From 7fd51c1148ead302d85f34e065c132f973d9da34 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Mon, 30 Jun 2025 09:50:18 -0700 Subject: [PATCH 01/12] upgrade torch_tensorrt version from 2.8.0.dev to 2.9.0.dev --- .github/workflows/docgen.yml | 12 ++++++------ MODULE.bazel | 2 +- cpp/include/torch_tensorrt/macros.h | 2 +- docker/Dockerfile | 4 ++-- docker/dist-build.sh | 2 +- py/requirements.txt | 2 +- pyproject.toml | 12 ++++++------ version.txt | 2 +- 8 files changed, 19 insertions(+), 19 deletions(-) diff --git a/.github/workflows/docgen.yml b/.github/workflows/docgen.yml index fc6afa8d0d..a943efe302 100644 --- a/.github/workflows/docgen.yml +++ b/.github/workflows/docgen.yml @@ -14,12 +14,12 @@ jobs: if: ${{ ! contains(github.actor, 'pytorchbot') }} environment: pytorchbot-env container: - image: docker.io/pytorch/manylinux2_28-builder:cuda12.8 + image: docker.io/pytorch/manylinux2_28-builder:cuda12.9 options: --gpus all env: - CUDA_HOME: /usr/local/cuda-12.8 - VERSION_SUFFIX: cu128 - CU_VERSION: cu128 + CUDA_HOME: /usr/local/cuda-12.9 + VERSION_SUFFIX: cu129 + CU_VERSION: cu129 CHANNEL: nightly CI_BUILD: 1 steps: @@ -35,14 +35,14 @@ jobs: - name: Install base deps run: | python3 -m pip install pip --upgrade - python3 -m pip install pyyaml numpy torch --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu128 + python3 -m pip install pyyaml numpy torch --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu129 ./packaging/pre_build_script.sh - name: Get HEAD SHA id: vars run: echo "sha=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT - name: Build Python Package run: | - python3 -m pip install --pre . --extra-index-url https://download.pytorch.org/whl/nightly/cu128 + python3 -m pip install --pre . --extra-index-url https://download.pytorch.org/whl/nightly/cu129 - name: Generate New Docs run: | cd docsrc diff --git a/MODULE.bazel b/MODULE.bazel index 59196b85be..6087f5dffb 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -1,6 +1,6 @@ module( name = "torch_tensorrt", - version = "2.8.0a0", + version = "2.9.0a0", repo_name = "org_pytorch_tensorrt", ) diff --git a/cpp/include/torch_tensorrt/macros.h b/cpp/include/torch_tensorrt/macros.h index bdc25f6cd8..020b94c114 100644 --- a/cpp/include/torch_tensorrt/macros.h +++ b/cpp/include/torch_tensorrt/macros.h @@ -24,7 +24,7 @@ #define STR(x) XSTR(x) #define TORCH_TENSORRT_MAJOR_VERSION 2 -#define TORCH_TENSORRT_MINOR_VERSION 6 +#define TORCH_TENSORRT_MINOR_VERSION 9 #define TORCH_TENSORRT_PATCH_VERSION 0 #define TORCH_TENSORRT_VERSION \ STR(TORCH_TENSORRT_MAJOR_VERSION) \ diff --git a/docker/Dockerfile b/docker/Dockerfile index 23786435c6..b218211e38 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -2,9 +2,9 @@ # Base image starts with CUDA #TODO: cuda version -ARG BASE_IMG=nvidia/cuda:12.8.0-devel-ubuntu22.04 +ARG BASE_IMG=nvidia/cuda:12.9.0-devel-ubuntu22.04 FROM ${BASE_IMG} as base -ENV BASE_IMG=nvidia/cuda:12.8.0-devel-ubuntu22.04 +ENV BASE_IMG=nvidia/cuda:12.9.0-devel-ubuntu22.04 ARG TENSORRT_VERSION ENV TENSORRT_VERSION=${TENSORRT_VERSION} diff --git a/docker/dist-build.sh b/docker/dist-build.sh index faabedade2..7790c05f82 100755 --- a/docker/dist-build.sh +++ b/docker/dist-build.sh @@ -4,7 +4,7 @@ set -x TOP_DIR=$(cd $(dirname $0); pwd)/.. -BUILD_CMD="python -m pip wheel . --extra-index-url https://download.pytorch.org/whl/nightly/cu128 -w dist" +BUILD_CMD="python -m pip wheel . --extra-index-url https://download.pytorch.org/whl/nightly/cu129 -w dist" # TensorRT restricts our pip version cd ${TOP_DIR} \ diff --git a/py/requirements.txt b/py/requirements.txt index 302b7e92af..a34a458938 100644 --- a/py/requirements.txt +++ b/py/requirements.txt @@ -2,7 +2,7 @@ numpy packaging pybind11==2.6.2 --extra-index-url https://download.pytorch.org/whl/nightly/cu129 -torch>=2.8.0.dev,<2.9.0 +torch>=2.9.0.dev,<2.10.0 --extra-index-url https://pypi.ngc.nvidia.com pyyaml dllist \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index b45cd96d5d..f32e3296d0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ requires = [ "future>=0.18.3", "tensorrt-cu12>=10.11.0,<10.12.0; platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)", "tensorrt-cu12>=10.3.0,<10.4.0; platform_machine == 'aarch64' and 'tegra' in platform_release", - "torch>=2.8.0.dev,<2.9.0; platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)", + "torch>=2.9.0.dev,<2.10.0; platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)", "torch>=2.7.0,<2.8.0; platform_machine == 'aarch64' and 'tegra' in platform_release", "pybind11==2.6.2", "numpy; platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)", @@ -59,7 +59,7 @@ keywords = [ "inference", ] dependencies = [ - "torch>=2.8.0.dev,<2.9.0; platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)", + "torch>=2.9.0.dev,<2.10.0; platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)", "torch>=2.7.0,<2.8.0; platform_machine == 'aarch64' and 'tegra' in platform_release", "tensorrt>=10.11.0,<10.12.0; platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)", @@ -135,17 +135,17 @@ index-strategy = "unsafe-best-match" [tool.uv.sources] torch = [ - { index = "pytorch-nightly-cu128", marker = "platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)" }, + { index = "pytorch-nightly-cu129", marker = "platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)" }, { index = "jetson-containers", marker = "platform_machine == 'aarch64' and 'tegra' in platform_release" }, ] torchvision = [ - { index = "pytorch-nightly-cu128", marker = "platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)" }, + { index = "pytorch-nightly-cu129", marker = "platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)" }, { index = "jetson-containers", marker = "platform_machine == 'aarch64' and 'tegra' in platform_release" }, ] [[tool.uv.index]] -name = "pytorch-nightly-cu128" -url = "https://download.pytorch.org/whl/nightly/cu128" +name = "pytorch-nightly-cu129" +url = "https://download.pytorch.org/whl/nightly/cu129" explicit = false [[tool.uv.index]] diff --git a/version.txt b/version.txt index 11922a5ce1..03e905f0db 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -2.8.0a0 +2.9.0a0 From e80bd10b48e04b3405569c0d4254108c585fca24 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Thu, 10 Jul 2025 15:54:39 -0700 Subject: [PATCH 02/12] initial checkin for adding tensorrt_rtx --- MODULE.bazel | 19 ++ core/conversion/conversion.cpp | 2 +- .../conversionctx/ConversionCtx.cpp | 22 +- core/conversion/conversionctx/ConversionCtx.h | 2 +- .../converters/impl/quantization.cpp | 2 +- core/util/trt_util.h | 2 +- cpp/src/compile_spec.cpp | 26 +- dev_dep_versions.yml | 1 + py/torch_tensorrt/__init__.py | 80 +++-- .../csrc/register_tensorrt_classes.cpp | 2 +- py/torch_tensorrt/csrc/tensorrt_classes.cpp | 22 +- py/torch_tensorrt/csrc/tensorrt_classes.h | 16 +- py/torch_tensorrt/csrc/torch_tensorrt_py.cpp | 297 ++++++++---------- .../runtime/_PythonTorchTensorRTModule.py | 2 +- py/torch_tensorrt/trt_alias.py | 94 ++++++ pyproject.toml | 4 + setup.py | 31 +- third_party/tensorrt_rtx/archive/BUILD | 58 ++++ third_party/tensorrt_rtx/local/BUILD | 93 ++++++ toolchains/dep_collection/defs.bzl | 2 +- 20 files changed, 527 insertions(+), 250 deletions(-) create mode 100644 py/torch_tensorrt/trt_alias.py create mode 100644 third_party/tensorrt_rtx/archive/BUILD create mode 100644 third_party/tensorrt_rtx/local/BUILD diff --git a/MODULE.bazel b/MODULE.bazel index 6087f5dffb..4fad87365f 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -109,6 +109,16 @@ http_archive( ], ) +http_archive( + name = "tensorrt_rtx", + build_file = "@//third_party/tensorrt_rtx/archive:BUILD", + sha256 = "b1222e08f9d473f0bcc06c6a76bf2b1327a106dcee671415c4c46833a105a425", + strip_prefix = "TensorRT-RTX-1.0.0.21", + urls = [ + "http://cuda-repo/release-candidates/Libraries/TensorRT/v10.12/10.12.0.35-51f47a12/12.9-r575/Linux-x64-manylinux_2_28-winjit/tar/TensorRT-RTX-1.0.0.21.Linux.x86_64-gnu.cuda-12.9.tar.gz", + ], +) + http_archive( name = "tensorrt_sbsa", build_file = "@//third_party/tensorrt/archive:BUILD", @@ -136,6 +146,15 @@ http_archive( ], ) +http_archive( + name = "tensorrt_rtx_win", + build_file = "@//third_party/tensorrt_rtx/archive:BUILD", + sha256 = "49cf1247ada75faa8d538257b763b1c12b9bbb97fcd7765654c55b3ad16bd680", + strip_prefix = "TensorRT-RTX-1.0.0.21", + urls = [ + "http://cuda-repo/release-candidates/Libraries/TensorRT/v10.12/10.12.0.35-51f47a12/12.9-r575/Windows10-x64-winjit/zip/TensorRT-RTX-1.0.0.21.Windows.win10.cuda-12.9.zip", + ], +) #################################################################################### # Locally installed dependencies (use in cases of custom dependencies or aarch64) #################################################################################### diff --git a/core/conversion/conversion.cpp b/core/conversion/conversion.cpp index f8a26e8d77..d84e690563 100644 --- a/core/conversion/conversion.cpp +++ b/core/conversion/conversion.cpp @@ -204,7 +204,7 @@ void AddInputs(ConversionCtx* ctx, c10::ArrayRef input "Optimization profile is invalid, please check the input range provided (conversion.AddInputs)"); ctx->cfg->addOptimizationProfile(profile); -#if NV_TENSORRT_MAJOR > 7 || (NV_TENSORRT_MAJOR == 7 && NV_TENSORRT_MINOR >= 1) +#if !defined(TRT_MAJOR_RTX) && (NV_TENSORRT_MAJOR > 7 || (NV_TENSORRT_MAJOR == 7 && NV_TENSORRT_MINOR >= 1)) if (ctx->enabled_precisions.find(nvinfer1::DataType::kINT8) != ctx->enabled_precisions.end()) { ctx->cfg->setCalibrationProfile(profile); } diff --git a/core/conversion/conversionctx/ConversionCtx.cpp b/core/conversion/conversionctx/ConversionCtx.cpp index 2eb363706f..7c8e087fbc 100644 --- a/core/conversion/conversionctx/ConversionCtx.cpp +++ b/core/conversion/conversionctx/ConversionCtx.cpp @@ -31,8 +31,8 @@ std::ostream& operator<<(std::ostream& os, const BuilderSettings& s) { if (s.device.device_type == nvinfer1::DeviceType::kDLA) { os << "\n DLACore: " << s.device.dla_core; } - os << "\n Engine Capability: " << s.capability \ - << "\n Calibrator Created: " << (s.calibrator != nullptr); + os << "\n Engine Capability: " << s.capability; + // << "\n Calibrator Created: " << (s.calibrator != nullptr); return os; } // clang-format on @@ -67,12 +67,14 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings) TORCHTRT_CHECK( builder->platformHasFastInt8(), "Requested inference in INT8 but platform does not support INT8"); cfg->setFlag(nvinfer1::BuilderFlag::kINT8); - if (!settings.calibrator) { - LOG_INFO( - "Int8 precision has been enabled but no calibrator provided. This assumes the network has Q/DQ nodes obtained from Quantization aware training. For more details, refer to https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#work-with-qat-networks"); - } else { - cfg->setInt8Calibrator(settings.calibrator); - } + // if (!settings.calibrator) { + // LOG_INFO( + // "Int8 precision has been enabled but no calibrator provided. This assumes the network has Q/DQ nodes + // obtained from Quantization aware training. For more details, refer to + // https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#work-with-qat-networks"); + // } else { + // cfg->setInt8Calibrator(settings.calibrator); + // } break; case nvinfer1::DataType::kFLOAT: break; @@ -89,7 +91,7 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings) if (settings.disable_tf32) { cfg->clearFlag(nvinfer1::BuilderFlag::kTF32); } -#if NV_TENSORRT_MAJOR > 7 +#if !defined(TRT_MAJOR_RTX) && (NV_TENSORRT_MAJOR > 7) if (settings.sparse_weights) { cfg->setFlag(nvinfer1::BuilderFlag::kSPARSE_WEIGHTS); } @@ -163,7 +165,7 @@ void ConversionCtx::RecordNewITensor(const torch::jit::Value* value, nvinfer1::I } std::string ConversionCtx::SerializeEngine() { -#if NV_TENSORRT_MAJOR > 7 +#if !defined(TRT_MAJOR_RTX) && (NV_TENSORRT_MAJOR > 7) auto serialized_network = make_trt(builder->buildSerializedNetwork(*net, *cfg)); if (!serialized_network) { TORCHTRT_THROW_ERROR("Building serialized network failed in TensorRT"); diff --git a/core/conversion/conversionctx/ConversionCtx.h b/core/conversion/conversionctx/ConversionCtx.h index 8587885eca..e8ed1a686b 100644 --- a/core/conversion/conversionctx/ConversionCtx.h +++ b/core/conversion/conversionctx/ConversionCtx.h @@ -26,7 +26,7 @@ struct BuilderSettings { bool allow_shape_tensors = false; ir::Device device; nvinfer1::EngineCapability capability = TRT_ENGINE_CAPABILITY_STANDARD; - nvinfer1::IInt8Calibrator* calibrator = nullptr; + // nvinfer1::IInt8Calibrator* calibrator = nullptr; uint64_t num_avg_timing_iters = 1; uint64_t workspace_size = 0; uint64_t dla_sram_size = DLA_SRAM_SIZE; diff --git a/core/conversion/converters/impl/quantization.cpp b/core/conversion/converters/impl/quantization.cpp index addf629e6b..759b772f0f 100644 --- a/core/conversion/converters/impl/quantization.cpp +++ b/core/conversion/converters/impl/quantization.cpp @@ -9,7 +9,7 @@ namespace converters { namespace impl { namespace { -#if NV_TENSORRT_MAJOR > 7 +#if !defined(TRT_MAJOR_RTX) && (NV_TENSORRT_MAJOR > 7) // clang-format off bool add_qdq(ConversionCtx *ctx, const torch::jit::Node* n, nvinfer1::ITensor* input, nvinfer1::ITensor* scale, std::string& opName) { diff --git a/core/util/trt_util.h b/core/util/trt_util.h index f3df533d8b..a68e00e14d 100644 --- a/core/util/trt_util.h +++ b/core/util/trt_util.h @@ -8,7 +8,7 @@ namespace nvinfer1 { -#if NV_TENSORRT_MAJOR < 8 +#if !defined(TRT_MAJOR_RTX) && (NV_TENSORRT_MAJOR < 8) #define TRT_ENGINE_CAPABILITY_STANDARD nvinfer1::EngineCapability::kDEFAULT #define TRT_ENGINE_CAPABILITY_SAFETY nvinfer1::EngineCapability::kSAFE_GPU diff --git a/cpp/src/compile_spec.cpp b/cpp/src/compile_spec.cpp index 68a25b3912..8a950f3b8a 100644 --- a/cpp/src/compile_spec.cpp +++ b/cpp/src/compile_spec.cpp @@ -153,19 +153,19 @@ torchtrt::core::CompileSpec to_internal_compile_spec(CompileSpec external, bool internal.partitioning_info.cast_int8_inputs = true; - if (internal.convert_info.engine_settings.enabled_precisions.find(nvinfer1::DataType::kINT8) != - internal.convert_info.engine_settings.enabled_precisions.end()) { - internal.partitioning_info.cast_int8_inputs = false; - if (external.ptq_calibrator) { - internal.convert_info.engine_settings.calibrator = external.ptq_calibrator; - } else { - internal.lower_info.unfreeze_module = true; - internal.lower_info.disable_cse = true; - internal.convert_info.engine_settings.calibrator = nullptr; - } - } else { - internal.convert_info.engine_settings.calibrator = nullptr; - } + // if (internal.convert_info.engine_settings.enabled_precisions.find(nvinfer1::DataType::kINT8) != + // internal.convert_info.engine_settings.enabled_precisions.end()) { + // internal.partitioning_info.cast_int8_inputs = false; + // if (external.ptq_calibrator) { + // internal.convert_info.engine_settings.calibrator = external.ptq_calibrator; + // } else { + // internal.lower_info.unfreeze_module = true; + // internal.lower_info.disable_cse = true; + // internal.convert_info.engine_settings.calibrator = nullptr; + // } + // } else { + // internal.convert_info.engine_settings.calibrator = nullptr; + // } return internal; } diff --git a/dev_dep_versions.yml b/dev_dep_versions.yml index 492035a76f..5e08228539 100644 --- a/dev_dep_versions.yml +++ b/dev_dep_versions.yml @@ -1,2 +1,3 @@ __cuda_version__: "12.8" __tensorrt_version__: "10.11.0" +__tensorrt_rtx_version__: "1.0.0" diff --git a/py/torch_tensorrt/__init__.py b/py/torch_tensorrt/__init__.py index 6d79f9b4f3..d392a24f93 100644 --- a/py/torch_tensorrt/__init__.py +++ b/py/torch_tensorrt/__init__.py @@ -1,11 +1,14 @@ import ctypes +import logging import os import platform import sys from typing import Dict, List +import torch from torch_tensorrt._version import ( # noqa: F401 __cuda_version__, + __tensorrt_rtx_version__, __tensorrt_version__, __version__, ) @@ -35,55 +38,73 @@ def _find_lib(name: str, paths: List[str]) -> str: raise FileNotFoundError(f"Could not find {name}\n Search paths: {paths}") +import logging + +import torch + try: import tensorrt # noqa: F401 + + from . import trt_alias # noqa: F401 + + print(f"You are using {tensorrt.__name__=} {tensorrt.__version__=}") except ImportError: - cuda_version = _parse_semver(__cuda_version__) tensorrt_version = _parse_semver(__tensorrt_version__) + tensorrt_rtx_version = _parse_semver(__tensorrt_rtx_version__) - CUDA_MAJOR = cuda_version["major"] TENSORRT_MAJOR = tensorrt_version["major"] - + TENSORRT_MINOR = tensorrt_version["minor"] + TENSORRT_RTX_MAJOR = tensorrt_rtx_version["major"] + TENSORRT_RTX_MINOR = tensorrt_rtx_version["minor"] + + trt_lib = { + "tensorrt_rtx": { + "win": [ + f"tensorrt_rtx_{TENSORRT_RTX_MAJOR}_{TENSORRT_RTX_MINOR}.dll", + # TODO: lan to verify, comment out for now, as torch-tensorrt don't require onnx parser + # f"tensorrt_onnxparser_rtx_{TENSORRT_RTX_MAJOR}_{TENSORRT_RTX_MINOR}.dll", + ], + "linux": [ + f"libtensorrt_rtx.so.{TENSORRT_RTX_MAJOR}", + # TODO: lan to verify, comment out for now, as torch-tensorrt don't require onnx parser + # f"libtensorrt_onnxparser_rtx.so.{TENSORRT_RTX_MAJOR}", + ], + }, + "tensorrt": { + "win": [ + f"nvinfer_{TENSORRT_MAJOR}.dll", + f"nvinfer_plugin_{TENSORRT_MAJOR}.dll", + ], + "linux": [ + f"libnvinfer.so.{TENSORRT_MAJOR}", + f"libnvinfer_plugin.so.{TENSORRT_MAJOR}", + ], + }, + } if sys.platform.startswith("win"): - WIN_LIBS = [ - "nvinfer.dll", - "nvinfer_plugin.dll", - ] - + WIN_LIBS = trt_lib[tensorrt.__name__]["win"] WIN_PATHS = os.environ["PATH"].split(os.path.pathsep) - for lib in WIN_LIBS: ctypes.CDLL(_find_lib(lib, WIN_PATHS)) elif sys.platform.startswith("linux"): - LINUX_PATHS = ["/usr/local/cuda-12.8/lib64", "/usr/lib", "/usr/lib64"] + LINUX_PATHS = [ + f"/usr/local/cuda-{__cuda_version__}/lib64", + "/usr/lib", + "/usr/lib64", + ] if "LD_LIBRARY_PATH" in os.environ: LINUX_PATHS += os.environ["LD_LIBRARY_PATH"].split(os.path.pathsep) - if platform.uname().processor == "x86_64": LINUX_PATHS += [ "/usr/lib/x86_64-linux-gnu", ] - elif platform.uname().processor == "aarch64": LINUX_PATHS += ["/usr/lib/aarch64-linux-gnu"] - - LINUX_LIBS = [ - f"libnvinfer.so.{TENSORRT_MAJOR}", - f"libnvinfer_plugin.so.{TENSORRT_MAJOR}", - ] - + LINUX_LIBS = trt_lib[tensorrt.__name__]["linux"] for lib in LINUX_LIBS: ctypes.CDLL(_find_lib(lib, LINUX_PATHS)) -import logging - -import torch -from torch_tensorrt._features import ENABLED_FEATURES, _enabled_features_str - -_LOGGER = logging.getLogger(__name__) -_LOGGER.debug(_enabled_features_str()) - def _register_with_torch() -> None: trtorch_dir = os.path.dirname(__file__) @@ -111,6 +132,13 @@ def _register_with_torch() -> None: torch.ops.load_library(linked_file_runtime_full_path) +# note: trt_alias must be imported before enabled features, because enabled features will check tensorrt.plugin availability +from torch_tensorrt._features import ENABLED_FEATURES, _enabled_features_str + +_LOGGER = logging.getLogger(__name__) +_LOGGER.debug(_enabled_features_str()) + + _register_with_torch() from torch_tensorrt._Device import Device # noqa: F401 diff --git a/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp b/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp index bae61881da..c96b07de35 100644 --- a/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp +++ b/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp @@ -67,7 +67,7 @@ void RegisterTRTCompileSpec() { .def("_set_precisions", &torch_tensorrt::pyapi::CompileSpec::setPrecisions) .def("_set_device", &torch_tensorrt::pyapi::CompileSpec::setDeviceIntrusive) .def("_set_torch_fallback", &torch_tensorrt::pyapi::CompileSpec::setTorchFallbackIntrusive) - .def("_set_ptq_calibrator", &torch_tensorrt::pyapi::CompileSpec::setPTQCalibratorViaHandle) + // .def("_set_ptq_calibrator", &torch_tensorrt::pyapi::CompileSpec::setPTQCalibratorViaHandle) .def("__str__", &torch_tensorrt::pyapi::CompileSpec::stringify); ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, sparse_weights); diff --git a/py/torch_tensorrt/csrc/tensorrt_classes.cpp b/py/torch_tensorrt/csrc/tensorrt_classes.cpp index bd3aa6b305..14ad3cbba8 100644 --- a/py/torch_tensorrt/csrc/tensorrt_classes.cpp +++ b/py/torch_tensorrt/csrc/tensorrt_classes.cpp @@ -343,17 +343,17 @@ core::CompileSpec CompileSpec::toInternalCompileSpec(bool converting_to_trt_engi info.partitioning_info.cast_int8_inputs = true; - if (ptq_calibrator) { - info.convert_info.engine_settings.calibrator = ptq_calibrator; - info.partitioning_info.cast_int8_inputs = false; - } else { - if (info.convert_info.engine_settings.enabled_precisions.find(nvinfer1::DataType::kINT8) != - info.convert_info.engine_settings.enabled_precisions.end()) { - info.partitioning_info.cast_int8_inputs = false; - info.lower_info.unfreeze_module = true; - info.lower_info.disable_cse = true; - } - } + // if (ptq_calibrator) { + // info.convert_info.engine_settings.calibrator = ptq_calibrator; + // info.partitioning_info.cast_int8_inputs = false; + // } else { + // if (info.convert_info.engine_settings.enabled_precisions.find(nvinfer1::DataType::kINT8) != + // info.convert_info.engine_settings.enabled_precisions.end()) { + // info.partitioning_info.cast_int8_inputs = false; + // info.lower_info.unfreeze_module = true; + // info.lower_info.disable_cse = true; + // } + // } info.convert_info.engine_settings.sparse_weights = sparse_weights; info.convert_info.engine_settings.disable_tf32 = disable_tf32; info.convert_info.engine_settings.refit = refit; diff --git a/py/torch_tensorrt/csrc/tensorrt_classes.h b/py/torch_tensorrt/csrc/tensorrt_classes.h index 89c5c8661e..7241bd0f55 100644 --- a/py/torch_tensorrt/csrc/tensorrt_classes.h +++ b/py/torch_tensorrt/csrc/tensorrt_classes.h @@ -140,9 +140,9 @@ struct CompileSpec : torch::CustomClassHolder { } } - int64_t getPTQCalibratorHandle() { - return (int64_t)ptq_calibrator; - } + // int64_t getPTQCalibratorHandle() { + // return (int64_t)ptq_calibrator; + // } void setDeviceIntrusive(const c10::intrusive_ptr& d) { device = *d; @@ -152,9 +152,9 @@ struct CompileSpec : torch::CustomClassHolder { torch_fallback = *fb; } - void setPTQCalibratorViaHandle(int64_t handle) { - ptq_calibrator = (nvinfer1::IInt8Calibrator*)handle; - } + // void setPTQCalibratorViaHandle(int64_t handle) { + // ptq_calibrator = (nvinfer1::IInt8Calibrator*)handle; + // } ADD_FIELD_GET_SET(disable_tf32, bool); ADD_FIELD_GET_SET(sparse_weights, bool); @@ -170,11 +170,11 @@ struct CompileSpec : torch::CustomClassHolder { ADD_FIELD_GET_SET(allow_shape_tensors, bool); ADD_FIELD_GET_SET(device, Device); ADD_FIELD_GET_SET(torch_fallback, TorchFallback); - ADD_FIELD_GET_SET(ptq_calibrator, nvinfer1::IInt8Calibrator*); + // ADD_FIELD_GET_SET(ptq_calibrator, nvinfer1::IInt8Calibrator*); std::vector inputs; InputSignature input_signature; - nvinfer1::IInt8Calibrator* ptq_calibrator = nullptr; + // nvinfer1::IInt8Calibrator* ptq_calibrator = nullptr; std::set enabled_precisions = {}; bool sparse_weights = false; bool disable_tf32 = false; diff --git a/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp b/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp index e32d102f8b..0fa5425625 100644 --- a/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp +++ b/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp @@ -18,134 +18,82 @@ namespace py = pybind11; namespace torch_tensorrt { namespace pyapi { -template -class pyCalibratorTrampoline : public Derived { - public: - using Derived::Derived; // Inherit constructors - - int getBatchSize() const noexcept override { - try { - PYBIND11_OVERLOAD_PURE_NAME(int, Derived, "get_batch_size", getBatchSize); - } catch (std::exception const& e) { - LOG_ERROR("Exception caught in get_batch_size" + std::string(e.what())); - } catch (...) { - LOG_ERROR("Exception caught in get_batch_size"); - } - return -1; - } - - bool getBatch(void* bindings[], const char* names[], int nbBindings) noexcept override { - py::gil_scoped_acquire gil{}; - - py::function pyGetBatch = torch_tensorrt::pyapi::util::getOverload(static_cast(this), "get_batch"); - std::vector namesVec(names, names + nbBindings); - py::object result = pyGetBatch(namesVec); - // Copy over into the other data structure. - if (!result.is_none() && result.cast>().size() != 0) { - std::memcpy(bindings, result.cast>().data(), nbBindings * sizeof(void*)); - return true; - } - return false; - } - - const void* readCalibrationCache(std::size_t& length) noexcept override { - py::gil_scoped_acquire gil{}; - - py::function pyReadCalibrationCache = - torch_tensorrt::pyapi::util::getOverload(static_cast(this), "read_calibration_cache"); - py::buffer cache = pyReadCalibrationCache(); - if (!cache.is_none()) { - py::buffer_info info = cache.request(); - length = info.size * info.itemsize; - return info.ptr; - } - return nullptr; - } - - void writeCalibrationCache(const void* ptr, std::size_t length) noexcept override { - py::gil_scoped_acquire gil{}; - - py::function pyWriteCalibrationCache = - torch_tensorrt::pyapi::util::getOverload(static_cast(this), "write_calibration_cache"); - - py::memoryview cache{py::memoryview::from_buffer(static_cast(ptr), {length}, {sizeof(uint8_t)})}; - pyWriteCalibrationCache(cache); - } -}; - -class pyIInt8Calibrator : public pyCalibratorTrampoline { - public: - using Derived = pyCalibratorTrampoline; - using Derived::Derived; - - nvinfer1::InterfaceInfo getInterfaceInfo() const noexcept override { - return nvinfer1::InterfaceInfo{"PYTHON CALIBRATOR", 1, 0}; - } - - nvinfer1::CalibrationAlgoType getAlgorithm() noexcept override { - try { - PYBIND11_OVERLOAD_PURE_NAME( - nvinfer1::CalibrationAlgoType, nvinfer1::IInt8Calibrator, "get_algorithm", getAlgorithm); - } catch (std::exception const& e) { - LOG_ERROR("Exception caught in get_algorithm: " + std::string(e.what())); - } catch (...) { - LOG_ERROR("Exception caught in get_algorithm"); - } - return {}; - } -}; - -class pyIInt8LegacyCalibrator : public pyCalibratorTrampoline { - public: - using Derived = pyCalibratorTrampoline; - using Derived::Derived; - - double getQuantile() const noexcept override { - try { - PYBIND11_OVERLOAD_PURE_NAME(double, nvinfer1::IInt8LegacyCalibrator, "get_quantile", getQuantile); - } catch (std::exception const& e) { - LOG_ERROR("Exception caught in get_quantile: " + std::string(e.what())); - } catch (...) { - LOG_ERROR("Exception caught in get_quantile"); - } - return -1.0; - } - - double getRegressionCutoff() const noexcept override { - try { - PYBIND11_OVERLOAD_PURE_NAME( - double, nvinfer1::IInt8LegacyCalibrator, "get_regression_cutoff", getRegressionCutoff); - } catch (std::exception const& e) { - LOG_ERROR("Exception caught in get_regression_cutoff: " + std::string(e.what())); - } catch (...) { - LOG_ERROR("Exception caught in get_regression_cutoff"); - } - return -1.0; - } - - const void* readHistogramCache(std::size_t& length) noexcept override { - try { - PYBIND11_OVERLOAD_PURE_NAME( - const char*, nvinfer1::IInt8LegacyCalibrator, "read_histogram_cache", readHistogramCache, length); - } catch (std::exception const& e) { - LOG_ERROR("Exception caught in read_histogram_cache" + std::string(e.what())); - } catch (...) { - LOG_ERROR("Exception caught in read_histogram_cache"); - } - return {}; - } - - void writeHistogramCache(const void* ptr, std::size_t length) noexcept override { - try { - PYBIND11_OVERLOAD_PURE_NAME( - void, nvinfer1::IInt8LegacyCalibrator, "write_histogram_cache", writeHistogramCache, ptr, length); - } catch (std::exception const& e) { - LOG_ERROR("Exception caught in write_histogram_cache" + std::string(e.what())); - } catch (...) { - LOG_ERROR("Exception caught in write_histogram_cache"); - } - } -}; +// template +// CalibrationAlgo + +// class pyIInt8Calibrator : public pyCalibratorTrampoline { +// public: +// using Derived = pyCalibratorTrampoline; +// using Derived::Derived; + +// nvinfer1::InterfaceInfo getInterfaceInfo() const noexcept override { +// return nvinfer1::InterfaceInfo{"PYTHON CALIBRATOR", 1, 0}; +// } + +// nvinfer1::CalibrationAlgoType getAlgorithm() noexcept override { +// try { +// PYBIND11_OVERLOAD_PURE_NAME( +// nvinfer1::CalibrationAlgoType, nvinfer1::IInt8Calibrator, "get_algorithm", getAlgorithm); +// } catch (std::exception const& e) { +// LOG_ERROR("Exception caught in get_algorithm: " + std::string(e.what())); +// } catch (...) { +// LOG_ERROR("Exception caught in get_algorithm"); +// } +// return {}; +// } +// }; + +// class pyIInt8LegacyCalibrator : public pyCalibratorTrampoline { +// public: +// using Derived = pyCalibratorTrampoline; +// using Derived::Derived; + +// double getQuantile() const noexcept override { +// try { +// PYBIND11_OVERLOAD_PURE_NAME(double, nvinfer1::IInt8LegacyCalibrator, "get_quantile", getQuantile); +// } catch (std::exception const& e) { +// LOG_ERROR("Exception caught in get_quantile: " + std::string(e.what())); +// } catch (...) { +// LOG_ERROR("Exception caught in get_quantile"); +// } +// return -1.0; +// } + +// double getRegressionCutoff() const noexcept override { +// try { +// PYBIND11_OVERLOAD_PURE_NAME( +// double, nvinfer1::IInt8LegacyCalibrator, "get_regression_cutoff", getRegressionCutoff); +// } catch (std::exception const& e) { +// LOG_ERROR("Exception caught in get_regression_cutoff: " + std::string(e.what())); +// } catch (...) { +// LOG_ERROR("Exception caught in get_regression_cutoff"); +// } +// return -1.0; +// } + +// const void* readHistogramCache(std::size_t& length) noexcept override { +// try { +// PYBIND11_OVERLOAD_PURE_NAME( +// const char*, nvinfer1::IInt8LegacyCalibrator, "read_histogram_cache", readHistogramCache, length); +// } catch (std::exception const& e) { +// LOG_ERROR("Exception caught in read_histogram_cache" + std::string(e.what())); +// } catch (...) { +// LOG_ERROR("Exception caught in read_histogram_cache"); +// } +// return {}; +// } + +// void writeHistogramCache(const void* ptr, std::size_t length) noexcept override { +// try { +// PYBIND11_OVERLOAD_PURE_NAME( +// void, nvinfer1::IInt8LegacyCalibrator, "write_histogram_cache", writeHistogramCache, ptr, length); +// } catch (std::exception const& e) { +// LOG_ERROR("Exception caught in write_histogram_cache" + std::string(e.what())); +// } catch (...) { +// LOG_ERROR("Exception caught in write_histogram_cache"); +// } +// } +// }; void set_device(const int device_id) { core::set_device(device_id); @@ -275,50 +223,50 @@ PYBIND11_MODULE(_C, m) { .value("channels_last", TensorFormat::kChannelsLast, "Channels last memory layout (NHWC)") .export_values(); - py::enum_(m, "CalibrationAlgo", py::module_local(), "Type of calibration algorithm") - .value("LEGACY_CALIBRATION", nvinfer1::CalibrationAlgoType::kLEGACY_CALIBRATION) - .value("ENTROPY_CALIBRATION", nvinfer1::CalibrationAlgoType::kENTROPY_CALIBRATION) - .value("ENTROPY_CALIBRATION_2", nvinfer1::CalibrationAlgoType::kENTROPY_CALIBRATION_2) - .value("MINMAX_CALIBRATION", nvinfer1::CalibrationAlgoType::kMINMAX_CALIBRATION); - - py::class_( - m, "IInt8Calibrator", py::module_local(), "Int8 Calibrator base class") - .def(py::init_alias<>()) // Always initialize trampoline class. - .def("get_batch_size", &nvinfer1::IInt8Calibrator::getBatchSize, "Get batch size") - .def("get_algorithm", &nvinfer1::IInt8Calibrator::getAlgorithm, "Get algorithm"); - - py::class_( - m, "IInt8LegacyCalibrator", py::module_local(), "Int8 Legacy Calibrator class") - .def(py::init_alias<>()) // Always initialize trampoline class. - .def("get_batch_size", &nvinfer1::IInt8LegacyCalibrator::getBatchSize, "Get batch size") - .def("get_algorithm", &nvinfer1::IInt8LegacyCalibrator::getAlgorithm, "Get algorithm"); - - py::class_< - nvinfer1::IInt8EntropyCalibrator, - nvinfer1::IInt8Calibrator, - pyCalibratorTrampoline>( - m, "IInt8EntropyCalibrator", py::module_local(), "Int8 Entropy Calibrator class") - .def(py::init_alias<>()) // Always initialize trampoline class. - .def("get_batch_size", &nvinfer1::IInt8EntropyCalibrator::getBatchSize, "Get batch size") - .def("get_algorithm", &nvinfer1::IInt8EntropyCalibrator::getAlgorithm, "Get algorithm"); - - py::class_< - nvinfer1::IInt8EntropyCalibrator2, - nvinfer1::IInt8Calibrator, - pyCalibratorTrampoline>( - m, "IInt8EntropyCalibrator2", py::module_local(), "Int8 Entropy Calibrator2 class") - .def(py::init_alias<>()) // Always initialize trampoline class. - .def("get_batch_size", &nvinfer1::IInt8EntropyCalibrator2::getBatchSize, "Get batch size") - .def("get_algorithm", &nvinfer1::IInt8EntropyCalibrator2::getAlgorithm, "Get algorithm"); - - py::class_< - nvinfer1::IInt8MinMaxCalibrator, - nvinfer1::IInt8Calibrator, - pyCalibratorTrampoline>( - m, "IInt8MinMaxCalibrator", py::module_local(), "Int8 MinMax Calibrator class") - .def(py::init_alias<>()) // Always initialize trampoline class. - .def("get_batch_size", &nvinfer1::IInt8MinMaxCalibrator::getBatchSize, "Get batch size") - .def("get_algorithm", &nvinfer1::IInt8MinMaxCalibrator::getAlgorithm, "Get algorithm"); + // py::enum_(m, "CalibrationAlgo", py::module_local(), "Type of calibration algorithm") + // .value("LEGACY_CALIBRATION", nvinfer1::CalibrationAlgoType::kLEGACY_CALIBRATION) + // .value("ENTROPY_CALIBRATION", nvinfer1::CalibrationAlgoType::kENTROPY_CALIBRATION) + // .value("ENTROPY_CALIBRATION_2", nvinfer1::CalibrationAlgoType::kENTROPY_CALIBRATION_2) + // .value("MINMAX_CALIBRATION", nvinfer1::CalibrationAlgoType::kMINMAX_CALIBRATION); + + // py::class_( + // m, "IInt8Calibrator", py::module_local(), "Int8 Calibrator base class") + // .def(py::init_alias<>()) // Always initialize trampoline class. + // .def("get_batch_size", &nvinfer1::IInt8Calibrator::getBatchSize, "Get batch size") + // .def("get_algorithm", &nvinfer1::IInt8Calibrator::getAlgorithm, "Get algorithm"); + + // py::class_( + // m, "IInt8LegacyCalibrator", py::module_local(), "Int8 Legacy Calibrator class") + // .def(py::init_alias<>()) // Always initialize trampoline class. + // .def("get_batch_size", &nvinfer1::IInt8LegacyCalibrator::getBatchSize, "Get batch size") + // .def("get_algorithm", &nvinfer1::IInt8LegacyCalibrator::getAlgorithm, "Get algorithm"); + + // py::class_< + // nvinfer1::IInt8EntropyCalibrator, + // nvinfer1::IInt8Calibrator, + // pyCalibratorTrampoline>( + // m, "IInt8EntropyCalibrator", py::module_local(), "Int8 Entropy Calibrator class") + // .def(py::init_alias<>()) // Always initialize trampoline class. + // .def("get_batch_size", &nvinfer1::IInt8EntropyCalibrator::getBatchSize, "Get batch size") + // .def("get_algorithm", &nvinfer1::IInt8EntropyCalibrator::getAlgorithm, "Get algorithm"); + + // py::class_< + // nvinfer1::IInt8EntropyCalibrator2, + // nvinfer1::IInt8Calibrator, + // pyCalibratorTrampoline>( + // m, "IInt8EntropyCalibrator2", py::module_local(), "Int8 Entropy Calibrator2 class") + // .def(py::init_alias<>()) // Always initialize trampoline class. + // .def("get_batch_size", &nvinfer1::IInt8EntropyCalibrator2::getBatchSize, "Get batch size") + // .def("get_algorithm", &nvinfer1::IInt8EntropyCalibrator2::getAlgorithm, "Get algorithm"); + + // py::class_< + // nvinfer1::IInt8MinMaxCalibrator, + // nvinfer1::IInt8Calibrator, + // pyCalibratorTrampoline>( + // m, "IInt8MinMaxCalibrator", py::module_local(), "Int8 MinMax Calibrator class") + // .def(py::init_alias<>()) // Always initialize trampoline class. + // .def("get_batch_size", &nvinfer1::IInt8MinMaxCalibrator::getBatchSize, "Get batch size") + // .def("get_algorithm", &nvinfer1::IInt8MinMaxCalibrator::getAlgorithm, "Get algorithm"); py::class_(m, "Device") .def(py::init<>()) @@ -362,11 +310,12 @@ PYBIND11_MODULE(_C, m) { py::class_(ts_sub_mod, "CompileSpec") .def(py::init<>()) .def("__str__", &torch_tensorrt::pyapi::CompileSpec::stringify) - .def("_get_calibrator_handle", &CompileSpec::getPTQCalibratorHandle, "[Internal] gets a handle from a calibrator") + //.def("_get_calibrator_handle", &CompileSpec::getPTQCalibratorHandle, "[Internal] gets a handle from a + // calibrator") .def_readwrite("inputs", &CompileSpec::inputs) .def_readwrite("input_signature", &CompileSpec::input_signature) .def_readwrite("enabled_precisions", &CompileSpec::enabled_precisions) - .def_readwrite("ptq_calibrator", &CompileSpec::ptq_calibrator) + // .def_readwrite("ptq_calibrator", &CompileSpec::ptq_calibrator) .def_readwrite("refit", &CompileSpec::refit) .def_readwrite("sparse_weights", &CompileSpec::sparse_weights) .def_readwrite("disable_tf32", &CompileSpec::disable_tf32) diff --git a/py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py b/py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py index fc76b20141..dda5929d4b 100644 --- a/py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py +++ b/py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py @@ -430,7 +430,7 @@ def create_output_allocator(self) -> None: def forward(self, *inputs: torch.Tensor) -> torch.Tensor | Tuple[torch.Tensor, ...]: def run_standard_execution() -> torch.Tensor | Tuple[torch.Tensor, ...]: - shape_changed = self.validate_input_shapes(inputs) + shape_changed = self.validate_input_shapes(contiguous_inputs) ( need_cudagraphs_record, can_use_pre_allocated_outputs, diff --git a/py/torch_tensorrt/trt_alias.py b/py/torch_tensorrt/trt_alias.py new file mode 100644 index 0000000000..6d0cc885d3 --- /dev/null +++ b/py/torch_tensorrt/trt_alias.py @@ -0,0 +1,94 @@ +import importlib +import importlib.util +import os +import sys +from types import ModuleType +from typing import Any + + +def is_rtx_gpu() -> bool: + try: + import torch + + return "RTX" in torch.cuda.get_device_name(0) + except ImportError: + # fallback to tensorrt + return False + + +# TensorRTProxyModule is a proxy module that allows us to use the tensorrt_rtx package if rtx gpu is detected +class TensorRTProxyModule(ModuleType): + def __init__(self, target_module: ModuleType) -> None: + spec = importlib.util.spec_from_loader("tensorrt", loader=None) + self.__spec__ = spec + self.__package__ = target_module.__package__ + self.__path__ = target_module.__path__ + self.__file__ = target_module.__file__ + self.__loader__ = target_module.__loader__ + self.__version__ = target_module.__version__ + self._target_module = target_module + self._nested_module = None + self._package_name: str = "" + + # For RTX: tensorrt.tensorrt -> tensorrt_rtx.tensorrt_rtx + # For standard: tensorrt.tensorrt -> tensorrt.tensorrt (no change) + if hasattr(target_module, "tensorrt_rtx"): + self._nested_module = target_module.tensorrt_rtx + elif hasattr(target_module, "tensorrt"): + self._nested_module = target_module.tensorrt + + # Set up the nested module structure + if self._nested_module: + self.tensorrt = self._nested_module + + # __getattr__ is used to get the attribute from the target module + def __getattr__(self, name: str) -> Any: + # First try to get from the target module + try: + return getattr(self._target_module, name) + except AttributeError: + print(f"AttributeError: {name}") + # For nested modules like tensorrt.tensorrt + if name == "tensorrt" and self._nested_module: + return self._nested_module + raise + + def __dir__(self) -> list[str]: + return dir(self._target_module) + + +def alias_tensorrt() -> None: + # Determine package name with env override support for easy testing with tensorrt or tensorrt_rtx + # eg: FORCE_TENSORRT_RTX=1 python test.py + # eg: FORCE_TENSORRT_STD=1 python test.py + use_rtx = False + if os.environ.get("FORCE_TENSORRT_RTX", "0") == "1": + use_rtx = True + elif os.environ.get("FORCE_TENSORRT_STD", "0") == "1": + use_rtx = False + else: + use_rtx = is_rtx_gpu() + + # Import the appropriate package + try: + if use_rtx: + target = importlib.import_module("tensorrt_rtx") + else: + target = importlib.import_module("tensorrt") + except ImportError: + # Fallback to standard tensorrt if RTX version not available + print(f"import error when {use_rtx=}, fallback to standard tensorrt") + try: + target = importlib.import_module("tensorrt") + # since we are using the standard tensorrt, we need to set the use_rtx to True + use_rtx = True + except ImportError: + raise RuntimeError("TensorRT package not found") + + proxy = TensorRTProxyModule(target) + proxy._package_name = "tensorrt" if use_rtx else "tensorrt_rtx" + + sys.modules["tensorrt"] = proxy + + +alias_tensorrt() diff --git a/pyproject.toml b/pyproject.toml index f32e3296d0..7c98c4f98e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -84,6 +84,10 @@ dependencies = [ dynamic = ["version"] [dependency-groups] +rtx = [ + "tensorrt_rtx", +] + dev = [ "pre-commit>=2.20.0", "black>=22.6.0", diff --git a/setup.py b/setup.py index fb96d85453..b512410cb3 100644 --- a/setup.py +++ b/setup.py @@ -28,6 +28,7 @@ __version__: str = "0.0.0" __cuda_version__: str = "0.0" __tensorrt_version__: str = "0.0" +__tensorrt_rtx_version__: str = "0.0" LEGACY_BASE_VERSION_SUFFIX_PATTERN = re.compile("a0$") @@ -63,6 +64,7 @@ def get_base_version() -> str: def load_dep_info(): global __cuda_version__ global __tensorrt_version__ + global __tensorrt_rtx_version__ with open("dev_dep_versions.yml", "r") as stream: versions = yaml.safe_load(stream) if (gpu_arch_version := os.environ.get("CU_VERSION")) is not None: @@ -72,6 +74,7 @@ def load_dep_info(): else: __cuda_version__ = versions["__cuda_version__"] __tensorrt_version__ = versions["__tensorrt_version__"] + __tensorrt_rtx_version__ = versions["__tensorrt_rtx_version__"] load_dep_info() @@ -86,6 +89,11 @@ def load_dep_info(): LEGACY = False RELEASE = False CI_BUILD = False +USE_RTX = False + +if "--use-rtx" in sys.argv: + USE_RTX = True + sys.argv.remove("--use-rtx") if "--fx-only" in sys.argv: PY_ONLY = True @@ -115,6 +123,14 @@ def load_dep_info(): if py_only_env_var == "1": PY_ONLY = True +if (use_rtx_env_var := os.environ.get("FORCE_TENSORRT_RTX")) is not None: + if use_rtx_env_var == "1": + USE_RTX = True + +if (use_rtx_env_var := os.environ.get("FORCE_TENSORRT_STD")) is not None: + if use_rtx_env_var == "1": + USE_RTX = False + if (release_env_var := os.environ.get("RELEASE")) is not None: if release_env_var == "1": RELEASE = True @@ -240,6 +256,7 @@ def gen_version_file(): f.write('__version__ = "' + __version__ + '"\n') f.write('__cuda_version__ = "' + __cuda_version__ + '"\n') f.write('__tensorrt_version__ = "' + __tensorrt_version__ + '"\n') + f.write('__tensorrt_rtx_version__ = "' + __tensorrt_rtx_version__ + '"\n') def copy_libtorchtrt(multilinux=False, rt_only=False): @@ -487,6 +504,15 @@ def run(self): .split("/BUILD.bazel")[0] ) + tensorrt_rtx_external_dir = ( + lambda: subprocess.check_output( + [BAZEL_EXE, "query", "@tensorrt_rtx//:nvinfer", "--output", "location"] + ) + .decode("ascii") + .strip() + .split("/BUILD.bazel")[0] + ) + tensorrt_sbsa_external_dir = ( lambda: subprocess.check_output( [BAZEL_EXE, "query", "@tensorrt_sbsa//:nvinfer", "--output", "location"] @@ -510,7 +536,10 @@ def run(self): elif IS_JETPACK: tensorrt_linux_external_dir = tensorrt_jetpack_external_dir else: - tensorrt_linux_external_dir = tensorrt_x86_64_external_dir + if USE_RTX: + tensorrt_linux_external_dir = tensorrt_rtx_external_dir + else: + tensorrt_linux_external_dir = tensorrt_x86_64_external_dir tensorrt_windows_external_dir = ( lambda: subprocess.check_output( diff --git a/third_party/tensorrt_rtx/archive/BUILD b/third_party/tensorrt_rtx/archive/BUILD new file mode 100644 index 0000000000..014158110a --- /dev/null +++ b/third_party/tensorrt_rtx/archive/BUILD @@ -0,0 +1,58 @@ +load("@rules_cc//cc:defs.bzl", "cc_import", "cc_library") + +package(default_visibility = ["//visibility:public"]) + +config_setting( + name = "windows", + constraint_values = [ + "@platforms//os:windows", + ], +) + +cc_library( + name = "nvinfer_headers", + hdrs = glob( + [ + "include/NvInfer*.h", + ], + allow_empty = True, + exclude = [ + "include/NvInferPlugin.h", + "include/NvInferPluginUtils.h", + ], + ), + includes = ["include/"], + visibility = ["//visibility:private"], +) + +cc_import( + name = "nvinfer_lib", + shared_library = select({ + ":windows": "lib/tensorrt_rtx_1_0.dll", + "//conditions:default": "lib/libtensorrt_rtx.so", + }), + visibility = ["//visibility:private"], +) + +cc_import( + name = "nvinfer_static_lib", + static_library = select({ + ":windows": "lib/tensorrt_rtx_1_0.lib", + }), + visibility = ["//visibility:private"], +) + +cc_library( + name = "nvinfer", + visibility = ["//visibility:public"], + deps = [ + "nvinfer_headers", + "nvinfer_lib", + ] + select({ + ":windows": [ + "nvinfer_static_lib", + "@cuda_win//:cudart", + ], + "//conditions:default": ["@cuda//:cudart"], + }), +) diff --git a/third_party/tensorrt_rtx/local/BUILD b/third_party/tensorrt_rtx/local/BUILD new file mode 100644 index 0000000000..1bc90ab413 --- /dev/null +++ b/third_party/tensorrt_rtx/local/BUILD @@ -0,0 +1,93 @@ +load("@rules_cc//cc:defs.bzl", "cc_import", "cc_library") + +package(default_visibility = ["//visibility:public"]) + +config_setting( + name = "windows", + constraint_values = [ + "@platforms//os:windows", + ], +) + +config_setting( + name = "ci_rhel_x86_64_linux", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + "@//toolchains/distro:ci_rhel", + ], +) + +cc_library( + name = "nvinfer_headers", + hdrs = select({ + ":ci_rhel_x86_64_linux": glob( + [ + "include/NvInfer*.h", + ], + allow_empty = True, + exclude = [ + "include/NvInferPlugin.h", + "include/NvInferPluginUtils.h", + ], + ), + ":windows": glob( + [ + "include/NvInfer*.h", + ], + allow_empty = True, + exclude = [ + "include/NvInferPlugin.h", + "include/NvInferPluginUtils.h", + ], + ), + "//conditions:default": glob( + [ + "include/x86_64-linux-gnu/NvInfer*.h", + ], + exclude = [ + "include/x86_64-linux-gnu/NvInferPlugin.h", + "include/x86_64-linux-gnu/NvInferPluginUtils.h", + ], + ), + }), + includes = select({ + ":ci_rhel_x86_64_linux": ["include/"], + ":windows": ["include/"], + "//conditions:default": ["include/x86_64-linux-gnu/"], + }), + visibility = ["//visibility:private"], +) + +cc_import( + name = "nvinfer_static_lib", + static_library = select({ + ":windows": "lib/tensorrt_rtx_1_0.lib", + }), + visibility = ["//visibility:private"], +) + +cc_import( + name = "nvinfer_lib", + shared_library = select({ + ":ci_rhel_x86_64_linux": "lib64/libtensorrt_rtx.so", + ":windows": "lib/tensorrt_rtx_1_0.dll", + "//conditions:default": "lib/x86_64-linux-gnu/libtensorrt_rtx.so", + }), + visibility = ["//visibility:private"], +) + +cc_library( + name = "nvinfer", + visibility = ["//visibility:public"], + deps = [ + "nvinfer_headers", + "nvinfer_lib", + ] + select({ + ":windows": [ + "nvinfer_static_lib", + "@cuda_win//:cudart", + ], + "//conditions:default": ["@cuda//:cudart"], + }), +) diff --git a/toolchains/dep_collection/defs.bzl b/toolchains/dep_collection/defs.bzl index 6eaa710261..873ef7ec42 100644 --- a/toolchains/dep_collection/defs.bzl +++ b/toolchains/dep_collection/defs.bzl @@ -1,7 +1,7 @@ # buildifier: disable=module-docstring DependencyCollectionInfo = provider(doc = "", fields = ["type"]) -collection_types = ["default", "jetpack"] +collection_types = ["default", "jetpack", "rtx"] def _impl(ctx): _type = ctx.build_setting_value From 7d66ea5ad4ca4f729508104db403a9b164ba0ba5 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Fri, 11 Jul 2025 15:53:37 -0700 Subject: [PATCH 03/12] add tensorrt_rtx --- .bazelrc | 3 ++ core/BUILD | 17 +++++++ core/conversion/BUILD | 17 +++++++ core/conversion/conversion.cpp | 2 +- core/conversion/conversionctx/BUILD | 17 +++++++ .../conversionctx/ConversionCtx.cpp | 26 ++++++---- core/conversion/converters/BUILD | 42 ++++++++++++++-- .../converters/impl/quantization.cpp | 11 ++--- core/conversion/evaluators/BUILD | 17 +++++++ core/conversion/tensorcontainer/BUILD | 17 +++++++ core/conversion/var/BUILD | 17 +++++++ core/ir/BUILD | 15 ++++++ core/lowering/BUILD | 15 ++++++ core/partitioning/BUILD | 17 +++++++ core/partitioning/partitioningctx/BUILD | 15 ++++++ core/partitioning/partitioninginfo/BUILD | 15 ++++++ core/partitioning/segmentedblock/BUILD | 17 +++++++ core/plugins/BUILD | 48 +++++++++++++++---- core/runtime/BUILD | 17 +++++++ core/util/BUILD | 17 +++++++ core/util/logging/BUILD | 17 +++++++ cpp/BUILD | 35 ++++++++++++-- cpp/CMakeLists.txt | 2 - cpp/bin/torchtrtc/fileio.h | 2 + cpp/bin/torchtrtc/main.cpp | 18 +++---- cpp/bin/torchtrtc/parser_util.h | 2 + cpp/include/torch_tensorrt/torch_tensorrt.h | 4 +- py/torch_tensorrt/__init__.py | 5 +- .../dynamo/conversion/_TRTInterpreter.py | 31 ++++++------ py/torch_tensorrt/trt_alias.py | 2 +- setup.py | 34 +++++++++---- tests/util/BUILD | 2 + third_party/tensorrt_rtx/archive/BUILD | 28 +++++++---- toolchains/ci_workspaces/MODULE.bazel.tmpl | 18 +++++++ toolchains/dep_collection/defs.bzl | 2 +- 35 files changed, 478 insertions(+), 86 deletions(-) diff --git a/.bazelrc b/.bazelrc index 801b7193d4..019eaa930c 100644 --- a/.bazelrc +++ b/.bazelrc @@ -38,6 +38,9 @@ build:cxx11_abi --define=abi=cxx11_abi build:jetpack --//toolchains/dep_collection:compute_libs=jetpack +build:rtx_x86_64 --cxxopt="-std=c++17" --cxxopt="-fdiagnostics-color=always" +build:rtx_win --cxxopt="/GS-" --cxxopt="/std:c++17" --cxxopt="/permissive-" --cxxopt="/wd4244" --cxxopt="/wd4267" --cxxopt="/wd4819" --features=windows_export_all_symbols + build:ci_testing --define=torchtrt_src=prebuilt --cxxopt="-DDISABLE_TEST_IN_CI" --action_env "NVIDIA_TF32_OVERRIDE=0" build:use_precompiled_torchtrt --define=torchtrt_src=prebuilt diff --git a/core/BUILD b/core/BUILD index 6f5cfad30f..88eafeec2a 100644 --- a/core/BUILD +++ b/core/BUILD @@ -10,6 +10,21 @@ config_setting( }, ) +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], +) + config_setting( name = "sbsa", constraint_values = [ @@ -59,6 +74,8 @@ cc_library( "//core/runtime", "//core/util/logging", ] + select({ + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], ":windows": ["@tensorrt_win//:nvinfer"], ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], diff --git a/core/conversion/BUILD b/core/conversion/BUILD index ff87c5a4b8..200543b486 100644 --- a/core/conversion/BUILD +++ b/core/conversion/BUILD @@ -10,6 +10,21 @@ config_setting( }, ) +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], +) + config_setting( name = "sbsa", constraint_values = [ @@ -54,6 +69,8 @@ cc_library( "//core/ir", "//core/util:prelude", ] + select({ + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], ":windows": ["@tensorrt_win//:nvinfer"], ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], diff --git a/core/conversion/conversion.cpp b/core/conversion/conversion.cpp index d84e690563..a3b50430cc 100644 --- a/core/conversion/conversion.cpp +++ b/core/conversion/conversion.cpp @@ -204,7 +204,7 @@ void AddInputs(ConversionCtx* ctx, c10::ArrayRef input "Optimization profile is invalid, please check the input range provided (conversion.AddInputs)"); ctx->cfg->addOptimizationProfile(profile); -#if !defined(TRT_MAJOR_RTX) && (NV_TENSORRT_MAJOR > 7 || (NV_TENSORRT_MAJOR == 7 && NV_TENSORRT_MINOR >= 1)) +#ifndef TRT_MAJOR_RTX && (NV_TENSORRT_MAJOR > 7 || (NV_TENSORRT_MAJOR == 7 && NV_TENSORRT_MINOR >= 1)) if (ctx->enabled_precisions.find(nvinfer1::DataType::kINT8) != ctx->enabled_precisions.end()) { ctx->cfg->setCalibrationProfile(profile); } diff --git a/core/conversion/conversionctx/BUILD b/core/conversion/conversionctx/BUILD index b6820fc757..19794520e1 100644 --- a/core/conversion/conversionctx/BUILD +++ b/core/conversion/conversionctx/BUILD @@ -10,6 +10,21 @@ config_setting( }, ) +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], +) + config_setting( name = "sbsa", constraint_values = [ @@ -49,6 +64,8 @@ cc_library( "//core/ir", "//core/util:prelude", ] + select({ + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], ":windows": ["@tensorrt_win//:nvinfer"], ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], diff --git a/core/conversion/conversionctx/ConversionCtx.cpp b/core/conversion/conversionctx/ConversionCtx.cpp index 7c8e087fbc..c821698fe3 100644 --- a/core/conversion/conversionctx/ConversionCtx.cpp +++ b/core/conversion/conversionctx/ConversionCtx.cpp @@ -59,23 +59,29 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings) for (auto p = settings.enabled_precisions.begin(); p != settings.enabled_precisions.end(); ++p) { switch (*p) { case nvinfer1::DataType::kHALF: + // tensorrt_rtx is strong typed, cannot set fp16 by builder config, only do this for tensorrt build + #ifndef TRT_MAJOR_RTX TORCHTRT_CHECK( builder->platformHasFastFp16(), "Requested inference in FP16 but platform does not support FP16"); cfg->setFlag(nvinfer1::BuilderFlag::kFP16); break; + #endif case nvinfer1::DataType::kINT8: + // tensorrt_rtx is strong typed, cannot set int8 by builder config, only do this for tensorrt build + #ifndef TRT_MAJOR_RTX TORCHTRT_CHECK( builder->platformHasFastInt8(), "Requested inference in INT8 but platform does not support INT8"); cfg->setFlag(nvinfer1::BuilderFlag::kINT8); - // if (!settings.calibrator) { - // LOG_INFO( - // "Int8 precision has been enabled but no calibrator provided. This assumes the network has Q/DQ nodes - // obtained from Quantization aware training. For more details, refer to - // https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#work-with-qat-networks"); - // } else { - // cfg->setInt8Calibrator(settings.calibrator); - // } + if (!settings.calibrator) { + LOG_INFO( + "Int8 precision has been enabled but no calibrator provided. This assumes the network has Q/DQ nodes + obtained from Quantization aware training. For more details, refer to + https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#work-with-qat-networks"); + } else { + cfg->setInt8Calibrator(settings.calibrator); + } break; + #endif case nvinfer1::DataType::kFLOAT: break; case nvinfer1::DataType::kINT32: @@ -91,7 +97,7 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings) if (settings.disable_tf32) { cfg->clearFlag(nvinfer1::BuilderFlag::kTF32); } -#if !defined(TRT_MAJOR_RTX) && (NV_TENSORRT_MAJOR > 7) +#if defined(TRT_MAJOR_RTX) || (NV_TENSORRT_MAJOR > 7) if (settings.sparse_weights) { cfg->setFlag(nvinfer1::BuilderFlag::kSPARSE_WEIGHTS); } @@ -165,7 +171,7 @@ void ConversionCtx::RecordNewITensor(const torch::jit::Value* value, nvinfer1::I } std::string ConversionCtx::SerializeEngine() { -#if !defined(TRT_MAJOR_RTX) && (NV_TENSORRT_MAJOR > 7) +#if defined(TRT_MAJOR_RTX) || (NV_TENSORRT_MAJOR > 7) auto serialized_network = make_trt(builder->buildSerializedNetwork(*net, *cfg)); if (!serialized_network) { TORCHTRT_THROW_ERROR("Building serialized network failed in TensorRT"); diff --git a/core/conversion/converters/BUILD b/core/conversion/converters/BUILD index 456b8ee7d4..ae417636e1 100644 --- a/core/conversion/converters/BUILD +++ b/core/conversion/converters/BUILD @@ -10,6 +10,21 @@ config_setting( }, ) +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], +) + config_setting( name = "sbsa", constraint_values = [ @@ -49,6 +64,8 @@ cc_library( "//core/conversion/conversionctx", "//core/util:prelude", ] + select({ + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], ":windows": ["@tensorrt_win//:nvinfer"], ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], @@ -75,6 +92,8 @@ cc_library( "//core/conversion/conversionctx", "//core/util:prelude", ] + select({ + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], ":windows": ["@tensorrt_win//:nvinfer"], ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], @@ -88,12 +107,13 @@ cc_library( alwayslink = True, ) + cc_library( name = "converters", srcs = [ "NodeConverterRegistry.cpp", "impl/activation.cpp", - "impl/batch_norm.cpp", + # "impl/batch_norm.cpp", "impl/bitwise.cpp", "impl/cast.cpp", "impl/chunk.cpp", @@ -106,14 +126,14 @@ cc_library( "impl/element_wise.cpp", "impl/expand.cpp", "impl/internal_ops.cpp", - "impl/interpolate.cpp", + # "impl/interpolate.cpp", "impl/layer_norm.cpp", "impl/linear.cpp", "impl/lstm_cell.cpp", "impl/matrix_multiply.cpp", "impl/max.cpp", - "impl/normalize.cpp", - "impl/pooling.cpp", + # "impl/normalize.cpp", + # "impl/pooling.cpp", "impl/quantization.cpp", "impl/reduce.cpp", "impl/reflection_pad.cpp", @@ -126,7 +146,17 @@ cc_library( "impl/topk.cpp", "impl/unary.cpp", "impl/unsqueeze.cpp", - ], + ] + select({ + # exclude plugins from rtx build + ":rtx_x86_64": [], + ":rtx_win": [], + "//conditions:default": [ + "impl/interpolate.cpp", + "impl/normalize.cpp", + "impl/pooling.cpp", + "impl/batch_norm.cpp", + ], + }), hdrs = [ "converters.h", ], @@ -138,6 +168,8 @@ cc_library( "//core/plugins:torch_tensorrt_plugins", "//core/util:prelude", ] + select({ + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], ":windows": ["@tensorrt_win//:nvinfer"], ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], diff --git a/core/conversion/converters/impl/quantization.cpp b/core/conversion/converters/impl/quantization.cpp index 759b772f0f..6a8c2e2f73 100644 --- a/core/conversion/converters/impl/quantization.cpp +++ b/core/conversion/converters/impl/quantization.cpp @@ -9,15 +9,14 @@ namespace converters { namespace impl { namespace { -#if !defined(TRT_MAJOR_RTX) && (NV_TENSORRT_MAJOR > 7) +#if defined(TRT_MAJOR_RTX) || (NV_TENSORRT_MAJOR > 7) // clang-format off bool add_qdq(ConversionCtx *ctx, const torch::jit::Node* n, nvinfer1::ITensor* input, nvinfer1::ITensor* scale, std::string& opName) { - nvinfer1::IQuantizeLayer* quantize_layer = ctx->net->addQuantize(*input, *scale); + nvinfer1::IQuantizeLayer* quantize_layer = ctx->net->addQuantize(*input, *scale, nvinfer1::DataType::kINT8); TORCHTRT_CHECK(quantize_layer, "Unable to create QuantizeLayer from node: " << *n); quantize_layer->setAxis(0); - - nvinfer1::IDequantizeLayer* dequantize_layer = ctx->net->addDequantize(*quantize_layer->getOutput(0), *scale); + nvinfer1::IDequantizeLayer* dequantize_layer = ctx->net->addDequantize(*quantize_layer->getOutput(0), *scale, input->getType()); TORCHTRT_CHECK(dequantize_layer, "Unable to create DequantizeLayer from node: " << *n); dequantize_layer->setAxis(0); @@ -54,12 +53,12 @@ auto quantization_registrations TORCHTRT_UNUSED = RegisterNodeConversionPatterns auto scale = args[1].ITensorOrFreeze(ctx); int64_t axis = args[3].unwrapToScalar().to(); // Add and configure a QuantizeLayer. - nvinfer1::IQuantizeLayer* quantize_layer = ctx->net->addQuantize(*input, *scale); + nvinfer1::IQuantizeLayer* quantize_layer = ctx->net->addQuantize(*input, *scale, nvinfer1::DataType::kINT8); // Set a channel axis which represents output channels quantize_layer->setAxis(axis); // Add and configure a DequantizeLayer. - nvinfer1::IDequantizeLayer* dequantize_layer = ctx->net->addDequantize(*quantize_layer->getOutput(0), *scale); + nvinfer1::IDequantizeLayer* dequantize_layer = ctx->net->addDequantize(*quantize_layer->getOutput(0), *scale, input->getType()); dequantize_layer->setAxis(axis); auto qdq_out = ctx->AssociateValueAndTensor(n->outputs()[0], dequantize_layer->getOutput(0)); diff --git a/core/conversion/evaluators/BUILD b/core/conversion/evaluators/BUILD index d3adad10cd..a6714e8a90 100644 --- a/core/conversion/evaluators/BUILD +++ b/core/conversion/evaluators/BUILD @@ -10,6 +10,21 @@ config_setting( }, ) +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], +) + config_setting( name = "sbsa", constraint_values = [ @@ -55,6 +70,8 @@ cc_library( "//core/conversion/var", "//core/util:prelude", ] + select({ + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], ":windows": ["@tensorrt_win//:nvinfer"], ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], diff --git a/core/conversion/tensorcontainer/BUILD b/core/conversion/tensorcontainer/BUILD index 951a0b886e..a671301533 100644 --- a/core/conversion/tensorcontainer/BUILD +++ b/core/conversion/tensorcontainer/BUILD @@ -10,6 +10,21 @@ config_setting( }, ) +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], +) + config_setting( name = "sbsa", constraint_values = [ @@ -48,6 +63,8 @@ cc_library( deps = [ "//core/util:prelude", ] + select({ + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], ":windows": ["@tensorrt_win//:nvinfer"], ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], diff --git a/core/conversion/var/BUILD b/core/conversion/var/BUILD index 770d3c2120..caf57a2ba3 100644 --- a/core/conversion/var/BUILD +++ b/core/conversion/var/BUILD @@ -10,6 +10,21 @@ config_setting( }, ) +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], +) + config_setting( name = "sbsa", constraint_values = [ @@ -51,6 +66,8 @@ cc_library( "//core/conversion/tensorcontainer", "//core/util:prelude", ] + select({ + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], ":windows": ["@tensorrt_win//:nvinfer"], ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], diff --git a/core/ir/BUILD b/core/ir/BUILD index fce3fbe51f..f24468a519 100644 --- a/core/ir/BUILD +++ b/core/ir/BUILD @@ -9,7 +9,20 @@ config_setting( "//toolchains/dep_src:torch": "whl" }, ) +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], +) +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], +) config_setting( name = "sbsa", constraint_values = [ @@ -51,6 +64,8 @@ cc_library( deps = [ "//core/util:prelude", ] + select({ + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], ":windows": ["@tensorrt_win//:nvinfer"], ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], diff --git a/core/lowering/BUILD b/core/lowering/BUILD index 27af435927..354c1b0331 100644 --- a/core/lowering/BUILD +++ b/core/lowering/BUILD @@ -9,7 +9,20 @@ config_setting( "//toolchains/dep_src:torch": "whl" }, ) +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], +) +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], +) config_setting( name = "sbsa", constraint_values = [ @@ -53,6 +66,8 @@ cc_library( "//core/lowering/passes", "//core/util:prelude", ] + select({ + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], ":windows": ["@tensorrt_win//:nvinfer"], ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], diff --git a/core/partitioning/BUILD b/core/partitioning/BUILD index 378752cdfd..b7603b62d0 100644 --- a/core/partitioning/BUILD +++ b/core/partitioning/BUILD @@ -10,6 +10,21 @@ config_setting( }, ) +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], +) + config_setting( name = "sbsa", constraint_values = [ @@ -56,6 +71,8 @@ cc_library( "//core/partitioning/segmentedblock", "//core/util:prelude", ] + select({ + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], ":windows": ["@tensorrt_win//:nvinfer"], ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], diff --git a/core/partitioning/partitioningctx/BUILD b/core/partitioning/partitioningctx/BUILD index bd21aba7ff..c0ac8c984a 100644 --- a/core/partitioning/partitioningctx/BUILD +++ b/core/partitioning/partitioningctx/BUILD @@ -9,7 +9,20 @@ config_setting( "//toolchains/dep_src:torch": "whl" }, ) +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], +) +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], +) config_setting( name = "sbsa", constraint_values = [ @@ -52,6 +65,8 @@ cc_library( "//core/partitioning/segmentedblock", "//core/util:prelude", ] + select({ + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], ":windows": ["@tensorrt_win//:nvinfer"], ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], diff --git a/core/partitioning/partitioninginfo/BUILD b/core/partitioning/partitioninginfo/BUILD index daebcd615f..52905ab252 100644 --- a/core/partitioning/partitioninginfo/BUILD +++ b/core/partitioning/partitioninginfo/BUILD @@ -9,7 +9,20 @@ config_setting( "//toolchains/dep_src:torch": "whl" }, ) +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], +) +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], +) config_setting( name = "sbsa", constraint_values = [ @@ -51,6 +64,8 @@ cc_library( "//core/lowering", "//core/util:prelude", ] + select({ + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], ":windows": ["@tensorrt_win//:nvinfer"], ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], diff --git a/core/partitioning/segmentedblock/BUILD b/core/partitioning/segmentedblock/BUILD index 83e45eaf14..b3dadf236d 100644 --- a/core/partitioning/segmentedblock/BUILD +++ b/core/partitioning/segmentedblock/BUILD @@ -10,6 +10,21 @@ config_setting( }, ) +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], +) + config_setting( name = "sbsa", constraint_values = [ @@ -51,6 +66,8 @@ cc_library( "//core/lowering", "//core/util:prelude", ] + select({ + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], ":windows": ["@tensorrt_win//:nvinfer"], ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], diff --git a/core/plugins/BUILD b/core/plugins/BUILD index cebce31941..59360ed0e4 100644 --- a/core/plugins/BUILD +++ b/core/plugins/BUILD @@ -10,6 +10,21 @@ config_setting( }, ) +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], +) + config_setting( name = "sbsa", constraint_values = [ @@ -37,18 +52,27 @@ config_setting( ], ) + cc_library( name = "torch_tensorrt_plugins", - srcs = [ - "impl/interpolate_plugin.cpp", - "impl/normalize_plugin.cpp", - "register_plugins.cpp", - ], - hdrs = [ - "impl/interpolate_plugin.h", - "impl/normalize_plugin.h", - "plugins.h", - ], + srcs = select({ + ":rtx_x86_64": [], + ":rtx_win": [], + "//conditions:default": [ + "impl/interpolate_plugin.cpp", + "impl/normalize_plugin.cpp", + "register_plugins.cpp", + ], + }), + hdrs = select({ + ":rtx_x86_64": [], + ":rtx_win": [], + "//conditions:default": [ + "impl/interpolate_plugin.h", + "impl/normalize_plugin.h", + "plugins.h", + ], + }), copts = [ "-pthread", ], @@ -58,6 +82,8 @@ cc_library( deps = [ "//core/util:prelude", ] + select({ + ":rtx_x86_64": [], + ":rtx_win": [], ":windows": [ "@tensorrt_win//:nvinfer", "@tensorrt_win//:nvinferplugin", @@ -75,6 +101,8 @@ cc_library( "@tensorrt//:nvinferplugin", ], }) + select({ + ":rtx_x86_64": [], + ":rtx_win": [], ":windows": ["@libtorch_win//:libtorch"], ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], diff --git a/core/runtime/BUILD b/core/runtime/BUILD index 72c670bff1..79480e7177 100644 --- a/core/runtime/BUILD +++ b/core/runtime/BUILD @@ -10,6 +10,21 @@ config_setting( }, ) +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], +) + config_setting( name = "sbsa", constraint_values = [ @@ -63,6 +78,8 @@ cc_library( "//core/plugins:torch_tensorrt_plugins", "//core/util:prelude", ] + select({ + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], ":windows": ["@tensorrt_win//:nvinfer"], ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], diff --git a/core/util/BUILD b/core/util/BUILD index 4f522704ee..947917089e 100644 --- a/core/util/BUILD +++ b/core/util/BUILD @@ -10,6 +10,21 @@ config_setting( }, ) +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], +) + config_setting( name = "sbsa", constraint_values = [ @@ -114,6 +129,8 @@ cc_library( ":macros", "//core/util/logging", ] + select({ + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], ":windows": ["@tensorrt_win//:nvinfer"], ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], diff --git a/core/util/logging/BUILD b/core/util/logging/BUILD index f0cc067af9..fc7264e920 100644 --- a/core/util/logging/BUILD +++ b/core/util/logging/BUILD @@ -10,6 +10,21 @@ config_setting( }, ) +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], +) + config_setting( name = "sbsa", constraint_values = [ @@ -46,6 +61,8 @@ cc_library( "TorchTRTLogger.h", ], deps = select({ + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], ":windows": ["@tensorrt_win//:nvinfer"], ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], diff --git a/cpp/BUILD b/cpp/BUILD index e5cb1558e9..aebc6ea329 100644 --- a/cpp/BUILD +++ b/cpp/BUILD @@ -2,21 +2,48 @@ load("@rules_cc//cc:defs.bzl", "cc_library") package(default_visibility = ["//visibility:public"]) + +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], +) + + cc_library( name = "torch_tensorrt", srcs = [ "src/compile_spec.cpp", "src/logging.cpp", - "src/ptq.cpp", "src/torch_tensorrt.cpp", "src/types.cpp", - ], + ] + select({ + ":rtx_x86_64": [], + ":rtx_win": [], + "//conditions:default": [ + "src/ptq.cpp", + ], + }), hdrs = [ "include/torch_tensorrt/logging.h", "include/torch_tensorrt/macros.h", - "include/torch_tensorrt/ptq.h", "include/torch_tensorrt/torch_tensorrt.h", - ], + ] + select({ + ":rtx_x86_64": [], + ":rtx_win": [], + "//conditions:default": [ + "include/torch_tensorrt/ptq.h", + ], + }), linkstatic = True, strip_include_prefix = "include/", deps = [ diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 0c0e5a43f0..690dca2749 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -4,7 +4,6 @@ add_library(${lib_name} OBJECT) set(CXX_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/src/compile_spec.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/src/logging.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/src/ptq.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/src/torch_tensorrt.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/src/types.cpp" ) @@ -12,7 +11,6 @@ set(CXX_SRCS set(HEADER_FILES "${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/logging.h" "${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/macros.h" - "${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/ptq.h" "${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/torch_tensorrt.h" ) diff --git a/cpp/bin/torchtrtc/fileio.h b/cpp/bin/torchtrtc/fileio.h index ed52d566a1..a27c0a69e1 100644 --- a/cpp/bin/torchtrtc/fileio.h +++ b/cpp/bin/torchtrtc/fileio.h @@ -23,7 +23,9 @@ #include "torch/torch.h" #include "torch_tensorrt/logging.h" +#ifndef TRT_MAJOR_RTX #include "torch_tensorrt/ptq.h" +#endif #include "torch_tensorrt/torch_tensorrt.h" namespace torchtrtc { diff --git a/cpp/bin/torchtrtc/main.cpp b/cpp/bin/torchtrtc/main.cpp index c36cfdd0fc..60c76da049 100644 --- a/cpp/bin/torchtrtc/main.cpp +++ b/cpp/bin/torchtrtc/main.cpp @@ -7,7 +7,9 @@ #include "torch/script.h" #include "torch_tensorrt/logging.h" +#ifndef TRT_MAJOR_RTX #include "torch_tensorrt/ptq.h" +#endif #include "torch_tensorrt/torch_tensorrt.h" #include "accuracy.h" @@ -335,7 +337,7 @@ int main(int argc, char** argv) { calibration_cache_file_path = torchtrtc::fileio::resolve_path(args::get(calibration_cache_file)); } - auto calibrator = torchtrt::ptq::make_int8_cache_calibrator(calibration_cache_file_path); + // auto calibrator = torchtrt::ptq::make_int8_cache_calibrator(calibration_cache_file_path); compile_settings.require_full_compilation = require_full_compilation; @@ -367,13 +369,13 @@ int main(int argc, char** argv) { compile_settings.enabled_precisions.insert(torch::kF16); } else if (dtype == torchtrt::DataType::kChar) { compile_settings.enabled_precisions.insert(torch::kI8); - if (calibration_cache_file) { - compile_settings.ptq_calibrator = calibrator; - } else { - torchtrt::logging::log( - torchtrt::logging::Level::kINFO, - "Int8 precision has been enabled but no calibrator provided. This assumes the network has Q/DQ nodes obtained from Quantization aware training. For more details, refer to https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#work-with-qat-networks"); - } + // if (calibration_cache_file) { + // compile_settings.ptq_calibrator = calibrator; + // } else { + // torchtrt::logging::log( + // torchtrt::logging::Level::kINFO, + // "Int8 precision has been enabled but no calibrator provided. This assumes the network has Q/DQ nodes obtained from Quantization aware training. For more details, refer to https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#work-with-qat-networks"); + // } } else { std::stringstream ss; ss << "Invalid precision given for enabled kernel precision, options are [ float | float32 | f32 | fp32 | half | float16 | f16 | fp16 | char | int8 | i8 ], found: "; diff --git a/cpp/bin/torchtrtc/parser_util.h b/cpp/bin/torchtrtc/parser_util.h index 9ed5f6d06b..6605ec011a 100644 --- a/cpp/bin/torchtrtc/parser_util.h +++ b/cpp/bin/torchtrtc/parser_util.h @@ -9,7 +9,9 @@ #include "torch/torch.h" #include "torch_tensorrt/logging.h" +#ifndef TRT_MAJOR_RTX #include "torch_tensorrt/ptq.h" +#endif #include "torch_tensorrt/torch_tensorrt.h" namespace torchtrtc { diff --git a/cpp/include/torch_tensorrt/torch_tensorrt.h b/cpp/include/torch_tensorrt/torch_tensorrt.h index adac75d984..4068fa6b80 100644 --- a/cpp/include/torch_tensorrt/torch_tensorrt.h +++ b/cpp/include/torch_tensorrt/torch_tensorrt.h @@ -832,12 +832,12 @@ struct CompileSpec { * host RAM used by DLA to store weights and metadata for execution */ uint64_t dla_global_dram_size = 536870912; - +#ifndef TRT_MAJOR_RTX /** * Calibration dataloaders for each input for post training quantizatiom */ nvinfer1::IInt8Calibrator* ptq_calibrator = nullptr; - +#endif /** * Require the full module be compiled to TensorRT instead of potentially running unsupported operations in PyTorch */ diff --git a/py/torch_tensorrt/__init__.py b/py/torch_tensorrt/__init__.py index d392a24f93..a1764c940e 100644 --- a/py/torch_tensorrt/__init__.py +++ b/py/torch_tensorrt/__init__.py @@ -43,10 +43,9 @@ def _find_lib(name: str, paths: List[str]) -> str: import torch try: - import tensorrt # noqa: F401 - from . import trt_alias # noqa: F401 - + + import tensorrt # noqa: F401 print(f"You are using {tensorrt.__name__=} {tensorrt.__version__=}") except ImportError: tensorrt_version = _parse_semver(__tensorrt_version__) diff --git a/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py b/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py index 8d7a914836..c92d810701 100644 --- a/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py +++ b/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py @@ -90,11 +90,13 @@ def __init__( self.builder = trt.Builder(self.logger) self._debugger_config = _debugger_config flag = 0 - if compilation_settings.use_explicit_typing: - STRONGLY_TYPED = 1 << (int)( - trt.NetworkDefinitionCreationFlag.STRONGLY_TYPED - ) - flag |= STRONGLY_TYPED + # rtx build, strongly typed is enabled by default, can not set it by builder config + if trt._package_name == "tensorrt": + if compilation_settings.use_explicit_typing: + STRONGLY_TYPED = 1 << (int)( + trt.NetworkDefinitionCreationFlag.STRONGLY_TYPED + ) + flag |= STRONGLY_TYPED self.ctx = ConversionContext( self.builder.create_network(flag), compilation_settings @@ -277,18 +279,19 @@ def _populate_trt_builder_config( trt.MemoryPoolType.DLA_GLOBAL_DRAM, self.compilation_settings.dla_global_dram_size, ) + # rtx build, strongly typed is enabled by default, cannot select precision by builder_config + if trt._package_name == "tensorrt": + if dtype.float16 in self.compilation_settings.enabled_precisions: + builder_config.set_flag(trt.BuilderFlag.FP16) - if dtype.float16 in self.compilation_settings.enabled_precisions: - builder_config.set_flag(trt.BuilderFlag.FP16) - - if dtype.int8 in self.compilation_settings.enabled_precisions: - builder_config.set_flag(trt.BuilderFlag.INT8) + if dtype.int8 in self.compilation_settings.enabled_precisions: + builder_config.set_flag(trt.BuilderFlag.INT8) - if dtype.fp8 in self.compilation_settings.enabled_precisions: - builder_config.set_flag(trt.BuilderFlag.FP8) + if dtype.fp8 in self.compilation_settings.enabled_precisions: + builder_config.set_flag(trt.BuilderFlag.FP8) - if dtype.bfloat16 in self.compilation_settings.enabled_precisions: - builder_config.set_flag(trt.BuilderFlag.BF16) + if dtype.bfloat16 in self.compilation_settings.enabled_precisions: + builder_config.set_flag(trt.BuilderFlag.BF16) if self.compilation_settings.sparse_weights: builder_config.set_flag(trt.BuilderFlag.SPARSE_WEIGHTS) diff --git a/py/torch_tensorrt/trt_alias.py b/py/torch_tensorrt/trt_alias.py index 6d0cc885d3..55bcd4a9e9 100644 --- a/py/torch_tensorrt/trt_alias.py +++ b/py/torch_tensorrt/trt_alias.py @@ -86,7 +86,7 @@ def alias_tensorrt() -> None: raise RuntimeError("TensorRT package not found") proxy = TensorRTProxyModule(target) - proxy._package_name = "tensorrt" if use_rtx else "tensorrt_rtx" + proxy._package_name = "tensorrt_rtx" if use_rtx else "tensorrt" sys.modules["tensorrt"] = proxy diff --git a/setup.py b/setup.py index b512410cb3..e42e6531bb 100644 --- a/setup.py +++ b/setup.py @@ -222,9 +222,15 @@ def build_libtorchtrt_cxx11_abi( cmd.append("--config=python") if IS_WINDOWS: - cmd.append("--config=windows") + if USE_RTX: + cmd.append("--config=rtx_win") + else: + cmd.append("--config=windows") else: - cmd.append("--config=linux") + if USE_RTX: + cmd.append("--config=rtx_x86_64") + else: + cmd.append("--config=linux") if IS_JETPACK: cmd.append("--config=jetpack") @@ -541,14 +547,24 @@ def run(self): else: tensorrt_linux_external_dir = tensorrt_x86_64_external_dir - tensorrt_windows_external_dir = ( - lambda: subprocess.check_output( - [BAZEL_EXE, "query", "@tensorrt_win//:nvinfer", "--output", "location"] + if USE_RTX: + tensorrt_windows_external_dir = ( + lambda: subprocess.check_output( + [BAZEL_EXE, "query", "@tensorrt_rtx_win//:nvinfer", "--output", "location"] + ) + .decode("ascii") + .strip() + .split("/BUILD.bazel")[0] + ) + else: + tensorrt_windows_external_dir = ( + lambda: subprocess.check_output( + [BAZEL_EXE, "query", "@tensorrt_win//:nvinfer", "--output", "location"] + ) + .decode("ascii") + .strip() + .split("/BUILD.bazel")[0] ) - .decode("ascii") - .strip() - .split("/BUILD.bazel")[0] - ) ext_modules += [ CUDAExtension( diff --git a/tests/util/BUILD b/tests/util/BUILD index 3b89c9073e..68a98cf76c 100644 --- a/tests/util/BUILD +++ b/tests/util/BUILD @@ -58,6 +58,8 @@ cc_library( deps = [ "@googletest//:gtest_main", ] + select({ + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], ":windows": ["@tensorrt_win//:nvinfer"], ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], diff --git a/third_party/tensorrt_rtx/archive/BUILD b/third_party/tensorrt_rtx/archive/BUILD index 014158110a..88dc42c943 100644 --- a/third_party/tensorrt_rtx/archive/BUILD +++ b/third_party/tensorrt_rtx/archive/BUILD @@ -3,7 +3,15 @@ load("@rules_cc//cc:defs.bzl", "cc_import", "cc_library") package(default_visibility = ["//visibility:public"]) config_setting( - name = "windows", + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], +) + +config_setting( + name = "rtx_win", constraint_values = [ "@platforms//os:windows", ], @@ -16,10 +24,10 @@ cc_library( "include/NvInfer*.h", ], allow_empty = True, - exclude = [ - "include/NvInferPlugin.h", - "include/NvInferPluginUtils.h", - ], + # exclude = [ + # "include/NvInferPlugin.h", + # "include/NvInferPluginUtils.h", + # ], ), includes = ["include/"], visibility = ["//visibility:private"], @@ -28,8 +36,8 @@ cc_library( cc_import( name = "nvinfer_lib", shared_library = select({ - ":windows": "lib/tensorrt_rtx_1_0.dll", - "//conditions:default": "lib/libtensorrt_rtx.so", + ":rtx_win": "lib/tensorrt_rtx_1_0.dll", + ":rtx_x86_64": "lib/libtensorrt_rtx.so", }), visibility = ["//visibility:private"], ) @@ -37,7 +45,7 @@ cc_import( cc_import( name = "nvinfer_static_lib", static_library = select({ - ":windows": "lib/tensorrt_rtx_1_0.lib", + ":rtx_win": "lib/tensorrt_rtx_1_0.lib", }), visibility = ["//visibility:private"], ) @@ -49,10 +57,10 @@ cc_library( "nvinfer_headers", "nvinfer_lib", ] + select({ - ":windows": [ + ":rtx_win": [ "nvinfer_static_lib", "@cuda_win//:cudart", ], - "//conditions:default": ["@cuda//:cudart"], + ":rtx_x86_64": ["@cuda//:cudart"], }), ) diff --git a/toolchains/ci_workspaces/MODULE.bazel.tmpl b/toolchains/ci_workspaces/MODULE.bazel.tmpl index 4f03473c08..039ed62cc2 100644 --- a/toolchains/ci_workspaces/MODULE.bazel.tmpl +++ b/toolchains/ci_workspaces/MODULE.bazel.tmpl @@ -81,6 +81,16 @@ http_archive( ], ) +http_archive( + name = "tensorrt_rtx", + build_file = "@//third_party/tensorrt_rtx/archive:BUILD", + sha256 = "b1222e08f9d473f0bcc06c6a76bf2b1327a106dcee671415c4c46833a105a425", + strip_prefix = "TensorRT-RTX-1.0.0.21", + urls = [ + "http://cuda-repo/release-candidates/Libraries/TensorRT/v10.12/10.12.0.35-51f47a12/12.9-r575/Linux-x64-manylinux_2_28-winjit/tar/TensorRT-RTX-1.0.0.21.Linux.x86_64-gnu.cuda-12.9.tar.gz", + ], +) + http_archive( name = "tensorrt_sbsa", build_file = "@//third_party/tensorrt/archive:BUILD", @@ -108,6 +118,14 @@ http_archive( ], ) +http_archive( + name = "tensorrt_win", + build_file = "@//third_party/tensorrt/archive:BUILD", + strip_prefix = "TensorRT-10.11.0.33", + urls = [ + "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.11.0/zip/TensorRT-10.11.0.33.Windows.win10.cuda-12.9.zip", + ], +) #################################################################################### # Locally installed dependencies (use in cases of custom dependencies or aarch64) diff --git a/toolchains/dep_collection/defs.bzl b/toolchains/dep_collection/defs.bzl index 873ef7ec42..6eaa710261 100644 --- a/toolchains/dep_collection/defs.bzl +++ b/toolchains/dep_collection/defs.bzl @@ -1,7 +1,7 @@ # buildifier: disable=module-docstring DependencyCollectionInfo = provider(doc = "", fields = ["type"]) -collection_types = ["default", "jetpack", "rtx"] +collection_types = ["default", "jetpack"] def _impl(ctx): _type = ctx.build_setting_value From 5bbb60d0b0a23cb657fc2181fe508381c673c5d6 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Fri, 11 Jul 2025 17:11:23 -0700 Subject: [PATCH 04/12] test --- .../conversionctx/ConversionCtx.cpp | 16 +++---- core/conversion/converters/BUILD | 47 +++++++++---------- cpp/bin/torchtrtc/main.cpp | 22 +++++---- cpp/src/compile_spec.cpp | 22 ++++----- py/torch_tensorrt/__init__.py | 11 +++-- py/torch_tensorrt/trt_alias.py | 26 +++++----- setup.py | 8 +++- toolchains/ci_workspaces/MODULE.bazel.tmpl | 9 ++-- 8 files changed, 81 insertions(+), 80 deletions(-) diff --git a/core/conversion/conversionctx/ConversionCtx.cpp b/core/conversion/conversionctx/ConversionCtx.cpp index c821698fe3..625ef1b669 100644 --- a/core/conversion/conversionctx/ConversionCtx.cpp +++ b/core/conversion/conversionctx/ConversionCtx.cpp @@ -59,29 +59,27 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings) for (auto p = settings.enabled_precisions.begin(); p != settings.enabled_precisions.end(); ++p) { switch (*p) { case nvinfer1::DataType::kHALF: - // tensorrt_rtx is strong typed, cannot set fp16 by builder config, only do this for tensorrt build - #ifndef TRT_MAJOR_RTX +// tensorrt_rtx is strong typed, cannot set fp16 by builder config, only do this for tensorrt build +#ifndef TRT_MAJOR_RTX TORCHTRT_CHECK( builder->platformHasFastFp16(), "Requested inference in FP16 but platform does not support FP16"); cfg->setFlag(nvinfer1::BuilderFlag::kFP16); break; - #endif +#endif case nvinfer1::DataType::kINT8: - // tensorrt_rtx is strong typed, cannot set int8 by builder config, only do this for tensorrt build - #ifndef TRT_MAJOR_RTX +// tensorrt_rtx is strong typed, cannot set int8 by builder config, only do this for tensorrt build +#ifndef TRT_MAJOR_RTX TORCHTRT_CHECK( builder->platformHasFastInt8(), "Requested inference in INT8 but platform does not support INT8"); cfg->setFlag(nvinfer1::BuilderFlag::kINT8); if (!settings.calibrator) { LOG_INFO( - "Int8 precision has been enabled but no calibrator provided. This assumes the network has Q/DQ nodes - obtained from Quantization aware training. For more details, refer to - https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#work-with-qat-networks"); + "Int8 precision has been enabled but no calibrator provided. This assumes the network has Q/DQ nodes obtained from Quantization aware training. For more details, refer to https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#work-with-qat-networks"); } else { cfg->setInt8Calibrator(settings.calibrator); } break; - #endif +#endif case nvinfer1::DataType::kFLOAT: break; case nvinfer1::DataType::kINT32: diff --git a/core/conversion/converters/BUILD b/core/conversion/converters/BUILD index ae417636e1..608b768731 100644 --- a/core/conversion/converters/BUILD +++ b/core/conversion/converters/BUILD @@ -6,7 +6,7 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", }, ) @@ -31,7 +31,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -41,7 +41,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -64,16 +64,16 @@ cc_library( "//core/conversion/conversionctx", "//core/util:prelude", ] + select({ - ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":jetpack": ["@tensorrt_l4t//:nvinfer"], ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], - ":windows": ["@tensorrt_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], ":sbsa": ["@tensorrt_sbsa//:nvinfer"], - ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, @@ -92,28 +92,26 @@ cc_library( "//core/conversion/conversionctx", "//core/util:prelude", ] + select({ - ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":jetpack": ["@tensorrt_l4t//:nvinfer"], ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], - ":windows": ["@tensorrt_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], ":sbsa": ["@tensorrt_sbsa//:nvinfer"], - ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, ) - cc_library( name = "converters", srcs = [ "NodeConverterRegistry.cpp", "impl/activation.cpp", - # "impl/batch_norm.cpp", "impl/bitwise.cpp", "impl/cast.cpp", "impl/chunk.cpp", @@ -126,14 +124,11 @@ cc_library( "impl/element_wise.cpp", "impl/expand.cpp", "impl/internal_ops.cpp", - # "impl/interpolate.cpp", "impl/layer_norm.cpp", "impl/linear.cpp", "impl/lstm_cell.cpp", "impl/matrix_multiply.cpp", "impl/max.cpp", - # "impl/normalize.cpp", - # "impl/pooling.cpp", "impl/quantization.cpp", "impl/reduce.cpp", "impl/reflection_pad.cpp", @@ -147,14 +142,14 @@ cc_library( "impl/unary.cpp", "impl/unsqueeze.cpp", ] + select({ + ":rtx_win": [], # exclude plugins from rtx build ":rtx_x86_64": [], - ":rtx_win": [], - "//conditions:default": [ + "//conditions:default": [ + "impl/batch_norm.cpp", "impl/interpolate.cpp", "impl/normalize.cpp", "impl/pooling.cpp", - "impl/batch_norm.cpp", ], }), hdrs = [ @@ -168,16 +163,16 @@ cc_library( "//core/plugins:torch_tensorrt_plugins", "//core/util:prelude", ] + select({ - ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":jetpack": ["@tensorrt_l4t//:nvinfer"], ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], - ":windows": ["@tensorrt_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], ":sbsa": ["@tensorrt_sbsa//:nvinfer"], - ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/cpp/bin/torchtrtc/main.cpp b/cpp/bin/torchtrtc/main.cpp index 60c76da049..9542a54739 100644 --- a/cpp/bin/torchtrtc/main.cpp +++ b/cpp/bin/torchtrtc/main.cpp @@ -336,8 +336,12 @@ int main(int argc, char** argv) { if (calibration_cache_file) { calibration_cache_file_path = torchtrtc::fileio::resolve_path(args::get(calibration_cache_file)); } - - // auto calibrator = torchtrt::ptq::make_int8_cache_calibrator(calibration_cache_file_path); +#ifndef TRT_MAJOR_RTX + auto calibrator = torchtrt::ptq::make_int8_cache_calibrator(calibration_cache_file_path); +#else + // rtx build has no calibrator + auto calibrator = nullptr; +#endif compile_settings.require_full_compilation = require_full_compilation; @@ -369,13 +373,13 @@ int main(int argc, char** argv) { compile_settings.enabled_precisions.insert(torch::kF16); } else if (dtype == torchtrt::DataType::kChar) { compile_settings.enabled_precisions.insert(torch::kI8); - // if (calibration_cache_file) { - // compile_settings.ptq_calibrator = calibrator; - // } else { - // torchtrt::logging::log( - // torchtrt::logging::Level::kINFO, - // "Int8 precision has been enabled but no calibrator provided. This assumes the network has Q/DQ nodes obtained from Quantization aware training. For more details, refer to https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#work-with-qat-networks"); - // } + if (calibration_cache_file) { + // compile_settings.ptq_calibrator = calibrator; + } else { + torchtrt::logging::log( + torchtrt::logging::Level::kINFO, + "Int8 precision has been enabled but no calibrator provided. This assumes the network has Q/DQ nodes obtained from Quantization aware training. For more details, refer to https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#work-with-qat-networks"); + } } else { std::stringstream ss; ss << "Invalid precision given for enabled kernel precision, options are [ float | float32 | f32 | fp32 | half | float16 | f16 | fp16 | char | int8 | i8 ], found: "; diff --git a/cpp/src/compile_spec.cpp b/cpp/src/compile_spec.cpp index 8a950f3b8a..041f52ffe3 100644 --- a/cpp/src/compile_spec.cpp +++ b/cpp/src/compile_spec.cpp @@ -153,19 +153,15 @@ torchtrt::core::CompileSpec to_internal_compile_spec(CompileSpec external, bool internal.partitioning_info.cast_int8_inputs = true; - // if (internal.convert_info.engine_settings.enabled_precisions.find(nvinfer1::DataType::kINT8) != - // internal.convert_info.engine_settings.enabled_precisions.end()) { - // internal.partitioning_info.cast_int8_inputs = false; - // if (external.ptq_calibrator) { - // internal.convert_info.engine_settings.calibrator = external.ptq_calibrator; - // } else { - // internal.lower_info.unfreeze_module = true; - // internal.lower_info.disable_cse = true; - // internal.convert_info.engine_settings.calibrator = nullptr; - // } - // } else { - // internal.convert_info.engine_settings.calibrator = nullptr; - // } + if (internal.convert_info.engine_settings.enabled_precisions.find(nvinfer1::DataType::kINT8) != + internal.convert_info.engine_settings.enabled_precisions.end()) { + internal.partitioning_info.cast_int8_inputs = false; + internal.lower_info.unfreeze_module = true; + internal.lower_info.disable_cse = true; + // internal.convert_info.engine_settings.calibrator = nullptr; + } else { + // internal.convert_info.engine_settings.calibrator = nullptr; + } return internal; } diff --git a/py/torch_tensorrt/__init__.py b/py/torch_tensorrt/__init__.py index a1764c940e..df62e1d4ce 100644 --- a/py/torch_tensorrt/__init__.py +++ b/py/torch_tensorrt/__init__.py @@ -43,10 +43,10 @@ def _find_lib(name: str, paths: List[str]) -> str: import torch try: + # note: trt_alias must be imported before any import tensorrt from . import trt_alias # noqa: F401 - - import tensorrt # noqa: F401 - print(f"You are using {tensorrt.__name__=} {tensorrt.__version__=}") + + print(f"You are using {trt_alias.tensorrt_package_name=} ") except ImportError: tensorrt_version = _parse_semver(__tensorrt_version__) tensorrt_rtx_version = _parse_semver(__tensorrt_rtx_version__) @@ -80,8 +80,9 @@ def _find_lib(name: str, paths: List[str]) -> str: ], }, } + if sys.platform.startswith("win"): - WIN_LIBS = trt_lib[tensorrt.__name__]["win"] + WIN_LIBS = trt_lib[trt_alias.tensorrt_package_name]["win"] WIN_PATHS = os.environ["PATH"].split(os.path.pathsep) for lib in WIN_LIBS: ctypes.CDLL(_find_lib(lib, WIN_PATHS)) @@ -100,7 +101,7 @@ def _find_lib(name: str, paths: List[str]) -> str: ] elif platform.uname().processor == "aarch64": LINUX_PATHS += ["/usr/lib/aarch64-linux-gnu"] - LINUX_LIBS = trt_lib[tensorrt.__name__]["linux"] + LINUX_LIBS = trt_lib[trt_alias.tensorrt_package_name]["linux"] for lib in LINUX_LIBS: ctypes.CDLL(_find_lib(lib, LINUX_PATHS)) diff --git a/py/torch_tensorrt/trt_alias.py b/py/torch_tensorrt/trt_alias.py index 55bcd4a9e9..cbea413918 100644 --- a/py/torch_tensorrt/trt_alias.py +++ b/py/torch_tensorrt/trt_alias.py @@ -5,6 +5,9 @@ from types import ModuleType from typing import Any +tensorrt_package_name = "" +tensorrt_package_imported = False + def is_rtx_gpu() -> bool: try: @@ -58,6 +61,11 @@ def __dir__(self) -> list[str]: def alias_tensorrt() -> None: + global tensorrt_package_imported + # tensorrt package has been imported, no need to alias again + if tensorrt_package_imported: + return + # Determine package name with env override support for easy testing with tensorrt or tensorrt_rtx # eg: FORCE_TENSORRT_RTX=1 python test.py # eg: FORCE_TENSORRT_STD=1 python test.py @@ -69,25 +77,17 @@ def alias_tensorrt() -> None: else: use_rtx = is_rtx_gpu() + global tensorrt_package_name + tensorrt_package_name = "tensorrt_rtx" if use_rtx else "tensorrt" # Import the appropriate package try: - if use_rtx: - target = importlib.import_module("tensorrt_rtx") - else: - target = importlib.import_module("tensorrt") + target = importlib.import_module(tensorrt_package_name) except ImportError: # Fallback to standard tensorrt if RTX version not available - print(f"import error when {use_rtx=}, fallback to standard tensorrt") - try: - target = importlib.import_module("tensorrt") - # since we are using the standard tensorrt, we need to set the use_rtx to True - use_rtx = True - except ImportError: - raise RuntimeError("TensorRT package not found") + print(f"import error when try to import {tensorrt_package_name=}") proxy = TensorRTProxyModule(target) - proxy._package_name = "tensorrt_rtx" if use_rtx else "tensorrt" - + proxy._package_name = tensorrt_package_name sys.modules["tensorrt"] = proxy diff --git a/setup.py b/setup.py index e42e6531bb..3a9e98c0e7 100644 --- a/setup.py +++ b/setup.py @@ -550,7 +550,13 @@ def run(self): if USE_RTX: tensorrt_windows_external_dir = ( lambda: subprocess.check_output( - [BAZEL_EXE, "query", "@tensorrt_rtx_win//:nvinfer", "--output", "location"] + [ + BAZEL_EXE, + "query", + "@tensorrt_rtx_win//:nvinfer", + "--output", + "location", + ] ) .decode("ascii") .strip() diff --git a/toolchains/ci_workspaces/MODULE.bazel.tmpl b/toolchains/ci_workspaces/MODULE.bazel.tmpl index 039ed62cc2..2f29be7a3b 100644 --- a/toolchains/ci_workspaces/MODULE.bazel.tmpl +++ b/toolchains/ci_workspaces/MODULE.bazel.tmpl @@ -119,11 +119,12 @@ http_archive( ) http_archive( - name = "tensorrt_win", - build_file = "@//third_party/tensorrt/archive:BUILD", - strip_prefix = "TensorRT-10.11.0.33", + name = "tensorrt_rtx_win", + build_file = "@//third_party/tensorrt_rtx/archive:BUILD", + sha256 = "49cf1247ada75faa8d538257b763b1c12b9bbb97fcd7765654c55b3ad16bd680", + strip_prefix = "TensorRT-RTX-1.0.0.21", urls = [ - "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.11.0/zip/TensorRT-10.11.0.33.Windows.win10.cuda-12.9.zip", + "http://cuda-repo/release-candidates/Libraries/TensorRT/v10.12/10.12.0.35-51f47a12/12.9-r575/Windows10-x64-winjit/zip/TensorRT-RTX-1.0.0.21.Windows.win10.cuda-12.9.zip", ], ) From a053d697e3f5766c344ad4e65bb0d3abf8b2446e Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Sun, 13 Jul 2025 15:46:10 -0700 Subject: [PATCH 05/12] add tensorrt_rtx ci for linux --- .github/scripts/install-torch-tensorrt.sh | 18 +++++ .../build-test-linux-aarch64-jetpack.yml | 2 +- .../workflows/build-test-linux-aarch64.yml | 2 +- .github/workflows/build-test-linux-x86_64.yml | 13 +++- ...nux_aarch64.yml => build_wheels_linux.yml} | 65 +++++++++++-------- .github/workflows/linux-test.yml | 6 ++ pyproject.toml | 12 ---- 7 files changed, 76 insertions(+), 42 deletions(-) rename .github/workflows/{build_wheels_linux_aarch64.yml => build_wheels_linux.yml} (90%) diff --git a/.github/scripts/install-torch-tensorrt.sh b/.github/scripts/install-torch-tensorrt.sh index 94de5f022a..9847deba23 100755 --- a/.github/scripts/install-torch-tensorrt.sh +++ b/.github/scripts/install-torch-tensorrt.sh @@ -21,6 +21,24 @@ pip uninstall -y torch torchvision pip install --force-reinstall --pre ${TORCHVISION} --index-url ${INDEX_URL} pip install --force-reinstall --pre ${TORCH} --index-url ${INDEX_URL} +# tensorrt-rtx is not publicly available, so we need to install it from the local path +if [[ ${USE_RTX} == true ]]; then + echo "It is the tensorrt-rtx build, install tensorrt-rtx" + # python version is like 3.11, we need to convert it to cp311 + CPYTHON_TAG="cp${PYTHON_VERSION//./}" + if [[ ${PLATFORM} == win32 ]]; then + curl -L http://cuda-repo/release-candidates/Libraries/TensorRT/v10.12/10.12.0.35-51f47a12/12.9-r575/Windows10-x64-winjit/zip/TensorRT-RTX-1.0.0.21.Windows.win10.cuda-12.9.zip -o TensorRT-RTX-1.0.0.21.Windows.win10.cuda-12.9.zip + unzip TensorRT-RTX-1.0.0.21.Windows.win10.cuda-12.9.zip + pip install TensorRT-RTX-1.0.0.21/python/tensorrt_rtx-1.0.0.21-${CPYTHON_TAG}-none-win_amd64.whl + else + curl -L http://cuda-repo/release-candidates/Libraries/TensorRT/v10.12/10.12.0.35-51f47a12/12.9-r575/Linux-x64-manylinux_2_28-winjit/tar/TensorRT-RTX-1.0.0.21.Linux.x86_64-gnu.cuda-12.9.tar.gz -o TensorRT-RTX-1.0.0.21.Linux.x86_64-gnu.cuda-12.9.tar.gz + tar -xzf TensorRT-RTX-1.0.0.21.Linux.x86_64-gnu.cuda-12.9.tar.gz + pip install TensorRT-RTX-1.0.0.21/python/tensorrt_rtx-1.0.0.21-${CPYTHON_TAG}-none-linux_x86_64.whl + fi +else + echo "It is the standard tensorrt build" +fi + # Install Torch-TensorRT if [[ ${PLATFORM} == win32 ]]; then diff --git a/.github/workflows/build-test-linux-aarch64-jetpack.yml b/.github/workflows/build-test-linux-aarch64-jetpack.yml index 744a802bfa..ef770ca024 100644 --- a/.github/workflows/build-test-linux-aarch64-jetpack.yml +++ b/.github/workflows/build-test-linux-aarch64-jetpack.yml @@ -66,7 +66,7 @@ jobs: smoke-test-script: packaging/smoke_test_script.sh package-name: torch_tensorrt name: Build torch-tensorrt whl package - uses: ./.github/workflows/build_wheels_linux_aarch64.yml + uses: ./.github/workflows/build_wheels_linux.yml with: repository: ${{ matrix.repository }} ref: "" diff --git a/.github/workflows/build-test-linux-aarch64.yml b/.github/workflows/build-test-linux-aarch64.yml index 1f83a51287..bc06d420ec 100644 --- a/.github/workflows/build-test-linux-aarch64.yml +++ b/.github/workflows/build-test-linux-aarch64.yml @@ -63,7 +63,7 @@ jobs: smoke-test-script: packaging/smoke_test_script.sh package-name: torch_tensorrt name: Build torch-tensorrt whl package - uses: ./.github/workflows/build_wheels_linux_aarch64.yml + uses: ./.github/workflows/build_wheels_linux.yml with: repository: ${{ matrix.repository }} ref: "" diff --git a/.github/workflows/build-test-linux-x86_64.yml b/.github/workflows/build-test-linux-x86_64.yml index 51f3730d02..a03a06b8dc 100644 --- a/.github/workflows/build-test-linux-x86_64.yml +++ b/.github/workflows/build-test-linux-x86_64.yml @@ -1,6 +1,13 @@ name: Build and test Linux x86_64 wheels on: + workflow_call: + inputs: + use-rtx: + description: "Use RTX TensorRT" + default: false + type: boolean + required: false pull_request: push: branches: @@ -61,7 +68,7 @@ jobs: smoke-test-script: packaging/smoke_test_script.sh package-name: torch_tensorrt name: Build torch-tensorrt whl package - uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@main + uses: ./.github/workflows/build_wheels_linux.yml with: repository: ${{ matrix.repository }} ref: "" @@ -74,6 +81,8 @@ jobs: package-name: ${{ matrix.package-name }} smoke-test-script: ${{ matrix.smoke-test-script }} trigger-event: ${{ github.event_name }} + architecture: "x86_64" + use-rtx: ${{ inputs.use-rtx }} tests-py-torchscript-fe: name: Test torchscript frontend [Python] @@ -338,5 +347,5 @@ jobs: popd concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.use-rtx }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} cancel-in-progress: true diff --git a/.github/workflows/build_wheels_linux_aarch64.yml b/.github/workflows/build_wheels_linux.yml similarity index 90% rename from .github/workflows/build_wheels_linux_aarch64.yml rename to .github/workflows/build_wheels_linux.yml index e2bfeb1540..35b0c91d38 100644 --- a/.github/workflows/build_wheels_linux_aarch64.yml +++ b/.github/workflows/build_wheels_linux.yml @@ -1,4 +1,4 @@ -name: Build Linux Wheels For aarch64 +name: Build Linux Wheels on: workflow_call: @@ -88,11 +88,6 @@ on: required: false default: "python -m build --wheel" type: string - is-jetpack: - description: Set to true if the build is for jetpack - required: false - default: false - type: boolean pip-install-torch-extra-args: # NOTE: Why does this exist? # Well setuptools / python packaging doesn't actually allow you to specify dependencies @@ -110,11 +105,22 @@ on: description: 'Timeout for the job (in minutes)' default: 120 type: number + use-rtx: + description: "Use RTX TensorRT" + default: false + type: boolean + required: false + is-jetpack: + description: Set to true if the build is for jetpack + required: false + default: false + type: boolean secrets: PYPI_API_TOKEN: description: An optional token to upload to pypi required: false + permissions: id-token: write contents: read @@ -154,7 +160,7 @@ jobs: fi echo "::endgroup::" - - uses: actions/checkout@v4 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: # Support the use case where we need to checkout someone's fork repository: ${{ inputs.test-infra-repository }} @@ -205,11 +211,12 @@ jobs: python-version: ${{ env.PYTHON_VERSION }} cuda-version: ${{ env.CU_VERSION }} arch: ${{ env.ARCH }} + - name: Combine Env Var and Build Env Files if: ${{ inputs.env-var-script != '' }} working-directory: ${{ inputs.repository }} run: | - set -x + set -euxo pipefail cat "${{ inputs.env-var-script }}" >> "${BUILD_ENV_FILE}" - name: Add XPU Env Vars in Build Env File if: ${{ matrix.gpu_arch_type == 'xpu' }} @@ -251,7 +258,6 @@ jobs: working-directory: ${{ inputs.repository }} shell: bash -l {0} run: | - #set -euxo pipefail set -x source "${BUILD_ENV_FILE}" export PYTORCH_VERSION="$(${CONDA_RUN} pip show torch | grep ^Version: | sed 's/Version: *//' | sed 's/+.\+//')" @@ -261,28 +267,37 @@ jobs: BUILD_VERSION="${BUILD_VERSION}+${CU_VERSION}" fi echo "BUILD_VERSION=$BUILD_VERSION" - if [[ ${{ inputs.is-jetpack }} == false ]]; then - ${CONDA_RUN} python setup.py bdist_wheel + echo "USE_RTX=$USE_RTX" + echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" + if [[ ${{ inputs.use-rtx }} == true ]]; then + echo "Building tensorrt-rtx wheel" + ${CONDA_RUN} python setup.py bdist_wheel --use-rtx else - ${CONDA_RUN} python setup.py bdist_wheel --jetpack --plat-name=linux_tegra_aarch64 + if [[ ${{ inputs.is-jetpack }} == true ]]; then + echo "Building tensorrt wheel for jetpack" + ${CONDA_RUN} python setup.py bdist_wheel --jetpack --plat-name=linux_tegra_aarch64 + else + echo "Building standard tensorrt wheel" + ${CONDA_RUN} python setup.py bdist_wheel + fi fi - name: Repair Manylinux_2_28 Wheel shell: bash -l {0} env: PACKAGE_NAME: ${{ inputs.package-name }} SMOKE_TEST_SCRIPT: ${{ inputs.smoke-test-script }} + if: ${{ inputs.architecture == 'x86_64' }} run: | set -euxo pipefail source "${BUILD_ENV_FILE}" - # for pkg in ${{ inputs.repository }}/dist/*-linux_*.whl; do - # # if the glob didn't match anything - # if [[ ! -e $pkg ]]; then - # continue - # fi - # abs_pkg=$(realpath $pkg) - # ./test-infra/.github/scripts/repair_manylinux_2_28.sh $abs_pkg - # done - echo "Repair Manylinux_2_28 Wheel is not supported for aarch64" + for pkg in ${{ inputs.repository }}/dist/*-linux_*.whl; do + # if the glob didn't match anything + if [[ ! -e $pkg ]]; then + continue + fi + abs_pkg=$(realpath $pkg) + ./test-infra/.github/scripts/repair_manylinux_2_28.sh $abs_pkg + done - name: Run Post-Script if: ${{ inputs.post-script != '' }} uses: ./test-infra/.github/actions/run-script-with-cache @@ -291,7 +306,6 @@ jobs: script: ${{ inputs.post-script }} - name: Smoke Test shell: bash -l {0} - if: ${{ inputs.is-jetpack == false }} env: PACKAGE_NAME: ${{ inputs.package-name }} SMOKE_TEST_SCRIPT: ${{ inputs.smoke-test-script }} @@ -328,16 +342,15 @@ jobs: - name: Upload wheel to GitHub continue-on-error: true - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: name: ${{ env.ARTIFACT_NAME }} path: ${{ inputs.repository }}/dist/ upload: needs: build - uses: pytorch/test-infra/.github/workflows/_binary_upload.yml@main - # for jetpack builds, only upload to pytorch index for nightly builds - if: ${{ inputs.is-jetpack == false || (github.event_name == 'push' && startsWith(github.event.ref, 'refs/heads/nightly')) }} + uses: ./.github/workflows/_binary_upload.yml + if: always() with: repository: ${{ inputs.repository }} ref: ${{ inputs.ref }} diff --git a/.github/workflows/linux-test.yml b/.github/workflows/linux-test.yml index 15016ecd36..c48d864239 100644 --- a/.github/workflows/linux-test.yml +++ b/.github/workflows/linux-test.yml @@ -53,6 +53,11 @@ on: description: 'Name to give artifacts uploaded from ${RUNNER_ARTIFACT_DIR}' default: '' type: string + use-rtx: + description: "Use RTX TensorRT" + default: false + type: boolean + required: false jobs: test: @@ -135,6 +140,7 @@ jobs: working-directory: ${{ inputs.repository }} env: ALL_SECRETS: ${{ toJSON(secrets) }} + USE_RTX: ${{ inputs.use-rtx }} run: | set -euxo pipefail # shellcheck disable=SC2086 diff --git a/pyproject.toml b/pyproject.toml index 7c98c4f98e..63f9b458b7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,17 +6,9 @@ requires = [ "ninja>=1.11.0", "pyyaml>=6.0", "cffi>=1.15.1", - "typing-extensions>=4.7.0", - "future>=0.18.3", - "tensorrt-cu12>=10.11.0,<10.12.0; platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)", - "tensorrt-cu12>=10.3.0,<10.4.0; platform_machine == 'aarch64' and 'tegra' in platform_release", "torch>=2.9.0.dev,<2.10.0; platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)", "torch>=2.7.0,<2.8.0; platform_machine == 'aarch64' and 'tegra' in platform_release", "pybind11==2.6.2", - "numpy; platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)", - "numpy<2.0.0; platform_machine == 'aarch64' and 'tegra' in platform_release", - "sympy", - "dllist", ] build-backend = "setuptools.build_meta" @@ -84,10 +76,6 @@ dependencies = [ dynamic = ["version"] [dependency-groups] -rtx = [ - "tensorrt_rtx", -] - dev = [ "pre-commit>=2.20.0", "black>=22.6.0", From 5d6bc9cb2fe5f507fc0931b6b3d7313fa60a2473 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Sun, 13 Jul 2025 15:57:45 -0700 Subject: [PATCH 06/12] fix ci build --- .github/workflows/build_wheels_linux.yml | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build_wheels_linux.yml b/.github/workflows/build_wheels_linux.yml index 35b0c91d38..2eab046af5 100644 --- a/.github/workflows/build_wheels_linux.yml +++ b/.github/workflows/build_wheels_linux.yml @@ -160,7 +160,7 @@ jobs: fi echo "::endgroup::" - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: actions/checkout@v4 with: # Support the use case where we need to checkout someone's fork repository: ${{ inputs.test-infra-repository }} @@ -216,7 +216,7 @@ jobs: if: ${{ inputs.env-var-script != '' }} working-directory: ${{ inputs.repository }} run: | - set -euxo pipefail + set -x cat "${{ inputs.env-var-script }}" >> "${BUILD_ENV_FILE}" - name: Add XPU Env Vars in Build Env File if: ${{ matrix.gpu_arch_type == 'xpu' }} @@ -342,15 +342,18 @@ jobs: - name: Upload wheel to GitHub continue-on-error: true - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + uses: actions/upload-artifact@v4 with: name: ${{ env.ARTIFACT_NAME }} path: ${{ inputs.repository }}/dist/ upload: needs: build - uses: ./.github/workflows/_binary_upload.yml - if: always() + uses: pytorch/test-infra/.github/workflows/_binary_upload.yml@main + # if it is not the jetpack build or tensorrt-rtx build, upload to pytorch index, + # if it is the jetpack build only upload to pytorch_index for nightly builds + # for tensorrt-rtx build, do not upload at all + if: ${{ (inputs.is-jetpack == false && inputs.use-rtx == false) || (inputs.is-jetpack == true && github.event_name == 'push' && startsWith(github.event.ref, 'refs/heads/nightly')) }} with: repository: ${{ inputs.repository }} ref: ${{ inputs.ref }} From c41c2da2e9db1c4a654d107232b68fd9278da20c Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Sun, 13 Jul 2025 16:40:41 -0700 Subject: [PATCH 07/12] fix cpp error --- core/conversion/conversionctx/ConversionCtx.h | 4 +++- cpp/bin/torchtrtc/main.cpp | 4 +++- cpp/src/compile_spec.cpp | 15 ++++++++++----- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/core/conversion/conversionctx/ConversionCtx.h b/core/conversion/conversionctx/ConversionCtx.h index e8ed1a686b..df5c2a646d 100644 --- a/core/conversion/conversionctx/ConversionCtx.h +++ b/core/conversion/conversionctx/ConversionCtx.h @@ -26,7 +26,9 @@ struct BuilderSettings { bool allow_shape_tensors = false; ir::Device device; nvinfer1::EngineCapability capability = TRT_ENGINE_CAPABILITY_STANDARD; - // nvinfer1::IInt8Calibrator* calibrator = nullptr; +#ifndef TRT_MAJOR_RTX + nvinfer1::IInt8Calibrator* calibrator = nullptr; +#endif uint64_t num_avg_timing_iters = 1; uint64_t workspace_size = 0; uint64_t dla_sram_size = DLA_SRAM_SIZE; diff --git a/cpp/bin/torchtrtc/main.cpp b/cpp/bin/torchtrtc/main.cpp index 9542a54739..b93d977c95 100644 --- a/cpp/bin/torchtrtc/main.cpp +++ b/cpp/bin/torchtrtc/main.cpp @@ -374,7 +374,9 @@ int main(int argc, char** argv) { } else if (dtype == torchtrt::DataType::kChar) { compile_settings.enabled_precisions.insert(torch::kI8); if (calibration_cache_file) { - // compile_settings.ptq_calibrator = calibrator; +#ifndef TRT_MAJOR_RTX + compile_settings.ptq_calibrator = calibrator; +#endif } else { torchtrt::logging::log( torchtrt::logging::Level::kINFO, diff --git a/cpp/src/compile_spec.cpp b/cpp/src/compile_spec.cpp index 041f52ffe3..8dba4a76b8 100644 --- a/cpp/src/compile_spec.cpp +++ b/cpp/src/compile_spec.cpp @@ -152,16 +152,21 @@ torchtrt::core::CompileSpec to_internal_compile_spec(CompileSpec external, bool internal.convert_info.engine_settings.dla_global_dram_size = external.dla_global_dram_size; internal.partitioning_info.cast_int8_inputs = true; - +#ifndef TRT_MAJOR_RTX if (internal.convert_info.engine_settings.enabled_precisions.find(nvinfer1::DataType::kINT8) != internal.convert_info.engine_settings.enabled_precisions.end()) { internal.partitioning_info.cast_int8_inputs = false; - internal.lower_info.unfreeze_module = true; - internal.lower_info.disable_cse = true; - // internal.convert_info.engine_settings.calibrator = nullptr; + if (external.ptq_calibrator) { + internal.convert_info.engine_settings.calibrator = external.ptq_calibrator; + } else { + internal.lower_info.unfreeze_module = true; + internal.lower_info.disable_cse = true; + internal.convert_info.engine_settings.calibrator = nullptr; + } } else { - // internal.convert_info.engine_settings.calibrator = nullptr; + internal.convert_info.engine_settings.calibrator = nullptr; } +#endif return internal; } From 0e983cbc529d3c8e655f60d2d7d27bd970b5dacb Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Sun, 13 Jul 2025 17:32:28 -0700 Subject: [PATCH 08/12] fix ci --- .github/workflows/build_wheels_linux.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build_wheels_linux.yml b/.github/workflows/build_wheels_linux.yml index 2eab046af5..d00805b23d 100644 --- a/.github/workflows/build_wheels_linux.yml +++ b/.github/workflows/build_wheels_linux.yml @@ -106,7 +106,7 @@ on: default: 120 type: number use-rtx: - description: "Use RTX TensorRT" + description: "Set to true if use TensorRT-RTX" default: false type: boolean required: false @@ -211,7 +211,6 @@ jobs: python-version: ${{ env.PYTHON_VERSION }} cuda-version: ${{ env.CU_VERSION }} arch: ${{ env.ARCH }} - - name: Combine Env Var and Build Env Files if: ${{ inputs.env-var-script != '' }} working-directory: ${{ inputs.repository }} @@ -367,5 +366,5 @@ jobs: PYPI_API_TOKEN: ${{ secrets.PYPI_API_TOKEN }} concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }} + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{inputs.use-rtx}}-${{inputs.is-jetpack}}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }} cancel-in-progress: true From 1eb2e5a6f822c770005acddd9588fe8fc1d2e717 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Mon, 14 Jul 2025 12:01:53 -0700 Subject: [PATCH 09/12] add build fix for windows --- .bazelrc | 3 +- .../build-test-linux-aarch64-jetpack.yml | 2 +- .../workflows/build-test-linux-aarch64.yml | 2 +- .github/workflows/build-test-linux-x86_64.yml | 2 +- BUILD.bazel | 18 +++++++++ MODULE.bazel | 1 + core/BUILD | 23 +++++++---- core/conversion/BUILD | 23 +++++++---- core/conversion/conversionctx/BUILD | 23 +++++++---- core/conversion/converters/BUILD | 9 +++++ core/conversion/evaluators/BUILD | 23 +++++++---- core/conversion/tensorcontainer/BUILD | 23 +++++++---- core/conversion/var/BUILD | 23 +++++++---- core/ir/BUILD | 25 ++++++++---- core/lowering/BUILD | 25 ++++++++---- core/lowering/passes/BUILD | 13 +++++- core/partitioning/BUILD | 23 +++++++---- core/partitioning/partitioningctx/BUILD | 25 ++++++++---- core/partitioning/partitioninginfo/BUILD | 25 ++++++++---- core/partitioning/segmentedblock/BUILD | 23 +++++++---- core/plugins/BUILD | 40 ++++++++++--------- core/runtime/BUILD | 23 +++++++---- core/util/BUILD | 33 +++++++++------ core/util/logging/BUILD | 20 ++++++---- cpp/BUILD | 12 ++++-- cpp/bin/torchtrtc/BUILD | 31 ++++++++++---- setup.py | 14 +++---- third_party/tensorrt_rtx/archive/BUILD | 2 + toolchains/ci_workspaces/MODULE.bazel.tmpl | 1 + toolchains/dep_collection/defs.bzl | 2 +- 30 files changed, 343 insertions(+), 169 deletions(-) diff --git a/.bazelrc b/.bazelrc index 019eaa930c..abf4ac4dc8 100644 --- a/.bazelrc +++ b/.bazelrc @@ -38,8 +38,7 @@ build:cxx11_abi --define=abi=cxx11_abi build:jetpack --//toolchains/dep_collection:compute_libs=jetpack -build:rtx_x86_64 --cxxopt="-std=c++17" --cxxopt="-fdiagnostics-color=always" -build:rtx_win --cxxopt="/GS-" --cxxopt="/std:c++17" --cxxopt="/permissive-" --cxxopt="/wd4244" --cxxopt="/wd4267" --cxxopt="/wd4819" --features=windows_export_all_symbols +build:rtx --//toolchains/dep_collection:compute_libs=rtx build:ci_testing --define=torchtrt_src=prebuilt --cxxopt="-DDISABLE_TEST_IN_CI" --action_env "NVIDIA_TF32_OVERRIDE=0" build:use_precompiled_torchtrt --define=torchtrt_src=prebuilt diff --git a/.github/workflows/build-test-linux-aarch64-jetpack.yml b/.github/workflows/build-test-linux-aarch64-jetpack.yml index ef770ca024..a2e8a3a736 100644 --- a/.github/workflows/build-test-linux-aarch64-jetpack.yml +++ b/.github/workflows/build-test-linux-aarch64-jetpack.yml @@ -65,7 +65,7 @@ jobs: post-script: packaging/post_build_script.sh smoke-test-script: packaging/smoke_test_script.sh package-name: torch_tensorrt - name: Build torch-tensorrt whl package + name: Build torch-tensorrt whl package for aarch64-jetpack uses: ./.github/workflows/build_wheels_linux.yml with: repository: ${{ matrix.repository }} diff --git a/.github/workflows/build-test-linux-aarch64.yml b/.github/workflows/build-test-linux-aarch64.yml index bc06d420ec..34b3e4fa34 100644 --- a/.github/workflows/build-test-linux-aarch64.yml +++ b/.github/workflows/build-test-linux-aarch64.yml @@ -62,7 +62,7 @@ jobs: post-script: packaging/post_build_script.sh smoke-test-script: packaging/smoke_test_script.sh package-name: torch_tensorrt - name: Build torch-tensorrt whl package + name: Build torch-tensorrt whl package for aarch64 uses: ./.github/workflows/build_wheels_linux.yml with: repository: ${{ matrix.repository }} diff --git a/.github/workflows/build-test-linux-x86_64.yml b/.github/workflows/build-test-linux-x86_64.yml index a03a06b8dc..263bf5c274 100644 --- a/.github/workflows/build-test-linux-x86_64.yml +++ b/.github/workflows/build-test-linux-x86_64.yml @@ -67,7 +67,7 @@ jobs: post-script: packaging/post_build_script.sh smoke-test-script: packaging/smoke_test_script.sh package-name: torch_tensorrt - name: Build torch-tensorrt whl package + name: Build torch-tensorrt whl package for x86_64 uses: ./.github/workflows/build_wheels_linux.yml with: repository: ${{ matrix.repository }} diff --git a/BUILD.bazel b/BUILD.bazel index 950839a40e..dc632ba73b 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -1,5 +1,12 @@ +load("@bazel_skylib//rules:common_settings.bzl", "bool_flag") load("@rules_pkg//:pkg.bzl", "pkg_tar") +bool_flag( + name = "use_rtx", + build_setting_default = False, + visibility = ["//visibility:public"], +) + config_setting( name = "windows", constraint_values = [ @@ -7,6 +14,14 @@ config_setting( ], ) +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = {":use_rtx": "true"}, +) + pkg_tar( name = "include_core", package_dir = "include/torch_tensorrt", @@ -52,6 +67,7 @@ pkg_tar( pkg_tar( name = "lib", srcs = select({ + ":rtx_win": ["//cpp/lib:torchtrt.dll"], ":windows": ["//cpp/lib:torchtrt.dll"], "//conditions:default": [ "//cpp/lib:libtorchtrt.so", @@ -66,6 +82,7 @@ pkg_tar( pkg_tar( name = "lib_rt", srcs = select({ + ":rtx_win": ["//cpp/lib:torchtrt_runtime.dll"], ":windows": ["//cpp/lib:torchtrt_runtime.dll"], "//conditions:default": [ "//cpp/lib:libtorchtrt_runtime.so", @@ -98,6 +115,7 @@ pkg_tar( ":include_core", ":lib", ] + select({ + ":rtx_win": [], ":windows": [], "//conditions:default": [":bin"], }), diff --git a/MODULE.bazel b/MODULE.bazel index 4fad87365f..80ef4b7f5e 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -8,6 +8,7 @@ bazel_dep(name = "googletest", version = "1.16.0") bazel_dep(name = "platforms", version = "0.0.11") bazel_dep(name = "rules_cc", version = "0.1.1") bazel_dep(name = "rules_python", version = "1.3.0") +bazel_dep(name = "bazel_skylib", version = "1.7.1") python = use_extension("@rules_python//python/extensions:python.bzl", "python") python.toolchain( diff --git a/core/BUILD b/core/BUILD index 88eafeec2a..c6744c66c1 100644 --- a/core/BUILD +++ b/core/BUILD @@ -6,7 +6,7 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", }, ) @@ -16,6 +16,9 @@ config_setting( "@platforms//cpu:x86_64", "@platforms//os:linux", ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, ) config_setting( @@ -23,6 +26,9 @@ config_setting( constraint_values = [ "@platforms//os:windows", ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, ) config_setting( @@ -31,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -41,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -74,16 +80,17 @@ cc_library( "//core/runtime", "//core/util/logging", ] + select({ - ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":jetpack": ["@tensorrt_l4t//:nvinfer"], ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], - ":windows": ["@tensorrt_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], ":sbsa": ["@tensorrt_sbsa//:nvinfer"], - ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/conversion/BUILD b/core/conversion/BUILD index 200543b486..480481e6bd 100644 --- a/core/conversion/BUILD +++ b/core/conversion/BUILD @@ -6,7 +6,7 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", }, ) @@ -16,6 +16,9 @@ config_setting( "@platforms//cpu:x86_64", "@platforms//os:linux", ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, ) config_setting( @@ -23,6 +26,9 @@ config_setting( constraint_values = [ "@platforms//os:windows", ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, ) config_setting( @@ -31,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -41,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -69,16 +75,17 @@ cc_library( "//core/ir", "//core/util:prelude", ] + select({ - ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":jetpack": ["@tensorrt_l4t//:nvinfer"], ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], - ":windows": ["@tensorrt_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], ":sbsa": ["@tensorrt_sbsa//:nvinfer"], - ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/conversion/conversionctx/BUILD b/core/conversion/conversionctx/BUILD index 19794520e1..d0ad2e7bd1 100644 --- a/core/conversion/conversionctx/BUILD +++ b/core/conversion/conversionctx/BUILD @@ -6,7 +6,7 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", }, ) @@ -16,6 +16,9 @@ config_setting( "@platforms//cpu:x86_64", "@platforms//os:linux", ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, ) config_setting( @@ -23,6 +26,9 @@ config_setting( constraint_values = [ "@platforms//os:windows", ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, ) config_setting( @@ -31,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -41,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -64,16 +70,17 @@ cc_library( "//core/ir", "//core/util:prelude", ] + select({ - ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":jetpack": ["@tensorrt_l4t//:nvinfer"], ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], - ":windows": ["@tensorrt_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], ":sbsa": ["@tensorrt_sbsa//:nvinfer"], - ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/conversion/converters/BUILD b/core/conversion/converters/BUILD index 608b768731..84864cea10 100644 --- a/core/conversion/converters/BUILD +++ b/core/conversion/converters/BUILD @@ -16,6 +16,9 @@ config_setting( "@platforms//cpu:x86_64", "@platforms//os:linux", ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, ) config_setting( @@ -23,6 +26,9 @@ config_setting( constraint_values = [ "@platforms//os:windows", ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, ) config_setting( @@ -72,6 +78,7 @@ cc_library( "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], ":use_torch_whl": ["@torch_whl//:libtorch"], ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], @@ -100,6 +107,7 @@ cc_library( "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], ":use_torch_whl": ["@torch_whl//:libtorch"], ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], @@ -171,6 +179,7 @@ cc_library( "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], ":use_torch_whl": ["@torch_whl//:libtorch"], ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], diff --git a/core/conversion/evaluators/BUILD b/core/conversion/evaluators/BUILD index a6714e8a90..e9fc358582 100644 --- a/core/conversion/evaluators/BUILD +++ b/core/conversion/evaluators/BUILD @@ -6,7 +6,7 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", }, ) @@ -16,6 +16,9 @@ config_setting( "@platforms//cpu:x86_64", "@platforms//os:linux", ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, ) config_setting( @@ -23,6 +26,9 @@ config_setting( constraint_values = [ "@platforms//os:windows", ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, ) config_setting( @@ -31,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -41,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -70,16 +76,17 @@ cc_library( "//core/conversion/var", "//core/util:prelude", ] + select({ - ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":jetpack": ["@tensorrt_l4t//:nvinfer"], ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], - ":windows": ["@tensorrt_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], ":sbsa": ["@tensorrt_sbsa//:nvinfer"], - ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/conversion/tensorcontainer/BUILD b/core/conversion/tensorcontainer/BUILD index a671301533..c6f56b70c8 100644 --- a/core/conversion/tensorcontainer/BUILD +++ b/core/conversion/tensorcontainer/BUILD @@ -6,7 +6,7 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", }, ) @@ -16,6 +16,9 @@ config_setting( "@platforms//cpu:x86_64", "@platforms//os:linux", ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, ) config_setting( @@ -23,6 +26,9 @@ config_setting( constraint_values = [ "@platforms//os:windows", ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, ) config_setting( @@ -31,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -41,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -63,16 +69,17 @@ cc_library( deps = [ "//core/util:prelude", ] + select({ - ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":jetpack": ["@tensorrt_l4t//:nvinfer"], ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], - ":windows": ["@tensorrt_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], ":sbsa": ["@tensorrt_sbsa//:nvinfer"], - ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/conversion/var/BUILD b/core/conversion/var/BUILD index caf57a2ba3..ce58ca70f3 100644 --- a/core/conversion/var/BUILD +++ b/core/conversion/var/BUILD @@ -6,7 +6,7 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", }, ) @@ -16,6 +16,9 @@ config_setting( "@platforms//cpu:x86_64", "@platforms//os:linux", ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, ) config_setting( @@ -23,6 +26,9 @@ config_setting( constraint_values = [ "@platforms//os:windows", ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, ) config_setting( @@ -31,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -41,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -66,16 +72,17 @@ cc_library( "//core/conversion/tensorcontainer", "//core/util:prelude", ] + select({ - ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":jetpack": ["@tensorrt_l4t//:nvinfer"], ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], - ":windows": ["@tensorrt_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], ":sbsa": ["@tensorrt_sbsa//:nvinfer"], - ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/ir/BUILD b/core/ir/BUILD index f24468a519..5dfdeded90 100644 --- a/core/ir/BUILD +++ b/core/ir/BUILD @@ -6,15 +6,19 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", }, ) + config_setting( name = "rtx_x86_64", constraint_values = [ "@platforms//cpu:x86_64", "@platforms//os:linux", ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, ) config_setting( @@ -22,14 +26,18 @@ config_setting( constraint_values = [ "@platforms//os:windows", ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, ) + config_setting( name = "sbsa", constraint_values = [ "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -39,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -64,16 +72,17 @@ cc_library( deps = [ "//core/util:prelude", ] + select({ - ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":jetpack": ["@tensorrt_l4t//:nvinfer"], ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], - ":windows": ["@tensorrt_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], ":sbsa": ["@tensorrt_sbsa//:nvinfer"], - ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/lowering/BUILD b/core/lowering/BUILD index 354c1b0331..6084198c74 100644 --- a/core/lowering/BUILD +++ b/core/lowering/BUILD @@ -6,15 +6,19 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", }, ) + config_setting( name = "rtx_x86_64", constraint_values = [ "@platforms//cpu:x86_64", "@platforms//os:linux", ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, ) config_setting( @@ -22,14 +26,18 @@ config_setting( constraint_values = [ "@platforms//os:windows", ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, ) + config_setting( name = "sbsa", constraint_values = [ "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -39,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -66,16 +74,17 @@ cc_library( "//core/lowering/passes", "//core/util:prelude", ] + select({ - ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":jetpack": ["@tensorrt_l4t//:nvinfer"], ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], - ":windows": ["@tensorrt_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], ":sbsa": ["@tensorrt_sbsa//:nvinfer"], - ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/lowering/passes/BUILD b/core/lowering/passes/BUILD index 845abdb62a..bd8462eed1 100644 --- a/core/lowering/passes/BUILD +++ b/core/lowering/passes/BUILD @@ -30,6 +30,16 @@ config_setting( }, ) +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + config_setting( name = "windows", constraint_values = [ @@ -76,9 +86,10 @@ cc_library( deps = [ "//core/util:prelude", ] + select({ + ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], ":use_torch_whl": ["@torch_whl//:libtorch"], ":windows": ["@libtorch_win//:libtorch"], - ":jetpack": ["@torch_l4t//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/partitioning/BUILD b/core/partitioning/BUILD index b7603b62d0..bbbb89af37 100644 --- a/core/partitioning/BUILD +++ b/core/partitioning/BUILD @@ -6,7 +6,7 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", }, ) @@ -16,6 +16,9 @@ config_setting( "@platforms//cpu:x86_64", "@platforms//os:linux", ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, ) config_setting( @@ -23,6 +26,9 @@ config_setting( constraint_values = [ "@platforms//os:windows", ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, ) config_setting( @@ -31,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -41,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -71,16 +77,17 @@ cc_library( "//core/partitioning/segmentedblock", "//core/util:prelude", ] + select({ - ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":jetpack": ["@tensorrt_l4t//:nvinfer"], ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], - ":windows": ["@tensorrt_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], ":sbsa": ["@tensorrt_sbsa//:nvinfer"], - ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/partitioning/partitioningctx/BUILD b/core/partitioning/partitioningctx/BUILD index c0ac8c984a..bae63241a6 100644 --- a/core/partitioning/partitioningctx/BUILD +++ b/core/partitioning/partitioningctx/BUILD @@ -6,15 +6,19 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", }, ) + config_setting( name = "rtx_x86_64", constraint_values = [ "@platforms//cpu:x86_64", "@platforms//os:linux", ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, ) config_setting( @@ -22,14 +26,18 @@ config_setting( constraint_values = [ "@platforms//os:windows", ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, ) + config_setting( name = "sbsa", constraint_values = [ "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -39,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -65,16 +73,17 @@ cc_library( "//core/partitioning/segmentedblock", "//core/util:prelude", ] + select({ - ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":jetpack": ["@tensorrt_l4t//:nvinfer"], ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], - ":windows": ["@tensorrt_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], ":sbsa": ["@tensorrt_sbsa//:nvinfer"], - ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/partitioning/partitioninginfo/BUILD b/core/partitioning/partitioninginfo/BUILD index 52905ab252..04515abb10 100644 --- a/core/partitioning/partitioninginfo/BUILD +++ b/core/partitioning/partitioninginfo/BUILD @@ -6,15 +6,19 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", }, ) + config_setting( name = "rtx_x86_64", constraint_values = [ "@platforms//cpu:x86_64", "@platforms//os:linux", ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, ) config_setting( @@ -22,14 +26,18 @@ config_setting( constraint_values = [ "@platforms//os:windows", ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, ) + config_setting( name = "sbsa", constraint_values = [ "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -39,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -64,16 +72,17 @@ cc_library( "//core/lowering", "//core/util:prelude", ] + select({ - ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":jetpack": ["@tensorrt_l4t//:nvinfer"], ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], - ":windows": ["@tensorrt_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], ":sbsa": ["@tensorrt_sbsa//:nvinfer"], - ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/partitioning/segmentedblock/BUILD b/core/partitioning/segmentedblock/BUILD index b3dadf236d..73916bb6bd 100644 --- a/core/partitioning/segmentedblock/BUILD +++ b/core/partitioning/segmentedblock/BUILD @@ -6,7 +6,7 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", }, ) @@ -16,6 +16,9 @@ config_setting( "@platforms//cpu:x86_64", "@platforms//os:linux", ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, ) config_setting( @@ -23,6 +26,9 @@ config_setting( constraint_values = [ "@platforms//os:windows", ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, ) config_setting( @@ -31,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -41,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -66,16 +72,17 @@ cc_library( "//core/lowering", "//core/util:prelude", ] + select({ - ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":jetpack": ["@tensorrt_l4t//:nvinfer"], ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], - ":windows": ["@tensorrt_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], ":sbsa": ["@tensorrt_sbsa//:nvinfer"], - ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/plugins/BUILD b/core/plugins/BUILD index 59360ed0e4..00503552f2 100644 --- a/core/plugins/BUILD +++ b/core/plugins/BUILD @@ -6,7 +6,7 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", }, ) @@ -16,6 +16,9 @@ config_setting( "@platforms//cpu:x86_64", "@platforms//os:linux", ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, ) config_setting( @@ -23,6 +26,9 @@ config_setting( constraint_values = [ "@platforms//os:windows", ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, ) config_setting( @@ -31,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -41,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -52,12 +58,11 @@ config_setting( ], ) - cc_library( name = "torch_tensorrt_plugins", srcs = select({ - ":rtx_x86_64": [], ":rtx_win": [], + ":rtx_x86_64": [], "//conditions:default": [ "impl/interpolate_plugin.cpp", "impl/normalize_plugin.cpp", @@ -65,8 +70,8 @@ cc_library( ], }), hdrs = select({ - ":rtx_x86_64": [], ":rtx_win": [], + ":rtx_x86_64": [], "//conditions:default": [ "impl/interpolate_plugin.h", "impl/normalize_plugin.h", @@ -82,30 +87,29 @@ cc_library( deps = [ "//core/util:prelude", ] + select({ - ":rtx_x86_64": [], - ":rtx_win": [], - ":windows": [ - "@tensorrt_win//:nvinfer", - "@tensorrt_win//:nvinferplugin", + ":jetpack": [ + "@tensorrt_l4t//:nvinfer", + "@tensorrt_l4t//:nvinferplugin", ], + ":rtx_win": [], + ":rtx_x86_64": [], ":sbsa": [ "@tensorrt_sbsa//:nvinfer", "@tensorrt_sbsa//:nvinferplugin", ], - ":jetpack": [ - "@tensorrt_l4t//:nvinfer", - "@tensorrt_l4t//:nvinferplugin", + ":windows": [ + "@tensorrt_win//:nvinfer", + "@tensorrt_win//:nvinferplugin", ], "//conditions:default": [ "@tensorrt//:nvinfer", "@tensorrt//:nvinferplugin", ], }) + select({ - ":rtx_x86_64": [], - ":rtx_win": [], - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/runtime/BUILD b/core/runtime/BUILD index 79480e7177..a573cfed78 100644 --- a/core/runtime/BUILD +++ b/core/runtime/BUILD @@ -6,7 +6,7 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", }, ) @@ -16,6 +16,9 @@ config_setting( "@platforms//cpu:x86_64", "@platforms//os:linux", ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, ) config_setting( @@ -23,6 +26,9 @@ config_setting( constraint_values = [ "@platforms//os:windows", ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, ) config_setting( @@ -31,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -41,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -78,16 +84,17 @@ cc_library( "//core/plugins:torch_tensorrt_plugins", "//core/util:prelude", ] + select({ - ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":jetpack": ["@tensorrt_l4t//:nvinfer"], ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], - ":windows": ["@tensorrt_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], ":sbsa": ["@tensorrt_sbsa//:nvinfer"], - ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/util/BUILD b/core/util/BUILD index 947917089e..0ed97a5eda 100644 --- a/core/util/BUILD +++ b/core/util/BUILD @@ -6,7 +6,7 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", }, ) @@ -16,6 +16,9 @@ config_setting( "@platforms//cpu:x86_64", "@platforms//os:linux", ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, ) config_setting( @@ -23,6 +26,9 @@ config_setting( constraint_values = [ "@platforms//os:windows", ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, ) config_setting( @@ -31,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -41,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -75,9 +81,10 @@ cc_library( deps = [ ":macros", ] + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), ) @@ -110,9 +117,10 @@ cc_library( "build_info.h", ], deps = select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), ) @@ -129,16 +137,17 @@ cc_library( ":macros", "//core/util/logging", ] + select({ - ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":jetpack": ["@tensorrt_l4t//:nvinfer"], ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], - ":windows": ["@tensorrt_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], ":sbsa": ["@tensorrt_sbsa//:nvinfer"], - ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/util/logging/BUILD b/core/util/logging/BUILD index fc7264e920..1ac834b021 100644 --- a/core/util/logging/BUILD +++ b/core/util/logging/BUILD @@ -6,7 +6,7 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", }, ) @@ -16,6 +16,7 @@ config_setting( "@platforms//cpu:x86_64", "@platforms//os:linux", ], + flag_values = {"//toolchains/dep_collection:compute_libs": "rtx"}, ) config_setting( @@ -23,6 +24,7 @@ config_setting( constraint_values = [ "@platforms//os:windows", ], + flag_values = {"//toolchains/dep_collection:compute_libs": "rtx"}, ) config_setting( @@ -31,7 +33,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -41,7 +43,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -50,6 +52,7 @@ config_setting( constraint_values = [ "@platforms//os:windows", ], + flag_values = {"//toolchains/dep_collection:compute_libs": "default"}, ) cc_library( @@ -61,16 +64,17 @@ cc_library( "TorchTRTLogger.h", ], deps = select({ - ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":jetpack": ["@tensorrt_l4t//:nvinfer"], ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], - ":windows": ["@tensorrt_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], ":sbsa": ["@tensorrt_sbsa//:nvinfer"], - ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/cpp/BUILD b/cpp/BUILD index aebc6ea329..2b5877aa4a 100644 --- a/cpp/BUILD +++ b/cpp/BUILD @@ -2,13 +2,15 @@ load("@rules_cc//cc:defs.bzl", "cc_library") package(default_visibility = ["//visibility:public"]) - config_setting( name = "rtx_x86_64", constraint_values = [ "@platforms//cpu:x86_64", "@platforms//os:linux", ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, ) config_setting( @@ -16,9 +18,11 @@ config_setting( constraint_values = [ "@platforms//os:windows", ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, ) - cc_library( name = "torch_tensorrt", srcs = [ @@ -27,8 +31,8 @@ cc_library( "src/torch_tensorrt.cpp", "src/types.cpp", ] + select({ - ":rtx_x86_64": [], ":rtx_win": [], + ":rtx_x86_64": [], "//conditions:default": [ "src/ptq.cpp", ], @@ -38,8 +42,8 @@ cc_library( "include/torch_tensorrt/macros.h", "include/torch_tensorrt/torch_tensorrt.h", ] + select({ - ":rtx_x86_64": [], ":rtx_win": [], + ":rtx_x86_64": [], "//conditions:default": [ "include/torch_tensorrt/ptq.h", ], diff --git a/cpp/bin/torchtrtc/BUILD b/cpp/bin/torchtrtc/BUILD index d858d4de93..2c87eddae2 100644 --- a/cpp/bin/torchtrtc/BUILD +++ b/cpp/bin/torchtrtc/BUILD @@ -5,7 +5,7 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", }, ) @@ -15,9 +15,10 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) + config_setting( name = "windows", constraint_values = [ @@ -25,6 +26,16 @@ config_setting( ], ) +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + cc_binary( name = "torchtrtc", srcs = [ @@ -44,17 +55,21 @@ cc_binary( "//cpp:torch_tensorrt", "//third_party/args", ] + select({ - ":windows": [ + ":jetpack": [ + "@torch_l4t//:caffe2", + "@torch_l4t//:libtorch", + ], + ":rtx_win": [ "@libtorch_win//:caffe2", - "@libtorch_win//:libtorch" + "@libtorch_win//:libtorch", ], ":use_torch_whl": [ "@torch_whl//:caffe2", - "@torch_whl//:libtorch" + "@torch_whl//:libtorch", ], - ":jetpack": [ - "@torch_l4t//:caffe2", - "@torch_l4t//:libtorch" + ":windows": [ + "@libtorch_win//:caffe2", + "@libtorch_win//:libtorch", ], "//conditions:default": [ "@libtorch", diff --git a/setup.py b/setup.py index 3a9e98c0e7..529a6399d4 100644 --- a/setup.py +++ b/setup.py @@ -222,15 +222,13 @@ def build_libtorchtrt_cxx11_abi( cmd.append("--config=python") if IS_WINDOWS: - if USE_RTX: - cmd.append("--config=rtx_win") - else: - cmd.append("--config=windows") + cmd.append("--config=windows") else: - if USE_RTX: - cmd.append("--config=rtx_x86_64") - else: - cmd.append("--config=linux") + cmd.append("--config=linux") + + if USE_RTX: + cmd.append("--config=rtx") + print("TensorRT RTX build") if IS_JETPACK: cmd.append("--config=jetpack") diff --git a/third_party/tensorrt_rtx/archive/BUILD b/third_party/tensorrt_rtx/archive/BUILD index 88dc42c943..ec6ebbe985 100644 --- a/third_party/tensorrt_rtx/archive/BUILD +++ b/third_party/tensorrt_rtx/archive/BUILD @@ -8,6 +8,7 @@ config_setting( "@platforms//cpu:x86_64", "@platforms//os:linux", ], + flag_values = {"@//toolchains/dep_collection:compute_libs": "rtx"}, ) config_setting( @@ -15,6 +16,7 @@ config_setting( constraint_values = [ "@platforms//os:windows", ], + flag_values = {"@//toolchains/dep_collection:compute_libs": "rtx"}, ) cc_library( diff --git a/toolchains/ci_workspaces/MODULE.bazel.tmpl b/toolchains/ci_workspaces/MODULE.bazel.tmpl index 2f29be7a3b..7345a056eb 100644 --- a/toolchains/ci_workspaces/MODULE.bazel.tmpl +++ b/toolchains/ci_workspaces/MODULE.bazel.tmpl @@ -8,6 +8,7 @@ bazel_dep(name = "googletest", version = "1.16.0") bazel_dep(name = "platforms", version = "0.0.11") bazel_dep(name = "rules_cc", version = "0.1.1") bazel_dep(name = "rules_python", version = "1.3.0") +bazel_dep(name = "bazel_skylib", version = "1.3.0") python = use_extension("@rules_python//python/extensions:python.bzl", "python") python.toolchain( diff --git a/toolchains/dep_collection/defs.bzl b/toolchains/dep_collection/defs.bzl index 6eaa710261..873ef7ec42 100644 --- a/toolchains/dep_collection/defs.bzl +++ b/toolchains/dep_collection/defs.bzl @@ -1,7 +1,7 @@ # buildifier: disable=module-docstring DependencyCollectionInfo = provider(doc = "", fields = ["type"]) -collection_types = ["default", "jetpack"] +collection_types = ["default", "jetpack", "rtx"] def _impl(ctx): _type = ctx.build_setting_value From a871a959c36d2eba1e79e3e3ffa178f6f8629e69 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Mon, 14 Jul 2025 12:23:13 -0700 Subject: [PATCH 10/12] fix windows build in ci --- .github/workflows/build_wheels_linux.yml | 2 +- BUILD.bazel | 11 +++-------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/.github/workflows/build_wheels_linux.yml b/.github/workflows/build_wheels_linux.yml index d00805b23d..90d5d804dd 100644 --- a/.github/workflows/build_wheels_linux.yml +++ b/.github/workflows/build_wheels_linux.yml @@ -366,5 +366,5 @@ jobs: PYPI_API_TOKEN: ${{ secrets.PYPI_API_TOKEN }} concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{inputs.use-rtx}}-${{inputs.is-jetpack}}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }} + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{inputs.use-rtx}}-${{inputs.architecture}}-${{inputs.is-jetpack}}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }} cancel-in-progress: true diff --git a/BUILD.bazel b/BUILD.bazel index dc632ba73b..11a96d6ae3 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -1,12 +1,5 @@ -load("@bazel_skylib//rules:common_settings.bzl", "bool_flag") load("@rules_pkg//:pkg.bzl", "pkg_tar") -bool_flag( - name = "use_rtx", - build_setting_default = False, - visibility = ["//visibility:public"], -) - config_setting( name = "windows", constraint_values = [ @@ -19,7 +12,9 @@ config_setting( constraint_values = [ "@platforms//os:windows", ], - flag_values = {":use_rtx": "true"}, + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, ) pkg_tar( From cce81ae6171b6ffa62b2fd30f5f06d275a3570e8 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Mon, 14 Jul 2025 16:04:37 -0700 Subject: [PATCH 11/12] fix ci build is skipped issue --- .github/scripts/install-torch-tensorrt.sh | 12 ++++++++++++ .github/workflows/build_wheels_linux.yml | 2 +- MODULE.bazel | 1 - py/torch_tensorrt/trt_alias.py | 20 +++++++++++++------- toolchains/ci_workspaces/MODULE.bazel.tmpl | 1 - 5 files changed, 26 insertions(+), 10 deletions(-) diff --git a/.github/scripts/install-torch-tensorrt.sh b/.github/scripts/install-torch-tensorrt.sh index 9847deba23..77d433f66e 100755 --- a/.github/scripts/install-torch-tensorrt.sh +++ b/.github/scripts/install-torch-tensorrt.sh @@ -29,10 +29,16 @@ if [[ ${USE_RTX} == true ]]; then if [[ ${PLATFORM} == win32 ]]; then curl -L http://cuda-repo/release-candidates/Libraries/TensorRT/v10.12/10.12.0.35-51f47a12/12.9-r575/Windows10-x64-winjit/zip/TensorRT-RTX-1.0.0.21.Windows.win10.cuda-12.9.zip -o TensorRT-RTX-1.0.0.21.Windows.win10.cuda-12.9.zip unzip TensorRT-RTX-1.0.0.21.Windows.win10.cuda-12.9.zip + rtx_lib_dir=${PWD}/TensorRT-RTX-1.0.0.21/lib + export LD_LIBRARY_PATH=${rtx_lib_dir}:$LD_LIBRARY_PATH + echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH" pip install TensorRT-RTX-1.0.0.21/python/tensorrt_rtx-1.0.0.21-${CPYTHON_TAG}-none-win_amd64.whl else curl -L http://cuda-repo/release-candidates/Libraries/TensorRT/v10.12/10.12.0.35-51f47a12/12.9-r575/Linux-x64-manylinux_2_28-winjit/tar/TensorRT-RTX-1.0.0.21.Linux.x86_64-gnu.cuda-12.9.tar.gz -o TensorRT-RTX-1.0.0.21.Linux.x86_64-gnu.cuda-12.9.tar.gz tar -xzf TensorRT-RTX-1.0.0.21.Linux.x86_64-gnu.cuda-12.9.tar.gz + rtx_lib_dir=${PWD}/TensorRT-RTX-1.0.0.21/lib + export LD_LIBRARY_PATH=${rtx_lib_dir}:$LD_LIBRARY_PATH + echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH" pip install TensorRT-RTX-1.0.0.21/python/tensorrt_rtx-1.0.0.21-${CPYTHON_TAG}-none-linux_x86_64.whl fi else @@ -47,4 +53,10 @@ else pip install /opt/torch-tensorrt-builds/torch_tensorrt*.whl fi +if [[ ${USE_RTX} == true ]]; then + # currently tensorrt is installed automatically by install torch-tensorrt since it is a dependency of torch-tensorrt in pyproject.toml + # so we need to uninstall it to avoid conflict + pip uninstall -y tensorrt tensorrt_cu12 tensorrt_cu12_bindings tensorrt_cu12_libs +fi + echo -e "Running test script"; diff --git a/.github/workflows/build_wheels_linux.yml b/.github/workflows/build_wheels_linux.yml index 90d5d804dd..e9b7bfd7f8 100644 --- a/.github/workflows/build_wheels_linux.yml +++ b/.github/workflows/build_wheels_linux.yml @@ -139,7 +139,7 @@ jobs: UPLOAD_TO_BASE_BUCKET: ${{ matrix.upload_to_base_bucket }} ARCH: ${{ inputs.architecture }} BUILD_TARGET: ${{ inputs.build-target }} - name: build-wheel-${{ matrix.python_version }}-${{ matrix.desired_cuda }}-${{ matrix.gpu_arch_type }} + name: build-wheel-${{ matrix.python_version }}-${{ matrix.desired_cuda }}-${{ matrix.gpu_arch_type }}-${{ inputs.architecture }}-${{ inputs.use-rtx }}-${{ inputs.is-jetpack }} runs-on: ${{ matrix.validation_runner }} environment: ${{(inputs.trigger-event == 'schedule' || (inputs.trigger-event == 'push' && (startsWith(github.event.ref, 'refs/heads/nightly') || startsWith(github.event.ref, 'refs/tags/v')))) && 'pytorchbot-env' || ''}} container: diff --git a/MODULE.bazel b/MODULE.bazel index 80ef4b7f5e..4fad87365f 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -8,7 +8,6 @@ bazel_dep(name = "googletest", version = "1.16.0") bazel_dep(name = "platforms", version = "0.0.11") bazel_dep(name = "rules_cc", version = "0.1.1") bazel_dep(name = "rules_python", version = "1.3.0") -bazel_dep(name = "bazel_skylib", version = "1.7.1") python = use_extension("@rules_python//python/extensions:python.bzl", "python") python.toolchain( diff --git a/py/torch_tensorrt/trt_alias.py b/py/torch_tensorrt/trt_alias.py index cbea413918..963d2c06fc 100644 --- a/py/torch_tensorrt/trt_alias.py +++ b/py/torch_tensorrt/trt_alias.py @@ -81,14 +81,20 @@ def alias_tensorrt() -> None: tensorrt_package_name = "tensorrt_rtx" if use_rtx else "tensorrt" # Import the appropriate package try: - target = importlib.import_module(tensorrt_package_name) - except ImportError: + target_module = importlib.import_module(tensorrt_package_name) + proxy = TensorRTProxyModule(target_module) + proxy._package_name = tensorrt_package_name + sys.modules["tensorrt"] = proxy + tensorrt_package_imported = True + except ImportError as e: # Fallback to standard tensorrt if RTX version not available - print(f"import error when try to import {tensorrt_package_name=}") - - proxy = TensorRTProxyModule(target) - proxy._package_name = tensorrt_package_name - sys.modules["tensorrt"] = proxy + print(f"import error when try to import {tensorrt_package_name=} got error {e}") + print( + f"make sure tensorrt lib is in the LD_LIBRARY_PATH: {os.environ.get('LD_LIBRARY_PATH')}" + ) + raise Exception( + f"import error when try to import {tensorrt_package_name=} got error {e}" + ) alias_tensorrt() diff --git a/toolchains/ci_workspaces/MODULE.bazel.tmpl b/toolchains/ci_workspaces/MODULE.bazel.tmpl index 7345a056eb..2f29be7a3b 100644 --- a/toolchains/ci_workspaces/MODULE.bazel.tmpl +++ b/toolchains/ci_workspaces/MODULE.bazel.tmpl @@ -8,7 +8,6 @@ bazel_dep(name = "googletest", version = "1.16.0") bazel_dep(name = "platforms", version = "0.0.11") bazel_dep(name = "rules_cc", version = "0.1.1") bazel_dep(name = "rules_python", version = "1.3.0") -bazel_dep(name = "bazel_skylib", version = "1.3.0") python = use_extension("@rules_python//python/extensions:python.bzl", "python") python.toolchain( From fc4dee26c2ba1b6a6c6fcc6c88bdc0d0a6ada3a5 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Mon, 14 Jul 2025 16:17:57 -0700 Subject: [PATCH 12/12] fix build skip issue in CI --- .github/workflows/build-test-linux-x86_64.yml | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/.github/workflows/build-test-linux-x86_64.yml b/.github/workflows/build-test-linux-x86_64.yml index 263bf5c274..c9839d4051 100644 --- a/.github/workflows/build-test-linux-x86_64.yml +++ b/.github/workflows/build-test-linux-x86_64.yml @@ -1,13 +1,6 @@ name: Build and test Linux x86_64 wheels on: - workflow_call: - inputs: - use-rtx: - description: "Use RTX TensorRT" - default: false - type: boolean - required: false pull_request: push: branches: @@ -19,6 +12,12 @@ on: # Release candidate tags look like: v1.11.0-rc1 - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+ workflow_dispatch: + inputs: + use-rtx: + description: "Use RTX TensorRT" + default: false + type: boolean + required: true jobs: generate-matrix: @@ -82,7 +81,7 @@ jobs: smoke-test-script: ${{ matrix.smoke-test-script }} trigger-event: ${{ github.event_name }} architecture: "x86_64" - use-rtx: ${{ inputs.use-rtx }} + use-rtx: ${{ github.event.inputs.use-rtx }} tests-py-torchscript-fe: name: Test torchscript frontend [Python] @@ -347,5 +346,5 @@ jobs: popd concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.use-rtx }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.event.inputs.use-rtx }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} cancel-in-progress: true