pytorch
diff --git a/‎.github/workflows/build-test.yml
Lines changed: 9 additions & 8 deletions b/‎.github/workflows/build-test.yml
Lines changed: 9 additions & 8 deletions
diff --git a/‎WORKSPACE
Lines changed: 3 additions & 3 deletions b/‎WORKSPACE
Lines changed: 3 additions & 3 deletions
diff --git a/‎cpp/include/torch_tensorrt/macros.h
Lines changed: 1 addition & 1 deletion b/‎cpp/include/torch_tensorrt/macros.h
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev_dep_versions.yml
Lines changed: 1 addition & 1 deletion b/‎dev_dep_versions.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎docsrc/getting_started/installation.rst
Lines changed: 15 additions & 10 deletions b/‎docsrc/getting_started/installation.rst
Lines changed: 15 additions & 10 deletions
diff --git a/‎py/torch_tensorrt/dynamo/_compiler.py
Lines changed: 5 additions & 7 deletions b/‎py/torch_tensorrt/dynamo/_compiler.py
Lines changed: 5 additions & 7 deletions
diff --git a/‎py/torch_tensorrt/dynamo/backend/backends.py
Lines changed: 14 additions & 6 deletions b/‎py/torch_tensorrt/dynamo/backend/backends.py
Lines changed: 14 additions & 6 deletions
diff --git a/‎py/torch_tensorrt/dynamo/conversion/impl/normalization/ops.py
Lines changed: 20 additions & 91 deletions b/‎py/torch_tensorrt/dynamo/conversion/impl/normalization/ops.py
Lines changed: 20 additions & 91 deletions
diff --git a/‎py/torch_tensorrt/dynamo/conversion/impl/shape.py
Lines changed: 1 addition & 0 deletions b/‎py/torch_tensorrt/dynamo/conversion/impl/shape.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎py/torch_tensorrt/dynamo/lowering/__init__.py
Lines changed: 1 addition & 1 deletion b/‎py/torch_tensorrt/dynamo/lowering/__init__.py
Lines changed: 1 addition & 1 deletion
@@ -15,7 +15,7 @@ on:
 
 jobs:
   generate-matrix:
-    uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@release/2.3
+    uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
     with:
       package-type: wheel
       os: linux
@@ -40,7 +40,7 @@ jobs:
             smoke-test-script: packaging/smoke_test_script.sh
             package-name: torch_tensorrt
     name: Build torch-tensorrt whl package
-    uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@release/2.3
+    uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@main
     with:
       repository: ${{ matrix.repository }}
       ref: ""
@@ -65,7 +65,7 @@ jobs:
             package-name: torch_tensorrt
             pre-script: packaging/pre_build_script.sh
             post-script: packaging/post_build_script.sh
-    uses: pytorch/tensorrt/.github/workflows/linux-test.yml@release/2.3
+    uses: pytorch/tensorrt/.github/workflows/linux-test.yml@main
     with:
       job-name: tests-py-torchscript-fe
       repository: "pytorch/tensorrt"
@@ -103,7 +103,7 @@ jobs:
             package-name: torch_tensorrt
             pre-script: packaging/pre_build_script.sh
             post-script: packaging/post_build_script.sh
-    uses: pytorch/tensorrt/.github/workflows/linux-test.yml@release/2.3
+    uses: pytorch/tensorrt/.github/workflows/linux-test.yml@main
     with:
       job-name: tests-py-dynamo-converters
       repository: "pytorch/tensorrt"
@@ -132,7 +132,7 @@ jobs:
             package-name: torch_tensorrt
             pre-script: packaging/pre_build_script.sh
             post-script: packaging/post_build_script.sh
-    uses: pytorch/tensorrt/.github/workflows/linux-test.yml@release/2.3
+    uses: pytorch/tensorrt/.github/workflows/linux-test.yml@main
     with:
       job-name: tests-py-dynamo-fe
       repository: "pytorch/tensorrt"
@@ -162,7 +162,7 @@ jobs:
             package-name: torch_tensorrt
             pre-script: packaging/pre_build_script.sh
             post-script: packaging/post_build_script.sh
-    uses: pytorch/tensorrt/.github/workflows/linux-test.yml@release/2.3
+    uses: pytorch/tensorrt/.github/workflows/linux-test.yml@main
     with:
       job-name: tests-py-dynamo-serde
       repository: "pytorch/tensorrt"
@@ -191,7 +191,7 @@ jobs:
             package-name: torch_tensorrt
             pre-script: packaging/pre_build_script.sh
             post-script: packaging/post_build_script.sh
-    uses: pytorch/tensorrt/.github/workflows/linux-test.yml@release/2.3
+    uses: pytorch/tensorrt/.github/workflows/linux-test.yml@main
     with:
       job-name: tests-py-torch-compile-be
       repository: "pytorch/tensorrt"
@@ -208,6 +208,7 @@ jobs:
         ${CONDA_RUN} python -m pip install --pre pytest-xdist timm transformers parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver
         ${CONDA_RUN} python -m pytest -n 10 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_be_test_results.xml backend/
         ${CONDA_RUN} python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_comple_be_e2e_test_results.xml --ir torch_compile models/test_models.py
+        ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_dyn_models_export.xml --ir torch_compile models/test_dyn_models.py
         popd
 
   tests-py-dynamo-core:
@@ -221,7 +222,7 @@ jobs:
             package-name: torch_tensorrt
             pre-script: packaging/pre_build_script.sh
             post-script: packaging/post_build_script.sh
-    uses: pytorch/tensorrt/.github/workflows/linux-test.yml@release/2.3
+    uses: pytorch/tensorrt/.github/workflows/linux-test.yml@main
     with:
       job-name: tests-py-dynamo-core
       repository: "pytorch/tensorrt"
 
@@ -81,10 +81,10 @@ http_archive(
 http_archive(
     name = "tensorrt",
     build_file = "@//third_party/tensorrt/archive:BUILD",
-    sha256 = "0f8157a5fc5329943b338b893591373350afa90ca81239cdadd7580cd1eba254",
-    strip_prefix = "TensorRT-8.6.1.6",
+    sha256 = "0e35729954681411a79ccf31df089523caa11838095fbd025ddc7cd6f73f02de",
+    strip_prefix = "TensorRT-10.0.0.6",
     urls = [
-        "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/secure/8.6.1/tars/TensorRT-8.6.1.6.Linux.x86_64-gnu.cuda-12.0.tar.gz",
+        "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.0/TensorRT-10.0.0.6.Linux.x86_64-gnu.cuda-12.4.tar.gz",
     ],
 )
 
 
@@ -24,7 +24,7 @@
 #define STR(x) XSTR(x)
 
 #define TORCH_TENSORRT_MAJOR_VERSION 2
-#define TORCH_TENSORRT_MINOR_VERSION 3
+#define TORCH_TENSORRT_MINOR_VERSION 4
 #define TORCH_TENSORRT_PATCH_VERSION 0
 #define TORCH_TENSORRT_VERSION      \
   STR(TORCH_TENSORRT_MAJOR_VERSION) \
 
@@ -1,4 +1,4 @@
-__version__: "2.3.0.dev0"
+__version__: "2.4.0.dev0"
 __cuda_version__: "12.1"
 __cudnn_version__: "8.9"
 __tensorrt_version__: "10.0.0.6"
@@ -87,15 +87,16 @@ Dependencies for Compilation
     * Specify your CUDA version here if not the version used in the branch being built: https://github.com/pytorch/TensorRT/blob/4e5b0f6e860910eb510fa70a76ee3eb9825e7a4d/WORKSPACE#L46
 
 
-* The correct **LibTorch** version will be pulled down for you by bazel.
+* The correct **LibTorch**, **cuDNN** and **TensorRT** versions will be pulled down for you by bazel.
 
     NOTE: By default bazel will pull the latest nightly from pytorch.org. For building main, this is usually sufficient however if there is a specific PyTorch you are targeting,
     edit these locations with updated URLs/paths:
 
     * https://github.com/pytorch/TensorRT/blob/4e5b0f6e860910eb510fa70a76ee3eb9825e7a4d/WORKSPACE#L53C1-L53C1
 
 
-* **cuDNN and TensorRT** are not required to be installed on the system to build Torch-TensorRT, in fact this is preferable to ensure reproducable builds. Download the tarballs for cuDNN and TensorRT from https://developer.nvidia.com and update the paths in the WORKSPACE file here https://github.com/pytorch/TensorRT/blob/4e5b0f6e860910eb510fa70a76ee3eb9825e7a4d/WORKSPACE#L71
+* **cuDNN and TensorRT** are not required to be installed on the system to build Torch-TensorRT, in fact this is preferable to ensure reproducable builds. If versions other than the default are needed
+  point the WORKSPACE file to the URL of the tarball or download the tarballs for cuDNN and TensorRT from https://developer.nvidia.com and update the paths in the WORKSPACE file here https://github.com/pytorch/TensorRT/blob/4e5b0f6e860910eb510fa70a76ee3eb9825e7a4d/WORKSPACE#L71
 
     For example:
 
@@ -104,25 +105,29 @@ Dependencies for Compilation
         http_archive(
             name = "cudnn",
             build_file = "@//third_party/cudnn/archive:BUILD",
-            sha256 = "79d77a769c7e7175abc7b5c2ed5c494148c0618a864138722c887f95c623777c",
-            strip_prefix = "cudnn-linux-x86_64-8.8.1.3_cuda12-archive",
+            sha256 = "<CUDNN SHA256>", # Optional but recommended
+            strip_prefix = "cudnn-linux-x86_64-<CUDNN VERSION>_<CUDA VERSION>-archive",
             urls = [
-                #"https://developer.nvidia.com/downloads/compute/cudnn/secure/8.8.1/local_installers/12.0/cudnn-linux-x86_64-8.8.1.3_cuda12-archive.tar.xz",
-                "file:///<ABSOLUTE PATH TO FILE>/cudnn-linux-x86_64-8.8.1.3_cuda12-archive.tar.xz"
+                "https://developer.nvidia.com/downloads/compute/cudnn/<CUDNN DOWNLOAD PATH>",
+                # OR
+                "file:///<ABSOLUTE PATH TO FILE>/cudnn-linux-x86_64-<CUDNN VERSION>_<CUDA VERSION>-archive.tar.xz"
             ],
         )
 
         http_archive(
             name = "tensorrt",
             build_file = "@//third_party/tensorrt/archive:BUILD",
-            sha256 = "0f8157a5fc5329943b338b893591373350afa90ca81239cdadd7580cd1eba254",
-            strip_prefix = "TensorRT-8.6.1.6",
+            sha256 = "<TENSORRT SHA256>", # Optional but recommended
+            strip_prefix = "TensorRT-<TENSORRT VERSION>",
             urls = [
-                #"https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/secure/8.6.1/tars/TensorRT-8.6.1.6.Linux.x86_64-gnu.cuda-12.0.tar.gz",
-                "file:///<ABSOLUTE PATH TO FILE>/TensorRT-8.6.1.6.Linux.x86_64-gnu.cuda-12.0.tar.gz"
+                "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/<TENSORRT DOWNLOAD PATH>",
+                # OR
+                "file:///<ABSOLUTE PATH TO FILE>/TensorRT-<TENSORRT VERSION>.Linux.x86_64-gnu.cuda-<CUDA VERSION>.tar.gz"
             ],
         )
 
+    Remember at runtime, these libraries must be added to your ``LD_LIBRARY_PATH`` explicity
+
 If you have a local version of cuDNN and TensorRT installed, this can be used as well by commenting out the above lines and uncommenting the following lines https://github.com/pytorch/TensorRT/blob/4e5b0f6e860910eb510fa70a76ee3eb9825e7a4d/WORKSPACE#L114C1-L124C3
 
 
 
@@ -273,14 +273,12 @@ def contains_metadata(gm: torch.fx.GraphModule) -> bool:
                 return False
         return True
 
-    # Check if the module has metadata (shape, dtype). If not, run symbolic shape propagation.
+    # Check if the module has metadata (shape, dtype).
     if not contains_metadata(gm):
-        from torch._inductor.compile_fx import fake_tensor_prop
-
-        torch_inputs = get_torch_inputs(sample_inputs, settings.device)
-        with torch.no_grad():
-            # This fails if the module has data-dependent shape operators.
-            fake_tensor_prop(gm, torch_inputs)
+        # TODO: For future, explore when nodes don't have metadata and if fake_tensor_prop can resolve this.
+        logger.warning(
+            "Some nodes do not have metadata (shape and dtype information). This could lead to problems sometimes if the graph has PyTorch and TensorRT segments."
+        )
 
     # Partition module into components that can be TRT-accelerated
     fast_partitioner_failed = False
 
@@ -13,6 +13,7 @@
 from torch_tensorrt.dynamo.lowering import (
     apply_lowering_passes,
     get_decompositions,
+    remove_sym_nodes,
     repair_input_aliasing,
 )
 from torch_tensorrt.dynamo.utils import (
@@ -27,7 +28,7 @@
 @td.register_backend(name="tensorrt")  # type: ignore[misc]
 @td.register_backend(name="torch_tensorrt")  # type: ignore[misc]
 def torch_tensorrt_backend(
-    gm: torch.fx.GraphModule, sample_inputs: Sequence[torch.Tensor], **kwargs: Any
+    gm: torch.fx.GraphModule, sample_inputs: Sequence[Any], **kwargs: Any
 ) -> torch.nn.Module:
     # Set log level at the top of compilation (torch_tensorrt.dynamo)
     if (
@@ -44,15 +45,15 @@ def torch_tensorrt_backend(
 
 @td.register_backend(name="aot_torch_tensorrt_aten")  # type: ignore[misc]
 def aot_torch_tensorrt_aten_backend(
-    gm: torch.fx.GraphModule, sample_inputs: Sequence[torch.Tensor], **kwargs: Any
+    gm: torch.fx.GraphModule, sample_inputs: Sequence[Any], **kwargs: Any
 ) -> torch.nn.Module:
     settings = parse_dynamo_kwargs(kwargs)
     return _pretraced_backend(gm, sample_inputs, settings)
 
 
 def _pretraced_backend(
     gm: torch.fx.GraphModule,
-    sample_inputs: Sequence[torch.Tensor],
+    sample_inputs: Sequence[Any],
     settings: CompilationSettings = CompilationSettings(),
 ) -> torch.fx.GraphModule | Callable[..., Any]:
     """Helper function to manage translation of traced FX module to TRT engines
@@ -74,10 +75,17 @@ def _pretraced_backend(
             fake_mode, "allow_non_fake_inputs", True
         ), fake_mode:
             repair_input_aliasing(gm)
+
+            # Remove sym_int placeholders and inputs
+            remove_sym_nodes(gm)
+            torch_inputs = [
+                input for input in sample_inputs if isinstance(input, torch.Tensor)
+            ]
+
             # Invoke AOTAutograd to translate operators to aten
             gm = aot_export_joint_simple(
                 gm,
-                sample_inputs,
+                torch_inputs,
                 trace_joint=False,
                 decompositions=get_decompositions(
                     settings.enable_experimental_decompositions
@@ -86,10 +94,10 @@ def _pretraced_backend(
 
             logger.debug("Post-AOT Autograd graph:\n" + str(gm.graph))
 
-            gm = apply_lowering_passes(gm, sample_inputs)
+            gm = apply_lowering_passes(gm, torch_inputs)
 
             torchtrt_inputs = prepare_inputs(
-                sample_inputs, disable_memory_format_check=True
+                torch_inputs, disable_memory_format_check=True
             )
             trt_compiled = compile_module(
                 gm,
 
@@ -9,6 +9,7 @@
 from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext
 from torch_tensorrt.dynamo.conversion.converter_utils import (
     cast_trt_tensor,
+    get_axes_for_reduce_op,
     get_positive_dim,
     get_trt_tensor,
     to_numpy,
@@ -105,102 +106,30 @@ def layer_norm(
     cudnn_enable: bool,
     return_mean_rstd: bool,
 ) -> Union[TRTTensor, Tuple[TRTTensor, torch.Tensor, torch.Tensor]]:
-    if weight is None:
-        weight = to_numpy(1.0)
-
-    if bias is None:
-        bias = to_numpy(0.0)
-
-    shape = weight.shape
-    gamma = to_numpy(weight).reshape(shape)
-    beta = to_numpy(bias).reshape(shape)
-
-    dims = list(range(len(input.shape) - len(shape), len(input.shape)))
-
-    # E[x]
-    mean_expected_trt = impl.reduce.mean(
-        ctx, target, source_ir, f"{name}_mean_expected", input, dims, True
-    )
-
-    # X-E[x]
-    sub_trt = impl.elementwise.sub(
-        ctx,
-        target,
-        source_ir,
-        f"{name}_sub",
-        input,
-        mean_expected_trt,
-    )
-
-    # Variance = mean(pow(x_sub_mean, 2))
-    pow_trt = get_trt_tensor(ctx, 2, f"{name}_power", np.float32)
-    pow_var = impl.elementwise.pow(
-        ctx,
-        target,
-        source_ir,
-        f"{name}_pow_var",
-        sub_trt,
-        pow_trt,
-    )
-    mean_trt = impl.reduce.mean(
-        ctx, target, source_ir, f"{name}_mean", pow_var, dims, True
-    )
-
-    # sqrt((var + eps))
-    eps_trt = get_trt_tensor(ctx, eps, f"{name}_eps", np.float32)
-    add_trt = impl.elementwise.add(
-        ctx,
-        target,
-        source_ir,
-        f"{name}_add",
-        mean_trt,
-        eps_trt,
-    )
-    sqrt_trt = impl.unary.sqrt(
-        ctx,
-        target,
-        source_ir,
-        f"{name}_sqrt",
-        add_trt,
-    )
-
-    # (X - E[X]) / sqrt((var + eps))
-    div_trt = impl.elementwise.div(
-        ctx,
-        target,
-        source_ir,
-        f"{name}_div",
-        sub_trt,
-        sqrt_trt,
-    )
-
-    gamma_trt = get_trt_tensor(ctx, weight, f"{name}_gamma")
-    beta_trt = get_trt_tensor(ctx, bias, f"{name}_beta")
-
-    # y * gamma + beta
-    scaled_y = impl.elementwise.mul(
-        ctx,
-        target,
-        source_ir,
-        f"{name}_mul_gamma",
-        div_trt,
-        gamma_trt,
-    )
+    dims = list(range(len(input.shape) - len(normalized_shape), len(input.shape)))
+    axes = get_axes_for_reduce_op(dims)
+
+    weight = get_trt_tensor(ctx, weight, f"{name}_weight")
+    bias = get_trt_tensor(ctx, bias, f"{name}_bias")
+    if tuple(input.shape) != tuple(weight.shape):
+        weight = impl.slice.expand(
+            ctx, target, source_ir, f"{name}_expand_weight", weight, input.shape
+        )
+    if tuple(input.shape) != tuple(bias.shape):
+        bias = impl.slice.expand(
+            ctx, target, source_ir, f"{name}_expand_bias", bias, input.shape
+        )
 
-    output = impl.elementwise.add(
-        ctx,
-        target,
-        source_ir,
-        f"{name}_add_beta",
-        scaled_y,
-        beta_trt,
-    )
+    layer_norm = ctx.net.add_normalization(input, weight, bias, axes)
+    layer_norm.epsilon = eps
+    layer_norm.compute_precision = input.dtype
+    set_layer_name(layer_norm, target, f"{name}_layer_norm", source_ir)
 
     if return_mean_rstd:
         # return fake mean and rstd for now
-        return output, None, None
+        return layer_norm.get_output(0), None, None
 
-    return output
+    return layer_norm.get_output(0)
 
 
 def native_group_norm(
 
@@ -104,6 +104,7 @@ def get_shape_with_dynamic_shape(
     scale_res = scale_layer.get_output(0)
 
     length = input_shape.shape[0]
+
     zero_layer = ctx.net.add_constant(
         input_shape.shape, np.zeros((length), dtype=np.int32)
     )
 
@@ -3,6 +3,6 @@
     torch_enabled_decompositions,
 )
 from ._decompositions import get_decompositions  # noqa: F401
-from ._fusers import *  # noqa: F401
+from ._remove_sym_nodes import remove_sym_nodes
 from ._repair_input_aliasing import repair_input_aliasing
 from .passes import apply_lowering_passes
Original file line number	Diff line number	Diff line change
`@@ -104,6 +104,7 @@ def get_shape_with_dynamic_shape(`
`104`	`104`	`scale_res = scale_layer.get_output(0)`
`105`	`105`
`106`	`106`	`length = input_shape.shape[0]`
	`107`	`+`
`107`	`108`	`zero_layer = ctx.net.add_constant(`
`108`	`109`	`input_shape.shape, np.zeros((length), dtype=np.int32)`
`109`	`110`	`)`
Original file line number	Diff line number	Diff line change
`@@ -3,6 +3,6 @@`
`3`	`3`	`torch_enabled_decompositions,`
`4`	`4`	`)`
`5`	`5`	`from ._decompositions import get_decompositions # noqa: F401`
`6`		`-from ._fusers import * # noqa: F401`
	`6`	`+from ._remove_sym_nodes import remove_sym_nodes`
`7`	`7`	`from ._repair_input_aliasing import repair_input_aliasing`
`8`	`8`	`from .passes import apply_lowering_passes`