Skip to content

Commit e0c311a

Browse files
author
pytorchbot
committed
2024-04-27 nightly release (1a4ffe4)
1 parent fcd893a commit e0c311a

File tree

17 files changed

+310
-253
lines changed

17 files changed

+310
-253
lines changed

.github/workflows/build-test.yml

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ on:
1515

1616
jobs:
1717
generate-matrix:
18-
uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@release/2.3
18+
uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
1919
with:
2020
package-type: wheel
2121
os: linux
@@ -40,7 +40,7 @@ jobs:
4040
smoke-test-script: packaging/smoke_test_script.sh
4141
package-name: torch_tensorrt
4242
name: Build torch-tensorrt whl package
43-
uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@release/2.3
43+
uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@main
4444
with:
4545
repository: ${{ matrix.repository }}
4646
ref: ""
@@ -65,7 +65,7 @@ jobs:
6565
package-name: torch_tensorrt
6666
pre-script: packaging/pre_build_script.sh
6767
post-script: packaging/post_build_script.sh
68-
uses: pytorch/tensorrt/.github/workflows/linux-test.yml@release/2.3
68+
uses: pytorch/tensorrt/.github/workflows/linux-test.yml@main
6969
with:
7070
job-name: tests-py-torchscript-fe
7171
repository: "pytorch/tensorrt"
@@ -103,7 +103,7 @@ jobs:
103103
package-name: torch_tensorrt
104104
pre-script: packaging/pre_build_script.sh
105105
post-script: packaging/post_build_script.sh
106-
uses: pytorch/tensorrt/.github/workflows/linux-test.yml@release/2.3
106+
uses: pytorch/tensorrt/.github/workflows/linux-test.yml@main
107107
with:
108108
job-name: tests-py-dynamo-converters
109109
repository: "pytorch/tensorrt"
@@ -132,7 +132,7 @@ jobs:
132132
package-name: torch_tensorrt
133133
pre-script: packaging/pre_build_script.sh
134134
post-script: packaging/post_build_script.sh
135-
uses: pytorch/tensorrt/.github/workflows/linux-test.yml@release/2.3
135+
uses: pytorch/tensorrt/.github/workflows/linux-test.yml@main
136136
with:
137137
job-name: tests-py-dynamo-fe
138138
repository: "pytorch/tensorrt"
@@ -162,7 +162,7 @@ jobs:
162162
package-name: torch_tensorrt
163163
pre-script: packaging/pre_build_script.sh
164164
post-script: packaging/post_build_script.sh
165-
uses: pytorch/tensorrt/.github/workflows/linux-test.yml@release/2.3
165+
uses: pytorch/tensorrt/.github/workflows/linux-test.yml@main
166166
with:
167167
job-name: tests-py-dynamo-serde
168168
repository: "pytorch/tensorrt"
@@ -191,7 +191,7 @@ jobs:
191191
package-name: torch_tensorrt
192192
pre-script: packaging/pre_build_script.sh
193193
post-script: packaging/post_build_script.sh
194-
uses: pytorch/tensorrt/.github/workflows/linux-test.yml@release/2.3
194+
uses: pytorch/tensorrt/.github/workflows/linux-test.yml@main
195195
with:
196196
job-name: tests-py-torch-compile-be
197197
repository: "pytorch/tensorrt"
@@ -208,6 +208,7 @@ jobs:
208208
${CONDA_RUN} python -m pip install --pre pytest-xdist timm transformers parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver
209209
${CONDA_RUN} python -m pytest -n 10 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_be_test_results.xml backend/
210210
${CONDA_RUN} python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_comple_be_e2e_test_results.xml --ir torch_compile models/test_models.py
211+
${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_dyn_models_export.xml --ir torch_compile models/test_dyn_models.py
211212
popd
212213
213214
tests-py-dynamo-core:
@@ -221,7 +222,7 @@ jobs:
221222
package-name: torch_tensorrt
222223
pre-script: packaging/pre_build_script.sh
223224
post-script: packaging/post_build_script.sh
224-
uses: pytorch/tensorrt/.github/workflows/linux-test.yml@release/2.3
225+
uses: pytorch/tensorrt/.github/workflows/linux-test.yml@main
225226
with:
226227
job-name: tests-py-dynamo-core
227228
repository: "pytorch/tensorrt"

WORKSPACE

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,10 +81,10 @@ http_archive(
8181
http_archive(
8282
name = "tensorrt",
8383
build_file = "@//third_party/tensorrt/archive:BUILD",
84-
sha256 = "0f8157a5fc5329943b338b893591373350afa90ca81239cdadd7580cd1eba254",
85-
strip_prefix = "TensorRT-8.6.1.6",
84+
sha256 = "0e35729954681411a79ccf31df089523caa11838095fbd025ddc7cd6f73f02de",
85+
strip_prefix = "TensorRT-10.0.0.6",
8686
urls = [
87-
"https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/secure/8.6.1/tars/TensorRT-8.6.1.6.Linux.x86_64-gnu.cuda-12.0.tar.gz",
87+
"https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.0/TensorRT-10.0.0.6.Linux.x86_64-gnu.cuda-12.4.tar.gz",
8888
],
8989
)
9090

cpp/include/torch_tensorrt/macros.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
#define STR(x) XSTR(x)
2525

2626
#define TORCH_TENSORRT_MAJOR_VERSION 2
27-
#define TORCH_TENSORRT_MINOR_VERSION 3
27+
#define TORCH_TENSORRT_MINOR_VERSION 4
2828
#define TORCH_TENSORRT_PATCH_VERSION 0
2929
#define TORCH_TENSORRT_VERSION \
3030
STR(TORCH_TENSORRT_MAJOR_VERSION) \

dev_dep_versions.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__: "2.3.0.dev0"
1+
__version__: "2.4.0.dev0"
22
__cuda_version__: "12.1"
33
__cudnn_version__: "8.9"
44
__tensorrt_version__: "10.0.0.6"

docsrc/getting_started/installation.rst

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -87,15 +87,16 @@ Dependencies for Compilation
8787
* Specify your CUDA version here if not the version used in the branch being built: https://github.com/pytorch/TensorRT/blob/4e5b0f6e860910eb510fa70a76ee3eb9825e7a4d/WORKSPACE#L46
8888

8989

90-
* The correct **LibTorch** version will be pulled down for you by bazel.
90+
* The correct **LibTorch**, **cuDNN** and **TensorRT** versions will be pulled down for you by bazel.
9191

9292
NOTE: By default bazel will pull the latest nightly from pytorch.org. For building main, this is usually sufficient however if there is a specific PyTorch you are targeting,
9393
edit these locations with updated URLs/paths:
9494

9595
* https://github.com/pytorch/TensorRT/blob/4e5b0f6e860910eb510fa70a76ee3eb9825e7a4d/WORKSPACE#L53C1-L53C1
9696

9797

98-
* **cuDNN and TensorRT** are not required to be installed on the system to build Torch-TensorRT, in fact this is preferable to ensure reproducable builds. Download the tarballs for cuDNN and TensorRT from https://developer.nvidia.com and update the paths in the WORKSPACE file here https://github.com/pytorch/TensorRT/blob/4e5b0f6e860910eb510fa70a76ee3eb9825e7a4d/WORKSPACE#L71
98+
* **cuDNN and TensorRT** are not required to be installed on the system to build Torch-TensorRT, in fact this is preferable to ensure reproducable builds. If versions other than the default are needed
99+
point the WORKSPACE file to the URL of the tarball or download the tarballs for cuDNN and TensorRT from https://developer.nvidia.com and update the paths in the WORKSPACE file here https://github.com/pytorch/TensorRT/blob/4e5b0f6e860910eb510fa70a76ee3eb9825e7a4d/WORKSPACE#L71
99100

100101
For example:
101102

@@ -104,25 +105,29 @@ Dependencies for Compilation
104105
http_archive(
105106
name = "cudnn",
106107
build_file = "@//third_party/cudnn/archive:BUILD",
107-
sha256 = "79d77a769c7e7175abc7b5c2ed5c494148c0618a864138722c887f95c623777c",
108-
strip_prefix = "cudnn-linux-x86_64-8.8.1.3_cuda12-archive",
108+
sha256 = "<CUDNN SHA256>", # Optional but recommended
109+
strip_prefix = "cudnn-linux-x86_64-<CUDNN VERSION>_<CUDA VERSION>-archive",
109110
urls = [
110-
#"https://developer.nvidia.com/downloads/compute/cudnn/secure/8.8.1/local_installers/12.0/cudnn-linux-x86_64-8.8.1.3_cuda12-archive.tar.xz",
111-
"file:///<ABSOLUTE PATH TO FILE>/cudnn-linux-x86_64-8.8.1.3_cuda12-archive.tar.xz"
111+
"https://developer.nvidia.com/downloads/compute/cudnn/<CUDNN DOWNLOAD PATH>",
112+
# OR
113+
"file:///<ABSOLUTE PATH TO FILE>/cudnn-linux-x86_64-<CUDNN VERSION>_<CUDA VERSION>-archive.tar.xz"
112114
],
113115
)
114116
115117
http_archive(
116118
name = "tensorrt",
117119
build_file = "@//third_party/tensorrt/archive:BUILD",
118-
sha256 = "0f8157a5fc5329943b338b893591373350afa90ca81239cdadd7580cd1eba254",
119-
strip_prefix = "TensorRT-8.6.1.6",
120+
sha256 = "<TENSORRT SHA256>", # Optional but recommended
121+
strip_prefix = "TensorRT-<TENSORRT VERSION>",
120122
urls = [
121-
#"https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/secure/8.6.1/tars/TensorRT-8.6.1.6.Linux.x86_64-gnu.cuda-12.0.tar.gz",
122-
"file:///<ABSOLUTE PATH TO FILE>/TensorRT-8.6.1.6.Linux.x86_64-gnu.cuda-12.0.tar.gz"
123+
"https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/<TENSORRT DOWNLOAD PATH>",
124+
# OR
125+
"file:///<ABSOLUTE PATH TO FILE>/TensorRT-<TENSORRT VERSION>.Linux.x86_64-gnu.cuda-<CUDA VERSION>.tar.gz"
123126
],
124127
)
125128
129+
Remember at runtime, these libraries must be added to your ``LD_LIBRARY_PATH`` explicity
130+
126131
If you have a local version of cuDNN and TensorRT installed, this can be used as well by commenting out the above lines and uncommenting the following lines https://github.com/pytorch/TensorRT/blob/4e5b0f6e860910eb510fa70a76ee3eb9825e7a4d/WORKSPACE#L114C1-L124C3
127132

128133

py/torch_tensorrt/dynamo/_compiler.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -273,14 +273,12 @@ def contains_metadata(gm: torch.fx.GraphModule) -> bool:
273273
return False
274274
return True
275275

276-
# Check if the module has metadata (shape, dtype). If not, run symbolic shape propagation.
276+
# Check if the module has metadata (shape, dtype).
277277
if not contains_metadata(gm):
278-
from torch._inductor.compile_fx import fake_tensor_prop
279-
280-
torch_inputs = get_torch_inputs(sample_inputs, settings.device)
281-
with torch.no_grad():
282-
# This fails if the module has data-dependent shape operators.
283-
fake_tensor_prop(gm, torch_inputs)
278+
# TODO: For future, explore when nodes don't have metadata and if fake_tensor_prop can resolve this.
279+
logger.warning(
280+
"Some nodes do not have metadata (shape and dtype information). This could lead to problems sometimes if the graph has PyTorch and TensorRT segments."
281+
)
284282

285283
# Partition module into components that can be TRT-accelerated
286284
fast_partitioner_failed = False

py/torch_tensorrt/dynamo/backend/backends.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from torch_tensorrt.dynamo.lowering import (
1414
apply_lowering_passes,
1515
get_decompositions,
16+
remove_sym_nodes,
1617
repair_input_aliasing,
1718
)
1819
from torch_tensorrt.dynamo.utils import (
@@ -27,7 +28,7 @@
2728
@td.register_backend(name="tensorrt") # type: ignore[misc]
2829
@td.register_backend(name="torch_tensorrt") # type: ignore[misc]
2930
def torch_tensorrt_backend(
30-
gm: torch.fx.GraphModule, sample_inputs: Sequence[torch.Tensor], **kwargs: Any
31+
gm: torch.fx.GraphModule, sample_inputs: Sequence[Any], **kwargs: Any
3132
) -> torch.nn.Module:
3233
# Set log level at the top of compilation (torch_tensorrt.dynamo)
3334
if (
@@ -44,15 +45,15 @@ def torch_tensorrt_backend(
4445

4546
@td.register_backend(name="aot_torch_tensorrt_aten") # type: ignore[misc]
4647
def aot_torch_tensorrt_aten_backend(
47-
gm: torch.fx.GraphModule, sample_inputs: Sequence[torch.Tensor], **kwargs: Any
48+
gm: torch.fx.GraphModule, sample_inputs: Sequence[Any], **kwargs: Any
4849
) -> torch.nn.Module:
4950
settings = parse_dynamo_kwargs(kwargs)
5051
return _pretraced_backend(gm, sample_inputs, settings)
5152

5253

5354
def _pretraced_backend(
5455
gm: torch.fx.GraphModule,
55-
sample_inputs: Sequence[torch.Tensor],
56+
sample_inputs: Sequence[Any],
5657
settings: CompilationSettings = CompilationSettings(),
5758
) -> torch.fx.GraphModule | Callable[..., Any]:
5859
"""Helper function to manage translation of traced FX module to TRT engines
@@ -74,10 +75,17 @@ def _pretraced_backend(
7475
fake_mode, "allow_non_fake_inputs", True
7576
), fake_mode:
7677
repair_input_aliasing(gm)
78+
79+
# Remove sym_int placeholders and inputs
80+
remove_sym_nodes(gm)
81+
torch_inputs = [
82+
input for input in sample_inputs if isinstance(input, torch.Tensor)
83+
]
84+
7785
# Invoke AOTAutograd to translate operators to aten
7886
gm = aot_export_joint_simple(
7987
gm,
80-
sample_inputs,
88+
torch_inputs,
8189
trace_joint=False,
8290
decompositions=get_decompositions(
8391
settings.enable_experimental_decompositions
@@ -86,10 +94,10 @@ def _pretraced_backend(
8694

8795
logger.debug("Post-AOT Autograd graph:\n" + str(gm.graph))
8896

89-
gm = apply_lowering_passes(gm, sample_inputs)
97+
gm = apply_lowering_passes(gm, torch_inputs)
9098

9199
torchtrt_inputs = prepare_inputs(
92-
sample_inputs, disable_memory_format_check=True
100+
torch_inputs, disable_memory_format_check=True
93101
)
94102
trt_compiled = compile_module(
95103
gm,

py/torch_tensorrt/dynamo/conversion/impl/normalization/ops.py

Lines changed: 20 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext
1010
from torch_tensorrt.dynamo.conversion.converter_utils import (
1111
cast_trt_tensor,
12+
get_axes_for_reduce_op,
1213
get_positive_dim,
1314
get_trt_tensor,
1415
to_numpy,
@@ -105,102 +106,30 @@ def layer_norm(
105106
cudnn_enable: bool,
106107
return_mean_rstd: bool,
107108
) -> Union[TRTTensor, Tuple[TRTTensor, torch.Tensor, torch.Tensor]]:
108-
if weight is None:
109-
weight = to_numpy(1.0)
110-
111-
if bias is None:
112-
bias = to_numpy(0.0)
113-
114-
shape = weight.shape
115-
gamma = to_numpy(weight).reshape(shape)
116-
beta = to_numpy(bias).reshape(shape)
117-
118-
dims = list(range(len(input.shape) - len(shape), len(input.shape)))
119-
120-
# E[x]
121-
mean_expected_trt = impl.reduce.mean(
122-
ctx, target, source_ir, f"{name}_mean_expected", input, dims, True
123-
)
124-
125-
# X-E[x]
126-
sub_trt = impl.elementwise.sub(
127-
ctx,
128-
target,
129-
source_ir,
130-
f"{name}_sub",
131-
input,
132-
mean_expected_trt,
133-
)
134-
135-
# Variance = mean(pow(x_sub_mean, 2))
136-
pow_trt = get_trt_tensor(ctx, 2, f"{name}_power", np.float32)
137-
pow_var = impl.elementwise.pow(
138-
ctx,
139-
target,
140-
source_ir,
141-
f"{name}_pow_var",
142-
sub_trt,
143-
pow_trt,
144-
)
145-
mean_trt = impl.reduce.mean(
146-
ctx, target, source_ir, f"{name}_mean", pow_var, dims, True
147-
)
148-
149-
# sqrt((var + eps))
150-
eps_trt = get_trt_tensor(ctx, eps, f"{name}_eps", np.float32)
151-
add_trt = impl.elementwise.add(
152-
ctx,
153-
target,
154-
source_ir,
155-
f"{name}_add",
156-
mean_trt,
157-
eps_trt,
158-
)
159-
sqrt_trt = impl.unary.sqrt(
160-
ctx,
161-
target,
162-
source_ir,
163-
f"{name}_sqrt",
164-
add_trt,
165-
)
166-
167-
# (X - E[X]) / sqrt((var + eps))
168-
div_trt = impl.elementwise.div(
169-
ctx,
170-
target,
171-
source_ir,
172-
f"{name}_div",
173-
sub_trt,
174-
sqrt_trt,
175-
)
176-
177-
gamma_trt = get_trt_tensor(ctx, weight, f"{name}_gamma")
178-
beta_trt = get_trt_tensor(ctx, bias, f"{name}_beta")
179-
180-
# y * gamma + beta
181-
scaled_y = impl.elementwise.mul(
182-
ctx,
183-
target,
184-
source_ir,
185-
f"{name}_mul_gamma",
186-
div_trt,
187-
gamma_trt,
188-
)
109+
dims = list(range(len(input.shape) - len(normalized_shape), len(input.shape)))
110+
axes = get_axes_for_reduce_op(dims)
111+
112+
weight = get_trt_tensor(ctx, weight, f"{name}_weight")
113+
bias = get_trt_tensor(ctx, bias, f"{name}_bias")
114+
if tuple(input.shape) != tuple(weight.shape):
115+
weight = impl.slice.expand(
116+
ctx, target, source_ir, f"{name}_expand_weight", weight, input.shape
117+
)
118+
if tuple(input.shape) != tuple(bias.shape):
119+
bias = impl.slice.expand(
120+
ctx, target, source_ir, f"{name}_expand_bias", bias, input.shape
121+
)
189122

190-
output = impl.elementwise.add(
191-
ctx,
192-
target,
193-
source_ir,
194-
f"{name}_add_beta",
195-
scaled_y,
196-
beta_trt,
197-
)
123+
layer_norm = ctx.net.add_normalization(input, weight, bias, axes)
124+
layer_norm.epsilon = eps
125+
layer_norm.compute_precision = input.dtype
126+
set_layer_name(layer_norm, target, f"{name}_layer_norm", source_ir)
198127

199128
if return_mean_rstd:
200129
# return fake mean and rstd for now
201-
return output, None, None
130+
return layer_norm.get_output(0), None, None
202131

203-
return output
132+
return layer_norm.get_output(0)
204133

205134

206135
def native_group_norm(

py/torch_tensorrt/dynamo/conversion/impl/shape.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ def get_shape_with_dynamic_shape(
104104
scale_res = scale_layer.get_output(0)
105105

106106
length = input_shape.shape[0]
107+
107108
zero_layer = ctx.net.add_constant(
108109
input_shape.shape, np.zeros((length), dtype=np.int32)
109110
)

py/torch_tensorrt/dynamo/lowering/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,6 @@
33
torch_enabled_decompositions,
44
)
55
from ._decompositions import get_decompositions # noqa: F401
6-
from ._fusers import * # noqa: F401
6+
from ._remove_sym_nodes import remove_sym_nodes
77
from ._repair_input_aliasing import repair_input_aliasing
88
from .passes import apply_lowering_passes

0 commit comments

Comments
 (0)