Skip to content

Commit 6693778

Browse files
committed
chore: rebase
2 parents c748fac + dc36709 commit 6693778

File tree

240 files changed

+3114
-621
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

240 files changed

+3114
-621
lines changed

.github/scripts/filter-matrix.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@
33
import argparse
44
import json
55
import sys
6+
from typing import List
67

7-
disabled_python_versions = "3.13"
8+
disabled_python_versions: List[str] = []
89

910

1011
def main(args: list[str]) -> None:

.github/scripts/generate-tensorrt-test-matrix.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@
2828
# please update the future tensorRT version you want to test here
2929
TENSORRT_VERSIONS_DICT = {
3030
"windows": {
31+
"10.3.0": {
32+
"urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.3.0/zip/TensorRT-10.3.0.26.Windows.win10.cuda-12.5.zip",
33+
"strip_prefix": "TensorRT-10.3.0.26",
34+
},
3135
"10.7.0": {
3236
"urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.7.0/zip/TensorRT-10.7.0.23.Windows.win10.cuda-12.6.zip",
3337
"strip_prefix": "TensorRT-10.7.0.23",
@@ -42,6 +46,10 @@
4246
},
4347
},
4448
"linux": {
49+
"10.3.0": {
50+
"urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.3.0/tars/TensorRT-10.3.0.26.Linux.x86_64-gnu.cuda-12.5.tar.gz",
51+
"strip_prefix": "TensorRT-10.3.0.26",
52+
},
4553
"10.7.0": {
4654
"urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.7.0/tars/TensorRT-10.7.0.23.Linux.x86_64-gnu.cuda-12.6.tar.gz",
4755
"strip_prefix": "TensorRT-10.7.0.23",

.github/scripts/generate_binary_build_matrix.py

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,16 @@
1818
import sys
1919
from typing import Any, Callable, Dict, List, Optional, Tuple
2020

21+
PYTHON_VERSIONS_FOR_PR_BUILD = ["3.11"]
2122
PYTHON_ARCHES_DICT = {
22-
"nightly": ["3.9", "3.10", "3.11", "3.12"],
23-
"test": ["3.9", "3.10", "3.11", "3.12"],
24-
"release": ["3.9", "3.10", "3.11", "3.12"],
23+
"nightly": ["3.9", "3.10", "3.11", "3.12", "3.13"],
24+
"test": ["3.9", "3.10", "3.11", "3.12", "3.13"],
25+
"release": ["3.9", "3.10", "3.11", "3.12", "3.13"],
2526
}
2627
CUDA_ARCHES_DICT = {
2728
"nightly": ["11.8", "12.6", "12.8"],
2829
"test": ["11.8", "12.6", "12.8"],
29-
"release": ["11.8", "12.6", "12.8"],
30+
"release": ["11.8", "12.4", "12.6"],
3031
}
3132
ROCM_ARCHES_DICT = {
3233
"nightly": ["6.1", "6.2"],
@@ -422,11 +423,6 @@ def generate_wheels_matrix(
422423
# Define default python version
423424
python_versions = list(PYTHON_ARCHES)
424425

425-
# If the list of python versions is set explicitly by the caller, stick with it instead
426-
# of trying to add more versions behind the scene
427-
if channel == NIGHTLY and (os in (LINUX, MACOS_ARM64, LINUX_AARCH64)):
428-
python_versions += ["3.13"]
429-
430426
if os == LINUX:
431427
# NOTE: We only build manywheel packages for linux
432428
package_type = "manywheel"
@@ -456,7 +452,7 @@ def generate_wheels_matrix(
456452
arches += [XPU]
457453

458454
if limit_pr_builds:
459-
python_versions = [python_versions[0]]
455+
python_versions = PYTHON_VERSIONS_FOR_PR_BUILD
460456

461457
global WHEEL_CONTAINER_IMAGES
462458

.github/workflows/build-test-linux.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ jobs:
2323
test-infra-ref: main
2424
with-rocm: false
2525
with-cpu: false
26-
python-versions: '["3.11", "3.12", "3.10", "3.9"]'
2726

2827
filter-matrix:
2928
needs: [generate-matrix]
@@ -143,6 +142,7 @@ jobs:
143142
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 4 conversion/
144143
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml automatic_plugin/test_automatic_plugin.py
145144
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml automatic_plugin/test_automatic_plugin_with_attrs.py
145+
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml automatic_plugin/test_flashinfer_rmsnorm.py
146146
popd
147147
148148
tests-py-dynamo-fe:

.github/workflows/build-test-windows.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ jobs:
2323
test-infra-ref: main
2424
with-rocm: false
2525
with-cpu: false
26-
python-versions: '["3.11", "3.12", "3.10", "3.9"]'
2726

2827
substitute-runner:
2928
needs: generate-matrix

README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@ Torch-TensorRT
55
<h4> Easily achieve the best inference performance for any PyTorch model on the NVIDIA platform. </h4>
66

77
[![Documentation](https://img.shields.io/badge/docs-master-brightgreen)](https://nvidia.github.io/Torch-TensorRT/)
8-
[![pytorch](https://img.shields.io/badge/PyTorch-2.4-green)](https://www.python.org/downloads/release/python-31013/)
9-
[![cuda](https://img.shields.io/badge/CUDA-12.4-green)](https://developer.nvidia.com/cuda-downloads)
8+
[![pytorch](https://img.shields.io/badge/PyTorch-2.8-green)](https://download.pytorch.org/whl/nightly/cu128)
9+
[![cuda](https://img.shields.io/badge/CUDA-12.8-green)](https://developer.nvidia.com/cuda-downloads)
1010
[![trt](https://img.shields.io/badge/TensorRT-10.9.0-green)](https://github.com/nvidia/tensorrt-llm)
1111
[![license](https://img.shields.io/badge/license-BSD--3--Clause-blue)](./LICENSE)
12-
[![linux_tests](https://github.com/pytorch/TensorRT/actions/workflows/build-test-linux.yml/badge.svg)](https://github.com/pytorch/TensorRT/actions/workflows/build-test-linux.yml)
13-
[![windows_tests](https://github.com/pytorch/TensorRT/actions/workflows/build-test-windows.yml/badge.svg)](https://github.com/pytorch/TensorRT/actions/workflows/build-test-windows.yml)
12+
[![linux_nightly](https://github.com/pytorch/TensorRT/actions/workflows/build-test-linux.yml/badge.svg?branch=nightly)](https://github.com/pytorch/TensorRT/actions/workflows/build-test-linux.yml)
13+
[![windows_nightly](https://github.com/pytorch/TensorRT/actions/workflows/build-test-windows.yml/badge.svg?branch=nightly)](https://github.com/pytorch/TensorRT/actions/workflows/build-test-windows.yml)
1414

1515
---
1616
<div align="left">

core/runtime/TRTEngine.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -453,6 +453,10 @@ std::vector<std::string> TRTEngine::serialize() {
453453
return serialized_info;
454454
}
455455

456+
void TRTEngine::reset_captured_graph() {
457+
cudagraph.reset();
458+
}
459+
456460
} // namespace runtime
457461
} // namespace core
458462
} // namespace torch_tensorrt

core/runtime/TRTEngine.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,7 @@ struct TRTEngine : torch::CustomClassHolder {
185185
// c10::List<at::Tensor> Run(c10::List<at::Tensor> inputs);
186186

187187
void set_profiling_paths();
188+
void reset_captured_graph();
188189
#ifndef NDEBUG
189190
bool profile_execution = true;
190191
#else

core/runtime/register_jit_hooks.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ static auto TORCHTRT_UNUSED TRTEngineTSRegistrtion =
8888
.def("dump_engine_layer_info", &TRTEngine::dump_engine_layer_info)
8989
.def("get_engine_layer_info", &TRTEngine::get_engine_layer_info)
9090
.def("infer_outputs", &TRTEngine::infer_outputs)
91+
.def("reset_captured_graph", &TRTEngine::reset_captured_graph)
9192
.def_readwrite("use_pre_allocated_outputs", &TRTEngine::use_pre_allocated_outputs)
9293
.def_readwrite("use_output_allocator_outputs", &TRTEngine::use_output_allocator_outputs)
9394
.def_property(

docs/_cpp_api/classtorch__tensorrt_1_1DataType.html

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
<meta name="viewport" content="width=device-width, initial-scale=1.0">
1212

13-
<title>Class DataType &mdash; Torch-TensorRT v2.7.0.dev0+bffe240 documentation</title>
13+
<title>Class DataType &mdash; Torch-TensorRT v2.8.0.dev0+3b30409 documentation</title>
1414

1515

1616

@@ -293,7 +293,7 @@
293293

294294

295295
<div class="version">
296-
v2.7.0.dev0+bffe240
296+
v2.8.0.dev0+3b30409
297297
</div>
298298

299299

@@ -348,6 +348,7 @@
348348
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/converter_overloading.html">Overloading Torch-TensorRT Converters with Custom Converters</a></li>
349349
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/custom_kernel_plugins.html">Using Custom Kernels within TensorRT Engines with Torch-TensorRT</a></li>
350350
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/auto_generate_converters.html">Automatically Generate a Converter for a Custom Kernel</a></li>
351+
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/auto_generate_plugins.html">Automatically Generate a Plugin for a Custom Kernel</a></li>
351352
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/mutable_torchtrt_module_example.html">Mutable Torch TensorRT Module</a></li>
352353
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/weight_streaming_example.html">Weight Streaming</a></li>
353354
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/pre_allocated_output_example.html">Pre-allocated output buffer</a></li>

0 commit comments

Comments
 (0)