Skip to content

Commit 9d3631d

Browse files
Mpasumarthi/nvtrt test suite (#1756)
Extend GenAI test suite to support NvTensorRtRtx EP by switching to a newer model and adding deterministic tests for API flow, batch size > 1, sampling, and out-of-place KV cache.
1 parent d4eabac commit 9d3631d

File tree

4 files changed

+474
-2
lines changed

4 files changed

+474
-2
lines changed

.github/workflows/linux-cpu-arm64-build.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,9 @@ jobs:
2727

2828
- uses: actions/setup-dotnet@v5
2929
with:
30-
dotnet-version: '8.0.x'
30+
dotnet-version: |
31+
8.0.x
32+
9.0.x
3133
3234
- name: Install Rust with rustup
3335
run: |

test/model_tests.cpp

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include <cstring> // for memcmp
55
#include <iostream>
66
#include <random>
7+
#include <filesystem>
78
#include <gtest/gtest.h>
89

910
#include "span.h"
@@ -249,6 +250,104 @@ TEST(ModelTests, BeamSearchGptCuda) {
249250
}
250251
#endif
251252

253+
// NvTensorRT test cases using Phi3 models
254+
static const std::pair<const char*, const char*> c_phi3_nvtrt_model_paths[] = {
255+
{MODEL_PATH "hf-internal-testing/phi3-fp16-nvtrt", "fp16"},
256+
};
257+
258+
void Test_GreedySearch_Phi3_NvTensorRtRtx(const char* model_path, const char* model_label) {
259+
// Skip test if NvTensorRT model is not available
260+
if (!std::filesystem::exists(model_path)) {
261+
GTEST_SKIP() << "NvTensorRT model not available at: " << model_path;
262+
}
263+
const std::vector<int64_t> input_ids_shape{1, 19};
264+
const std::vector<int32_t> input_ids{32006, 887, 526, 263, 8444, 29871, 23869, 20255, 29889, 32007, 32010, 6324, 29892, 1128, 526, 366, 29973, 32007, 32001};
265+
266+
// Complete expected sequence (input + generated) from model_qa.cpp using the working phi3-fp16-nvtrt model
267+
const std::vector<int32_t> expected_output{
268+
32006, 887, 526, 263, 8444, 29871, 23869, 20255, 29889, 32007, 32010, 6324, 29892, 1128, 526, 366, 29973, 32007, 32001, // Input tokens (19)
269+
15043, 29991, 306, 29915, 29885, 2599};
270+
auto config = OgaConfig::Create(model_path);
271+
config->ClearProviders();
272+
config->AppendProvider("NvTensorRtRtx");
273+
auto model = OgaModel::Create(*config);
274+
275+
constexpr int max_length = 25;
276+
int batch_size = static_cast<int>(input_ids_shape[0]);
277+
auto params = OgaGeneratorParams::Create(*model);
278+
params->SetSearchOption("max_length", max_length);
279+
params->SetSearchOption("batch_size", batch_size);
280+
281+
auto generator = OgaGenerator::Create(*model, *params);
282+
generator->AppendTokens(input_ids);
283+
284+
while (!generator->IsDone()) {
285+
generator->GenerateNextToken();
286+
}
287+
288+
// Verify outputs match expected outputs
289+
for (int i = 0; i < batch_size; i++) {
290+
auto sequence = generator->GetSequence(i);
291+
auto* expected_output_start = &expected_output[i * max_length];
292+
293+
EXPECT_TRUE(0 == std::memcmp(expected_output_start, sequence.data(), max_length * sizeof(int32_t)));
294+
}
295+
}
296+
297+
TEST(ModelTests, GreedySearchPhi3NvTensorRtRtx) {
298+
for (auto model_path : c_phi3_nvtrt_model_paths)
299+
Test_GreedySearch_Phi3_NvTensorRtRtx(model_path.first, model_path.second);
300+
}
301+
302+
void Test_OutOfPlaceKvCache_Phi3_NvTensorRtRtx(const char* model_path, const char* model_label) {
303+
// Skip test if NvTensorRT model is not available
304+
if (!std::filesystem::exists(model_path)) {
305+
GTEST_SKIP() << "NvTensorRT model not available at: " << model_path;
306+
}
307+
308+
const std::vector<int64_t> input_ids_shape{1, 19};
309+
const std::vector<int32_t> input_ids{
310+
32006, 887, 526, 263, 8444, 29871, 23869, 20255, 29889,
311+
32007, 32010, 6324, 29892, 1128, 526, 366, 29973, 32007, 32001};
312+
313+
// Expected output sequence (input + generated tokens) for validation with greedy search
314+
const std::vector<int32_t> expected_output{
315+
32006, 887, 526, 263, 8444, 29871, 23869, 20255, 29889, 32007, 32010, 6324, 29892, 1128, 526, 366, 29973, 32007, 32001, // Input tokens (19)
316+
15043, 1554, 13, 16271, 29892, 8733};
317+
318+
auto config = OgaConfig::Create(model_path);
319+
config->ClearProviders();
320+
config->AppendProvider("NvTensorRtRtx");
321+
auto model = OgaModel::Create(*config);
322+
323+
constexpr int max_length = 25;
324+
int batch_size = static_cast<int>(input_ids_shape[0]);
325+
auto params = OgaGeneratorParams::Create(*model);
326+
params->SetSearchOption("max_length", max_length);
327+
params->SetSearchOption("batch_size", batch_size);
328+
params->SetSearchOptionBool("past_present_share_buffer", false);
329+
params->SetSearchOptionBool("do_sample", false);
330+
331+
auto generator = OgaGenerator::Create(*model, *params);
332+
generator->AppendTokens(input_ids);
333+
334+
while (!generator->IsDone()) {
335+
generator->GenerateNextToken();
336+
}
337+
338+
auto sequence = generator->GetSequence(0);
339+
340+
// Verify output matches expected output
341+
EXPECT_EQ(sequence.size(), expected_output.size());
342+
EXPECT_TRUE(0 == std::memcmp(expected_output.data(), sequence.data(),
343+
expected_output.size() * sizeof(int32_t)));
344+
}
345+
346+
TEST(ModelTests, OutOfPlaceKvCachePhi3NvTensorRtRtx) {
347+
for (auto model_path : c_phi3_nvtrt_model_paths)
348+
Test_OutOfPlaceKvCache_Phi3_NvTensorRtRtx(model_path.first, model_path.second);
349+
}
350+
252351
#if TEST_PHI2 && (USE_CUDA || USE_DML)
253352
TEST(ModelTests, TestApiDevice) {
254353
auto prompt = R"(

test/sampling_benchmark.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include <array>
88
#include <chrono>
99
#include <cmath>
10+
#include <filesystem>
1011
#include <iomanip>
1112
#include <iostream>
1213
#include <numeric>
@@ -108,7 +109,11 @@ void PrintSummary(const std::vector<BenchmarkResult>& results) {
108109
}
109110

110111
BenchmarkResult RunBenchmark(const BenchmarkParams& params) {
111-
auto config = OgaConfig::Create(MODEL_PATH "hf-internal-testing/tiny-random-gpt2-fp32");
112+
const char* model_path = MODEL_PATH "hf-internal-testing/tiny-random-gpt2-fp32";
113+
if (strcmp(params.device_type, "NvTensorRtRtx") == 0) {
114+
model_path = MODEL_PATH "hf-internal-testing/phi3-fp16-nvtrt";
115+
}
116+
auto config = OgaConfig::Create(model_path);
112117
std::string overlay = R"({ "model": { "vocab_size" : )" + std::to_string(params.vocab_size) + R"( } })";
113118
config->Overlay(overlay.c_str());
114119
config->ClearProviders();
@@ -178,6 +183,10 @@ TEST(SamplingBenchmarks, PerformanceTests) {
178183
#if USE_CUDA
179184
device_types.push_back("cuda");
180185
#endif
186+
// Add NvTensorRtRtx if model is available
187+
if (std::filesystem::exists(MODEL_PATH "hf-internal-testing/phi3-fp16-nvtrt")) {
188+
device_types.push_back("NvTensorRtRtx");
189+
}
181190

182191
std::vector<int> batch_sizes = {1};
183192
std::vector<int> vocab_sizes = {201088};

0 commit comments

Comments
 (0)