|
4 | 4 | #include <cstring> // for memcmp |
5 | 5 | #include <iostream> |
6 | 6 | #include <random> |
| 7 | +#include <filesystem> |
7 | 8 | #include <gtest/gtest.h> |
8 | 9 |
|
9 | 10 | #include "span.h" |
@@ -249,6 +250,104 @@ TEST(ModelTests, BeamSearchGptCuda) { |
249 | 250 | } |
250 | 251 | #endif |
251 | 252 |
|
| 253 | +// NvTensorRT test cases using Phi3 models |
| 254 | +static const std::pair<const char*, const char*> c_phi3_nvtrt_model_paths[] = { |
| 255 | + {MODEL_PATH "hf-internal-testing/phi3-fp16-nvtrt", "fp16"}, |
| 256 | +}; |
| 257 | + |
| 258 | +void Test_GreedySearch_Phi3_NvTensorRtRtx(const char* model_path, const char* model_label) { |
| 259 | + // Skip test if NvTensorRT model is not available |
| 260 | + if (!std::filesystem::exists(model_path)) { |
| 261 | + GTEST_SKIP() << "NvTensorRT model not available at: " << model_path; |
| 262 | + } |
| 263 | + const std::vector<int64_t> input_ids_shape{1, 19}; |
| 264 | + const std::vector<int32_t> input_ids{32006, 887, 526, 263, 8444, 29871, 23869, 20255, 29889, 32007, 32010, 6324, 29892, 1128, 526, 366, 29973, 32007, 32001}; |
| 265 | + |
| 266 | + // Complete expected sequence (input + generated) from model_qa.cpp using the working phi3-fp16-nvtrt model |
| 267 | + const std::vector<int32_t> expected_output{ |
| 268 | + 32006, 887, 526, 263, 8444, 29871, 23869, 20255, 29889, 32007, 32010, 6324, 29892, 1128, 526, 366, 29973, 32007, 32001, // Input tokens (19) |
| 269 | + 15043, 29991, 306, 29915, 29885, 2599}; |
| 270 | + auto config = OgaConfig::Create(model_path); |
| 271 | + config->ClearProviders(); |
| 272 | + config->AppendProvider("NvTensorRtRtx"); |
| 273 | + auto model = OgaModel::Create(*config); |
| 274 | + |
| 275 | + constexpr int max_length = 25; |
| 276 | + int batch_size = static_cast<int>(input_ids_shape[0]); |
| 277 | + auto params = OgaGeneratorParams::Create(*model); |
| 278 | + params->SetSearchOption("max_length", max_length); |
| 279 | + params->SetSearchOption("batch_size", batch_size); |
| 280 | + |
| 281 | + auto generator = OgaGenerator::Create(*model, *params); |
| 282 | + generator->AppendTokens(input_ids); |
| 283 | + |
| 284 | + while (!generator->IsDone()) { |
| 285 | + generator->GenerateNextToken(); |
| 286 | + } |
| 287 | + |
| 288 | + // Verify outputs match expected outputs |
| 289 | + for (int i = 0; i < batch_size; i++) { |
| 290 | + auto sequence = generator->GetSequence(i); |
| 291 | + auto* expected_output_start = &expected_output[i * max_length]; |
| 292 | + |
| 293 | + EXPECT_TRUE(0 == std::memcmp(expected_output_start, sequence.data(), max_length * sizeof(int32_t))); |
| 294 | + } |
| 295 | +} |
| 296 | + |
| 297 | +TEST(ModelTests, GreedySearchPhi3NvTensorRtRtx) { |
| 298 | + for (auto model_path : c_phi3_nvtrt_model_paths) |
| 299 | + Test_GreedySearch_Phi3_NvTensorRtRtx(model_path.first, model_path.second); |
| 300 | +} |
| 301 | + |
| 302 | +void Test_OutOfPlaceKvCache_Phi3_NvTensorRtRtx(const char* model_path, const char* model_label) { |
| 303 | + // Skip test if NvTensorRT model is not available |
| 304 | + if (!std::filesystem::exists(model_path)) { |
| 305 | + GTEST_SKIP() << "NvTensorRT model not available at: " << model_path; |
| 306 | + } |
| 307 | + |
| 308 | + const std::vector<int64_t> input_ids_shape{1, 19}; |
| 309 | + const std::vector<int32_t> input_ids{ |
| 310 | + 32006, 887, 526, 263, 8444, 29871, 23869, 20255, 29889, |
| 311 | + 32007, 32010, 6324, 29892, 1128, 526, 366, 29973, 32007, 32001}; |
| 312 | + |
| 313 | + // Expected output sequence (input + generated tokens) for validation with greedy search |
| 314 | + const std::vector<int32_t> expected_output{ |
| 315 | + 32006, 887, 526, 263, 8444, 29871, 23869, 20255, 29889, 32007, 32010, 6324, 29892, 1128, 526, 366, 29973, 32007, 32001, // Input tokens (19) |
| 316 | + 15043, 1554, 13, 16271, 29892, 8733}; |
| 317 | + |
| 318 | + auto config = OgaConfig::Create(model_path); |
| 319 | + config->ClearProviders(); |
| 320 | + config->AppendProvider("NvTensorRtRtx"); |
| 321 | + auto model = OgaModel::Create(*config); |
| 322 | + |
| 323 | + constexpr int max_length = 25; |
| 324 | + int batch_size = static_cast<int>(input_ids_shape[0]); |
| 325 | + auto params = OgaGeneratorParams::Create(*model); |
| 326 | + params->SetSearchOption("max_length", max_length); |
| 327 | + params->SetSearchOption("batch_size", batch_size); |
| 328 | + params->SetSearchOptionBool("past_present_share_buffer", false); |
| 329 | + params->SetSearchOptionBool("do_sample", false); |
| 330 | + |
| 331 | + auto generator = OgaGenerator::Create(*model, *params); |
| 332 | + generator->AppendTokens(input_ids); |
| 333 | + |
| 334 | + while (!generator->IsDone()) { |
| 335 | + generator->GenerateNextToken(); |
| 336 | + } |
| 337 | + |
| 338 | + auto sequence = generator->GetSequence(0); |
| 339 | + |
| 340 | + // Verify output matches expected output |
| 341 | + EXPECT_EQ(sequence.size(), expected_output.size()); |
| 342 | + EXPECT_TRUE(0 == std::memcmp(expected_output.data(), sequence.data(), |
| 343 | + expected_output.size() * sizeof(int32_t))); |
| 344 | +} |
| 345 | + |
| 346 | +TEST(ModelTests, OutOfPlaceKvCachePhi3NvTensorRtRtx) { |
| 347 | + for (auto model_path : c_phi3_nvtrt_model_paths) |
| 348 | + Test_OutOfPlaceKvCache_Phi3_NvTensorRtRtx(model_path.first, model_path.second); |
| 349 | +} |
| 350 | + |
252 | 351 | #if TEST_PHI2 && (USE_CUDA || USE_DML) |
253 | 352 | TEST(ModelTests, TestApiDevice) { |
254 | 353 | auto prompt = R"( |
|
0 commit comments