[ROCm][TunableOp] Speed-up matmul_small_brute_force_tunableop unit test (pytorch#147659)

naromero77amd · pytorchmergebot · commit 9605c5063b28 · 2025-02-24T19:44:38.000Z
This PR has a UT speed-up and some refactoring of tests. A previous PR pytorch#142422 fixed this matmul_small_brute_force_tunableop for the FP16 data type by adding TunableOp numerical checks. It had the unfortunate side effect that it increased the execution time for the FP32 and FP64 data types by a significant margin. This PR *reduces* the execution time by 20+ minutes. We also move a hipBLASLt version check to a different tunableop UT for simplicity. Pull Request resolved: pytorch#147659 Approved by: https://github.com/jeffdaily
diff --git a/test/test_linalg.py b/test/test_linalg.py
@@ -4589,7 +4589,8 @@ def test_matmul_small_brute_force_tunableop(self, device, dtype):
         try:
             set_tunableop_defaults()
             torch.cuda.tunable.set_rotating_buffer_size(0)
-            os.environ["PYTORCH_TUNABLEOP_NUMERICAL_CHECK"] = "1"
+            if dtype is torch.half:
+                os.environ["PYTORCH_TUNABLEOP_NUMERICAL_CHECK"] = "1"
             ordinal = torch.cuda.current_device()
             torch.cuda.tunable.set_filename(f"tunableop_results{ordinal}.csv")
 
@@ -4610,10 +4611,6 @@ def test_matmul_small_brute_force_tunableop(self, device, dtype):
             filename3 = "tunableop_results_tmp2.csv"
             ordinal = torch.cuda.current_device()
             assert filename1 == f"tunableop_results{ordinal}.csv"
-            validators = get_tunableop_validators()
-            if torch.version.hip:
-                assert "HIPBLASLT_VERSION" in validators
-                assert re.match(r'^\d+-[a-z0-9]+$', validators["HIPBLASLT_VERSION"])
             assert len(torch.cuda.tunable.get_results()) > 0
 
             assert torch.cuda.tunable.write_file()  # use default filename
@@ -4953,9 +4950,12 @@ def test_validator_tunableop_rocm(self, device, dtype):
         self.assertEqual(len(torch.cuda.tunable.get_validators()), validator_num_lines)
 
         validators = get_tunableop_validators()
+        # Check for rocBLAS and hipBLASLt
         self.assertTrue("ROCBLAS_VERSION" in validators)
         # format: [major].[minor].[patch].[tweak].[commit id]
         self.assertTrue(re.match(r'^\d+.\d+.\d+.\d+.[a-z0-9]+$', validators["ROCBLAS_VERSION"]))
+        self.assertTrue("HIPBLASLT_VERSION" in validators)
+        self.assertTrue(re.match(r'^\d+-[a-z0-9]+$', validators["HIPBLASLT_VERSION"]))
 
         # disable TunableOp
         torch.cuda.tunable.enable(False)