Skip to content

Commit c921a48

Browse files
[Bench] Add new pytorch scenarios for Compute Benchmarks (#20934)
1 parent 421b5f9 commit c921a48

File tree

2 files changed

+235
-10
lines changed

2 files changed

+235
-10
lines changed

devops/scripts/benchmarks/benches/compute.py

Lines changed: 179 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,8 @@ def git_url(self) -> str:
6161
return "https://github.com/intel/compute-benchmarks.git"
6262

6363
def git_hash(self) -> str:
64-
# Dec 17, 2025
65-
return "420549188cd8900c27cf9b04fd859ebe81876a99"
64+
# Dec 23, 2025
65+
return "a9546fe49b6291dbd5238dc966a2909d8ad72992"
6666

6767
def setup(self) -> None:
6868
if options.sycl is None:
@@ -322,6 +322,63 @@ def createRrBench(variant_name: str, **kwargs):
322322
),
323323
]
324324

325+
# Add TorchSingleQueue benchmarks
326+
for runtime in filter(lambda x: x != RUNTIMES.UR, RUNTIMES):
327+
328+
def createTorchSingleQueueBench(variant_name: str, **kwargs):
329+
return TorchSingleQueue(
330+
self,
331+
runtime,
332+
variant_name,
333+
PROFILERS.TIMER,
334+
**{
335+
**kwargs,
336+
"KernelBatchSize": 512,
337+
"KernelName": "Add",
338+
"KernelParamsNum": 5,
339+
"KernelSubmitPattern": "Single",
340+
},
341+
)
342+
343+
benches += [
344+
createTorchSingleQueueBench(
345+
"Int32Large",
346+
KernelDataType="Int32",
347+
KernelWGCount=4096,
348+
KernelWGSize=512,
349+
),
350+
createTorchSingleQueueBench(
351+
"Int32Medium",
352+
KernelDataType="Int32",
353+
KernelWGCount=512,
354+
KernelWGSize=256,
355+
),
356+
createTorchSingleQueueBench(
357+
"Int32Small",
358+
KernelDataType="Int32",
359+
KernelWGCount=256,
360+
KernelWGSize=128,
361+
),
362+
createTorchSingleQueueBench(
363+
"MixedLarge",
364+
KernelDataType="Mixed",
365+
KernelWGCount=4096,
366+
KernelWGSize=512,
367+
),
368+
createTorchSingleQueueBench(
369+
"MixedMedium",
370+
KernelDataType="Mixed",
371+
KernelWGCount=512,
372+
KernelWGSize=256,
373+
),
374+
createTorchSingleQueueBench(
375+
"MixedSmall",
376+
KernelDataType="Mixed",
377+
KernelWGCount=256,
378+
KernelWGSize=128,
379+
),
380+
]
381+
325382
# Add TorchMultiQueue benchmarks
326383
for runtime in filter(lambda x: x != RUNTIMES.UR, RUNTIMES):
327384

@@ -350,7 +407,7 @@ def createTorchMultiQueueBench(variant_name: str, **kwargs):
350407
createTorchMultiQueueBench(
351408
"small",
352409
workgroupCount=256,
353-
workgroupSize=124,
410+
workgroupSize=128,
354411
kernelsPerQueue=4,
355412
),
356413
]
@@ -379,9 +436,84 @@ def createTorchSlmSizeBench(variant_name: str, **kwargs):
379436
slmNum=1024,
380437
),
381438
createTorchSlmSizeBench(
382-
"max",
439+
"large",
383440
batchSize=512,
384-
slmNum=-1,
441+
slmNum=16384,
442+
),
443+
]
444+
445+
# Add TorchMemoryReuse benchmarks
446+
for runtime in filter(lambda x: x != RUNTIMES.UR, RUNTIMES):
447+
448+
def createTorchMemoryReuseBench(variant_name: str, **kwargs):
449+
return TorchMemoryReuse(
450+
self,
451+
runtime,
452+
variant_name,
453+
PROFILERS.TIMER,
454+
**kwargs,
455+
)
456+
457+
benches += [
458+
createTorchMemoryReuseBench(
459+
"Int32Large",
460+
kernelBatchSize=4096,
461+
dataType="Int32",
462+
),
463+
createTorchMemoryReuseBench(
464+
"Int32Medium",
465+
kernelBatchSize=512,
466+
dataType="Int32",
467+
),
468+
createTorchMemoryReuseBench(
469+
"FloatLarge",
470+
kernelBatchSize=4096,
471+
dataType="Float",
472+
),
473+
createTorchMemoryReuseBench(
474+
"FloatMedium",
475+
kernelBatchSize=512,
476+
dataType="Float",
477+
),
478+
]
479+
480+
# Add TorchLinearKernelSize benchmarks
481+
for runtime in filter(lambda x: x != RUNTIMES.UR, RUNTIMES):
482+
483+
def createTorchLinearKernelSizeBench(variant_name: str, **kwargs):
484+
return TorchLinearKernelSize(
485+
self,
486+
runtime,
487+
variant_name,
488+
PROFILERS.TIMER,
489+
**kwargs,
490+
)
491+
492+
benches += [
493+
createTorchLinearKernelSizeBench(
494+
"array32",
495+
kernelBatchSize=512,
496+
kernelSize=32,
497+
),
498+
createTorchLinearKernelSizeBench(
499+
"array128",
500+
kernelBatchSize=512,
501+
kernelSize=128,
502+
),
503+
createTorchLinearKernelSizeBench(
504+
"array512",
505+
kernelBatchSize=512,
506+
kernelSize=512,
507+
),
508+
createTorchLinearKernelSizeBench(
509+
"array1024",
510+
kernelBatchSize=512,
511+
kernelSize=1024,
512+
),
513+
createTorchLinearKernelSizeBench(
514+
"array5120",
515+
kernelBatchSize=512,
516+
kernelSize=5120,
385517
),
386518
]
387519

@@ -888,6 +1020,20 @@ def _bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
8881020
]
8891021

8901022

1023+
class TorchSingleQueue(TorchBenchmark):
1024+
def __init__(
1025+
self, suite, runtime: RUNTIMES, variant_name: str, profiler_type, **kwargs
1026+
):
1027+
super().__init__(
1028+
suite,
1029+
runtime,
1030+
"KernelSubmitSingleQueue",
1031+
variant_name,
1032+
profiler_type,
1033+
**kwargs,
1034+
)
1035+
1036+
8911037
class TorchMultiQueue(TorchBenchmark):
8921038
def __init__(
8931039
self, suite, runtime: RUNTIMES, variant_name: str, profiler_type, **kwargs
@@ -916,6 +1062,34 @@ def __init__(
9161062
)
9171063

9181064

1065+
class TorchLinearKernelSize(TorchBenchmark):
1066+
def __init__(
1067+
self, suite, runtime: RUNTIMES, variant_name: str, profiler_type, **kwargs
1068+
):
1069+
super().__init__(
1070+
suite,
1071+
runtime,
1072+
"KernelSubmitLinearKernelSize",
1073+
variant_name,
1074+
profiler_type,
1075+
**kwargs,
1076+
)
1077+
1078+
1079+
class TorchMemoryReuse(TorchBenchmark):
1080+
def __init__(
1081+
self, suite, runtime: RUNTIMES, variant_name: str, profiler_type, **kwargs
1082+
):
1083+
super().__init__(
1084+
suite,
1085+
runtime,
1086+
"KernelSubmitMemoryReuse",
1087+
variant_name,
1088+
profiler_type,
1089+
**kwargs,
1090+
)
1091+
1092+
9191093
class QueueInOrderMemcpy(ComputeBenchmark):
9201094
def __init__(self, bench, isCopyOnly, source, destination, size, profiler_type):
9211095
self._is_copy_only = isCopyOnly

devops/scripts/benchmarks/tests/test_integration.py

Lines changed: 56 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,14 @@ def run_main(self, *args):
9393
],
9494
capture_output=True,
9595
)
96-
print("MAIN_PY_STDOUT:\n" + proc.stdout.decode() if proc.stdout else "<empty>")
97-
print("MAIN_PY_STDERR:\n" + proc.stderr.decode() if proc.stderr else "<empty>")
96+
print(
97+
"MAIN_PY_STDOUT:",
98+
"\n" + proc.stdout.decode() if proc.stdout else " <empty>",
99+
)
100+
print(
101+
"MAIN_PY_STDERR:",
102+
"\n" + proc.stderr.decode() if proc.stderr else " <empty>",
103+
)
98104
return proc.returncode
99105

100106
def get_output(self):
@@ -189,6 +195,11 @@ def test_submit_kernel(self):
189195
)
190196

191197
def test_torch_l0(self):
198+
self._checkCase(
199+
"torch_benchmark_l0 KernelBatchSize 512, KernelDataType Int32, KernelName Add, KernelParamsNum 5, KernelSubmitPattern Single, KernelWGCount 4096, KernelWGSize 512",
200+
"KernelSubmitSingleQueue Int32Large",
201+
{"pytorch", "L0"},
202+
)
192203
self._checkCase(
193204
"torch_benchmark_l0 kernelsPerQueue 20, workgroupCount 4096, workgroupSize 512",
194205
"KernelSubmitMultiQueue large",
@@ -199,22 +210,52 @@ def test_torch_l0(self):
199210
"KernelSubmitSlmSize small",
200211
{"pytorch", "L0"},
201212
)
213+
self._checkCase(
214+
"torch_benchmark_l0 kernelBatchSize 512, kernelSize 32",
215+
"KernelSubmitLinearKernelSize array32",
216+
{"pytorch", "L0"},
217+
)
218+
self._checkCase(
219+
"torch_benchmark_l0 dataType Int32, kernelBatchSize 4096",
220+
"KernelSubmitMemoryReuse Int32Large",
221+
{"pytorch", "L0"},
222+
)
202223

203224
def test_torch_sycl(self):
225+
self._checkCase(
226+
"torch_benchmark_sycl KernelBatchSize 512, KernelDataType Mixed, KernelName Add, KernelParamsNum 5, KernelSubmitPattern Single, KernelWGCount 512, KernelWGSize 256",
227+
"KernelSubmitSingleQueue MixedMedium",
228+
{"pytorch", "SYCL"},
229+
)
204230
self._checkCase(
205231
"torch_benchmark_sycl kernelsPerQueue 10, workgroupCount 512, workgroupSize 256",
206232
"KernelSubmitMultiQueue medium",
207233
{"pytorch", "SYCL"},
208234
)
209235
self._checkCase(
210-
"torch_benchmark_sycl batchSize 512, slmNum -1, warmupIterations 1",
211-
"KernelSubmitSlmSize max",
236+
"torch_benchmark_sycl batchSize 512, slmNum 16384, warmupIterations 1",
237+
"KernelSubmitSlmSize large",
238+
{"pytorch", "SYCL"},
239+
)
240+
self._checkCase(
241+
"torch_benchmark_sycl kernelBatchSize 512, kernelSize 5120",
242+
"KernelSubmitLinearKernelSize array5120",
243+
{"pytorch", "SYCL"},
244+
)
245+
self._checkCase(
246+
"torch_benchmark_sycl dataType Float, kernelBatchSize 4096",
247+
"KernelSubmitMemoryReuse FloatLarge",
212248
{"pytorch", "SYCL"},
213249
)
214250

215251
def test_torch_syclpreview(self):
216252
self._checkCase(
217-
"torch_benchmark_syclpreview kernelsPerQueue 4, workgroupCount 256, workgroupSize 124",
253+
"torch_benchmark_syclpreview KernelBatchSize 512, KernelDataType Mixed, KernelName Add, KernelParamsNum 5, KernelSubmitPattern Single, KernelWGCount 256, KernelWGSize 128",
254+
"KernelSubmitSingleQueue MixedSmall",
255+
{"pytorch", "SYCL"},
256+
)
257+
self._checkCase(
258+
"torch_benchmark_syclpreview kernelsPerQueue 4, workgroupCount 256, workgroupSize 128",
218259
"KernelSubmitMultiQueue small",
219260
{"pytorch", "SYCL"},
220261
)
@@ -223,6 +264,16 @@ def test_torch_syclpreview(self):
223264
"KernelSubmitSlmSize medium",
224265
{"pytorch", "SYCL"},
225266
)
267+
self._checkCase(
268+
"torch_benchmark_syclpreview kernelBatchSize 512, kernelSize 512",
269+
"KernelSubmitLinearKernelSize array512",
270+
{"pytorch", "SYCL"},
271+
)
272+
self._checkCase(
273+
"torch_benchmark_syclpreview dataType Float, kernelBatchSize 512",
274+
"KernelSubmitMemoryReuse FloatMedium",
275+
{"pytorch", "SYCL"},
276+
)
226277

227278

228279
if __name__ == "__main__":

0 commit comments

Comments
 (0)