@@ -61,8 +61,8 @@ def git_url(self) -> str:
6161 return "https://github.com/intel/compute-benchmarks.git"
6262
6363 def git_hash (self ) -> str :
64- # Dec 17 , 2025
65- return "420549188cd8900c27cf9b04fd859ebe81876a99 "
64+ # Dec 23 , 2025
65+ return "a9546fe49b6291dbd5238dc966a2909d8ad72992 "
6666
6767 def setup (self ) -> None :
6868 if options .sycl is None :
@@ -322,6 +322,63 @@ def createRrBench(variant_name: str, **kwargs):
322322 ),
323323 ]
324324
325+ # Add TorchSingleQueue benchmarks
326+ for runtime in filter (lambda x : x != RUNTIMES .UR , RUNTIMES ):
327+
328+ def createTorchSingleQueueBench (variant_name : str , ** kwargs ):
329+ return TorchSingleQueue (
330+ self ,
331+ runtime ,
332+ variant_name ,
333+ PROFILERS .TIMER ,
334+ ** {
335+ ** kwargs ,
336+ "KernelBatchSize" : 512 ,
337+ "KernelName" : "Add" ,
338+ "KernelParamsNum" : 5 ,
339+ "KernelSubmitPattern" : "Single" ,
340+ },
341+ )
342+
343+ benches += [
344+ createTorchSingleQueueBench (
345+ "Int32Large" ,
346+ KernelDataType = "Int32" ,
347+ KernelWGCount = 4096 ,
348+ KernelWGSize = 512 ,
349+ ),
350+ createTorchSingleQueueBench (
351+ "Int32Medium" ,
352+ KernelDataType = "Int32" ,
353+ KernelWGCount = 512 ,
354+ KernelWGSize = 256 ,
355+ ),
356+ createTorchSingleQueueBench (
357+ "Int32Small" ,
358+ KernelDataType = "Int32" ,
359+ KernelWGCount = 256 ,
360+ KernelWGSize = 128 ,
361+ ),
362+ createTorchSingleQueueBench (
363+ "MixedLarge" ,
364+ KernelDataType = "Mixed" ,
365+ KernelWGCount = 4096 ,
366+ KernelWGSize = 512 ,
367+ ),
368+ createTorchSingleQueueBench (
369+ "MixedMedium" ,
370+ KernelDataType = "Mixed" ,
371+ KernelWGCount = 512 ,
372+ KernelWGSize = 256 ,
373+ ),
374+ createTorchSingleQueueBench (
375+ "MixedSmall" ,
376+ KernelDataType = "Mixed" ,
377+ KernelWGCount = 256 ,
378+ KernelWGSize = 128 ,
379+ ),
380+ ]
381+
325382 # Add TorchMultiQueue benchmarks
326383 for runtime in filter (lambda x : x != RUNTIMES .UR , RUNTIMES ):
327384
@@ -350,7 +407,7 @@ def createTorchMultiQueueBench(variant_name: str, **kwargs):
350407 createTorchMultiQueueBench (
351408 "small" ,
352409 workgroupCount = 256 ,
353- workgroupSize = 124 ,
410+ workgroupSize = 128 ,
354411 kernelsPerQueue = 4 ,
355412 ),
356413 ]
@@ -379,9 +436,84 @@ def createTorchSlmSizeBench(variant_name: str, **kwargs):
379436 slmNum = 1024 ,
380437 ),
381438 createTorchSlmSizeBench (
382- "max " ,
439+ "large " ,
383440 batchSize = 512 ,
384- slmNum = - 1 ,
441+ slmNum = 16384 ,
442+ ),
443+ ]
444+
445+ # Add TorchMemoryReuse benchmarks
446+ for runtime in filter (lambda x : x != RUNTIMES .UR , RUNTIMES ):
447+
448+ def createTorchMemoryReuseBench (variant_name : str , ** kwargs ):
449+ return TorchMemoryReuse (
450+ self ,
451+ runtime ,
452+ variant_name ,
453+ PROFILERS .TIMER ,
454+ ** kwargs ,
455+ )
456+
457+ benches += [
458+ createTorchMemoryReuseBench (
459+ "Int32Large" ,
460+ kernelBatchSize = 4096 ,
461+ dataType = "Int32" ,
462+ ),
463+ createTorchMemoryReuseBench (
464+ "Int32Medium" ,
465+ kernelBatchSize = 512 ,
466+ dataType = "Int32" ,
467+ ),
468+ createTorchMemoryReuseBench (
469+ "FloatLarge" ,
470+ kernelBatchSize = 4096 ,
471+ dataType = "Float" ,
472+ ),
473+ createTorchMemoryReuseBench (
474+ "FloatMedium" ,
475+ kernelBatchSize = 512 ,
476+ dataType = "Float" ,
477+ ),
478+ ]
479+
480+ # Add TorchLinearKernelSize benchmarks
481+ for runtime in filter (lambda x : x != RUNTIMES .UR , RUNTIMES ):
482+
483+ def createTorchLinearKernelSizeBench (variant_name : str , ** kwargs ):
484+ return TorchLinearKernelSize (
485+ self ,
486+ runtime ,
487+ variant_name ,
488+ PROFILERS .TIMER ,
489+ ** kwargs ,
490+ )
491+
492+ benches += [
493+ createTorchLinearKernelSizeBench (
494+ "array32" ,
495+ kernelBatchSize = 512 ,
496+ kernelSize = 32 ,
497+ ),
498+ createTorchLinearKernelSizeBench (
499+ "array128" ,
500+ kernelBatchSize = 512 ,
501+ kernelSize = 128 ,
502+ ),
503+ createTorchLinearKernelSizeBench (
504+ "array512" ,
505+ kernelBatchSize = 512 ,
506+ kernelSize = 512 ,
507+ ),
508+ createTorchLinearKernelSizeBench (
509+ "array1024" ,
510+ kernelBatchSize = 512 ,
511+ kernelSize = 1024 ,
512+ ),
513+ createTorchLinearKernelSizeBench (
514+ "array5120" ,
515+ kernelBatchSize = 512 ,
516+ kernelSize = 5120 ,
385517 ),
386518 ]
387519
@@ -888,6 +1020,20 @@ def _bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
8881020 ]
8891021
8901022
1023+ class TorchSingleQueue (TorchBenchmark ):
1024+ def __init__ (
1025+ self , suite , runtime : RUNTIMES , variant_name : str , profiler_type , ** kwargs
1026+ ):
1027+ super ().__init__ (
1028+ suite ,
1029+ runtime ,
1030+ "KernelSubmitSingleQueue" ,
1031+ variant_name ,
1032+ profiler_type ,
1033+ ** kwargs ,
1034+ )
1035+
1036+
8911037class TorchMultiQueue (TorchBenchmark ):
8921038 def __init__ (
8931039 self , suite , runtime : RUNTIMES , variant_name : str , profiler_type , ** kwargs
@@ -916,6 +1062,34 @@ def __init__(
9161062 )
9171063
9181064
1065+ class TorchLinearKernelSize (TorchBenchmark ):
1066+ def __init__ (
1067+ self , suite , runtime : RUNTIMES , variant_name : str , profiler_type , ** kwargs
1068+ ):
1069+ super ().__init__ (
1070+ suite ,
1071+ runtime ,
1072+ "KernelSubmitLinearKernelSize" ,
1073+ variant_name ,
1074+ profiler_type ,
1075+ ** kwargs ,
1076+ )
1077+
1078+
1079+ class TorchMemoryReuse (TorchBenchmark ):
1080+ def __init__ (
1081+ self , suite , runtime : RUNTIMES , variant_name : str , profiler_type , ** kwargs
1082+ ):
1083+ super ().__init__ (
1084+ suite ,
1085+ runtime ,
1086+ "KernelSubmitMemoryReuse" ,
1087+ variant_name ,
1088+ profiler_type ,
1089+ ** kwargs ,
1090+ )
1091+
1092+
9191093class QueueInOrderMemcpy (ComputeBenchmark ):
9201094 def __init__ (self , bench , isCopyOnly , source , destination , size , profiler_type ):
9211095 self ._is_copy_only = isCopyOnly
0 commit comments