@@ -49,7 +49,7 @@ def git_url(self) -> str:
49
49
return "https://github.com/intel/compute-benchmarks.git"
50
50
51
51
def git_hash (self ) -> str :
52
- return "49a8c6314875c57fee9b59aea16e721572e3021d "
52
+ return "3283b5edb8bf771c519625af741b5db7a37b0111 "
53
53
54
54
def setup (self ):
55
55
if options .sycl is None :
@@ -200,11 +200,12 @@ def benchmarks(self) -> list[Benchmark]:
200
200
# Add UR-specific benchmarks
201
201
if options .ur is not None :
202
202
benches += [
203
- MemcpyExecute (self , 400 , 1 , 102400 , 10 , 1 , 1 , 1 , 1 ),
204
- MemcpyExecute (self , 400 , 1 , 102400 , 10 , 0 , 1 , 1 , 1 ),
205
- MemcpyExecute (self , 100 , 4 , 102400 , 10 , 1 , 1 , 0 , 1 ),
206
- MemcpyExecute (self , 100 , 4 , 102400 , 10 , 1 , 1 , 0 , 0 ),
207
- MemcpyExecute (self , 4096 , 4 , 1024 , 10 , 0 , 1 , 0 , 1 ),
203
+ MemcpyExecute (self , RUNTIMES .UR , 400 , 1 , 102400 , 10 , 1 , 1 , 1 , 1 , 0 ),
204
+ MemcpyExecute (self , RUNTIMES .UR , 400 , 1 , 102400 , 10 , 0 , 1 , 1 , 1 , 0 ),
205
+ MemcpyExecute (self , RUNTIMES .UR , 100 , 4 , 102400 , 10 , 1 , 1 , 0 , 1 , 0 ),
206
+ MemcpyExecute (self , RUNTIMES .UR , 100 , 4 , 102400 , 10 , 1 , 1 , 0 , 0 , 0 ),
207
+ MemcpyExecute (self , RUNTIMES .UR , 4096 , 4 , 1024 , 10 , 0 , 1 , 0 , 1 , 0 ),
208
+ MemcpyExecute (self , RUNTIMES .UR , 4096 , 4 , 1024 , 10 , 0 , 1 , 0 , 1 , 1 ),
208
209
UsmMemoryAllocation (self , RUNTIMES .UR , "Device" , 256 , "Both" ),
209
210
UsmMemoryAllocation (self , RUNTIMES .UR , "Device" , 256 * 1024 , "Both" ),
210
211
UsmBatchMemoryAllocation (self , RUNTIMES .UR , "Device" , 128 , 256 , "Both" ),
@@ -215,6 +216,20 @@ def benchmarks(self) -> list[Benchmark]:
215
216
self , RUNTIMES .UR , "Device" , 128 , 128 * 1024 , "Both"
216
217
),
217
218
]
219
+ benches += [
220
+ MemcpyExecute (
221
+ self , RUNTIMES .SYCL_PREVIEW , 4096 , 1 , 1024 , 40 , 1 , 1 , 0 , 1 , 0
222
+ ),
223
+ MemcpyExecute (
224
+ self , RUNTIMES .SYCL_PREVIEW , 4096 , 1 , 1024 , 40 , 1 , 1 , 0 , 1 , 1
225
+ ),
226
+ MemcpyExecute (
227
+ self , RUNTIMES .SYCL_PREVIEW , 4096 , 4 , 1024 , 10 , 1 , 1 , 0 , 1 , 0
228
+ ),
229
+ MemcpyExecute (
230
+ self , RUNTIMES .SYCL_PREVIEW , 4096 , 4 , 1024 , 10 , 1 , 1 , 0 , 1 , 1
231
+ ),
232
+ ]
218
233
219
234
return benches
220
235
@@ -537,6 +552,7 @@ class MemcpyExecute(ComputeBenchmark):
537
552
def __init__ (
538
553
self ,
539
554
bench ,
555
+ runtime : RUNTIMES ,
540
556
numOpsPerThread ,
541
557
numThreads ,
542
558
allocSize ,
@@ -545,7 +561,9 @@ def __init__(
545
561
dstUSM ,
546
562
useEvent ,
547
563
useCopyOffload ,
564
+ useBarrier ,
548
565
):
566
+ self .runtime = runtime
549
567
self .numOpsPerThread = numOpsPerThread
550
568
self .numThreads = numThreads
551
569
self .allocSize = allocSize
@@ -554,7 +572,10 @@ def __init__(
554
572
self .dstUSM = dstUSM
555
573
self .useEvents = useEvent
556
574
self .useCopyOffload = useCopyOffload
557
- super ().__init__ (bench , "multithread_benchmark_ur" , "MemcpyExecute" )
575
+ self .useBarrier = useBarrier
576
+ super ().__init__ (
577
+ bench , f"multithread_benchmark_{ self .runtime .value } " , "MemcpyExecute"
578
+ )
558
579
559
580
def extra_env_vars (self ) -> dict :
560
581
if not self .useCopyOffload :
@@ -564,9 +585,10 @@ def extra_env_vars(self) -> dict:
564
585
565
586
def name (self ):
566
587
return (
567
- f"multithread_benchmark_ur MemcpyExecute opsPerThread:{ self .numOpsPerThread } , numThreads:{ self .numThreads } , allocSize:{ self .allocSize } srcUSM:{ self .srcUSM } dstUSM:{ self .dstUSM } "
588
+ f"multithread_benchmark_ { self . runtime . value } MemcpyExecute opsPerThread:{ self .numOpsPerThread } , numThreads:{ self .numThreads } , allocSize:{ self .allocSize } srcUSM:{ self .srcUSM } dstUSM:{ self .dstUSM } "
568
589
+ (" without events" if not self .useEvents else "" )
569
590
+ (" without copy offload" if not self .useCopyOffload else "" )
591
+ + (" with barrier" if self .useBarrier else "" )
570
592
)
571
593
572
594
def explicit_group (self ):
@@ -575,21 +597,25 @@ def explicit_group(self):
575
597
+ str (self .numOpsPerThread )
576
598
+ " numThreads: "
577
599
+ str (self .numThreads )
600
+ + " allocSize: "
601
+ + str (self .allocSize )
578
602
)
579
603
580
604
def description (self ) -> str :
581
605
src_type = "device" if self .srcUSM == 1 else "host"
582
606
dst_type = "device" if self .dstUSM == 1 else "host"
583
607
events = "with" if self .useEvents else "without"
584
608
copy_offload = "with" if self .useCopyOffload else "without"
609
+ with_barrier = "with" if self .useBarrier else "without"
585
610
return (
586
611
f"Measures multithreaded memory copy performance with { self .numThreads } threads "
587
612
f"each performing { self .numOpsPerThread } operations on { self .allocSize } bytes "
588
- f"from { src_type } to { dst_type } memory { events } events { copy_offload } driver copy offload."
613
+ f"from { src_type } to { dst_type } memory { events } events { copy_offload } driver copy offload "
614
+ f"{ with_barrier } barrier. "
589
615
)
590
616
591
617
def get_tags (self ):
592
- return ["memory" , "latency" , "UR" , "micro" ]
618
+ return ["memory" , "latency" , runtime_to_tag_name ( self . runtime ) , "micro" ]
593
619
594
620
def bin_args (self ) -> list [str ]:
595
621
return [
@@ -603,6 +629,7 @@ def bin_args(self) -> list[str]:
603
629
f"--iterations={ self .iterations } " ,
604
630
f"--SrcUSM={ self .srcUSM } " ,
605
631
f"--DstUSM={ self .dstUSM } " ,
632
+ f"--UseBarrier={ self .useBarrier } " ,
606
633
]
607
634
608
635
0 commit comments