Skip to content

Commit 65dfdac

Browse files
authored
[Bench] Add SYCL MemcpyExecute MT benchmark (#18604)
to show impact of submitting barrier
1 parent d8a66b8 commit 65dfdac

File tree

1 file changed

+37
-10
lines changed

1 file changed

+37
-10
lines changed

devops/scripts/benchmarks/benches/compute.py

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def git_url(self) -> str:
4949
return "https://github.com/intel/compute-benchmarks.git"
5050

5151
def git_hash(self) -> str:
52-
return "49a8c6314875c57fee9b59aea16e721572e3021d"
52+
return "3283b5edb8bf771c519625af741b5db7a37b0111"
5353

5454
def setup(self):
5555
if options.sycl is None:
@@ -200,11 +200,12 @@ def benchmarks(self) -> list[Benchmark]:
200200
# Add UR-specific benchmarks
201201
if options.ur is not None:
202202
benches += [
203-
MemcpyExecute(self, 400, 1, 102400, 10, 1, 1, 1, 1),
204-
MemcpyExecute(self, 400, 1, 102400, 10, 0, 1, 1, 1),
205-
MemcpyExecute(self, 100, 4, 102400, 10, 1, 1, 0, 1),
206-
MemcpyExecute(self, 100, 4, 102400, 10, 1, 1, 0, 0),
207-
MemcpyExecute(self, 4096, 4, 1024, 10, 0, 1, 0, 1),
203+
MemcpyExecute(self, RUNTIMES.UR, 400, 1, 102400, 10, 1, 1, 1, 1, 0),
204+
MemcpyExecute(self, RUNTIMES.UR, 400, 1, 102400, 10, 0, 1, 1, 1, 0),
205+
MemcpyExecute(self, RUNTIMES.UR, 100, 4, 102400, 10, 1, 1, 0, 1, 0),
206+
MemcpyExecute(self, RUNTIMES.UR, 100, 4, 102400, 10, 1, 1, 0, 0, 0),
207+
MemcpyExecute(self, RUNTIMES.UR, 4096, 4, 1024, 10, 0, 1, 0, 1, 0),
208+
MemcpyExecute(self, RUNTIMES.UR, 4096, 4, 1024, 10, 0, 1, 0, 1, 1),
208209
UsmMemoryAllocation(self, RUNTIMES.UR, "Device", 256, "Both"),
209210
UsmMemoryAllocation(self, RUNTIMES.UR, "Device", 256 * 1024, "Both"),
210211
UsmBatchMemoryAllocation(self, RUNTIMES.UR, "Device", 128, 256, "Both"),
@@ -215,6 +216,20 @@ def benchmarks(self) -> list[Benchmark]:
215216
self, RUNTIMES.UR, "Device", 128, 128 * 1024, "Both"
216217
),
217218
]
219+
benches += [
220+
MemcpyExecute(
221+
self, RUNTIMES.SYCL_PREVIEW, 4096, 1, 1024, 40, 1, 1, 0, 1, 0
222+
),
223+
MemcpyExecute(
224+
self, RUNTIMES.SYCL_PREVIEW, 4096, 1, 1024, 40, 1, 1, 0, 1, 1
225+
),
226+
MemcpyExecute(
227+
self, RUNTIMES.SYCL_PREVIEW, 4096, 4, 1024, 10, 1, 1, 0, 1, 0
228+
),
229+
MemcpyExecute(
230+
self, RUNTIMES.SYCL_PREVIEW, 4096, 4, 1024, 10, 1, 1, 0, 1, 1
231+
),
232+
]
218233

219234
return benches
220235

@@ -537,6 +552,7 @@ class MemcpyExecute(ComputeBenchmark):
537552
def __init__(
538553
self,
539554
bench,
555+
runtime: RUNTIMES,
540556
numOpsPerThread,
541557
numThreads,
542558
allocSize,
@@ -545,7 +561,9 @@ def __init__(
545561
dstUSM,
546562
useEvent,
547563
useCopyOffload,
564+
useBarrier,
548565
):
566+
self.runtime = runtime
549567
self.numOpsPerThread = numOpsPerThread
550568
self.numThreads = numThreads
551569
self.allocSize = allocSize
@@ -554,7 +572,10 @@ def __init__(
554572
self.dstUSM = dstUSM
555573
self.useEvents = useEvent
556574
self.useCopyOffload = useCopyOffload
557-
super().__init__(bench, "multithread_benchmark_ur", "MemcpyExecute")
575+
self.useBarrier = useBarrier
576+
super().__init__(
577+
bench, f"multithread_benchmark_{self.runtime.value}", "MemcpyExecute"
578+
)
558579

559580
def extra_env_vars(self) -> dict:
560581
if not self.useCopyOffload:
@@ -564,9 +585,10 @@ def extra_env_vars(self) -> dict:
564585

565586
def name(self):
566587
return (
567-
f"multithread_benchmark_ur MemcpyExecute opsPerThread:{self.numOpsPerThread}, numThreads:{self.numThreads}, allocSize:{self.allocSize} srcUSM:{self.srcUSM} dstUSM:{self.dstUSM}"
588+
f"multithread_benchmark_{self.runtime.value} MemcpyExecute opsPerThread:{self.numOpsPerThread}, numThreads:{self.numThreads}, allocSize:{self.allocSize} srcUSM:{self.srcUSM} dstUSM:{self.dstUSM}"
568589
+ (" without events" if not self.useEvents else "")
569590
+ (" without copy offload" if not self.useCopyOffload else "")
591+
+ (" with barrier" if self.useBarrier else "")
570592
)
571593

572594
def explicit_group(self):
@@ -575,21 +597,25 @@ def explicit_group(self):
575597
+ str(self.numOpsPerThread)
576598
+ " numThreads: "
577599
+ str(self.numThreads)
600+
+ " allocSize: "
601+
+ str(self.allocSize)
578602
)
579603

580604
def description(self) -> str:
581605
src_type = "device" if self.srcUSM == 1 else "host"
582606
dst_type = "device" if self.dstUSM == 1 else "host"
583607
events = "with" if self.useEvents else "without"
584608
copy_offload = "with" if self.useCopyOffload else "without"
609+
with_barrier = "with" if self.useBarrier else "without"
585610
return (
586611
f"Measures multithreaded memory copy performance with {self.numThreads} threads "
587612
f"each performing {self.numOpsPerThread} operations on {self.allocSize} bytes "
588-
f"from {src_type} to {dst_type} memory {events} events {copy_offload} driver copy offload."
613+
f"from {src_type} to {dst_type} memory {events} events {copy_offload} driver copy offload "
614+
f"{with_barrier} barrier. "
589615
)
590616

591617
def get_tags(self):
592-
return ["memory", "latency", "UR", "micro"]
618+
return ["memory", "latency", runtime_to_tag_name(self.runtime), "micro"]
593619

594620
def bin_args(self) -> list[str]:
595621
return [
@@ -603,6 +629,7 @@ def bin_args(self) -> list[str]:
603629
f"--iterations={self.iterations}",
604630
f"--SrcUSM={self.srcUSM}",
605631
f"--DstUSM={self.dstUSM}",
632+
f"--UseBarrier={self.useBarrier}",
606633
]
607634

608635

0 commit comments

Comments
 (0)