Skip to content

Commit 81a48a6

Browse files
authored
[Bench] add SubmitKernel benchmark variant with longer kernel (#18632)
To test OOO queues
1 parent f27a817 commit 81a48a6

File tree

1 file changed

+34
-12
lines changed

1 file changed

+34
-12
lines changed

devops/scripts/benchmarks/benches/compute.py

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -149,15 +149,17 @@ def benchmarks(self) -> list[Benchmark]:
149149
for in_order_queue in [0, 1]:
150150
for measure_completion in [0, 1]:
151151
for use_events in [0, 1]:
152-
benches.append(
153-
SubmitKernel(
154-
self,
155-
runtime,
156-
in_order_queue,
157-
measure_completion,
158-
use_events,
152+
for kernel_exec_time in [1, 20]:
153+
benches.append(
154+
SubmitKernel(
155+
self,
156+
runtime,
157+
in_order_queue,
158+
measure_completion,
159+
use_events,
160+
kernel_exec_time,
161+
)
159162
)
160-
)
161163

162164
# Add SinKernelGraph benchmarks
163165
for runtime in self.enabled_runtimes():
@@ -332,11 +334,20 @@ def teardown(self):
332334

333335

334336
class SubmitKernel(ComputeBenchmark):
335-
def __init__(self, bench, runtime: RUNTIMES, ioq, MeasureCompletion=0, UseEvents=0):
337+
def __init__(
338+
self,
339+
bench,
340+
runtime: RUNTIMES,
341+
ioq,
342+
MeasureCompletion=0,
343+
UseEvents=0,
344+
KernelExecTime=1,
345+
):
336346
self.ioq = ioq
337347
self.runtime = runtime
338348
self.MeasureCompletion = MeasureCompletion
339349
self.UseEvents = UseEvents
350+
self.KernelExecTime = KernelExecTime
340351
self.NumKernels = 10
341352
super().__init__(
342353
bench, f"api_overhead_benchmark_{runtime.value}", "SubmitKernel"
@@ -353,7 +364,11 @@ def name(self):
353364
# to match the existing already stored results
354365
events_str = " not using events" if not self.UseEvents else ""
355366

356-
return f"api_overhead_benchmark_{self.runtime.value} SubmitKernel {order}{completion_str}{events_str}"
367+
kernel_exec_time_str = (
368+
f" KernelExecTime={self.KernelExecTime}" if self.KernelExecTime != 1 else ""
369+
)
370+
371+
return f"api_overhead_benchmark_{self.runtime.value} SubmitKernel {order}{completion_str}{events_str}{kernel_exec_time_str}"
357372

358373
def display_name(self) -> str:
359374
order = "in order" if self.ioq else "out of order"
@@ -362,6 +377,8 @@ def display_name(self) -> str:
362377
info.append("with measure completion")
363378
if self.UseEvents:
364379
info.append("using events")
380+
if self.KernelExecTime != 1:
381+
info.append(f"KernelExecTime={self.KernelExecTime}")
365382
additional_info = f" {' '.join(info)}" if info else ""
366383
return f"{self.runtime.value.upper()} SubmitKernel {order}{additional_info}, NumKernels {self.NumKernels}"
367384

@@ -373,7 +390,11 @@ def explicit_group(self):
373390
# to match the existing already stored results
374391
events_str = " not using events" if not self.UseEvents else ""
375392

376-
return f"SubmitKernel {order}{completion_str}{events_str}"
393+
kernel_exec_time_str = (
394+
f" KernelExecTime={self.KernelExecTime}" if self.KernelExecTime != 1 else ""
395+
)
396+
397+
return f"SubmitKernel {order}{completion_str}{events_str}{kernel_exec_time_str}"
377398

378399
def description(self) -> str:
379400
order = "in-order" if self.ioq else "out-of-order"
@@ -386,6 +407,7 @@ def description(self) -> str:
386407
return (
387408
f"Measures CPU time overhead of submitting {order} kernels through {runtime_name} API{completion_desc}. "
388409
f"Runs {self.NumKernels} simple kernels with minimal execution time to isolate API overhead from kernel execution time."
410+
f"Each kernel executes for approximately {self.KernelExecTime} micro seconds."
389411
)
390412

391413
def range(self) -> tuple[float, float]:
@@ -398,7 +420,7 @@ def bin_args(self) -> list[str]:
398420
"--iterations=100000",
399421
"--Profiling=0",
400422
f"--NumKernels={self.NumKernels}",
401-
"--KernelExecTime=1",
423+
f"--KernelExecTime={self.KernelExecTime}",
402424
f"--UseEvents={self.UseEvents}",
403425
]
404426

0 commit comments

Comments
 (0)