@@ -149,15 +149,17 @@ def benchmarks(self) -> list[Benchmark]:
149
149
for in_order_queue in [0 , 1 ]:
150
150
for measure_completion in [0 , 1 ]:
151
151
for use_events in [0 , 1 ]:
152
- benches .append (
153
- SubmitKernel (
154
- self ,
155
- runtime ,
156
- in_order_queue ,
157
- measure_completion ,
158
- use_events ,
152
+ for kernel_exec_time in [1 , 20 ]:
153
+ benches .append (
154
+ SubmitKernel (
155
+ self ,
156
+ runtime ,
157
+ in_order_queue ,
158
+ measure_completion ,
159
+ use_events ,
160
+ kernel_exec_time ,
161
+ )
159
162
)
160
- )
161
163
162
164
# Add SinKernelGraph benchmarks
163
165
for runtime in self .enabled_runtimes ():
@@ -332,11 +334,20 @@ def teardown(self):
332
334
333
335
334
336
class SubmitKernel (ComputeBenchmark ):
335
- def __init__ (self , bench , runtime : RUNTIMES , ioq , MeasureCompletion = 0 , UseEvents = 0 ):
337
+ def __init__ (
338
+ self ,
339
+ bench ,
340
+ runtime : RUNTIMES ,
341
+ ioq ,
342
+ MeasureCompletion = 0 ,
343
+ UseEvents = 0 ,
344
+ KernelExecTime = 1 ,
345
+ ):
336
346
self .ioq = ioq
337
347
self .runtime = runtime
338
348
self .MeasureCompletion = MeasureCompletion
339
349
self .UseEvents = UseEvents
350
+ self .KernelExecTime = KernelExecTime
340
351
self .NumKernels = 10
341
352
super ().__init__ (
342
353
bench , f"api_overhead_benchmark_{ runtime .value } " , "SubmitKernel"
@@ -353,7 +364,11 @@ def name(self):
353
364
# to match the existing already stored results
354
365
events_str = " not using events" if not self .UseEvents else ""
355
366
356
- return f"api_overhead_benchmark_{ self .runtime .value } SubmitKernel { order } { completion_str } { events_str } "
367
+ kernel_exec_time_str = (
368
+ f" KernelExecTime={ self .KernelExecTime } " if self .KernelExecTime != 1 else ""
369
+ )
370
+
371
+ return f"api_overhead_benchmark_{ self .runtime .value } SubmitKernel { order } { completion_str } { events_str } { kernel_exec_time_str } "
357
372
358
373
def display_name (self ) -> str :
359
374
order = "in order" if self .ioq else "out of order"
@@ -362,6 +377,8 @@ def display_name(self) -> str:
362
377
info .append ("with measure completion" )
363
378
if self .UseEvents :
364
379
info .append ("using events" )
380
+ if self .KernelExecTime != 1 :
381
+ info .append (f"KernelExecTime={ self .KernelExecTime } " )
365
382
additional_info = f" { ' ' .join (info )} " if info else ""
366
383
return f"{ self .runtime .value .upper ()} SubmitKernel { order } { additional_info } , NumKernels { self .NumKernels } "
367
384
@@ -373,7 +390,11 @@ def explicit_group(self):
373
390
# to match the existing already stored results
374
391
events_str = " not using events" if not self .UseEvents else ""
375
392
376
- return f"SubmitKernel { order } { completion_str } { events_str } "
393
+ kernel_exec_time_str = (
394
+ f" KernelExecTime={ self .KernelExecTime } " if self .KernelExecTime != 1 else ""
395
+ )
396
+
397
+ return f"SubmitKernel { order } { completion_str } { events_str } { kernel_exec_time_str } "
377
398
378
399
def description (self ) -> str :
379
400
order = "in-order" if self .ioq else "out-of-order"
@@ -386,6 +407,7 @@ def description(self) -> str:
386
407
return (
387
408
f"Measures CPU time overhead of submitting { order } kernels through { runtime_name } API{ completion_desc } . "
388
409
f"Runs { self .NumKernels } simple kernels with minimal execution time to isolate API overhead from kernel execution time."
410
+ f"Each kernel executes for approximately { self .KernelExecTime } micro seconds."
389
411
)
390
412
391
413
def range (self ) -> tuple [float , float ]:
@@ -398,7 +420,7 @@ def bin_args(self) -> list[str]:
398
420
"--iterations=100000" ,
399
421
"--Profiling=0" ,
400
422
f"--NumKernels={ self .NumKernels } " ,
401
- "--KernelExecTime=1 " ,
423
+ f "--KernelExecTime={ self . KernelExecTime } " ,
402
424
f"--UseEvents={ self .UseEvents } " ,
403
425
]
404
426
0 commit comments