@@ -149,15 +149,17 @@ def benchmarks(self) -> list[Benchmark]:
149149 for in_order_queue in [0 , 1 ]:
150150 for measure_completion in [0 , 1 ]:
151151 for use_events in [0 , 1 ]:
152- benches .append (
153- SubmitKernel (
154- self ,
155- runtime ,
156- in_order_queue ,
157- measure_completion ,
158- use_events ,
152+ for kernel_exec_time in [1 , 20 ]:
153+ benches .append (
154+ SubmitKernel (
155+ self ,
156+ runtime ,
157+ in_order_queue ,
158+ measure_completion ,
159+ use_events ,
160+ kernel_exec_time ,
161+ )
159162 )
160- )
161163
162164 # Add SinKernelGraph benchmarks
163165 for runtime in self .enabled_runtimes ():
@@ -332,11 +334,20 @@ def teardown(self):
332334
333335
334336class SubmitKernel (ComputeBenchmark ):
335- def __init__ (self , bench , runtime : RUNTIMES , ioq , MeasureCompletion = 0 , UseEvents = 0 ):
337+ def __init__ (
338+ self ,
339+ bench ,
340+ runtime : RUNTIMES ,
341+ ioq ,
342+ MeasureCompletion = 0 ,
343+ UseEvents = 0 ,
344+ KernelExecTime = 1 ,
345+ ):
336346 self .ioq = ioq
337347 self .runtime = runtime
338348 self .MeasureCompletion = MeasureCompletion
339349 self .UseEvents = UseEvents
350+ self .KernelExecTime = KernelExecTime
340351 self .NumKernels = 10
341352 super ().__init__ (
342353 bench , f"api_overhead_benchmark_{ runtime .value } " , "SubmitKernel"
@@ -353,7 +364,11 @@ def name(self):
353364 # to match the existing already stored results
354365 events_str = " not using events" if not self .UseEvents else ""
355366
356- return f"api_overhead_benchmark_{ self .runtime .value } SubmitKernel { order } { completion_str } { events_str } "
367+ kernel_exec_time_str = (
368+ f" KernelExecTime={ self .KernelExecTime } " if self .KernelExecTime != 1 else ""
369+ )
370+
371+ return f"api_overhead_benchmark_{ self .runtime .value } SubmitKernel { order } { completion_str } { events_str } { kernel_exec_time_str } "
357372
358373 def display_name (self ) -> str :
359374 order = "in order" if self .ioq else "out of order"
@@ -362,6 +377,8 @@ def display_name(self) -> str:
362377 info .append ("with measure completion" )
363378 if self .UseEvents :
364379 info .append ("using events" )
380+ if self .KernelExecTime != 1 :
381+ info .append (f"KernelExecTime={ self .KernelExecTime } " )
365382 additional_info = f" { ' ' .join (info )} " if info else ""
366383 return f"{ self .runtime .value .upper ()} SubmitKernel { order } { additional_info } , NumKernels { self .NumKernels } "
367384
@@ -373,7 +390,11 @@ def explicit_group(self):
373390 # to match the existing already stored results
374391 events_str = " not using events" if not self .UseEvents else ""
375392
376- return f"SubmitKernel { order } { completion_str } { events_str } "
393+ kernel_exec_time_str = (
394+ f" KernelExecTime={ self .KernelExecTime } " if self .KernelExecTime != 1 else ""
395+ )
396+
397+ return f"SubmitKernel { order } { completion_str } { events_str } { kernel_exec_time_str } "
377398
378399 def description (self ) -> str :
379400 order = "in-order" if self .ioq else "out-of-order"
@@ -386,6 +407,7 @@ def description(self) -> str:
386407 return (
387408 f"Measures CPU time overhead of submitting { order } kernels through { runtime_name } API{ completion_desc } . "
388409 f"Runs { self .NumKernels } simple kernels with minimal execution time to isolate API overhead from kernel execution time."
410+ f"Each kernel executes for approximately { self .KernelExecTime } micro seconds."
389411 )
390412
391413 def range (self ) -> tuple [float , float ]:
@@ -398,7 +420,7 @@ def bin_args(self) -> list[str]:
398420 "--iterations=100000" ,
399421 "--Profiling=0" ,
400422 f"--NumKernels={ self .NumKernels } " ,
401- "--KernelExecTime=1 " ,
423+ f "--KernelExecTime={ self . KernelExecTime } " ,
402424 f"--UseEvents={ self .UseEvents } " ,
403425 ]
404426
0 commit comments