[ET-VK] 6/n Split dispatches between multiple command buffers. Repurpose submit_current_cmd_and_wait to wait based on input flag.

trivedivivek · trivedivivek · commit adf5a5ba3208 · 2025-07-15T16:24:20.000-07:00
Pull Request resolved: #12529 This diff makes changes to the `submit_current_cmd_and_wait` function in the `ComputeGraph` class to repurpose it to wait for command buffer completion based on an input flag. The function is renamed to `submit_current_cmd` and now takes an additional `wait` parameter. If `wait` is `true`, the function submits the command buffer to the GPU and waits for its completion. Otherwise, it only submits the command buffer without waiting. ghstack-source-id: 296448615 @exported-using-ghexport Differential Revision: [D78360042](https://our.internmc.facebook.com/intern/diff/D78360042/)
diff --git a/backends/vulkan/runtime/graph/ComputeGraph.cpp b/backends/vulkan/runtime/graph/ComputeGraph.cpp
@@ -756,15 +756,16 @@ void ComputeGraph::prepare_pipelines() {
       vkapi::ComputePipelineCache::Hasher>();
 }
 
-void ComputeGraph::submit_current_cmd(const bool final_use) {
-  context_->submit_cmd_to_gpu(VK_NULL_HANDLE, final_use);
-}
-
-void ComputeGraph::submit_current_cmd_and_wait(const bool final_use) {
-  vkapi::VulkanFence fence = context_->fences().get_fence();
-  context_->submit_cmd_to_gpu(fence.get_submit_handle(), final_use);
-  fence.wait();
-  context_->fences().return_fence(fence);
+void ComputeGraph::submit_current_cmd(const bool final_use, bool wait) {
+  if (wait) {
+    // Submit and wait for command buffer
+    vkapi::VulkanFence fence = context_->fences().get_fence();
+    context_->submit_cmd_to_gpu(fence.get_submit_handle(), final_use);
+    fence.wait();
+    context_->fences().return_fence(fence);
+  } else {
+    context_->submit_cmd_to_gpu(VK_NULL_HANDLE, final_use);
+  }
 }
 
 void ComputeGraph::prepack() {
@@ -786,10 +787,10 @@ void ComputeGraph::prepack() {
       // proceed. This results in lower load latency at the cost of higher peak
       // memory usage.
       if (reduce_peak_memory) {
-        submit_current_cmd_and_wait();
+        submit_current_cmd(/*final_use=*/true, /*wait=*/true);
         context_->flush();
       } else {
-        submit_current_cmd();
+        submit_current_cmd(/*final_use=*/true, /*wait=*/false);
       }
       staging_nbytes_in_cmd_ = 0;
       context_->set_cmd();
@@ -799,7 +800,7 @@ void ComputeGraph::prepack() {
     node->encode(this);
     i++;
   }
-  submit_current_cmd_and_wait(/*final_use=*/true);
+  submit_current_cmd(/*final_use=*/true, /*wait=*/true);
   context_->flush();
   staging_nbytes_in_cmd_ = 0;
 }
@@ -821,7 +822,7 @@ void ComputeGraph::encode_execute() {
 
 void ComputeGraph::execute() {
   if (execute_pending_first_submission) {
-    submit_current_cmd_and_wait(/*final_use=*/false);
+    submit_current_cmd(/*final_use=*/false, /*wait=*/true);
     execute_pending_first_submission = false;
   } else {
     vkapi::VulkanFence fence = context_->fences().get_fence();
diff --git a/backends/vulkan/runtime/graph/ComputeGraph.h b/backends/vulkan/runtime/graph/ComputeGraph.h
@@ -836,9 +836,9 @@ class ComputeGraph final {
 
   /*
    * Submits the current command buffer in the Context to the GPU for execution,
-   * and wait for it to complete before returning.
+   * and wait for it to complete before returning, if wait is True.
    */
-  void submit_current_cmd_and_wait(const bool final_use = false);
+  void submit_current_cmd(const bool final_use = false, bool wait = true);
 
  public:
   //