diff --git a/backends/vulkan/runtime/graph/ComputeGraph.cpp b/backends/vulkan/runtime/graph/ComputeGraph.cpp
index c5af504c81c..fbcb69a926f 100644
--- a/backends/vulkan/runtime/graph/ComputeGraph.cpp
+++ b/backends/vulkan/runtime/graph/ComputeGraph.cpp
@@ -756,15 +756,16 @@ void ComputeGraph::prepare_pipelines() {
       vkapi::ComputePipelineCache::Hasher>();
 }
 
-void ComputeGraph::submit_current_cmd(const bool final_use) {
-  context_->submit_cmd_to_gpu(VK_NULL_HANDLE, final_use);
-}
-
-void ComputeGraph::submit_current_cmd_and_wait(const bool final_use) {
-  vkapi::VulkanFence fence = context_->fences().get_fence();
-  context_->submit_cmd_to_gpu(fence.get_submit_handle(), final_use);
-  fence.wait();
-  context_->fences().return_fence(fence);
+void ComputeGraph::submit_current_cmd(const bool final_use, bool wait) {
+  if (wait) {
+    // Submit and wait for command buffer
+    vkapi::VulkanFence fence = context_->fences().get_fence();
+    context_->submit_cmd_to_gpu(fence.get_submit_handle(), final_use);
+    fence.wait();
+    context_->fences().return_fence(fence);
+  } else {
+    context_->submit_cmd_to_gpu(VK_NULL_HANDLE, final_use);
+  }
 }
 
 void ComputeGraph::prepack() {
@@ -786,10 +787,10 @@ void ComputeGraph::prepack() {
       // proceed. This results in lower load latency at the cost of higher peak
       // memory usage.
       if (reduce_peak_memory) {
-        submit_current_cmd_and_wait();
+        submit_current_cmd(/*final_use=*/true, /*wait=*/true);
         context_->flush();
       } else {
-        submit_current_cmd();
+        submit_current_cmd(/*final_use=*/true, /*wait=*/false);
       }
       staging_nbytes_in_cmd_ = 0;
       context_->set_cmd();
@@ -799,7 +800,7 @@ void ComputeGraph::prepack() {
     node->encode(this);
     i++;
   }
-  submit_current_cmd_and_wait(/*final_use=*/true);
+  submit_current_cmd(/*final_use=*/true, /*wait=*/true);
   context_->flush();
   staging_nbytes_in_cmd_ = 0;
 }
@@ -821,7 +822,7 @@ void ComputeGraph::encode_execute() {
 
 void ComputeGraph::execute() {
   if (execute_pending_first_submission) {
-    submit_current_cmd_and_wait(/*final_use=*/false);
+    submit_current_cmd(/*final_use=*/false, /*wait=*/true);
     execute_pending_first_submission = false;
   } else {
     vkapi::VulkanFence fence = context_->fences().get_fence();
diff --git a/backends/vulkan/runtime/graph/ComputeGraph.h b/backends/vulkan/runtime/graph/ComputeGraph.h
index 6a38bee5414..5187cbcba4f 100644
--- a/backends/vulkan/runtime/graph/ComputeGraph.h
+++ b/backends/vulkan/runtime/graph/ComputeGraph.h
@@ -844,16 +844,11 @@ class ComputeGraph final {
  protected:
   // Command Buffer Management
 
-  /*
-   * Submits the current command buffer in the Context to the GPU for execution.
-   */
-  void submit_current_cmd(const bool final_use = false);
-
   /*
    * Submits the current command buffer in the Context to the GPU for execution,
-   * and wait for it to complete before returning.
+   * and wait for it to complete before returning, if wait is True.
    */
-  void submit_current_cmd_and_wait(const bool final_use = false);
+  void submit_current_cmd(const bool final_use = false, bool wait = true);
 
  public:
   //