From 3604b86a65a3c0096d795baf82595ff36fc108ae Mon Sep 17 00:00:00 2001 From: Vivek Trivedi <5340687+trivedivivek@users.noreply.github.com> Date: Tue, 15 Jul 2025 16:24:20 -0700 Subject: [PATCH] [ET-VK] 7/n Split dispatches between multiple command buffers. Split execute dispatch into multiple commands based on dispatch count. Differential Revision: [D78360039](https://our.internmc.facebook.com/intern/diff/D78360039/) [ghstack-poisoned] --- .../vulkan/runtime/graph/ComputeGraph.cpp | 36 ++++++++++++------- backends/vulkan/runtime/graph/ComputeGraph.h | 6 ++-- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/backends/vulkan/runtime/graph/ComputeGraph.cpp b/backends/vulkan/runtime/graph/ComputeGraph.cpp index fbcb69a926f..b0baf3dc817 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.cpp +++ b/backends/vulkan/runtime/graph/ComputeGraph.cpp @@ -768,6 +768,13 @@ void ComputeGraph::submit_current_cmd(const bool final_use, bool wait) { } } +void ComputeGraph::wait_on_encode_execute() { + if (encode_execute_fence_) { + encode_execute_fence_.wait(); + context_->fences().return_fence(encode_execute_fence_); + } +} + void ComputeGraph::prepack() { int i = 0; bool submitted = false; @@ -793,7 +800,7 @@ void ComputeGraph::prepack() { submit_current_cmd(/*final_use=*/true, /*wait=*/false); } staging_nbytes_in_cmd_ = 0; - context_->set_cmd(); + context_->set_cmd(/*reusable = */ true); submitted = true; } @@ -806,30 +813,33 @@ void ComputeGraph::prepack() { } void ComputeGraph::encode_execute() { + wait_on_encode_execute(); context_->flush(); context_->set_cmd(/*reusable = */ true); context_->cmd_reset_querypool(); + uint32_t encoded_node_count = 0; for (std::unique_ptr& node : execute_nodes_) { node->encode(this); + encoded_node_count++; + if ((encoded_node_count % 64) == 0) { + submit_current_cmd(/*final_use=*/false, /*wait=*/false); + context_->set_cmd(true); + } } - // Indicate execute nodes have been freshly encoded and needs to be submitted - // first - execute_pending_first_submission = true; + encode_execute_fence_ = context_->fences().get_fence(); + context_->submit_cmd_to_gpu( + encode_execute_fence_.get_submit_handle(), /*final_use=*/false); } void ComputeGraph::execute() { - if (execute_pending_first_submission) { - submit_current_cmd(/*final_use=*/false, /*wait=*/true); - execute_pending_first_submission = false; - } else { - vkapi::VulkanFence fence = context_->fences().get_fence(); - context_->submit_all_non_final_cmds(fence.get_submit_handle()); - fence.wait(); - context_->fences().return_fence(fence); - } + wait_on_encode_execute(); + vkapi::VulkanFence fence = context_->fences().get_fence(); + context_->submit_all_non_final_cmds(fence.get_submit_handle()); + fence.wait(); + context_->fences().return_fence(fence); execute_count_++; } diff --git a/backends/vulkan/runtime/graph/ComputeGraph.h b/backends/vulkan/runtime/graph/ComputeGraph.h index 0c9774c12ed..d35e7b61174 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.h +++ b/backends/vulkan/runtime/graph/ComputeGraph.h @@ -204,9 +204,7 @@ class ComputeGraph final { // current Context's command buffer is submitted now. size_t staging_nbytes_in_cmd_ = 0; - // Flag to indicate if execute nodes have been freshly encoded and have not - // been submitted yet. - bool execute_pending_first_submission = true; + vkapi::VulkanFence encode_execute_fence_; public: // @@ -840,6 +838,8 @@ class ComputeGraph final { */ void submit_current_cmd(const bool final_use = false, bool wait = true); + void wait_on_encode_execute(); + public: // // Graph Prepacking