@@ -768,6 +768,13 @@ void ComputeGraph::submit_current_cmd(const bool final_use, bool wait) {
768
768
}
769
769
}
770
770
771
+ void ComputeGraph::wait_on_encode_execute () {
772
+ if (encode_execute_fence_) {
773
+ encode_execute_fence_.wait ();
774
+ context_->fences ().return_fence (encode_execute_fence_);
775
+ }
776
+ }
777
+
771
778
void ComputeGraph::prepack () {
772
779
int i = 0 ;
773
780
bool submitted = false ;
@@ -793,7 +800,7 @@ void ComputeGraph::prepack() {
793
800
submit_current_cmd (/* final_use=*/ true , /* wait=*/ false );
794
801
}
795
802
staging_nbytes_in_cmd_ = 0 ;
796
- context_->set_cmd ();
803
+ context_->set_cmd (/* reusable = */ true );
797
804
submitted = true ;
798
805
}
799
806
@@ -806,30 +813,33 @@ void ComputeGraph::prepack() {
806
813
}
807
814
808
815
void ComputeGraph::encode_execute () {
816
+ wait_on_encode_execute ();
809
817
context_->flush ();
810
818
context_->set_cmd (/* reusable = */ true );
811
819
812
820
context_->cmd_reset_querypool ();
821
+ uint32_t encoded_node_count = 0 ;
813
822
814
823
for (std::unique_ptr<ExecuteNode>& node : execute_nodes_) {
815
824
node->encode (this );
825
+ encoded_node_count++;
826
+ if ((encoded_node_count % 64 ) == 0 ) {
827
+ submit_current_cmd (/* final_use=*/ false , /* wait=*/ false );
828
+ context_->set_cmd (true );
829
+ }
816
830
}
817
831
818
- // Indicate execute nodes have been freshly encoded and needs to be submitted
819
- // first
820
- execute_pending_first_submission = true ;
832
+ encode_execute_fence_ = context_-> fences (). get_fence ();
833
+ context_-> submit_cmd_to_gpu (
834
+ encode_execute_fence_. get_submit_handle (), /* final_use= */ false ) ;
821
835
}
822
836
823
837
void ComputeGraph::execute () {
824
- if (execute_pending_first_submission) {
825
- submit_current_cmd (/* final_use=*/ false , /* wait=*/ true );
826
- execute_pending_first_submission = false ;
827
- } else {
828
- vkapi::VulkanFence fence = context_->fences ().get_fence ();
829
- context_->submit_all_non_final_cmds (fence.get_submit_handle ());
830
- fence.wait ();
831
- context_->fences ().return_fence (fence);
832
- }
838
+ wait_on_encode_execute ();
839
+ vkapi::VulkanFence fence = context_->fences ().get_fence ();
840
+ context_->submit_all_non_final_cmds (fence.get_submit_handle ());
841
+ fence.wait ();
842
+ context_->fences ().return_fence (fence);
833
843
execute_count_++;
834
844
}
835
845
0 commit comments