Update on "[ET-VK] Using push constants for buffer to image prepack nodes."

trivedivivek · trivedivivek · commit 81732ce5cbec · 2025-05-30T10:26:17.000-07:00
This diff enables the use of push constants for buffer to image prepack nodes in the Vulkan runtime graph. Push constants are a more efficient way to pass small amounts of data to shaders, compared to using uniform buffers. * The `nchw_to_*.yaml` files have been updated to include the `USE_PUSH_CONST` flag which is `True` by default, and enables the use of push constants for all `nchw_to_*` operations. * New variants of the `nchw_to_*` operation have been added with suffix `_no_pc`, which do not use push constants. These variants are used for compatibility with testing and utility functions. * The `Convolution.cpp` and `Staging.cpp` files have been updated to pass empty parameter buffers and instead use push constants. Differential Revision: [D70102398](https://our.internmc.facebook.com/intern/diff/D70102398/) [ghstack-poisoned]
diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl b/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl
@@ -46,7 +46,9 @@ layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
  * size is only 1x1, making it easier to re-use loaded texels from t_kernel.
  */
 void main() {
-  const int out_limits_scaled[2] = {(out_limits.x + (TILE_SIZE_X - 1)) / TILE_SIZE_X, (out_limits.y + (TILE_SIZE_Y - 1)) / TILE_SIZE_Y};
+  const int out_limits_scaled[2] =
+    {(out_limits.x + (TILE_SIZE_X - 1)) / TILE_SIZE_X,
+     (out_limits.y + (TILE_SIZE_Y - 1)) / TILE_SIZE_Y};
 
   const int div_by_x = int(gl_GlobalInvocationID.x / out_limits_scaled[0]);
   const int out_pos[3] = {int(gl_GlobalInvocationID.x % out_limits_scaled[0]), div_by_x, int(gl_GlobalInvocationID.y)};
diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw_s1p0.glsl b/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw_s1p0.glsl
@@ -48,7 +48,9 @@ layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
  * size is only 1x1, making it easier to re-use loaded texels from t_kernel.
  */
 void main() {
-  const int out_limits_scaled[2] = {(out_limits.x + (TILE_SIZE_X - 1)) / TILE_SIZE_X, (out_limits.y + (TILE_SIZE_Y - 1)) / TILE_SIZE_Y};
+  const int out_limits_scaled[2] =
+    {(out_limits.x + (TILE_SIZE_X - 1)) / TILE_SIZE_X,
+     (out_limits.y + (TILE_SIZE_Y - 1)) / TILE_SIZE_Y};
 
   const uint16_t div_by_x = uint16_t(gl_GlobalInvocationID.x / out_limits_scaled[0]);
   const uint16_t out_pos_xy[2] = {uint16_t(gl_GlobalInvocationID.x % out_limits_scaled[0]), div_by_x};
diff --git a/backends/vulkan/test/utils/test_utils.cpp b/backends/vulkan/test/utils/test_utils.cpp
@@ -74,7 +74,9 @@ void record_nchw_to_image_op(
 
   context->submit_compute_job(
       get_nchw_to_tensor_shader(
-          v_dst, context->adapter_ptr()->has_full_int8_buffers_support(), false),
+          v_dst,
+          context->adapter_ptr()->has_full_int8_buffers_support(),
+          false),
       pipeline_barrier,
       v_dst.logical_limits(),
       adaptive_work_group_size(v_dst.logical_limits()),