Skip to content

Commit d696913

Browse files
committed
[ET-VK] Applying bias after sum calculation in conv2d pw shader to improve performance.
This diff improves the performance of the conv2d pw shader by changing the order of operations to apply bias after the sum calculation. Differential Revision: [D75450662](https://our.internmc.facebook.com/intern/diff/D75450662/) ghstack-source-id: 286407336 Pull Request resolved: #11150
1 parent 35b5064 commit d696913

File tree

1 file changed

+6
-8
lines changed

1 file changed

+6
-8
lines changed

backends/vulkan/runtime/graph/ops/glsl/conv2d_pw_s1p0.glsl

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -79,14 +79,9 @@ void main() {
7979
// Tuple of consecutive 4 elements represents a single output texel.
8080
float sum[TILE_SIZE_X * TILE_SIZE_Y * 4];
8181

82-
const vec4 bias = texelFetch(t_bias, ivec2(out_pos_z, 0), 0);
83-
8482
// Initialize the output array with the bias value
85-
for (int i = 0; i < TILE_SIZE_X * TILE_SIZE_Y * 4; i += 4) {
86-
sum[i] = bias.x;
87-
sum[i + 1] = bias.y;
88-
sum[i + 2] = bias.z;
89-
sum[i + 3] = bias.w;
83+
for (int i = 0; i < TILE_SIZE_X * TILE_SIZE_Y * 4; i++) {
84+
sum[i] = 0;
9085
}
9186

9287
int z4 = 0;
@@ -157,10 +152,13 @@ void main() {
157152
}
158153
}
159154

155+
const vec4 bias = texelFetch(t_bias, ivec2(out_pos_z, 0), 0);
156+
160157
for (int i = 0; i < TILE_SIZE_X * TILE_SIZE_Y; ++i) {
161158
const ivec3 pos_l = ivec3(pos[i * 2], pos[i * 2 + 1], out_pos_z);
162159
if (all(lessThan(pos_l.xy, out_limits.xy))) {
163-
imageStore(t_out, pos_l, op(vec4(sum[i * 4], sum[i * 4 + 1], sum[i * 4 + 2], sum[i * 4 + 3]), out_min, out_max));
160+
const vec4 out_sum = vec4(sum[i * 4], sum[i * 4 + 1], sum[i * 4 + 2], sum[i * 4 + 3]);
161+
imageStore(t_out, pos_l, op(out_sum + bias, out_min, out_max));
164162
}
165163
}
166164
}

0 commit comments

Comments
 (0)