Skip to content

Commit 6309119

Browse files
committed
[ET-VK] Optimizing buffer to int8 quantized packing op to improve width packed performance.
Pull Request resolved: #12388 This diff simplifies looping in int8 quantized packing operation for width pack tensor, to improve performance. Differential Revision: [D78143041](https://our.internmc.facebook.com/intern/diff/D78143041/) ghstack-source-id: 295655468
1 parent 217f686 commit 6309119

File tree

1 file changed

+18
-6
lines changed

1 file changed

+18
-6
lines changed

backends/vulkan/runtime/graph/ops/glsl/nchw_to_bitw8_image_nobitw8buffer.glsl

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57,12 +57,24 @@ ivec4 read_texel(ivec4 tidx) {
5757

5858
ivec4 out_tex = ivec4(0);
5959

60-
[[unroll]] for (int i = 0; i < 4; ++i) {
61-
if (tidx[packed_dim] + i < sizes[packed_dim]) {
62-
const int in_texel = nchw_in[buf_indices[i] >> 2];
63-
int extracted_val = (in_texel >> (8 * (buf_indices[i] & 3))) & mask;
64-
extracted_val = extend_sign(extracted_val);
65-
out_tex[i] = extracted_val;
60+
if (packed_dim == 0) {
61+
int buf_index = buf_indices[0];
62+
[[unroll]] for (int i = 0; i < 4; ++i, ++buf_index) {
63+
if (tidx[packed_dim] + i < sizes[packed_dim]) {
64+
const int in_texel = nchw_in[buf_index >> 2];
65+
int extracted_val = (in_texel >> (8 * (buf_index & 3))) & mask;
66+
extracted_val = extend_sign(extracted_val);
67+
out_tex[i] = extracted_val;
68+
}
69+
}
70+
} else {
71+
[[unroll]] for (int i = 0; i < 4; ++i) {
72+
if (tidx[packed_dim] + i < sizes[packed_dim]) {
73+
const int in_texel = nchw_in[buf_indices[i] >> 2];
74+
int extracted_val = (in_texel >> (8 * (buf_indices[i] & 3))) & mask;
75+
extracted_val = extend_sign(extracted_val);
76+
out_tex[i] = extracted_val;
77+
}
6678
}
6779
}
6880

0 commit comments

Comments
 (0)