Skip to content
This repository was archived by the owner on Jan 13, 2025. It is now read-only.

Commit 8547059

Browse files
authored
Refactor SPR/SPR2 index group_broadcast (#533)
This patch substitutes the group_broadcast of two scalar indexes with the broadcast of a single vector value.
1 parent fbce030 commit 8547059

File tree

1 file changed

+7
-6
lines changed

1 file changed

+7
-6
lines changed

src/operations/blas2/spr.hpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -101,20 +101,21 @@ typename rhs_1_t::value_t Spr<Single, isUpper, lhs_t, rhs_1_t, rhs_2_t>::eval(
101101

102102
index_t row = 0, col = 0;
103103

104-
#ifndef __ADAPTIVECPP__
104+
#if (defined(INTEL_GPU) || defined(NVIDIA_GPU)) && not defined(__ADAPTIVECPP__)
105105
if (!id) {
106106
#endif
107107
Spr<Single, isUpper, lhs_t, rhs_1_t, rhs_2_t>::compute_row_col(
108108
global_idx, N_, row, col);
109-
#ifndef __ADAPTIVECPP__
109+
#if (defined(INTEL_GPU) || defined(NVIDIA_GPU)) && not defined(__ADAPTIVECPP__)
110110
}
111-
112-
row = sycl::group_broadcast(ndItem.get_group(), row);
113-
col = sycl::group_broadcast(ndItem.get_group(), col);
111+
sycl::vec<index_t, 2> bcast_idxs{row, col};
112+
bcast_idxs = sycl::group_broadcast(ndItem.get_group(), bcast_idxs);
113+
row = bcast_idxs[0];
114+
col = bcast_idxs[1];
114115
#endif
115116

116117
if (global_idx < lhs_size) {
117-
#ifndef __ADAPTIVECPP__
118+
#if (defined(INTEL_GPU) || defined(NVIDIA_GPU)) && not defined(__ADAPTIVECPP__)
118119
if constexpr (isUpper) {
119120
if (id) {
120121
row += id;

0 commit comments

Comments
 (0)