Skip to content

Commit 7ca2133

Browse files
committed
Fix kernel argument indices bug
1 parent 39df031 commit 7ca2133

File tree

1 file changed

+7
-8
lines changed

1 file changed

+7
-8
lines changed

source/adapters/cuda/kernel.hpp

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ struct ur_kernel_handle_t_ {
6868
args_size_t ParamSizes;
6969
/// Byte offset into /p Storage allocation for each parameter.
7070
args_index_t Indices;
71+
/// Largest argument index that has been added to this kernel so far.
72+
size_t InsertPos = 0;
7173
/// Aligned size in bytes for each local memory parameter after padding has
7274
/// been added. Zero if the argument at the index isn't a local memory
7375
/// argument.
@@ -110,13 +112,13 @@ struct ur_kernel_handle_t_ {
110112
OriginalLocalMemSize.resize(Index + 1);
111113
}
112114
ParamSizes[Index] = Size;
113-
// calculate the insertion point on the array
114-
size_t InsertPos = std::accumulate(std::begin(ParamSizes),
115-
std::begin(ParamSizes) + Index, 0);
116-
// Update the stored value for the argument
115+
117116
std::memcpy(&Storage[InsertPos], Arg, Size);
117+
118118
Indices[Index] = &Storage[InsertPos];
119119
AlignedLocalMemSize[Index] = LocalSize;
120+
121+
InsertPos += Size;
120122
}
121123

122124
/// Returns the padded size and offset of a local memory argument.
@@ -177,10 +179,7 @@ struct ur_kernel_handle_t_ {
177179
AlignedLocalMemSize[SuccIndex] = SuccAlignedLocalSize;
178180

179181
// Store new offset into local data
180-
const size_t InsertPos =
181-
std::accumulate(std::begin(ParamSizes),
182-
std::begin(ParamSizes) + SuccIndex, size_t{0});
183-
std::memcpy(&Storage[InsertPos], &SuccAlignedLocalOffset,
182+
std::memcpy(Indices[SuccIndex], &SuccAlignedLocalOffset,
184183
sizeof(size_t));
185184
}
186185
}

0 commit comments

Comments
 (0)