Skip to content

Commit 27a3561

Browse files
committed
Rename Indices member variable to ArgPointers
1 parent bf6b6f9 commit 27a3561

File tree

7 files changed

+76
-71
lines changed

7 files changed

+76
-71
lines changed

source/adapters/cuda/command_buffer.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,9 @@ ur_result_t commandHandleReleaseInternal(
6969

7070
ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_(
7171
ur_context_handle_t Context, ur_device_handle_t Device, bool IsUpdatable)
72-
: Context(Context), Device(Device),
73-
IsUpdatable(IsUpdatable), CudaGraph{nullptr}, CudaGraphExec{nullptr},
74-
RefCountInternal{1}, RefCountExternal{1}, NextSyncPoint{0} {
72+
: Context(Context), Device(Device), IsUpdatable(IsUpdatable),
73+
CudaGraph{nullptr}, CudaGraphExec{nullptr}, RefCountInternal{1},
74+
RefCountExternal{1}, NextSyncPoint{0} {
7575
urContextRetain(Context);
7676
urDeviceRetain(Device);
7777
}
@@ -523,7 +523,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
523523
ThreadsPerBlock, BlocksPerGrid));
524524

525525
// Set node param structure with the kernel related data
526-
auto &ArgIndices = hKernel->getArgIndices();
526+
auto &ArgPointers = hKernel->getArgPointers();
527527
CUDA_KERNEL_NODE_PARAMS NodeParams = {};
528528
NodeParams.func = CuFunc;
529529
NodeParams.gridDimX = BlocksPerGrid[0];
@@ -533,7 +533,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
533533
NodeParams.blockDimY = ThreadsPerBlock[1];
534534
NodeParams.blockDimZ = ThreadsPerBlock[2];
535535
NodeParams.sharedMemBytes = LocalSize;
536-
NodeParams.kernelParams = const_cast<void **>(ArgIndices.data());
536+
NodeParams.kernelParams = const_cast<void **>(ArgPointers.data());
537537

538538
// Create and add an new kernel node to the Cuda graph
539539
UR_CHECK_ERROR(cuGraphAddKernelNode(&GraphNode, hCommandBuffer->CudaGraph,
@@ -1398,7 +1398,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
13981398
Params.blockDimZ = ThreadsPerBlock[2];
13991399
Params.sharedMemBytes = KernelCommandHandle->Kernel->getLocalSize();
14001400
Params.kernelParams =
1401-
const_cast<void **>(KernelCommandHandle->Kernel->getArgIndices().data());
1401+
const_cast<void **>(KernelCommandHandle->Kernel->getArgPointers().data());
14021402

14031403
CUgraphNode Node = KernelCommandHandle->Node;
14041404
CUgraphExec CudaGraphExec = CommandBuffer->CudaGraphExec;

source/adapters/cuda/enqueue.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -492,7 +492,7 @@ enqueueKernelLaunch(ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel,
492492
UR_CHECK_ERROR(RetImplEvent->start());
493493
}
494494

495-
auto &ArgIndices = hKernel->getArgIndices();
495+
auto &ArgIndices = hKernel->getArgPointers();
496496
UR_CHECK_ERROR(cuLaunchKernel(
497497
CuFunc, BlocksPerGrid[0], BlocksPerGrid[1], BlocksPerGrid[2],
498498
ThreadsPerBlock[0], ThreadsPerBlock[1], ThreadsPerBlock[2], LocalSize,
@@ -680,7 +680,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp(
680680
UR_CHECK_ERROR(RetImplEvent->start());
681681
}
682682

683-
auto &ArgIndices = hKernel->getArgIndices();
683+
auto &ArgPointers = hKernel->getArgPointers();
684684

685685
CUlaunchConfig launch_config;
686686
launch_config.gridDimX = BlocksPerGrid[0];
@@ -696,7 +696,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp(
696696
launch_config.numAttrs = launch_attribute.size();
697697

698698
UR_CHECK_ERROR(cuLaunchKernelEx(&launch_config, CuFunc,
699-
const_cast<void **>(ArgIndices.data()),
699+
const_cast<void **>(ArgPointers.data()),
700700
nullptr));
701701

702702
if (phEvent) {

source/adapters/cuda/kernel.hpp

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,8 @@ struct ur_kernel_handle_t_ {
6666
args_t Storage;
6767
/// Aligned size of each parameter, including padding.
6868
args_size_t ParamSizes;
69-
/// Byte offset into /p Storage allocation for each parameter.
70-
args_index_t Indices;
69+
/// Byte offset into /p Storage allocation for each argument.
70+
args_index_t ArgPointers;
7171
/// Position in the Storage array where the next argument should added.
7272
size_t InsertPos = 0;
7373
/// Aligned size in bytes for each local memory parameter after padding has
@@ -92,21 +92,23 @@ struct ur_kernel_handle_t_ {
9292
std::uint32_t ImplicitOffsetArgs[3] = {0, 0, 0};
9393

9494
arguments() {
95-
// Place the implicit offset index at the end of the indicies collection
96-
Indices.emplace_back(&ImplicitOffsetArgs);
95+
// Place the implicit offset index at the end of the ArgPointers
96+
// collection.
97+
ArgPointers.emplace_back(&ImplicitOffsetArgs);
9798
}
9899

99100
/// Add an argument to the kernel.
100101
/// If the argument existed before, it is replaced.
101102
/// Otherwise, it is added.
102103
/// Gaps are filled with empty arguments.
103-
/// Implicit offset argument is kept at the back of the indices collection.
104+
/// Implicit offset argument is kept at the back of the ArgPointers
105+
/// collection.
104106
void addArg(size_t Index, size_t Size, const void *Arg,
105107
size_t LocalSize = 0) {
106108
// Expand storage to accommodate this Index if needed.
107-
if (Index + 2 > Indices.size()) {
109+
if (Index + 2 > ArgPointers.size()) {
108110
// Move implicit offset argument index with the end
109-
Indices.resize(Index + 2, Indices.back());
111+
ArgPointers.resize(Index + 2, ArgPointers.back());
110112
// Ensure enough space for the new argument
111113
ParamSizes.resize(Index + 1);
112114
AlignedLocalMemSize.resize(Index + 1);
@@ -117,13 +119,13 @@ struct ur_kernel_handle_t_ {
117119
if (ParamSizes[Index] == 0) {
118120
ParamSizes[Index] = Size;
119121
std::memcpy(&Storage[InsertPos], Arg, Size);
120-
Indices[Index] = &Storage[InsertPos];
122+
ArgPointers[Index] = &Storage[InsertPos];
121123
AlignedLocalMemSize[Index] = LocalSize;
122124
InsertPos += Size;
123125
}
124126
// Otherwise, update the existing argument.
125127
else {
126-
std::memcpy(Indices[Index], Arg, Size);
128+
std::memcpy(ArgPointers[Index], Arg, Size);
127129
AlignedLocalMemSize[Index] = LocalSize;
128130
assert(Size == ParamSizes[Index]);
129131
}
@@ -138,7 +140,7 @@ struct ur_kernel_handle_t_ {
138140
std::pair<size_t, size_t> calcAlignedLocalArgument(size_t Index,
139141
size_t Size) {
140142
// Store the unpadded size of the local argument
141-
if (Index + 2 > Indices.size()) {
143+
if (Index + 2 > ArgPointers.size()) {
142144
AlignedLocalMemSize.resize(Index + 1);
143145
OriginalLocalMemSize.resize(Index + 1);
144146
}
@@ -168,10 +170,11 @@ struct ur_kernel_handle_t_ {
168170
return std::make_pair(AlignedLocalSize, AlignedLocalOffset);
169171
}
170172

171-
// Iterate over all existing local argument which follows StartIndex
173+
// Iterate over each existing local argument which follows StartIndex
172174
// index, update the offset and pointer into the kernel local memory.
173175
void updateLocalArgOffset(size_t StartIndex) {
174-
const size_t NumArgs = Indices.size() - 1; // Accounts for implicit arg
176+
const size_t NumArgs =
177+
ArgPointers.size() - 1; // Accounts for implicit arg
175178
for (auto SuccIndex = StartIndex; SuccIndex < NumArgs; SuccIndex++) {
176179
const size_t OriginalLocalSize = OriginalLocalMemSize[SuccIndex];
177180
if (OriginalLocalSize == 0) {
@@ -187,7 +190,7 @@ struct ur_kernel_handle_t_ {
187190
AlignedLocalMemSize[SuccIndex] = SuccAlignedLocalSize;
188191

189192
// Store new offset into local data
190-
std::memcpy(Indices[SuccIndex], &SuccAlignedLocalOffset,
193+
std::memcpy(ArgPointers[SuccIndex], &SuccAlignedLocalOffset,
191194
sizeof(size_t));
192195
}
193196
}
@@ -235,7 +238,7 @@ struct ur_kernel_handle_t_ {
235238
std::memcpy(ImplicitOffsetArgs, ImplicitOffset, Size);
236239
}
237240

238-
const args_index_t &getIndices() const noexcept { return Indices; }
241+
const args_index_t &getArgPointers() const noexcept { return ArgPointers; }
239242

240243
uint32_t getLocalSize() const {
241244
return std::accumulate(std::begin(AlignedLocalMemSize),
@@ -306,7 +309,7 @@ struct ur_kernel_handle_t_ {
306309
/// real one required by the kernel, since this cannot be queried from
307310
/// the CUDA Driver API
308311
uint32_t getNumArgs() const noexcept {
309-
return static_cast<uint32_t>(Args.Indices.size() - 1);
312+
return static_cast<uint32_t>(Args.ArgPointers.size() - 1);
310313
}
311314

312315
void setKernelArg(int Index, size_t Size, const void *Arg) {
@@ -321,8 +324,8 @@ struct ur_kernel_handle_t_ {
321324
return Args.setImplicitOffset(Size, ImplicitOffset);
322325
}
323326

324-
const arguments::args_index_t &getArgIndices() const {
325-
return Args.getIndices();
327+
const arguments::args_index_t &getArgPointers() const {
328+
return Args.getArgPointers();
326329
}
327330

328331
void setWorkGroupMemory(size_t MemSize) { Args.setWorkGroupMemory(MemSize); }

source/adapters/hip/command_buffer.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,9 @@ commandHandleReleaseInternal(ur_exp_command_buffer_command_handle_t Command) {
4848

4949
ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_(
5050
ur_context_handle_t hContext, ur_device_handle_t hDevice, bool IsUpdatable)
51-
: Context(hContext), Device(hDevice),
52-
IsUpdatable(IsUpdatable), HIPGraph{nullptr}, HIPGraphExec{nullptr},
53-
RefCountInternal{1}, RefCountExternal{1}, NextSyncPoint{0} {
51+
: Context(hContext), Device(hDevice), IsUpdatable(IsUpdatable),
52+
HIPGraph{nullptr}, HIPGraphExec{nullptr}, RefCountInternal{1},
53+
RefCountExternal{1}, NextSyncPoint{0} {
5454
urContextRetain(hContext);
5555
urDeviceRetain(hDevice);
5656
}
@@ -378,7 +378,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
378378
pLocalWorkSize, hKernel, HIPFunc, ThreadsPerBlock, BlocksPerGrid));
379379

380380
// Set node param structure with the kernel related data
381-
auto &ArgIndices = hKernel->getArgIndices();
381+
auto &ArgPointers = hKernel->getArgPointers();
382382
hipKernelNodeParams NodeParams;
383383
NodeParams.func = HIPFunc;
384384
NodeParams.gridDim.x = BlocksPerGrid[0];
@@ -388,7 +388,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
388388
NodeParams.blockDim.y = ThreadsPerBlock[1];
389389
NodeParams.blockDim.z = ThreadsPerBlock[2];
390390
NodeParams.sharedMemBytes = LocalSize;
391-
NodeParams.kernelParams = const_cast<void **>(ArgIndices.data());
391+
NodeParams.kernelParams = const_cast<void **>(ArgPointers.data());
392392
NodeParams.extra = nullptr;
393393

394394
// Create and add an new kernel node to the HIP graph
@@ -1098,7 +1098,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
10981098
Params.blockDim.z = ThreadsPerBlock[2];
10991099
Params.sharedMemBytes = hCommand->Kernel->getLocalSize();
11001100
Params.kernelParams =
1101-
const_cast<void **>(hCommand->Kernel->getArgIndices().data());
1101+
const_cast<void **>(hCommand->Kernel->getArgPointers().data());
11021102

11031103
hipGraphNode_t Node = hCommand->Node;
11041104
hipGraphExec_t HipGraphExec = CommandBuffer->HIPGraphExec;

source/adapters/hip/enqueue.cpp

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -98,26 +98,26 @@ ur_result_t setHipMemAdvise(const void *DevPtr, const size_t Size,
9898
constexpr size_t DeviceFlagCount = 6;
9999
#endif
100100
static constexpr std::array<ur_to_hip_advice_t, DeviceFlagCount>
101-
URToHIPMemAdviseDeviceFlags {
102-
std::make_pair(UR_USM_ADVICE_FLAG_SET_READ_MOSTLY,
103-
hipMemAdviseSetReadMostly),
104-
std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_READ_MOSTLY,
105-
hipMemAdviseUnsetReadMostly),
106-
std::make_pair(UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION,
107-
hipMemAdviseSetPreferredLocation),
108-
std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION,
109-
hipMemAdviseUnsetPreferredLocation),
110-
std::make_pair(UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_DEVICE,
111-
hipMemAdviseSetAccessedBy),
112-
std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_DEVICE,
113-
hipMemAdviseUnsetAccessedBy),
101+
URToHIPMemAdviseDeviceFlags{
102+
std::make_pair(UR_USM_ADVICE_FLAG_SET_READ_MOSTLY,
103+
hipMemAdviseSetReadMostly),
104+
std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_READ_MOSTLY,
105+
hipMemAdviseUnsetReadMostly),
106+
std::make_pair(UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION,
107+
hipMemAdviseSetPreferredLocation),
108+
std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION,
109+
hipMemAdviseUnsetPreferredLocation),
110+
std::make_pair(UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_DEVICE,
111+
hipMemAdviseSetAccessedBy),
112+
std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_DEVICE,
113+
hipMemAdviseUnsetAccessedBy),
114114
#if defined(__HIP_PLATFORM_AMD__)
115-
std::make_pair(UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY,
116-
hipMemAdviseSetCoarseGrain),
117-
std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_NON_COHERENT_MEMORY,
118-
hipMemAdviseUnsetCoarseGrain),
115+
std::make_pair(UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY,
116+
hipMemAdviseSetCoarseGrain),
117+
std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_NON_COHERENT_MEMORY,
118+
hipMemAdviseUnsetCoarseGrain),
119119
#endif
120-
};
120+
};
121121
for (const auto &[URAdvice, HIPAdvice] : URToHIPMemAdviseDeviceFlags) {
122122
if (URAdviceFlags & URAdvice) {
123123
UR_CHECK_ERROR(hipMemAdvise(DevPtr, Size, HIPAdvice, Device));
@@ -308,7 +308,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
308308
}
309309
}
310310

311-
auto ArgIndices = hKernel->getArgIndices();
311+
auto ArgPointers = hKernel->getArgPointers();
312312

313313
// If migration of mem across buffer is needed, an event must be associated
314314
// with this command, implicitly if phEvent is nullptr
@@ -322,7 +322,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
322322
UR_CHECK_ERROR(hipModuleLaunchKernel(
323323
HIPFunc, BlocksPerGrid[0], BlocksPerGrid[1], BlocksPerGrid[2],
324324
ThreadsPerBlock[0], ThreadsPerBlock[1], ThreadsPerBlock[2],
325-
hKernel->getLocalSize(), HIPStream, ArgIndices.data(), nullptr));
325+
hKernel->getLocalSize(), HIPStream, ArgPointers.data(), nullptr));
326326

327327
if (phEvent) {
328328
UR_CHECK_ERROR(RetImplEvent->record());

0 commit comments

Comments
 (0)