Skip to content

Commit 3ac73a5

Browse files
authored
[UR] [L0v2] Add support of out-of-order command buffers to L0 adapter v2 (#18570)
1 parent c8986cd commit 3ac73a5

File tree

7 files changed

+312
-90
lines changed

7 files changed

+312
-90
lines changed

unified-runtime/source/adapters/level_zero/v2/command_buffer.cpp

Lines changed: 245 additions & 68 deletions
Large diffs are not rendered by default.

unified-runtime/source/adapters/level_zero/v2/command_buffer.hpp

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,17 @@ struct ur_exp_command_buffer_handle_t_ : public ur_object {
3232
ur_result_t
3333
registerExecutionEventUnlocked(ur_event_handle_t nextExecutionEvent);
3434

35-
lockable<ur_command_list_manager> commandListManager;
36-
37-
ur_result_t finalizeCommandBuffer();
3835
// Indicates if command-buffer commands can be updated after it is closed.
3936
const bool isUpdatable = false;
37+
const bool isInOrder = true;
38+
4039
// Command-buffer profiling is enabled.
4140
const bool isProfilingEnabled = false;
4241

42+
lockable<ur_command_list_manager> commandListManager;
43+
44+
ur_result_t finalizeCommandBuffer();
45+
4346
ur_result_t
4447
createCommandHandle(locked<ur_command_list_manager> &commandListLocked,
4548
ur_kernel_handle_t hKernel, uint32_t workDim,
@@ -51,11 +54,25 @@ struct ur_exp_command_buffer_handle_t_ : public ur_object {
5154
uint32_t numUpdateCommands,
5255
const ur_exp_command_buffer_update_kernel_launch_desc_t *updateCommands);
5356

57+
ur_exp_command_buffer_sync_point_t getSyncPoint(ur_event_handle_t event);
58+
ur_event_handle_t *getWaitListFromSyncPoints(
59+
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
60+
uint32_t numSyncPointsInWaitList);
61+
5462
private:
63+
// Stores all sync points that are created by the command buffer.
64+
std::vector<ur_event_handle_t> syncPoints;
65+
66+
// Temporary storage for sync points that are passed to function that require
67+
// array of events. This is used to avoid allocating a new memory every time.
68+
std::vector<ur_event_handle_t> syncPointWaitList;
69+
5570
const ur_context_handle_t context;
5671
const ur_device_handle_t device;
72+
5773
std::vector<std::unique_ptr<ur_exp_command_buffer_command_handle_t_>>
5874
commandHandles;
75+
5976
// Indicates if command-buffer was finalized.
6077
bool isFinalized = false;
6178

unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,11 @@
1818
ur_command_list_manager::ur_command_list_manager(
1919
ur_context_handle_t context, ur_device_handle_t device,
2020
v2::raii::command_list_unique_handle &&commandList, v2::event_flags_t flags,
21-
ur_queue_t_ *queue)
22-
: context(context), device(device),
23-
eventPool(context->getEventPoolCache().borrow(device->Id.value(), flags)),
24-
zeCommandList(std::move(commandList)), queue(queue) {
21+
ur_queue_t_ *queue, PoolCacheType listType)
22+
: context(context), device(device), zeCommandList(std::move(commandList)),
23+
queue(queue) {
24+
auto &eventPoolTmp = context->getEventPoolCache(listType);
25+
eventPool = eventPoolTmp.borrow(device->Id.value(), flags);
2526
UR_CALL_THROWS(ur::level_zero::urContextRetain(context));
2627
UR_CALL_THROWS(ur::level_zero::urDeviceRetain(device));
2728
}
@@ -320,17 +321,18 @@ ur_result_t ur_command_list_manager::appendUSMPrefetch(
320321
return UR_RESULT_SUCCESS;
321322
}
322323

323-
ur_result_t
324-
ur_command_list_manager::appendUSMAdvise(const void *pMem, size_t size,
325-
ur_usm_advice_flags_t advice,
326-
ur_event_handle_t *phEvent) {
324+
ur_result_t ur_command_list_manager::appendUSMAdvise(
325+
const void *pMem, size_t size, ur_usm_advice_flags_t advice,
326+
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
327+
ur_event_handle_t *phEvent) {
327328
TRACK_SCOPE_LATENCY("ur_command_list_manager::appendUSMAdvise");
328329

329330
auto zeAdvice = ur_cast<ze_memory_advice_t>(advice);
330331

331332
auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_USM_ADVISE);
332333

333-
auto [pWaitEvents, numWaitEvents] = getWaitListView(nullptr, 0);
334+
auto [pWaitEvents, numWaitEvents] =
335+
getWaitListView(phEventWaitList, numEventsInWaitList);
334336

335337
if (pWaitEvents) {
336338
ZE2UR_CALL(zeCommandListAppendWaitOnEvents,

unified-runtime/source/adapters/level_zero/v2/command_list_manager.hpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
#include "command_list_cache.hpp"
1313
#include "common.hpp"
14+
#include "context.hpp"
1415
#include "event_pool_cache.hpp"
1516
#include "memory.hpp"
1617
#include "queue_api.hpp"
@@ -39,7 +40,8 @@ struct ur_command_list_manager {
3940
ur_command_list_manager(ur_context_handle_t context,
4041
ur_device_handle_t device,
4142
v2::raii::command_list_unique_handle &&commandList,
42-
v2::event_flags_t flags, ur_queue_t_ *queue);
43+
v2::event_flags_t flags, ur_queue_t_ *queue,
44+
PoolCacheType listType);
4345
ur_command_list_manager(const ur_command_list_manager &src) = delete;
4446
ur_command_list_manager(ur_command_list_manager &&src) = default;
4547

@@ -128,6 +130,8 @@ struct ur_command_list_manager {
128130

129131
ur_result_t appendUSMAdvise(const void *pMem, size_t size,
130132
ur_usm_advice_flags_t advice,
133+
uint32_t numEventsInWaitList,
134+
const ur_event_handle_t *phEventWaitList,
131135
ur_event_handle_t *phEvent);
132136

133137
ur_result_t appendBarrier(uint32_t numEventsInWaitList,

unified-runtime/source/adapters/level_zero/v2/context.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,16 +53,25 @@ ur_context_handle_t_::ur_context_handle_t_(ze_context_handle_t hContext,
5353
commandListCache(hContext,
5454
{phDevices[0]->Platform->ZeCopyOffloadExtensionSupported,
5555
phDevices[0]->Platform->ZeMutableCmdListExt.Supported}),
56-
eventPoolCache(
56+
eventPoolCacheImmediate(
5757
this, phDevices[0]->Platform->getNumDevices(),
5858
[context = this](DeviceId /* deviceId*/, v2::event_flags_t flags)
5959
-> std::unique_ptr<v2::event_provider> {
60-
assert((flags & v2::EVENT_FLAGS_COUNTER) != 0);
61-
6260
// TODO: just use per-context id?
6361
return std::make_unique<v2::provider_normal>(
6462
context, v2::QUEUE_IMMEDIATE, flags);
6563
}),
64+
eventPoolCacheRegular(this, phDevices[0]->Platform->getNumDevices(),
65+
[context = this, platform = phDevices[0]->Platform](
66+
DeviceId deviceId, v2::event_flags_t flags)
67+
-> std::unique_ptr<v2::event_provider> {
68+
std::ignore = deviceId;
69+
std::ignore = platform;
70+
71+
// TODO: just use per-context id?
72+
return std::make_unique<v2::provider_normal>(
73+
context, v2::QUEUE_REGULAR, flags);
74+
}),
6675
nativeEventsPool(this, std::make_unique<v2::provider_normal>(
6776
this, v2::QUEUE_IMMEDIATE,
6877
v2::EVENT_FLAGS_PROFILING_ENABLED)),

unified-runtime/source/adapters/level_zero/v2/context.hpp

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
#include "event_pool_cache.hpp"
1818
#include "usm.hpp"
1919

20+
enum class PoolCacheType { Immediate, Regular };
21+
2022
struct ur_context_handle_t_ : ur_object {
2123
ur_context_handle_t_(ze_context_handle_t hContext, uint32_t numDevices,
2224
const ur_device_handle_t *phDevices, bool ownZeContext);
@@ -34,9 +36,18 @@ struct ur_context_handle_t_ : ur_object {
3436
getP2PDevices(ur_device_handle_t hDevice) const;
3537

3638
v2::event_pool &getNativeEventsPool() { return nativeEventsPool; }
37-
v2::event_pool_cache &getEventPoolCache() { return eventPoolCache; }
3839
v2::command_list_cache_t &getCommandListCache() { return commandListCache; }
39-
40+
v2::event_pool_cache &getEventPoolCache(PoolCacheType type) {
41+
switch (type) {
42+
case PoolCacheType::Immediate:
43+
return eventPoolCacheImmediate;
44+
case PoolCacheType::Regular:
45+
return eventPoolCacheRegular;
46+
default:
47+
assert(false && "Requested invalid event pool cache type");
48+
throw UR_RESULT_ERROR_INVALID_VALUE;
49+
}
50+
}
4051
// Checks if Device is covered by this context.
4152
// For that the Device or its root devices need to be in the context.
4253
bool isValidDevice(ur_device_handle_t Device) const;
@@ -45,7 +56,8 @@ struct ur_context_handle_t_ : ur_object {
4556
const v2::raii::ze_context_handle_t hContext;
4657
const std::vector<ur_device_handle_t> hDevices;
4758
v2::command_list_cache_t commandListCache;
48-
v2::event_pool_cache eventPoolCache;
59+
v2::event_pool_cache eventPoolCacheImmediate;
60+
v2::event_pool_cache eventPoolCacheRegular;
4961

5062
// pool used for urEventCreateWithNativeHandle when native handle is NULL
5163
// (uses non-counter based events to allow for signaling from host)

unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ ur_queue_immediate_in_order_t::ur_queue_immediate_in_order_t(
7676
ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS,
7777
getZePriority(pProps ? pProps->flags : ur_queue_flags_t{}),
7878
getZeIndex(pProps)),
79-
eventFlagsFromQueueFlags(flags), this) {}
79+
eventFlagsFromQueueFlags(flags), this, PoolCacheType::Immediate) {}
8080

8181
ur_queue_immediate_in_order_t::ur_queue_immediate_in_order_t(
8282
ur_context_handle_t hContext, ur_device_handle_t hDevice,
@@ -93,7 +93,7 @@ ur_queue_immediate_in_order_t::ur_queue_immediate_in_order_t(
9393
}
9494
}
9595
}),
96-
eventFlagsFromQueueFlags(flags), this) {}
96+
eventFlagsFromQueueFlags(flags), this, PoolCacheType::Immediate) {}
9797

9898
ze_event_handle_t ur_queue_immediate_in_order_t::getSignalEvent(
9999
locked<ur_command_list_manager> &commandList, ur_event_handle_t *hUserEvent,
@@ -605,7 +605,8 @@ ur_queue_immediate_in_order_t::enqueueUSMAdvise(const void *pMem, size_t size,
605605
TRACK_SCOPE_LATENCY("ur_queue_immediate_in_order_t::enqueueUSMAdvise");
606606

607607
auto commandListLocked = commandListManager.lock();
608-
UR_CALL(commandListLocked->appendUSMAdvise(pMem, size, advice, phEvent));
608+
UR_CALL(commandListLocked->appendUSMAdvise(pMem, size, advice, 0, nullptr,
609+
phEvent));
609610
return UR_RESULT_SUCCESS;
610611
}
611612

0 commit comments

Comments
 (0)