Skip to content

Commit b5f067f

Browse files
committed
Delay queue destruction until all events are relased
1 parent e142cbd commit b5f067f

13 files changed

+112
-19
lines changed

unified-runtime/source/adapters/level_zero/v2/command_buffer.cpp

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,12 @@ ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_(
6868
const ur_exp_command_buffer_desc_t *desc)
6969
: eventPool(context->getEventPoolCache(PoolCacheType::Regular)
7070
.borrow(device->Id.value(),
71-
isInOrder ? v2::EVENT_FLAGS_COUNTER : 0)),
71+
isInOrder ? v2::EVENT_FLAGS_COUNTER : 0,
72+
[this] {
73+
if (this->RefCount.load() == 0) {
74+
delete this;
75+
}
76+
})),
7277
context(context), device(device),
7378
isUpdatable(desc ? desc->isUpdatable : false),
7479
isInOrder(desc ? desc->isInOrder : false),
@@ -226,6 +231,18 @@ ur_event_handle_t ur_exp_command_buffer_handle_t_::createEventIfRequested(
226231
return event;
227232
}
228233

234+
ur_result_t ur_exp_command_buffer_handle_t_::release() {
235+
// Command buffer can only be released if all events were returned to the
236+
// event pool. If the event pool is not full, we delay queue destruction
237+
// until all events are relased. Queue destruction will happen on
238+
// the last eventPool::free.
239+
240+
if (eventPool->isFull()) {
241+
delete this;
242+
}
243+
return UR_RESULT_SUCCESS;
244+
}
245+
229246
namespace ur::level_zero {
230247

231248
ur_result_t
@@ -276,7 +293,7 @@ urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) try {
276293
ZE2UR_CALL(zeEventHostSynchronize,
277294
(executionEvent->getZeEvent(), UINT64_MAX));
278295
}
279-
delete hCommandBuffer;
296+
return hCommandBuffer->release();
280297
return UR_RESULT_SUCCESS;
281298
} catch (...) {
282299
return exceptionToResult(std::current_exception());

unified-runtime/source/adapters/level_zero/v2/command_buffer.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ struct ur_exp_command_buffer_handle_t_ : public ur_object {
5353
ur_event_handle_t
5454
createEventIfRequested(ur_exp_command_buffer_sync_point_t *retSyncPoint);
5555

56+
ur_result_t release();
57+
5658
private:
5759
v2::raii::cache_borrowed_event_pool eventPool;
5860

unified-runtime/source/adapters/level_zero/v2/event.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -261,9 +261,8 @@ ur_result_t urEventGetInfo(ur_event_handle_t hEvent, ur_event_info_t propName,
261261
return returnValue(hEvent->RefCount.load());
262262
}
263263
case UR_EVENT_INFO_COMMAND_QUEUE: {
264-
auto urQueueHandle = reinterpret_cast<uintptr_t>(hEvent->getQueue()) -
265-
ur_queue_handle_t_::queue_offset;
266-
return returnValue(urQueueHandle);
264+
return returnValue(
265+
ur_queue_handle_t_::queuePtrToHandle(hEvent->getQueue()));
267266
}
268267
case UR_EVENT_INFO_CONTEXT: {
269268
return returnValue(hEvent->getContext());

unified-runtime/source/adapters/level_zero/v2/event_pool.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,12 @@ void event_pool::free(ur_event_handle_t event) {
5454
// The event is still in the pool, so we need to increment the refcount
5555
assert(event->RefCount.load() == 0);
5656
event->RefCount.increment();
57+
58+
// All events are returned to the cache
59+
if (events.size() == freelist.size()) {
60+
lock.unlock();
61+
cleanupCallback();
62+
}
5763
}
5864

5965
event_provider *event_pool::getProvider() const { return provider.get(); }
@@ -62,4 +68,9 @@ event_flags_t event_pool::getFlags() const {
6268
return getProvider()->eventFlags();
6369
}
6470

71+
bool event_pool::isFull() const {
72+
std::unique_lock<std::mutex> lock(*mutex);
73+
return events.size() == freelist.size();
74+
}
75+
6576
} // namespace v2

unified-runtime/source/adapters/level_zero/v2/event_pool.hpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,10 @@ class event_pool {
4242

4343
~event_pool() = default;
4444

45+
void setCleanupCallbackUnlocked(std::function<void(void)> cleanupCallback) {
46+
this->cleanupCallback = cleanupCallback;
47+
}
48+
4549
// Allocate an event from the pool. Thread safe.
4650
ur_event_handle_t allocate();
4751

@@ -51,6 +55,8 @@ class event_pool {
5155
event_provider *getProvider() const;
5256
event_flags_t getFlags() const;
5357

58+
bool isFull() const;
59+
5460
private:
5561
ur_context_handle_t hContext;
5662
std::unique_ptr<event_provider> provider;
@@ -59,6 +65,8 @@ class event_pool {
5965
std::vector<ur_event_handle_t> freelist;
6066

6167
std::unique_ptr<std::mutex> mutex;
68+
69+
std::function<void(void)> cleanupCallback;
6270
};
6371

6472
// Only create an event when requested by the user.

unified-runtime/source/adapters/level_zero/v2/event_pool_cache.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,9 @@ event_pool_cache::event_pool_cache(ur_context_handle_t hContext,
2020
pools.resize(max_devices * (1ULL << EVENT_FLAGS_USED_BITS));
2121
}
2222

23-
raii::cache_borrowed_event_pool event_pool_cache::borrow(DeviceId id,
24-
event_flags_t flags) {
23+
raii::cache_borrowed_event_pool
24+
event_pool_cache::borrow(DeviceId id, event_flags_t flags,
25+
std::function<void(void)> cleanupCb) {
2526
std::unique_lock<ur_mutex> Lock(mutex);
2627

2728
event_descriptor event_desc{id, flags};
@@ -39,6 +40,8 @@ raii::cache_borrowed_event_pool event_pool_cache::borrow(DeviceId id,
3940
auto pool = vec.back().release();
4041
vec.pop_back();
4142

43+
pool->setCleanupCallbackUnlocked(cleanupCb);
44+
4245
return raii::cache_borrowed_event_pool(
4346
pool, [this, id, flags](event_pool *pool) {
4447
std::unique_lock<ur_mutex> Lock(mutex);

unified-runtime/source/adapters/level_zero/v2/event_pool_cache.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ class event_pool_cache {
3838
event_pool_cache(ur_context_handle_t hContext, size_t max_devices,
3939
ProviderCreateFunc);
4040

41-
raii::cache_borrowed_event_pool borrow(DeviceId, event_flags_t flags);
41+
raii::cache_borrowed_event_pool borrow(DeviceId, event_flags_t flags,
42+
std::function<void(void)> cleanupCb);
4243

4344
private:
4445
ur_context_handle_t hContext;

unified-runtime/source/adapters/level_zero/v2/queue_handle.hpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,13 @@ struct ur_queue_handle_t_ : ur::handle_base<ur::level_zero::ddi_getter> {
2727
static constexpr uintptr_t queue_offset =
2828
sizeof(ur::handle_base<ur::level_zero::ddi_getter>);
2929

30+
template <typename Q> static ur_queue_handle_t queuePtrToHandle(Q *queue) {
31+
if (!queue)
32+
return nullptr;
33+
return reinterpret_cast<ur_queue_handle_t>(
34+
reinterpret_cast<uintptr_t>(queue) - queue_offset);
35+
}
36+
3037
template <typename T, class... Args>
3138
ur_queue_handle_t_(std::in_place_type_t<T>, Args &&...args)
3239
: ur::handle_base<ur::level_zero::ddi_getter>(),
@@ -61,8 +68,7 @@ struct ur_queue_handle_t_ : ur::handle_base<ur::level_zero::ddi_getter> {
6168
[queueHandle = this](auto &q) {
6269
if (!q.RefCount.decrementAndTest())
6370
return UR_RESULT_SUCCESS;
64-
delete queueHandle;
65-
return UR_RESULT_SUCCESS;
71+
return q.release();
6672
},
6773
queue_data);
6874
}

unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.cpp

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "command_buffer.hpp"
1313
#include "kernel.hpp"
1414
#include "memory.hpp"
15+
#include "queue_handle.hpp"
1516
#include "ur.hpp"
1617

1718
#include "../common/latency_tracker.hpp"
@@ -29,7 +30,13 @@ ur_queue_immediate_in_order_t::ur_queue_immediate_in_order_t(
2930
event_flags_t eventFlags, ur_queue_flags_t flags)
3031
: hContext(hContext), hDevice(hDevice),
3132
eventPool(hContext->getEventPoolCache(PoolCacheType::Immediate)
32-
.borrow(hDevice->Id.value(), eventFlags)),
33+
.borrow(hDevice->Id.value(), eventFlags,
34+
[this] {
35+
if (this->RefCount.load() == 0) {
36+
delete ur_queue_handle_t_::queuePtrToHandle(
37+
this);
38+
}
39+
})),
3340
commandListManager(
3441
hContext, hDevice,
3542
hContext->getCommandListCache().getImmediateCommandList(
@@ -46,12 +53,30 @@ ur_queue_immediate_in_order_t::ur_queue_immediate_in_order_t(
4653
event_flags_t eventFlags, ur_queue_flags_t flags)
4754
: hContext(hContext), hDevice(hDevice),
4855
eventPool(hContext->getEventPoolCache(PoolCacheType::Immediate)
49-
.borrow(hDevice->Id.value(), eventFlags)),
56+
.borrow(hDevice->Id.value(), eventFlags,
57+
[this] {
58+
if (this->RefCount.load() == 0) {
59+
delete ur_queue_handle_t_::queuePtrToHandle(
60+
this);
61+
}
62+
})),
5063
commandListManager(hContext, hDevice, std::move(commandListHandle)),
5164
flags(flags) {
5265
ur::level_zero::urContextRetain(hContext);
5366
}
5467

68+
ur_result_t ur_queue_immediate_in_order_t::release() {
69+
// Command buffer can only be released if all events were returned to the
70+
// event pool. If the event pool is not full, we delay queue destruction
71+
// until all events are relased. Queue destruction will happen on
72+
// the last eventPool::free.
73+
74+
if (eventPool->isFull())
75+
delete ur_queue_handle_t_::queuePtrToHandle(this);
76+
77+
return UR_RESULT_SUCCESS;
78+
}
79+
5580
ur_result_t
5681
ur_queue_immediate_in_order_t::queueGetInfo(ur_queue_info_t propName,
5782
size_t propSize, void *pPropValue,

unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ struct ur_queue_immediate_in_order_t : ur_object, ur_queue_t_ {
4848

4949
~ur_queue_immediate_in_order_t();
5050

51+
ur_result_t release();
5152
ur_result_t queueGetInfo(ur_queue_info_t propName, size_t propSize,
5253
void *pPropValue, size_t *pPropSizeRet) override;
5354
ur_result_t queueGetNativeHandle(ur_queue_native_desc_t *pDesc,

unified-runtime/source/adapters/level_zero/v2/queue_immediate_out_of_order.cpp

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
#include "queue_immediate_out_of_order.hpp"
1212
#include "../common/latency_tracker.hpp"
13+
#include "queue_handle.hpp"
1314
#include "ur.hpp"
1415

1516
namespace v2 {
@@ -34,7 +35,13 @@ ur_queue_immediate_out_of_order_t::ur_queue_immediate_out_of_order_t(
3435
event_flags_t eventFlags, ur_queue_flags_t flags)
3536
: hContext(hContext), hDevice(hDevice),
3637
eventPool(hContext->getEventPoolCache(PoolCacheType::Immediate)
37-
.borrow(hDevice->Id.value(), eventFlags)),
38+
.borrow(hDevice->Id.value(), eventFlags,
39+
[this] {
40+
if (this->RefCount.load() == 0) {
41+
delete ur_queue_handle_t_::queuePtrToHandle(
42+
this);
43+
}
44+
})),
3845
commandListManagers(createCommandListManagers<numCommandLists>(
3946
hContext, hDevice, ordinal, priority, index)),
4047
flags(flags) {
@@ -45,6 +52,18 @@ ur_queue_immediate_out_of_order_t::ur_queue_immediate_out_of_order_t(
4552
ur::level_zero::urContextRetain(hContext);
4653
}
4754

55+
ur_result_t ur_queue_immediate_out_of_order_t::release() {
56+
// Command buffer can only be released if all events were returned to the
57+
// event pool. If the event pool is not full, we delay queue destruction
58+
// until all events are relased. Queue destruction will happen on
59+
// the last eventPool::free.
60+
61+
if (eventPool->isFull())
62+
delete ur_queue_handle_t_::queuePtrToHandle(this);
63+
64+
return UR_RESULT_SUCCESS;
65+
}
66+
4867
ur_result_t ur_queue_immediate_out_of_order_t::queueGetInfo(
4968
ur_queue_info_t propName, size_t propSize, void *pPropValue,
5069
size_t *pPropSizeRet) {

unified-runtime/source/adapters/level_zero/v2/queue_immediate_out_of_order.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ struct ur_queue_immediate_out_of_order_t : ur_object, ur_queue_t_ {
5959

6060
~ur_queue_immediate_out_of_order_t();
6161

62+
ur_result_t release();
6263
ur_result_t queueGetInfo(ur_queue_info_t propName, size_t propSize,
6364
void *pPropValue, size_t *pPropSizeRet) override;
6465
ur_result_t queueGetNativeHandle(ur_queue_native_desc_t *pDesc,

unified-runtime/test/adapters/level_zero/v2/event_pool_test.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -168,9 +168,9 @@ UUR_DEVICE_TEST_SUITE_WITH_PARAM(EventPoolTest, testing::ValuesIn(test_cases),
168168
printParams<EventPoolTest>);
169169

170170
TEST_P(EventPoolTest, InvalidDevice) {
171-
auto pool = cache->borrow(MAX_DEVICES, getParam().flags);
171+
auto pool = cache->borrow(MAX_DEVICES, getParam().flags, [] {});
172172
ASSERT_EQ(pool, nullptr);
173-
pool = cache->borrow(MAX_DEVICES + 10, getParam().flags);
173+
pool = cache->borrow(MAX_DEVICES + 10, getParam().flags, [] {});
174174
ASSERT_EQ(pool, nullptr);
175175
}
176176

@@ -179,7 +179,7 @@ TEST_P(EventPoolTest, Basic) {
179179
ur_event_handle_t first;
180180
ze_event_handle_t zeFirst;
181181
{
182-
auto pool = cache->borrow(device->Id.value(), getParam().flags);
182+
auto pool = cache->borrow(device->Id.value(), getParam().flags, [] {});
183183

184184
first = pool->allocate();
185185
first->setQueue(nullptr);
@@ -191,7 +191,7 @@ TEST_P(EventPoolTest, Basic) {
191191
ur_event_handle_t second;
192192
ze_event_handle_t zeSecond;
193193
{
194-
auto pool = cache->borrow(device->Id.value(), getParam().flags);
194+
auto pool = cache->borrow(device->Id.value(), getParam().flags, [] {});
195195

196196
second = pool->allocate();
197197
second->setQueue(nullptr);
@@ -211,7 +211,7 @@ TEST_P(EventPoolTest, Threaded) {
211211
for (int iters = 0; iters < 3; ++iters) {
212212
for (int th = 0; th < 10; ++th) {
213213
threads.emplace_back([&] {
214-
auto pool = cache->borrow(device->Id.value(), getParam().flags);
214+
auto pool = cache->borrow(device->Id.value(), getParam().flags, [] {});
215215
std::vector<ur_event_handle_t> events;
216216
for (int i = 0; i < 100; ++i) {
217217
events.push_back(pool->allocate());
@@ -231,7 +231,7 @@ TEST_P(EventPoolTest, Threaded) {
231231
}
232232

233233
TEST_P(EventPoolTest, ProviderNormalUseMostFreePool) {
234-
auto pool = cache->borrow(device->Id.value(), getParam().flags);
234+
auto pool = cache->borrow(device->Id.value(), getParam().flags, [] {});
235235
std::list<ur_event_handle_t> events;
236236
for (int i = 0; i < 128; ++i) {
237237
auto event = pool->allocate();

0 commit comments

Comments
 (0)