@@ -318,7 +318,12 @@ fill_copy_args(detail::handler_impl *impl,
318
318
319
319
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
320
320
handler::handler (detail::handler_impl &HandlerImpl) : impl(&HandlerImpl) {}
321
+ handler::handler (std::unique_ptr<detail::handler_impl> &&HandlerImpl)
322
+ : implOwner(std::move(HandlerImpl)), impl(implOwner.get()) {}
321
323
#else
324
+ handler::handler (std::unique_ptr<detail::handler_impl> &&HandlerImpl)
325
+ : impl(std::move(HandlerImpl)) {}
326
+
322
327
handler::handler (std::shared_ptr<detail::queue_impl> Queue,
323
328
bool CallerNeedsEvent)
324
329
: impl(std::make_shared<detail::handler_impl>(*Queue, nullptr ,
@@ -344,6 +349,7 @@ handler::handler(
344
349
: impl(std::make_shared<detail::handler_impl>(*Graph)) {}
345
350
346
351
#endif
352
+ handler::~handler () = default ;
347
353
348
354
// Sets the submission state to indicate that an explicit kernel bundle has been
349
355
// set. Throws a sycl::exception with errc::invalid if the current state
@@ -426,12 +432,6 @@ detail::EventImplPtr handler::finalize() {
426
432
#else
427
433
event handler::finalize () {
428
434
#endif
429
- // This block of code is needed only for reduction implementation.
430
- // It is harmless (does nothing) for everything else.
431
- if (MIsFinalized)
432
- return MLastEvent;
433
- MIsFinalized = true ;
434
-
435
435
const auto &type = getType ();
436
436
detail::queue_impl *Queue = impl->get_queue_or_null ();
437
437
ext::oneapi::experimental::detail::graph_impl *Graph =
@@ -559,13 +559,6 @@ event handler::finalize() {
559
559
std::vector<ur_event_handle_t > RawEvents = detail::Command::getUrEvents (
560
560
impl->CGData .MEvents , impl->get_queue_or_null (), false );
561
561
562
- #ifdef __INTEL_PREVIEW_BREAKING_CHANGES
563
- detail::EventImplPtr &LastEventImpl = MLastEvent;
564
- #else
565
- const detail::EventImplPtr &LastEventImpl =
566
- detail::getSyclObjImpl (MLastEvent);
567
- #endif
568
-
569
562
bool DiscardEvent =
570
563
!impl->MEventNeeded && impl->get_queue ().supportsDiscardingPiEvents ();
571
564
if (DiscardEvent) {
@@ -577,11 +570,10 @@ event handler::finalize() {
577
570
DiscardEvent = !KernelUsesAssert;
578
571
}
579
572
580
- #ifdef __INTEL_PREVIEW_BREAKING_CHANGES
581
- if (!DiscardEvent) {
582
- LastEventImpl = detail::event_impl::create_completed_host_event ();
583
- }
584
- #endif
573
+ std::shared_ptr<detail::event_impl> ResultEvent =
574
+ DiscardEvent
575
+ ? nullptr
576
+ : detail::event_impl::create_device_event (impl->get_queue ());
585
577
586
578
#ifdef XPTI_ENABLE_INSTRUMENTATION
587
579
const bool xptiEnabled = xptiTraceEnabled ();
@@ -612,9 +604,8 @@ event handler::finalize() {
612
604
enqueueImpKernel (
613
605
impl->get_queue (), impl->MNDRDesc , impl->MArgs , KernelBundleImpPtr,
614
606
MKernel.get (), toKernelNameStrT (MKernelName),
615
- impl->MKernelNameBasedCachePtr , RawEvents,
616
- DiscardEvent ? nullptr : LastEventImpl.get (), nullptr ,
617
- impl->MKernelCacheConfig , impl->MKernelIsCooperative ,
607
+ impl->MKernelNameBasedCachePtr , RawEvents, ResultEvent.get (),
608
+ nullptr , impl->MKernelCacheConfig , impl->MKernelIsCooperative ,
618
609
impl->MKernelUsesClusterLaunch , impl->MKernelWorkGroupMemorySize ,
619
610
BinImage, impl->MKernelFuncPtr , impl->MKernelNumArgs ,
620
611
impl->MKernelParamDescGetter , impl->MKernelHasSpecialCaptures );
@@ -624,7 +615,7 @@ event handler::finalize() {
624
615
if (!DiscardEvent) {
625
616
detail::emitInstrumentationGeneral (
626
617
StreamID, InstanceID, CmdTraceEvent, xpti::trace_signal,
627
- static_cast <const void *>(LastEventImpl ->getHandle ()));
618
+ static_cast <const void *>(ResultEvent ->getHandle ()));
628
619
}
629
620
detail::emitInstrumentationGeneral (StreamID, InstanceID,
630
621
CmdTraceEvent,
@@ -635,29 +626,32 @@ event handler::finalize() {
635
626
636
627
if (DiscardEvent) {
637
628
EnqueueKernel ();
638
- #ifndef __INTEL_PREVIEW_BREAKING_CHANGES
639
- LastEventImpl->setStateDiscarded ();
640
- #endif
641
629
} else {
642
630
detail::queue_impl &Queue = impl->get_queue ();
643
- LastEventImpl ->setQueue (Queue);
644
- LastEventImpl ->setWorkerQueue (Queue.weak_from_this ());
645
- LastEventImpl ->setContextImpl (impl->get_context ());
646
- LastEventImpl ->setStateIncomplete ();
647
- LastEventImpl ->setSubmissionTime ();
631
+ ResultEvent ->setQueue (Queue);
632
+ ResultEvent ->setWorkerQueue (Queue.weak_from_this ());
633
+ ResultEvent ->setContextImpl (impl->get_context ());
634
+ ResultEvent ->setStateIncomplete ();
635
+ ResultEvent ->setSubmissionTime ();
648
636
649
637
EnqueueKernel ();
650
- LastEventImpl ->setEnqueued ();
638
+ ResultEvent ->setEnqueued ();
651
639
// connect returned event with dependent events
652
640
if (!Queue.isInOrder ()) {
653
641
// MEvents is not used anymore, so can move.
654
- LastEventImpl ->getPreparedDepsEvents () =
642
+ ResultEvent ->getPreparedDepsEvents () =
655
643
std::move (impl->CGData .MEvents );
656
- // LastEventImpl is local for current thread, no need to lock.
657
- LastEventImpl ->cleanDepEventsThroughOneLevelUnlocked ();
644
+ // ResultEvent is local for current thread, no need to lock.
645
+ ResultEvent ->cleanDepEventsThroughOneLevelUnlocked ();
658
646
}
659
647
}
660
- return MLastEvent;
648
+ #ifdef __INTEL_PREVIEW_BREAKING_CHANGES
649
+ return ResultEvent;
650
+ #else
651
+ return detail::createSyclObjFromImpl<event>(
652
+ ResultEvent ? ResultEvent
653
+ : detail::event_impl::create_discarded_event ());
654
+ #endif
661
655
}
662
656
}
663
657
@@ -939,11 +933,10 @@ event handler::finalize() {
939
933
std::move (CommandGroup), *Queue, !DiscardEvent);
940
934
941
935
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
942
- MLastEvent = DiscardEvent ? nullptr : Event;
936
+ return DiscardEvent ? nullptr : Event;
943
937
#else
944
- MLastEvent = detail::createSyclObjFromImpl<event>(Event);
938
+ return detail::createSyclObjFromImpl<event>(Event);
945
939
#endif
946
- return MLastEvent;
947
940
}
948
941
949
942
void handler::addReduction (const std::shared_ptr<const void > &ReduObj) {
@@ -2474,58 +2467,28 @@ __SYCL_EXPORT void HandlerAccess::preProcess(handler &CGH,
2474
2467
AuxHandler.copyCodeLoc (CGH);
2475
2468
F (AuxHandler);
2476
2469
auto E = AuxHandler.finalize ();
2477
- assert (!CGH.MIsFinalized &&
2478
- " Can't do pre-processing if the command has been enqueued already!" );
2479
2470
if (EventNeeded)
2480
2471
CGH.depends_on (E);
2481
2472
}
2482
2473
__SYCL_EXPORT void HandlerAccess::postProcess (handler &CGH,
2483
2474
type_erased_cgfo_ty F) {
2484
- // The "hacky" `handler`s manipulation mentioned near the declaration in
2485
- // `handler.hpp` and implemented here is far from perfect. A better approach
2486
- // would be
2487
- //
2488
- // bool OrigNeedsEvent = CGH.needsEvent()
2489
- // assert(CGH.not_finalized/enqueued());
2490
- // if (!InOrderQueue)
2491
- // CGH.setNeedsEvent()
2492
- //
2493
- // handler PostProcessHandler(Queue, OrigNeedsEvent)
2494
- // auto E = CGH.finalize(); // enqueue original or current last
2495
- // // post-process
2496
- // if (!InOrder)
2497
- // PostProcessHandler.depends_on(E)
2498
- //
2499
- // swap_impls(CGH, PostProcessHandler)
2500
- // return; // queue::submit finalizes PostProcessHandler and returns its
2501
- // // event if necessary.
2502
- //
2503
- // Still hackier than "real" `queue::submit` but at least somewhat sane.
2504
- // That, however hasn't been tried yet and we have an even hackier approach
2505
- // copied from what's been done in an old reductions implementation before
2506
- // eventless submission work has started. Not sure how feasible the approach
2507
- // above is at this moment.
2508
-
2509
- // This `finalize` is wrong (at least logically) if
2510
- // `assert(!CGH.eventNeeded())`
2511
- auto E = CGH.finalize ();
2475
+ bool EventNeeded = CGH.impl ->MEventNeeded ;
2512
2476
queue_impl &Q = CGH.impl ->get_queue ();
2513
2477
bool InOrder = Q.isInOrder ();
2514
- // Cannot use `CGH.eventNeeded()` alone as there might be subsequent
2515
- // `postProcess` calls and we cannot address them properly similarly to the
2516
- // `finalize` issue described above. `swap_impls` suggested above might be
2517
- // able to handle this scenario naturally.
2518
- #ifdef __INTEL_PREVIEW_BREAKING_CHANGES
2519
- handler_impl HandlerImpl{Q, nullptr , CGH.eventNeeded () || !InOrder};
2520
- handler AuxHandler{HandlerImpl};
2521
- #else
2522
- handler AuxHandler{Q.shared_from_this (), CGH.eventNeeded () || !InOrder};
2523
- #endif
2524
2478
if (!InOrder)
2525
- AuxHandler.depends_on (E);
2526
- AuxHandler.copyCodeLoc (CGH);
2527
- F (AuxHandler);
2528
- CGH.MLastEvent = AuxHandler.finalize ();
2479
+ CGH.impl ->MEventNeeded = true ;
2480
+
2481
+ handler PostProcessHandler{
2482
+ std::make_unique<handler_impl>(Q, nullptr , EventNeeded)};
2483
+ PostProcessHandler.copyCodeLoc (CGH);
2484
+ // Extend lifetimes of auxiliary resources till the last kernel in the chain
2485
+ // finishes:
2486
+ PostProcessHandler.impl ->MAuxiliaryResources = CGH.impl ->MAuxiliaryResources ;
2487
+ auto E = CGH.finalize ();
2488
+ if (!InOrder)
2489
+ PostProcessHandler.depends_on (E);
2490
+ F (PostProcessHandler);
2491
+ swap (CGH, PostProcessHandler);
2529
2492
}
2530
2493
} // namespace detail
2531
2494
} // namespace _V1
0 commit comments