diff --git a/paddle/phi/core/memory/allocation/allocator_facade.cc b/paddle/phi/core/memory/allocation/allocator_facade.cc index 715b7f0158de2c..ce0d84de633066 100644 --- a/paddle/phi/core/memory/allocation/allocator_facade.cc +++ b/paddle/phi/core/memory/allocation/allocator_facade.cc @@ -791,6 +791,17 @@ class AllocatorFacadePrivate { } } + void EraseStream(std::shared_ptr allocation, + phi::stream::stream_t stream) { + if (auto stream_safe_cuda_allocation = + std::dynamic_pointer_cast( + allocation)) { + stream_safe_cuda_allocation->EraseStream(stream); + } else { + VLOG(6) << "EraseStream for a non-StreamSafeCUDAAllocation"; + } + } + phi::stream::stream_t GetStream( const std::shared_ptr& allocation) const { const std::shared_ptr @@ -1787,11 +1798,17 @@ AllocationPtr AllocatorFacade::Alloc(const phi::Place& place, bool AllocatorFacade::InSameStream( const std::shared_ptr& allocation, const phi::Stream& stream) { -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +#if (defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)) && \ + !defined(PADDLE_WITH_CUSTOM_DEVICE) gpuStream_t s = reinterpret_cast(stream.id()); // NOLINT return s == GetStream(allocation); +#elif defined(PADDLE_WITH_CUSTOM_DEVICE) + phi::stream::stream_t s = + reinterpret_cast(stream.id()); // NOLINT + return s == GetStream(allocation); #else - PADDLE_THROW(common::errors::PreconditionNotMet("Not compiled with GPU.")); + PADDLE_THROW(common::errors::PreconditionNotMet( + "Not compiled with GPU or CUDA backend.")); #endif } @@ -1946,6 +1963,11 @@ bool AllocatorFacade::RecordStream(std::shared_ptr allocation, return GetPrivate()->RecordStream(allocation, stream); } +void AllocatorFacade::EraseStream(std::shared_ptr allocation, + phi::stream::stream_t stream) { + GetPrivate()->EraseStream(allocation, stream); +} + const std::shared_ptr& AllocatorFacade::GetAllocator( const phi::Place& place, phi::stream::stream_t stream) { AllocatorFacadePrivate* m = GetPrivate(); diff --git a/paddle/phi/core/memory/allocation/allocator_facade.h b/paddle/phi/core/memory/allocation/allocator_facade.h index 504f657da4cc27..e46a6f9b13ef52 100644 --- a/paddle/phi/core/memory/allocation/allocator_facade.h +++ b/paddle/phi/core/memory/allocation/allocator_facade.h @@ -109,6 +109,8 @@ class AllocatorFacade { uint64_t Release(const phi::CustomPlace& place, phi::stream::stream_t stream); bool RecordStream(std::shared_ptr allocation, phi::stream::stream_t stream); + void EraseStream(std::shared_ptr allocation, + phi::stream::stream_t stream); TEST_API const std::shared_ptr& GetAllocator( const phi::Place& place, phi::stream::stream_t stream); phi::stream::stream_t GetStream( diff --git a/paddle/phi/core/memory/allocation/stream_safe_custom_device_allocator.cc b/paddle/phi/core/memory/allocation/stream_safe_custom_device_allocator.cc index 3b10fe5635d55f..34ee1a846f362e 100644 --- a/paddle/phi/core/memory/allocation/stream_safe_custom_device_allocator.cc +++ b/paddle/phi/core/memory/allocation/stream_safe_custom_device_allocator.cc @@ -58,6 +58,18 @@ bool StreamSafeCustomDeviceAllocation::RecordStream( return true; } +void StreamSafeCustomDeviceAllocation::EraseStream( + phi::stream::stream_t stream) { + VLOG(8) << "Try remove stream " << stream << " for address " << ptr(); + std::lock_guard lock_guard(outstanding_event_map_lock_); + auto it = outstanding_event_map_.find(stream); + if (it == outstanding_event_map_.end()) { + return; + } + it->second->Destroy(); + outstanding_event_map_.erase(it); +} + bool StreamSafeCustomDeviceAllocation::CanBeFreed() { std::lock_guard lock_guard(outstanding_event_map_lock_); if (!phi::DeviceManager::HasDeviceType(place_.GetDeviceType())) { @@ -191,7 +203,8 @@ uint64_t StreamSafeCustomDeviceAllocator::ReleaseImpl(const phi::Place& place) { void StreamSafeCustomDeviceAllocator::ProcessUnfreedAllocations() { // NOTE(Ruibiao): This condition is to reduce lock completion. It does not - // need to be thread-safe since here occasional misjudgments are permissible. + // need to be thread-safe since here occasional misjudgments are + // permissible. if (unfreed_allocations_.empty()) { return; } diff --git a/paddle/phi/core/memory/allocation/stream_safe_custom_device_allocator.h b/paddle/phi/core/memory/allocation/stream_safe_custom_device_allocator.h index 8d648489197306..a198c1761524b1 100644 --- a/paddle/phi/core/memory/allocation/stream_safe_custom_device_allocator.h +++ b/paddle/phi/core/memory/allocation/stream_safe_custom_device_allocator.h @@ -36,6 +36,7 @@ class StreamSafeCustomDeviceAllocation : public Allocation { StreamSafeCustomDeviceAllocator *allocator); bool RecordStream(phi::stream::stream_t stream); + void EraseStream(phi::stream::stream_t stream); bool CanBeFreed(); phi::stream::stream_t GetOwningStream() const; void SetOwningStream(phi::stream::stream_t s); diff --git a/paddle/phi/core/memory/malloc.cc b/paddle/phi/core/memory/malloc.cc index 9af21c6b3453a7..050a3d2855189b 100644 --- a/paddle/phi/core/memory/malloc.cc +++ b/paddle/phi/core/memory/malloc.cc @@ -77,10 +77,24 @@ gpuStream_t GetStream(const std::shared_ptr& allocation) { #endif #ifdef PADDLE_WITH_CUSTOM_DEVICE +uint64_t Release(const phi::CustomPlace& place, phi::stream::stream_t stream) { + return allocation::AllocatorFacade::Instance().Release(place, stream); +} + bool RecordStream(std::shared_ptr allocation, phi::stream::stream_t stream) { return allocation::AllocatorFacade::Instance().RecordStream(allocation, stream); } + +void EraseStream(std::shared_ptr allocation, + phi::stream::stream_t stream) { + return allocation::AllocatorFacade::Instance().EraseStream(allocation, + stream); +} + +phi::stream::stream_t GetStream(const std::shared_ptr& allocation) { + return allocation::AllocatorFacade::Instance().GetStream(allocation); +} #endif } // namespace paddle::memory diff --git a/paddle/phi/core/memory/malloc.h b/paddle/phi/core/memory/malloc.h index 4f80d143280a52..eea770696608a2 100644 --- a/paddle/phi/core/memory/malloc.h +++ b/paddle/phi/core/memory/malloc.h @@ -59,8 +59,16 @@ void EraseStream(std::shared_ptr allocation, gpuStream_t stream); gpuStream_t GetStream(const std::shared_ptr& allocation); #endif #ifdef PADDLE_WITH_CUSTOM_DEVICE +extern uint64_t Release(const phi::CustomPlace& place, + phi::stream::stream_t stream); + bool RecordStream(std::shared_ptr allocation, phi::stream::stream_t stream); + +void EraseStream(std::shared_ptr allocation, + phi::stream::stream_t stream); + +phi::stream::stream_t GetStream(const std::shared_ptr& allocation); #endif template