Skip to content

CustomDevice Memory Support #73033

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 24 additions & 2 deletions paddle/phi/core/memory/allocation/allocator_facade.cc
Original file line number Diff line number Diff line change
Expand Up @@ -791,6 +791,17 @@ class AllocatorFacadePrivate {
}
}

void EraseStream(std::shared_ptr<phi::Allocation> allocation,
phi::stream::stream_t stream) {
if (auto stream_safe_cuda_allocation =
std::dynamic_pointer_cast<StreamSafeCustomDeviceAllocation>(
allocation)) {
stream_safe_cuda_allocation->EraseStream(stream);
} else {
VLOG(6) << "EraseStream for a non-StreamSafeCUDAAllocation";
}
}

phi::stream::stream_t GetStream(
const std::shared_ptr<phi::Allocation>& allocation) const {
const std::shared_ptr<StreamSafeCustomDeviceAllocation>
Expand Down Expand Up @@ -1787,11 +1798,17 @@ AllocationPtr AllocatorFacade::Alloc(const phi::Place& place,
bool AllocatorFacade::InSameStream(
const std::shared_ptr<phi::Allocation>& allocation,
const phi::Stream& stream) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#if (defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)) && \
!defined(PADDLE_WITH_CUSTOM_DEVICE)
gpuStream_t s = reinterpret_cast<gpuStream_t>(stream.id()); // NOLINT
return s == GetStream(allocation);
#elif defined(PADDLE_WITH_CUSTOM_DEVICE)
phi::stream::stream_t s =
reinterpret_cast<phi::stream::stream_t>(stream.id()); // NOLINT
return s == GetStream(allocation);
#else
PADDLE_THROW(common::errors::PreconditionNotMet("Not compiled with GPU."));
PADDLE_THROW(common::errors::PreconditionNotMet(
"Not compiled with GPU or CUDA backend."));
#endif
}

Expand Down Expand Up @@ -1946,6 +1963,11 @@ bool AllocatorFacade::RecordStream(std::shared_ptr<phi::Allocation> allocation,
return GetPrivate()->RecordStream(allocation, stream);
}

void AllocatorFacade::EraseStream(std::shared_ptr<phi::Allocation> allocation,
phi::stream::stream_t stream) {
GetPrivate()->EraseStream(allocation, stream);
}

const std::shared_ptr<Allocator>& AllocatorFacade::GetAllocator(
const phi::Place& place, phi::stream::stream_t stream) {
AllocatorFacadePrivate* m = GetPrivate();
Expand Down
2 changes: 2 additions & 0 deletions paddle/phi/core/memory/allocation/allocator_facade.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,8 @@ class AllocatorFacade {
uint64_t Release(const phi::CustomPlace& place, phi::stream::stream_t stream);
bool RecordStream(std::shared_ptr<Allocation> allocation,
phi::stream::stream_t stream);
void EraseStream(std::shared_ptr<Allocation> allocation,
phi::stream::stream_t stream);
TEST_API const std::shared_ptr<Allocator>& GetAllocator(
const phi::Place& place, phi::stream::stream_t stream);
phi::stream::stream_t GetStream(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,18 @@ bool StreamSafeCustomDeviceAllocation::RecordStream(
return true;
}

void StreamSafeCustomDeviceAllocation::EraseStream(
phi::stream::stream_t stream) {
VLOG(8) << "Try remove stream " << stream << " for address " << ptr();
std::lock_guard<SpinLock> lock_guard(outstanding_event_map_lock_);
auto it = outstanding_event_map_.find(stream);
if (it == outstanding_event_map_.end()) {
return;
}
it->second->Destroy();
outstanding_event_map_.erase(it);
}

bool StreamSafeCustomDeviceAllocation::CanBeFreed() {
std::lock_guard<SpinLock> lock_guard(outstanding_event_map_lock_);
if (!phi::DeviceManager::HasDeviceType(place_.GetDeviceType())) {
Expand Down Expand Up @@ -191,7 +203,8 @@ uint64_t StreamSafeCustomDeviceAllocator::ReleaseImpl(const phi::Place& place) {

void StreamSafeCustomDeviceAllocator::ProcessUnfreedAllocations() {
// NOTE(Ruibiao): This condition is to reduce lock completion. It does not
// need to be thread-safe since here occasional misjudgments are permissible.
// need to be thread-safe since here occasional misjudgments are
// permissible.
if (unfreed_allocations_.empty()) {
return;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class StreamSafeCustomDeviceAllocation : public Allocation {
StreamSafeCustomDeviceAllocator *allocator);

bool RecordStream(phi::stream::stream_t stream);
void EraseStream(phi::stream::stream_t stream);
bool CanBeFreed();
phi::stream::stream_t GetOwningStream() const;
void SetOwningStream(phi::stream::stream_t s);
Expand Down
14 changes: 14 additions & 0 deletions paddle/phi/core/memory/malloc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,24 @@ gpuStream_t GetStream(const std::shared_ptr<Allocation>& allocation) {
#endif

#ifdef PADDLE_WITH_CUSTOM_DEVICE
uint64_t Release(const phi::CustomPlace& place, phi::stream::stream_t stream) {
return allocation::AllocatorFacade::Instance().Release(place, stream);
}

bool RecordStream(std::shared_ptr<Allocation> allocation,
phi::stream::stream_t stream) {
return allocation::AllocatorFacade::Instance().RecordStream(allocation,
stream);
}

void EraseStream(std::shared_ptr<Allocation> allocation,
phi::stream::stream_t stream) {
return allocation::AllocatorFacade::Instance().EraseStream(allocation,
stream);
}

phi::stream::stream_t GetStream(const std::shared_ptr<Allocation>& allocation) {
return allocation::AllocatorFacade::Instance().GetStream(allocation);
}
#endif
} // namespace paddle::memory
8 changes: 8 additions & 0 deletions paddle/phi/core/memory/malloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,16 @@ void EraseStream(std::shared_ptr<Allocation> allocation, gpuStream_t stream);
gpuStream_t GetStream(const std::shared_ptr<Allocation>& allocation);
#endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE
extern uint64_t Release(const phi::CustomPlace& place,
phi::stream::stream_t stream);

bool RecordStream(std::shared_ptr<Allocation> allocation,
phi::stream::stream_t stream);

void EraseStream(std::shared_ptr<Allocation> allocation,
phi::stream::stream_t stream);

phi::stream::stream_t GetStream(const std::shared_ptr<Allocation>& allocation);
#endif

template <typename StreamType>
Expand Down
Loading