From 87575ef5fc759060fe896cce4474fa331b89019e Mon Sep 17 00:00:00 2001 From: "Zhang, Winston" Date: Mon, 23 Jun 2025 18:50:11 -0700 Subject: [PATCH 1/4] [UR][L0] Implement support for device to device copy Bindless Image: pitched usm image to pitched usm image copy Signed-off-by: Zhang, Winston --- .../adapters/level_zero/image_common.cpp | 68 +++++++++++++++---- 1 file changed, 54 insertions(+), 14 deletions(-) diff --git a/unified-runtime/source/adapters/level_zero/image_common.cpp b/unified-runtime/source/adapters/level_zero/image_common.cpp index d191b2a7c37cf..d27bfd27670b2 100644 --- a/unified-runtime/source/adapters/level_zero/image_common.cpp +++ b/unified-runtime/source/adapters/level_zero/image_common.cpp @@ -867,21 +867,61 @@ ur_result_t bindlessImagesHandleCopyFlags( return UR_RESULT_SUCCESS; }; case UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE: { - ze_image_region_t DstRegion; - UR_CALL(getImageRegionHelper(zeSrcImageDesc, &pCopyRegion->dstOffset, - &pCopyRegion->copyExtent, DstRegion)); - ze_image_region_t SrcRegion; - UR_CALL(getImageRegionHelper(zeSrcImageDesc, &pCopyRegion->srcOffset, - &pCopyRegion->copyExtent, SrcRegion)); - - auto *urImgSrc = reinterpret_cast(pSrc); - auto *urImgDst = reinterpret_cast(pDst); - - ZE2UR_CALL(zeCommandListAppendImageCopyRegion, - (ZeCommandList, urImgDst->getZeImage(), urImgSrc->getZeImage(), - &DstRegion, &SrcRegion, zeSignalEvent, numWaitEvents, - phWaitEvents)); + if (pSrcImageDesc->rowPitch != 0 && pDstImageDesc->rowPitch != 0) { + // Copy from pitched USM memory to pitched USM memory + uint32_t SrcRowPitch = pSrcImageDesc->rowPitch; + uint32_t DstRowPitch = pDstImageDesc->rowPitch; + ze_copy_region_t ZeDstRegion = {(uint32_t)pCopyRegion->dstOffset.x, + (uint32_t)pCopyRegion->dstOffset.y, + (uint32_t)pCopyRegion->dstOffset.z, + DstRowPitch, + (uint32_t)pCopyRegion->copyExtent.height, + (uint32_t)pCopyRegion->copyExtent.depth}; + uint32_t DstSlicePitch = 0; + uint32_t SrcSlicePitch = 0; + ze_copy_region_t ZeSrcRegion = {(uint32_t)pCopyRegion->srcOffset.x, + (uint32_t)pCopyRegion->srcOffset.y, + (uint32_t)pCopyRegion->srcOffset.z, + SrcRowPitch, + (uint32_t)pCopyRegion->copyExtent.height, + (uint32_t)pCopyRegion->copyExtent.depth}; + ZE2UR_CALL(zeCommandListAppendMemoryCopyRegion, + (ZeCommandList, pDst, &ZeDstRegion, DstRowPitch, DstSlicePitch, + pSrc, &ZeSrcRegion, SrcRowPitch, SrcSlicePitch, zeSignalEvent, + numWaitEvents, phWaitEvents)); + } else if (pSrcImageDesc->rowPitch == 0 && pDstImageDesc->rowPitch == 0) { + // Copy from Non-USM memory to Non-USM memory + ze_image_region_t DstRegion; + UR_CALL(getImageRegionHelper(zeSrcImageDesc, &pCopyRegion->dstOffset, + &pCopyRegion->copyExtent, DstRegion)); + ze_image_region_t SrcRegion; + UR_CALL(getImageRegionHelper(zeSrcImageDesc, &pCopyRegion->srcOffset, + &pCopyRegion->copyExtent, SrcRegion)); + auto *UrImageDst = static_cast(pDst); + auto *UrImageSrc = static_cast(pSrc); + ZE2UR_CALL(zeCommandListAppendImageCopyRegion, + (ZeCommandList, UrImageDst->getZeImage(), UrImageSrc->getZeImage(), + &DstRegion, &SrcRegion, zeSignalEvent, numWaitEvents, + phWaitEvents)); + } else { + // Copy from Non-USM/pitched USM memory to pitched USM/Non-USM memory + // Note: This might be the same procedure as pitched USM to + // pitched USM. Need further testing. + ze_image_region_t DstRegion; + UR_CALL(getImageRegionHelper(zeSrcImageDesc, &pCopyRegion->dstOffset, + &pCopyRegion->copyExtent, DstRegion)); + ze_image_region_t SrcRegion; + UR_CALL(getImageRegionHelper(zeSrcImageDesc, &pCopyRegion->srcOffset, + &pCopyRegion->copyExtent, SrcRegion)); + auto *UrImageDst = static_cast(pDst); + auto *UrImageSrc = static_cast(pSrc); + ZE2UR_CALL(zeCommandListAppendImageCopyRegion, + (ZeCommandList, UrImageDst->getZeImage(), UrImageSrc->getZeImage(), + &DstRegion, &SrcRegion, zeSignalEvent, numWaitEvents, + phWaitEvents)); + } + return UR_RESULT_SUCCESS; }; default: From f3b0c81d997bf1434b6b2dedd1e87cfb447efae7 Mon Sep 17 00:00:00 2001 From: "Zhang, Winston" Date: Mon, 23 Jun 2025 18:58:55 -0700 Subject: [PATCH 2/4] [UR][L0] Implement support for device to device copy Bindless Image: pitched usm image to pitched usm image copy Signed-off-by: Zhang, Winston --- unified-runtime/source/adapters/level_zero/image_common.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/unified-runtime/source/adapters/level_zero/image_common.cpp b/unified-runtime/source/adapters/level_zero/image_common.cpp index d27bfd27670b2..d0f5da26acb0c 100644 --- a/unified-runtime/source/adapters/level_zero/image_common.cpp +++ b/unified-runtime/source/adapters/level_zero/image_common.cpp @@ -903,7 +903,6 @@ ur_result_t bindlessImagesHandleCopyFlags( (ZeCommandList, UrImageDst->getZeImage(), UrImageSrc->getZeImage(), &DstRegion, &SrcRegion, zeSignalEvent, numWaitEvents, phWaitEvents)); - } else { // Copy from Non-USM/pitched USM memory to pitched USM/Non-USM memory // Note: This might be the same procedure as pitched USM to @@ -921,7 +920,6 @@ ur_result_t bindlessImagesHandleCopyFlags( &DstRegion, &SrcRegion, zeSignalEvent, numWaitEvents, phWaitEvents)); } - return UR_RESULT_SUCCESS; }; default: From 0fd9b338e74090816754f10ef4a23c2574422b12 Mon Sep 17 00:00:00 2001 From: "Zhang, Winston" Date: Tue, 24 Jun 2025 10:18:13 -0700 Subject: [PATCH 3/4] Format image_common.cpp to comply with clang-format --- .../source/adapters/level_zero/image_common.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/unified-runtime/source/adapters/level_zero/image_common.cpp b/unified-runtime/source/adapters/level_zero/image_common.cpp index d0f5da26acb0c..f820d96388bea 100644 --- a/unified-runtime/source/adapters/level_zero/image_common.cpp +++ b/unified-runtime/source/adapters/level_zero/image_common.cpp @@ -900,9 +900,9 @@ ur_result_t bindlessImagesHandleCopyFlags( auto *UrImageDst = static_cast(pDst); auto *UrImageSrc = static_cast(pSrc); ZE2UR_CALL(zeCommandListAppendImageCopyRegion, - (ZeCommandList, UrImageDst->getZeImage(), UrImageSrc->getZeImage(), - &DstRegion, &SrcRegion, zeSignalEvent, numWaitEvents, - phWaitEvents)); + (ZeCommandList, UrImageDst->getZeImage(), + UrImageSrc->getZeImage(), &DstRegion, &SrcRegion, + zeSignalEvent, numWaitEvents, phWaitEvents)); } else { // Copy from Non-USM/pitched USM memory to pitched USM/Non-USM memory // Note: This might be the same procedure as pitched USM to @@ -916,9 +916,9 @@ ur_result_t bindlessImagesHandleCopyFlags( auto *UrImageDst = static_cast(pDst); auto *UrImageSrc = static_cast(pSrc); ZE2UR_CALL(zeCommandListAppendImageCopyRegion, - (ZeCommandList, UrImageDst->getZeImage(), UrImageSrc->getZeImage(), - &DstRegion, &SrcRegion, zeSignalEvent, numWaitEvents, - phWaitEvents)); + (ZeCommandList, UrImageDst->getZeImage(), + UrImageSrc->getZeImage(), &DstRegion, &SrcRegion, + zeSignalEvent, numWaitEvents, phWaitEvents)); } return UR_RESULT_SUCCESS; }; From 881888bf2225244a53f56a0c6b8b22508a1c866f Mon Sep 17 00:00:00 2001 From: "Zhang, Winston" Date: Wed, 25 Jun 2025 12:29:51 -0700 Subject: [PATCH 4/4] Added tests Signed-off-by: Zhang, Winston --- .../test-e2e/bindless_images/copies/device_to_device_copy.cpp | 4 +++- .../copies/device_to_device_copy_1D_subregion.cpp | 4 +++- .../copies/device_to_device_copy_2D_subregion.cpp | 4 +++- .../bindless_images/copies/device_to_device_pitched.cpp | 3 +-- sycl/test-e2e/bindless_images/copies/host_to_host_pitched.cpp | 4 +++- 5 files changed, 13 insertions(+), 6 deletions(-) diff --git a/sycl/test-e2e/bindless_images/copies/device_to_device_copy.cpp b/sycl/test-e2e/bindless_images/copies/device_to_device_copy.cpp index 9777e2a591837..0056dee78d33e 100644 --- a/sycl/test-e2e/bindless_images/copies/device_to_device_copy.cpp +++ b/sycl/test-e2e/bindless_images/copies/device_to_device_copy.cpp @@ -1,6 +1,8 @@ // REQUIRES: aspect-ext_oneapi_bindless_images -// REQUIRES: cuda +// UNSUPPORTED: target-amd +// UNSUPPORTED-INTENDED: currently not supporting amd for bindless image d2d +// copy // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/bindless_images/copies/device_to_device_copy_1D_subregion.cpp b/sycl/test-e2e/bindless_images/copies/device_to_device_copy_1D_subregion.cpp index 357d67d78de9b..9ef0f82e05180 100644 --- a/sycl/test-e2e/bindless_images/copies/device_to_device_copy_1D_subregion.cpp +++ b/sycl/test-e2e/bindless_images/copies/device_to_device_copy_1D_subregion.cpp @@ -1,5 +1,7 @@ // REQUIRES: aspect-ext_oneapi_bindless_images -// REQUIRES: cuda +// UNSUPPORTED: target-amd +// UNSUPPORTED-INTENDED: currently not supporting amd for bindless image d2d +// copy // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/bindless_images/copies/device_to_device_copy_2D_subregion.cpp b/sycl/test-e2e/bindless_images/copies/device_to_device_copy_2D_subregion.cpp index d13a9146b2784..dcc0b972d14cd 100644 --- a/sycl/test-e2e/bindless_images/copies/device_to_device_copy_2D_subregion.cpp +++ b/sycl/test-e2e/bindless_images/copies/device_to_device_copy_2D_subregion.cpp @@ -1,6 +1,8 @@ // REQUIRES: aspect-ext_oneapi_bindless_images -// REQUIRES: cuda +// UNSUPPORTED: target-amd +// UNSUPPORTED-INTENDED: currently not supporting amd for bindless image d2d +// copy // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/bindless_images/copies/device_to_device_pitched.cpp b/sycl/test-e2e/bindless_images/copies/device_to_device_pitched.cpp index c8a644296eba2..e7cc5ee774741 100644 --- a/sycl/test-e2e/bindless_images/copies/device_to_device_pitched.cpp +++ b/sycl/test-e2e/bindless_images/copies/device_to_device_pitched.cpp @@ -1,8 +1,7 @@ // REQUIRES: aspect-ext_oneapi_bindless_images // REQUIRES: aspect-ext_oneapi_bindless_images_2d_usm -// REQUIRES: cuda // -// UNSUPPORTED: cuda +// UNSUPPORTED: target-amd // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/17231 // RUN: %{build} -o %t.out diff --git a/sycl/test-e2e/bindless_images/copies/host_to_host_pitched.cpp b/sycl/test-e2e/bindless_images/copies/host_to_host_pitched.cpp index 37e58a3d75bef..a7b0bc9ea3b20 100644 --- a/sycl/test-e2e/bindless_images/copies/host_to_host_pitched.cpp +++ b/sycl/test-e2e/bindless_images/copies/host_to_host_pitched.cpp @@ -1,7 +1,9 @@ // REQUIRES: aspect-ext_oneapi_bindless_images // REQUIRES: aspect-ext_oneapi_bindless_images_2d_usm -// REQUIRES: cuda +// UNSUPPORTED: target-amd +// UNSUPPORTED-INTENDED: currently not supporting amd for bindless image d2d +// copy // RUN: %{build} -o %t.out // RUN: %{run} %t.out