@@ -867,21 +867,61 @@ ur_result_t bindlessImagesHandleCopyFlags(
867
867
return UR_RESULT_SUCCESS;
868
868
};
869
869
case UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE: {
870
- ze_image_region_t DstRegion;
871
- UR_CALL (getImageRegionHelper (zeSrcImageDesc, &pCopyRegion->dstOffset ,
872
- &pCopyRegion->copyExtent , DstRegion));
873
- ze_image_region_t SrcRegion;
874
- UR_CALL (getImageRegionHelper (zeSrcImageDesc, &pCopyRegion->srcOffset ,
875
- &pCopyRegion->copyExtent , SrcRegion));
876
-
877
- auto *urImgSrc = reinterpret_cast <const ur_bindless_mem_handle_t *>(pSrc);
878
- auto *urImgDst = reinterpret_cast <ur_bindless_mem_handle_t *>(pDst);
879
-
880
- ZE2UR_CALL (zeCommandListAppendImageCopyRegion,
881
- (ZeCommandList, urImgDst->getZeImage (), urImgSrc->getZeImage (),
882
- &DstRegion, &SrcRegion, zeSignalEvent, numWaitEvents,
883
- phWaitEvents));
870
+ if (pSrcImageDesc->rowPitch != 0 && pDstImageDesc->rowPitch != 0 ) {
871
+ // Copy from pitched USM memory to pitched USM memory
872
+ uint32_t SrcRowPitch = pSrcImageDesc->rowPitch ;
873
+ uint32_t DstRowPitch = pDstImageDesc->rowPitch ;
874
+ ze_copy_region_t ZeDstRegion = {(uint32_t )pCopyRegion->dstOffset .x ,
875
+ (uint32_t )pCopyRegion->dstOffset .y ,
876
+ (uint32_t )pCopyRegion->dstOffset .z ,
877
+ DstRowPitch,
878
+ (uint32_t )pCopyRegion->copyExtent .height ,
879
+ (uint32_t )pCopyRegion->copyExtent .depth };
880
+ uint32_t DstSlicePitch = 0 ;
881
+ uint32_t SrcSlicePitch = 0 ;
882
+ ze_copy_region_t ZeSrcRegion = {(uint32_t )pCopyRegion->srcOffset .x ,
883
+ (uint32_t )pCopyRegion->srcOffset .y ,
884
+ (uint32_t )pCopyRegion->srcOffset .z ,
885
+ SrcRowPitch,
886
+ (uint32_t )pCopyRegion->copyExtent .height ,
887
+ (uint32_t )pCopyRegion->copyExtent .depth };
888
+ ZE2UR_CALL (zeCommandListAppendMemoryCopyRegion,
889
+ (ZeCommandList, pDst, &ZeDstRegion, DstRowPitch, DstSlicePitch,
890
+ pSrc, &ZeSrcRegion, SrcRowPitch, SrcSlicePitch, zeSignalEvent,
891
+ numWaitEvents, phWaitEvents));
892
+ } else if (pSrcImageDesc->rowPitch == 0 && pDstImageDesc->rowPitch == 0 ) {
893
+ // Copy from Non-USM memory to Non-USM memory
894
+ ze_image_region_t DstRegion;
895
+ UR_CALL (getImageRegionHelper (zeSrcImageDesc, &pCopyRegion->dstOffset ,
896
+ &pCopyRegion->copyExtent , DstRegion));
897
+ ze_image_region_t SrcRegion;
898
+ UR_CALL (getImageRegionHelper (zeSrcImageDesc, &pCopyRegion->srcOffset ,
899
+ &pCopyRegion->copyExtent , SrcRegion));
900
+ auto *UrImageDst = static_cast <ur_bindless_mem_handle_t *>(pDst);
901
+ auto *UrImageSrc = static_cast <const ur_bindless_mem_handle_t *>(pSrc);
902
+ ZE2UR_CALL (zeCommandListAppendImageCopyRegion,
903
+ (ZeCommandList, UrImageDst->getZeImage (), UrImageSrc->getZeImage (),
904
+ &DstRegion, &SrcRegion, zeSignalEvent, numWaitEvents,
905
+ phWaitEvents));
884
906
907
+ } else {
908
+ // Copy from Non-USM/pitched USM memory to pitched USM/Non-USM memory
909
+ // Note: This might be the same procedure as pitched USM to
910
+ // pitched USM. Need further testing.
911
+ ze_image_region_t DstRegion;
912
+ UR_CALL (getImageRegionHelper (zeSrcImageDesc, &pCopyRegion->dstOffset ,
913
+ &pCopyRegion->copyExtent , DstRegion));
914
+ ze_image_region_t SrcRegion;
915
+ UR_CALL (getImageRegionHelper (zeSrcImageDesc, &pCopyRegion->srcOffset ,
916
+ &pCopyRegion->copyExtent , SrcRegion));
917
+ auto *UrImageDst = static_cast <ur_bindless_mem_handle_t *>(pDst);
918
+ auto *UrImageSrc = static_cast <const ur_bindless_mem_handle_t *>(pSrc);
919
+ ZE2UR_CALL (zeCommandListAppendImageCopyRegion,
920
+ (ZeCommandList, UrImageDst->getZeImage (), UrImageSrc->getZeImage (),
921
+ &DstRegion, &SrcRegion, zeSignalEvent, numWaitEvents,
922
+ phWaitEvents));
923
+ }
924
+
885
925
return UR_RESULT_SUCCESS;
886
926
};
887
927
default :
0 commit comments