Skip to content

Commit e44a920

Browse files
authored
[UR] Support 2D and 3D enqueue kernel launches in offload (#18740)
A (rather trivial) test has also been added since Offload currently has issues with arguments.
1 parent 23bcb9f commit e44a920

File tree

4 files changed

+60
-11
lines changed

4 files changed

+60
-11
lines changed

unified-runtime/source/adapters/offload/enqueue.cpp

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -28,22 +28,34 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
2828
//
2929

3030
(void)pGlobalWorkOffset;
31-
(void)pLocalWorkSize;
3231

33-
if (workDim == 1) {
34-
std::cerr
35-
<< "UR Offload adapter only supports 1d kernel launches at the moment";
36-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
32+
size_t GlobalSize[3] = {1, 1, 1};
33+
for (uint32_t I = 0; I < workDim; I++) {
34+
GlobalSize[I] = pGlobalWorkSize[I];
35+
}
36+
37+
// TODO: We default to 1, 1, 1 here. In future if pLocalWorkSize is not
38+
// specified, we should pick the "best" one
39+
size_t GroupSize[3] = {1, 1, 1};
40+
if (pLocalWorkSize) {
41+
for (uint32_t I = 0; I < workDim; I++) {
42+
GroupSize[I] = pLocalWorkSize[I];
43+
}
44+
}
45+
46+
if (GroupSize[0] > GlobalSize[0] || GroupSize[1] > GlobalSize[1] ||
47+
GroupSize[2] > GlobalSize[2]) {
48+
return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
3749
}
3850

3951
ol_kernel_launch_size_args_t LaunchArgs;
4052
LaunchArgs.Dimensions = workDim;
41-
LaunchArgs.NumGroupsX = pGlobalWorkSize[0];
42-
LaunchArgs.NumGroupsY = 1;
43-
LaunchArgs.NumGroupsZ = 1;
44-
LaunchArgs.GroupSizeX = 1;
45-
LaunchArgs.GroupSizeY = 1;
46-
LaunchArgs.GroupSizeZ = 1;
53+
LaunchArgs.NumGroupsX = GlobalSize[0] / GroupSize[0];
54+
LaunchArgs.NumGroupsY = GlobalSize[1] / GroupSize[1];
55+
LaunchArgs.NumGroupsZ = GlobalSize[2] / GroupSize[2];
56+
LaunchArgs.GroupSizeX = GroupSize[0];
57+
LaunchArgs.GroupSizeY = GroupSize[1];
58+
LaunchArgs.GroupSizeZ = GroupSize[2];
4759
LaunchArgs.DynSharedMemory = 0;
4860

4961
ol_event_handle_t EventOut;

unified-runtime/test/conformance/device_code/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/standard_types.cpp)
163163
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/subgroup.cpp)
164164
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/linker_error.cpp)
165165
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/saxpy_usm_local_mem.cpp)
166+
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/no_args.cpp)
166167

167168
set(KERNEL_HEADER ${UR_CONFORMANCE_DEVICE_BINARIES_DIR}/kernel_entry_points.h)
168169
add_custom_command(OUTPUT ${KERNEL_HEADER}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// Copyright (C) 2025 Intel Corporation
2+
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
3+
// Exceptions. See LICENSE.TXT
4+
//
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
7+
#include <sycl/sycl.hpp>
8+
9+
int main() {
10+
sycl::queue sycl_queue;
11+
sycl_queue.submit([&](sycl::handler &cgh) {
12+
cgh.parallel_for<class no_args>(
13+
sycl::range<3>{128, 128, 128},
14+
[](sycl::item<3> itemId) { itemId.get_id(0); });
15+
});
16+
return 0;
17+
}

unified-runtime/test/conformance/enqueue/urEnqueueKernelLaunch.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,18 @@
99
#include <uur/fixtures.h>
1010
#include <uur/known_failure.h>
1111

12+
struct urEnqueueKernelLaunchNoArgs3DTest : uur::urKernelExecutionTest {
13+
void SetUp() override {
14+
program_name = "no_args";
15+
UUR_RETURN_ON_FATAL_FAILURE(urKernelExecutionTest::SetUp());
16+
}
17+
18+
size_t global_size[3] = {32, 16, 8};
19+
size_t global_offset[3] = {0, 0, 0};
20+
size_t n_dimensions = 3;
21+
};
22+
UUR_INSTANTIATE_DEVICE_TEST_SUITE(urEnqueueKernelLaunchNoArgs3DTest);
23+
1224
struct urEnqueueKernelLaunchTest : uur::urKernelExecutionTest {
1325
void SetUp() override {
1426
program_name = "fill";
@@ -67,6 +79,13 @@ struct urEnqueueKernelLaunchKernelStandardTest : uur::urKernelExecutionTest {
6779
};
6880
UUR_INSTANTIATE_DEVICE_TEST_SUITE(urEnqueueKernelLaunchKernelStandardTest);
6981

82+
TEST_P(urEnqueueKernelLaunchNoArgs3DTest, Success) {
83+
ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, n_dimensions,
84+
global_offset, global_size, nullptr, 0,
85+
nullptr, nullptr));
86+
ASSERT_SUCCESS(urQueueFinish(queue));
87+
}
88+
7089
TEST_P(urEnqueueKernelLaunchTest, Success) {
7190
ur_mem_handle_t buffer = nullptr;
7291
AddBuffer1DArg(sizeof(val) * global_size, &buffer);

0 commit comments

Comments
 (0)