Skip to content

Commit 6d5fe22

Browse files
authored
[SYCL] Provide separate compilation support for free function kernels (#18955)
This PR adds separate compilation support for free function kernels.
1 parent c310aed commit 6d5fe22

File tree

10 files changed

+267
-6
lines changed

10 files changed

+267
-6
lines changed

clang/include/clang/Sema/SemaSYCL.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,8 @@ class SemaSYCL : public SemaBase {
265265

266266
llvm::DenseSet<const FunctionDecl *> SYCLKernelFunctions;
267267

268+
llvm::DenseSet<const FunctionDecl *> FreeFunctionDeclarations;
269+
268270
public:
269271
SemaSYCL(Sema &S);
270272

@@ -357,7 +359,9 @@ class SemaSYCL : public SemaBase {
357359
void ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, MangleContext &MC);
358360
void SetSYCLKernelNames();
359361
void MarkDevices();
362+
void processFreeFunctionDeclaration(const FunctionDecl *FD);
360363
void ProcessFreeFunction(FunctionDecl *FD);
364+
void finalizeFreeFunctionKernels();
361365

362366
/// Get the number of fields or captures within the parsed type.
363367
ExprResult ActOnSYCLBuiltinNumFieldsExpr(ParsedType PT);

clang/lib/Sema/Sema.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1250,6 +1250,7 @@ void Sema::ActOnEndOfTranslationUnitFragment(TUFragmentKind Kind) {
12501250
}
12511251

12521252
if (getLangOpts().SYCLIsDevice) {
1253+
SYCL().finalizeFreeFunctionKernels();
12531254
// Set the names of the kernels, now that the names have settled down. This
12541255
// needs to happen before we generate the integration headers.
12551256
SYCL().SetSYCLKernelNames();

clang/lib/Sema/SemaDecl.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11170,6 +11170,12 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
1117011170
if (getLangOpts().OpenACC)
1117111171
OpenACC().ActOnFunctionDeclarator(NewFD);
1117211172

11173+
// Handle free functions.
11174+
if (LangOpts.SYCLIsDevice && !NewFD->isDependentContext() &&
11175+
!D.isRedeclaration() &&
11176+
D.getFunctionDefinitionKind() == FunctionDefinitionKind::Declaration)
11177+
SYCL().processFreeFunctionDeclaration(NewFD);
11178+
1117311179
return NewFD;
1117411180
}
1117511181

clang/lib/Sema/SemaSYCL.cpp

Lines changed: 41 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1649,7 +1649,7 @@ class KernelObjVisitor {
16491649
// A visitor function that dispatches to functions as defined in
16501650
// SyclKernelFieldHandler by iterating over a free function parameter list.
16511651
template <typename... HandlerTys>
1652-
void VisitFunctionParameters(FunctionDecl *FreeFunc,
1652+
void VisitFunctionParameters(const FunctionDecl *FreeFunc,
16531653
HandlerTys &...Handlers) {
16541654
for (ParmVarDecl *Param : FreeFunc->parameters())
16551655
visitParam(Param, Param->getType(), Handlers...);
@@ -4822,7 +4822,7 @@ class SyclKernelIntHeaderCreator : public SyclKernelFieldHandler {
48224822
}
48234823

48244824
SyclKernelIntHeaderCreator(SemaSYCL &S, SYCLIntegrationHeader &H,
4825-
QualType NameType, FunctionDecl *FreeFunc)
4825+
QualType NameType, const FunctionDecl *FreeFunc)
48264826
: SyclKernelFieldHandler(S), Header(H) {
48274827
Header.startKernel(FreeFunc, NameType, FreeFunc->getLocation(),
48284828
false /*IsESIMD*/, true /*IsSYCLUnnamedKernel*/,
@@ -5849,7 +5849,7 @@ void SemaSYCL::MarkDevices() {
58495849
}
58505850
}
58515851

5852-
static bool CheckFreeFunctionDiagnostics(Sema &S, FunctionDecl *FD) {
5852+
static bool CheckFreeFunctionDiagnostics(Sema &S, const FunctionDecl *FD) {
58535853
if (FD->isVariadic()) {
58545854
return S.Diag(FD->getLocation(), diag::err_free_function_variadic_args);
58555855
}
@@ -5875,10 +5875,47 @@ static bool CheckFreeFunctionDiagnostics(Sema &S, FunctionDecl *FD) {
58755875
return false;
58765876
}
58775877

5878+
void SemaSYCL::finalizeFreeFunctionKernels() {
5879+
// This is called at the end of the translation unit. The kernels that appear
5880+
// in this list are kernels that have been declared but not defined. Their
5881+
// construction consists only of generating the integration header and setting
5882+
// their names manually. The other steps in constructing the kernel cannot be
5883+
// done because potentially nothing is known about the arguments of the kernel
5884+
// except that they exist.
5885+
for (const FunctionDecl *kernel : FreeFunctionDeclarations) {
5886+
if (CheckFreeFunctionDiagnostics(SemaRef, kernel))
5887+
continue; // Continue in order to diagnose errors in all kernels
5888+
5889+
SyclKernelIntHeaderCreator IntHeader(*this, getSyclIntegrationHeader(),
5890+
kernel->getType(), kernel);
5891+
KernelObjVisitor Visitor{*this};
5892+
Visitor.VisitFunctionParameters(kernel, IntHeader);
5893+
std::unique_ptr<MangleContext> MangleCtx(
5894+
getASTContext().createMangleContext());
5895+
std::string Name, MangledName;
5896+
std::tie(Name, MangledName) =
5897+
constructFreeFunctionKernelName(*this, kernel, *MangleCtx);
5898+
getSyclIntegrationHeader().updateKernelNames(kernel, Name, MangledName);
5899+
}
5900+
}
5901+
5902+
void SemaSYCL::processFreeFunctionDeclaration(const FunctionDecl *FD) {
5903+
// FD represents a forward declaration of a free function kernel.
5904+
// Save them for the end of the translation unit action. This makes it easier
5905+
// to handle the case where a definition is defined later.
5906+
if (isFreeFunction(FD))
5907+
FreeFunctionDeclarations.insert(FD->getCanonicalDecl());
5908+
}
5909+
58785910
void SemaSYCL::ProcessFreeFunction(FunctionDecl *FD) {
58795911
if (isFreeFunction(FD)) {
58805912
if (CheckFreeFunctionDiagnostics(SemaRef, FD))
58815913
return;
5914+
5915+
// In case the free function kernel has already been seen by way of a
5916+
// forward declaration, flush it out because a definition takes priority.
5917+
FreeFunctionDeclarations.erase(FD->getCanonicalDecl());
5918+
58825919
SyclKernelDecompMarker DecompMarker(*this);
58835920
SyclKernelFieldChecker FieldChecker(*this);
58845921
SyclKernelUnionChecker UnionChecker(*this);
@@ -7155,7 +7192,7 @@ void SYCLIntegrationHeader::emit(raw_ostream &O) {
71557192
O << "\n// Definition of kernel_id of " << K.Name << "\n";
71567193
O << "namespace sycl {\n";
71577194
O << "template <>\n";
7158-
O << "kernel_id ext::oneapi::experimental::get_kernel_id<__sycl_shim"
7195+
O << "inline kernel_id ext::oneapi::experimental::get_kernel_id<__sycl_shim"
71597196
<< ShimCounter << "()>() {\n";
71607197
O << " return sycl::detail::get_kernel_id_impl(std::string_view{\""
71617198
<< K.Name << "\"});\n";

clang/test/CodeGenSYCL/free_function_int_header.cpp

Lines changed: 70 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,20 @@ void ff_21(AliasType start, AliasType *ptr) {
264264
void ff_22(AliasType start, AliasType *ptr) {
265265
}
266266

267+
// Forward declaration of ff_23.
268+
[[__sycl_detail__::add_ir_attributes_function("sycl-single-task-kernel", 0)]]
269+
void ff_23(int arg);
270+
271+
// Forward declaration of ff_24 followed by a definition just after.
272+
// Note that ff_24 appears earlier than ff_23 in the integration header because kernels which
273+
// only declared and not defined such as ff_23 are handled at the end of the translation unit to wait for a definition if it appears.
274+
[[__sycl_detail__::add_ir_attributes_function("sycl-single-task-kernel", 0)]]
275+
void ff_24(int arg);
276+
277+
[[__sycl_detail__::add_ir_attributes_function("sycl-single-task-kernel", 0)]]
278+
void ff_24(int arg) {
279+
}
280+
267281
// CHECK: const char* const kernel_names[] = {
268282
// CHECK-NEXT: {{.*}}__sycl_kernel_ff_2Piii
269283
// CHECK-NEXT: {{.*}}__sycl_kernel_ff_2Piiii
@@ -298,6 +312,8 @@ void ff_22(AliasType start, AliasType *ptr) {
298312
// CHECK-NEXT: {{.*}}__sycl_kernel_ff_20N4sycl3_V18accessorIiLi1ELNS0_6access4modeE1026ELNS2_6targetE2014ELNS2_11placeholderE0ENS0_3ext6oneapi22accessor_property_listIJEEEEE
299313
// CHECK-NEXT: {{.*}}__sycl_kernel_ff_217DerivedPS_
300314
// CHECK-NEXT: {{.*}}__sycl_kernel_ff_227DerivedPS_
315+
// CHECK-NEXT: {{.*}}__sycl_kernel_ff_24i"
316+
// CHECK-NEXT: {{.*}}__sycl_kernel_ff_23i"
301317

302318
// CHECK-NEXT: ""
303319
// CHECK-NEXT: };
@@ -417,6 +433,12 @@ void ff_22(AliasType start, AliasType *ptr) {
417433
// CHECK: //--- _Z19__sycl_kernel_ff_20N4sycl3_V18accessorIiLi1ELNS0_6access4modeE1026ELNS2_6targetE2014ELNS2_11placeholderE0ENS0_3ext6oneapi22accessor_property_listIJEEEEE
418434
// CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 4062, 0 },
419435

436+
// CHECK: //--- _Z19__sycl_kernel_ff_24i
437+
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 0 },
438+
439+
// CHECK: //--- _Z19__sycl_kernel_ff_23i
440+
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 0 },
441+
420442
// CHECK: { kernel_param_kind_t::kind_invalid, -987654321, -987654321 },
421443
// CHECK-NEXT: };
422444

@@ -991,8 +1013,6 @@ void ff_22(AliasType start, AliasType *ptr) {
9911013
// CHECK-NEXT: struct ext::oneapi::experimental::is_single_task_kernel<__sycl_shim29()> {
9921014
// CHECK-NEXT: static constexpr bool value = true;
9931015
// CHECK-NEXT: };
994-
// CHECK-NEXT: }
995-
9961016

9971017
// CHECK: void ff_21(Derived start, Derived * ptr);
9981018
// CHECK-NEXT: static constexpr auto __sycl_shim30() {
@@ -1022,6 +1042,39 @@ void ff_22(AliasType start, AliasType *ptr) {
10221042
// CHECK-NEXT: struct ext::oneapi::experimental::is_single_task_kernel<__sycl_shim31()> {
10231043
// CHECK-NEXT: static constexpr bool value = true;
10241044
// CHECK-NEXT: };
1045+
1046+
// CHECK: Definition of _Z19__sycl_kernel_ff_24i as a free function kernel
1047+
// CHECK: Forward declarations of kernel and its argument types:
1048+
// CHECK: void ff_24(int arg);
1049+
// CHECK-NEXT: static constexpr auto __sycl_shim32() {
1050+
// CHECK-NEXT: return (void (*)(int))ff_24;
1051+
// CHECK-NEXT: }
1052+
// CHECK-NEXT: namespace sycl {
1053+
// CHECK-NEXT: template <>
1054+
// CHECK-NEXT: struct ext::oneapi::experimental::is_kernel<__sycl_shim32()> {
1055+
// CHECK-NEXT: static constexpr bool value = true;
1056+
// CHECK-NEXT: };
1057+
// CHECK-NEXT: template <>
1058+
// CHECK-NEXT: struct ext::oneapi::experimental::is_single_task_kernel<__sycl_shim32()> {
1059+
// CHECK-NEXT: static constexpr bool value = true;
1060+
// CHECK-NEXT: };
1061+
1062+
// CHECK: Definition of _Z19__sycl_kernel_ff_23i as a free function kernel
1063+
// CHECK: Forward declarations of kernel and its argument types:
1064+
// CHECK: void ff_23(int arg);
1065+
// CHECK-NEXT: static constexpr auto __sycl_shim33() {
1066+
// CHECK-NEXT: return (void (*)(int))ff_23;
1067+
// CHECK-NEXT: }
1068+
// CHECK-NEXT: namespace sycl {
1069+
// CHECK-NEXT: template <>
1070+
// CHECK-NEXT: struct ext::oneapi::experimental::is_kernel<__sycl_shim33()> {
1071+
// CHECK-NEXT: static constexpr bool value = true;
1072+
// CHECK-NEXT: };
1073+
// CHECK-NEXT: template <>
1074+
// CHECK-NEXT: struct ext::oneapi::experimental::is_single_task_kernel<__sycl_shim33()> {
1075+
// CHECK-NEXT: static constexpr bool value = true;
1076+
// CHECK-NEXT: };
1077+
10251078
// CHECK-NEXT: }
10261079

10271080
// CHECK: #include <sycl/kernel_bundle.hpp>
@@ -1252,5 +1305,20 @@ void ff_22(AliasType start, AliasType *ptr) {
12521305
// CHECK-NEXT: template <>
12531306
// CHECK-NEXT: kernel_id ext::oneapi::experimental::get_kernel_id<__sycl_shim31()>() {
12541307
// CHECK-NEXT: return sycl::detail::get_kernel_id_impl(std::string_view{"_Z19__sycl_kernel_ff_227DerivedPS_"});
1308+
1309+
// CHECK: // Definition of kernel_id of _Z19__sycl_kernel_ff_24i
1310+
// CHECK-NEXT: namespace sycl {
1311+
// CHECK-NEXT: template <>
1312+
// CHECK-NEXT: inline kernel_id ext::oneapi::experimental::get_kernel_id<__sycl_shim32()>() {
1313+
// CHECK-NEXT return sycl::detail::get_kernel_id_impl(std::string_view{"_Z19__sycl_kernel_ff_24i"});
1314+
// CHECK-NEXT: }
1315+
// CHECK-NEXT: }
1316+
1317+
// CHECK: // Definition of kernel_id of _Z19__sycl_kernel_ff_23i
1318+
// CHECK-NEXT: namespace sycl {
1319+
// CHECK-NEXT: template <>
1320+
// CHECK-NEXT: inline kernel_id ext::oneapi::experimental::get_kernel_id<__sycl_shim33()>() {
1321+
// CHECK-NEXT: return sycl::detail::get_kernel_id_impl(std::string_view{"_Z19__sycl_kernel_ff_23i"});
1322+
12551323
// CHECK-NEXT: }
12561324
// CHECK-NEXT: }
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// This is not meant as a standalone test but rather as a source file that will
2+
// link with SeparateCompilation.cpp to check that separate compilation works
3+
// with free function kernels. Hence the .cc suffix to exclude it from the list
4+
// of picked up tests.
5+
6+
#include "ProductKernel.hpp"
7+
#include <sycl/ext/oneapi/free_function_queries.hpp>
8+
9+
using namespace sycl;
10+
11+
// Add declarations again to test the compiler with multiple
12+
// declarations of the same free function kernel in the same translation unit
13+
14+
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(
15+
(ext::oneapi::experimental::nd_range_kernel<1>))
16+
void product(accessor<int, 1> accA, accessor<int, 1> accB,
17+
accessor<int, 1> result);
18+
19+
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(
20+
(ext::oneapi::experimental::nd_range_kernel<1>))
21+
void product(accessor<int, 1> accA, accessor<int, 1> accB,
22+
accessor<int, 1> result) {
23+
size_t id =
24+
ext::oneapi::this_work_item::get_nd_item<1>().get_global_linear_id();
25+
result[id] = accA[id] * accB[id];
26+
}
27+
28+
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(
29+
(ext::oneapi::experimental::nd_range_kernel<1>))
30+
void product(accessor<int, 1> accA, accessor<int, 1> accB,
31+
accessor<int, 1> result);
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#pragma once
2+
#include <sycl/accessor.hpp>
3+
#include <sycl/kernel_bundle.hpp>
4+
5+
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(
6+
(sycl::ext::oneapi::experimental::nd_range_kernel<1>))
7+
void product(sycl::accessor<int, 1> accA, sycl::accessor<int, 1> accB,
8+
sycl::accessor<int, 1> result);
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
// RUN: %{build} %S/SumKernel.cc %S/ProductKernel.cc -o %t.out
2+
// RUN: %{run} %t.out
3+
4+
#include "ProductKernel.hpp"
5+
#include "SumKernel.hpp"
6+
#include <cassert>
7+
#include <numeric>
8+
#include <sycl/detail/core.hpp>
9+
#include <sycl/ext/oneapi/experimental/free_function_traits.hpp>
10+
#include <sycl/kernel_bundle.hpp>
11+
12+
using namespace sycl;
13+
14+
// Add declarations again to test the compiler with multiple declarations of the
15+
// same free function kernel in the translation unit.
16+
17+
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(
18+
(ext::oneapi::experimental::nd_range_kernel<1>))
19+
void SumKernel::sum(accessor<int, 1> accA, accessor<int, 1> accB,
20+
accessor<int, 1> result);
21+
22+
constexpr size_t SIZE = 16;
23+
24+
int main() {
25+
int data[SIZE];
26+
int result[SIZE];
27+
std::iota(data, data + SIZE, 0);
28+
queue Q;
29+
kernel_bundle bundle =
30+
get_kernel_bundle<bundle_state::executable>(Q.get_context());
31+
kernel_id sumId = ext::oneapi::experimental::get_kernel_id<SumKernel::sum>();
32+
kernel_id productId = ext::oneapi::experimental::get_kernel_id<product>();
33+
kernel sumKernel = bundle.get_kernel(sumId);
34+
kernel productKernel = bundle.get_kernel(productId);
35+
36+
{
37+
buffer<int, 1> databuf{data, SIZE};
38+
buffer<int, 1> resultbuf{result, SIZE};
39+
40+
Q.submit([&](handler &h) {
41+
accessor<int, 1> accdata(databuf, h);
42+
accessor<int, 1> accresult(resultbuf, h);
43+
h.set_args(accdata, accdata, accresult);
44+
h.parallel_for(nd_range{{SIZE}, {SIZE}}, sumKernel);
45+
});
46+
}
47+
48+
int failed = 0;
49+
for (int i = 0; i < SIZE; ++i) {
50+
if (result[i] != 2 * data[i]) {
51+
std::cout << "Failed at index " << i << ": " << result[i]
52+
<< "!=" << (2 * data[i]) << std::endl;
53+
++failed;
54+
}
55+
}
56+
57+
{
58+
buffer<int, 1> databuf{data, SIZE};
59+
buffer<int, 1> resultbuf{result, SIZE};
60+
61+
Q.submit([&](handler &h) {
62+
accessor<int, 1> accdata(databuf, h);
63+
accessor<int, 1> accresult(resultbuf, h);
64+
h.set_args(accdata, accdata, accresult);
65+
h.parallel_for(nd_range{{SIZE}, {SIZE}}, productKernel);
66+
});
67+
}
68+
69+
for (int i = 0; i < SIZE; ++i) {
70+
if (result[i] != data[i] * data[i]) {
71+
std::cout << "Failed at index " << i << ": " << result[i]
72+
<< "!=" << (data[i] * data[i]) << std::endl;
73+
++failed;
74+
}
75+
}
76+
77+
return failed;
78+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// This is not meant as a standalone test but rather as a source file that will
2+
// link with SeparateCompilation.cpp to check that separate compilation works
3+
// with free function kernels. Hence the .cc suffix to exclude it from the list
4+
// of picked up tests.
5+
6+
#include "SumKernel.hpp"
7+
#include <sycl/ext/oneapi/free_function_queries.hpp>
8+
9+
using namespace sycl;
10+
11+
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(
12+
(ext::oneapi::experimental::nd_range_kernel<1>))
13+
void SumKernel::sum(accessor<int, 1> accA, accessor<int, 1> accB,
14+
accessor<int, 1> result) {
15+
size_t id =
16+
ext::oneapi::this_work_item::get_nd_item<1>().get_global_linear_id();
17+
result[id] = accA[id] + accB[id];
18+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#pragma once
2+
#include <sycl/accessor.hpp>
3+
#include <sycl/kernel_bundle.hpp>
4+
5+
namespace SumKernel {
6+
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(
7+
(sycl::ext::oneapi::experimental::nd_range_kernel<1>))
8+
void sum(sycl::accessor<int, 1> accA, sycl::accessor<int, 1> accB,
9+
sycl::accessor<int, 1> result);
10+
} // namespace SumKernel

0 commit comments

Comments
 (0)