diff --git a/clang/include/clang/Sema/SemaSYCL.h b/clang/include/clang/Sema/SemaSYCL.h index 8aac24b8d0079..967e0d1bc2e88 100644 --- a/clang/include/clang/Sema/SemaSYCL.h +++ b/clang/include/clang/Sema/SemaSYCL.h @@ -265,6 +265,8 @@ class SemaSYCL : public SemaBase { llvm::DenseSet SYCLKernelFunctions; + llvm::DenseSet FreeFunctionDeclarations; + public: SemaSYCL(Sema &S); @@ -357,7 +359,9 @@ class SemaSYCL : public SemaBase { void ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, MangleContext &MC); void SetSYCLKernelNames(); void MarkDevices(); + void processFreeFunctionDeclaration(const FunctionDecl *FD); void ProcessFreeFunction(FunctionDecl *FD); + void finalizeFreeFunctionKernels(); /// Get the number of fields or captures within the parsed type. ExprResult ActOnSYCLBuiltinNumFieldsExpr(ParsedType PT); diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index 5575e193b3bc3..4c65a69ff3fa7 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -1250,6 +1250,7 @@ void Sema::ActOnEndOfTranslationUnitFragment(TUFragmentKind Kind) { } if (getLangOpts().SYCLIsDevice) { + SYCL().finalizeFreeFunctionKernels(); // Set the names of the kernels, now that the names have settled down. This // needs to happen before we generate the integration headers. SYCL().SetSYCLKernelNames(); diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 49eef7e6f05e2..4bdab4b65d375 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -11170,6 +11170,12 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, if (getLangOpts().OpenACC) OpenACC().ActOnFunctionDeclarator(NewFD); + // Handle free functions. + if (LangOpts.SYCLIsDevice && !NewFD->isDependentContext() && + !D.isRedeclaration() && + D.getFunctionDefinitionKind() == FunctionDefinitionKind::Declaration) + SYCL().processFreeFunctionDeclaration(NewFD); + return NewFD; } diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index ece83dc4fc9f0..87087391bc7ad 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -1649,7 +1649,7 @@ class KernelObjVisitor { // A visitor function that dispatches to functions as defined in // SyclKernelFieldHandler by iterating over a free function parameter list. template - void VisitFunctionParameters(FunctionDecl *FreeFunc, + void VisitFunctionParameters(const FunctionDecl *FreeFunc, HandlerTys &...Handlers) { for (ParmVarDecl *Param : FreeFunc->parameters()) visitParam(Param, Param->getType(), Handlers...); @@ -4822,7 +4822,7 @@ class SyclKernelIntHeaderCreator : public SyclKernelFieldHandler { } SyclKernelIntHeaderCreator(SemaSYCL &S, SYCLIntegrationHeader &H, - QualType NameType, FunctionDecl *FreeFunc) + QualType NameType, const FunctionDecl *FreeFunc) : SyclKernelFieldHandler(S), Header(H) { Header.startKernel(FreeFunc, NameType, FreeFunc->getLocation(), false /*IsESIMD*/, true /*IsSYCLUnnamedKernel*/, @@ -5849,7 +5849,7 @@ void SemaSYCL::MarkDevices() { } } -static bool CheckFreeFunctionDiagnostics(Sema &S, FunctionDecl *FD) { +static bool CheckFreeFunctionDiagnostics(Sema &S, const FunctionDecl *FD) { if (FD->isVariadic()) { return S.Diag(FD->getLocation(), diag::err_free_function_variadic_args); } @@ -5875,10 +5875,47 @@ static bool CheckFreeFunctionDiagnostics(Sema &S, FunctionDecl *FD) { return false; } +void SemaSYCL::finalizeFreeFunctionKernels() { + // This is called at the end of the translation unit. The kernels that appear + // in this list are kernels that have been declared but not defined. Their + // construction consists only of generating the integration header and setting + // their names manually. The other steps in constructing the kernel cannot be + // done because potentially nothing is known about the arguments of the kernel + // except that they exist. + for (const FunctionDecl *kernel : FreeFunctionDeclarations) { + if (CheckFreeFunctionDiagnostics(SemaRef, kernel)) + continue; // Continue in order to diagnose errors in all kernels + + SyclKernelIntHeaderCreator IntHeader(*this, getSyclIntegrationHeader(), + kernel->getType(), kernel); + KernelObjVisitor Visitor{*this}; + Visitor.VisitFunctionParameters(kernel, IntHeader); + std::unique_ptr MangleCtx( + getASTContext().createMangleContext()); + std::string Name, MangledName; + std::tie(Name, MangledName) = + constructFreeFunctionKernelName(*this, kernel, *MangleCtx); + getSyclIntegrationHeader().updateKernelNames(kernel, Name, MangledName); + } +} + +void SemaSYCL::processFreeFunctionDeclaration(const FunctionDecl *FD) { + // FD represents a forward declaration of a free function kernel. + // Save them for the end of the translation unit action. This makes it easier + // to handle the case where a definition is defined later. + if (isFreeFunction(FD)) + FreeFunctionDeclarations.insert(FD->getCanonicalDecl()); +} + void SemaSYCL::ProcessFreeFunction(FunctionDecl *FD) { if (isFreeFunction(FD)) { if (CheckFreeFunctionDiagnostics(SemaRef, FD)) return; + + // In case the free function kernel has already been seen by way of a + // forward declaration, flush it out because a definition takes priority. + FreeFunctionDeclarations.erase(FD->getCanonicalDecl()); + SyclKernelDecompMarker DecompMarker(*this); SyclKernelFieldChecker FieldChecker(*this); SyclKernelUnionChecker UnionChecker(*this); @@ -7155,7 +7192,7 @@ void SYCLIntegrationHeader::emit(raw_ostream &O) { O << "\n// Definition of kernel_id of " << K.Name << "\n"; O << "namespace sycl {\n"; O << "template <>\n"; - O << "kernel_id ext::oneapi::experimental::get_kernel_id<__sycl_shim" + O << "inline kernel_id ext::oneapi::experimental::get_kernel_id<__sycl_shim" << ShimCounter << "()>() {\n"; O << " return sycl::detail::get_kernel_id_impl(std::string_view{\"" << K.Name << "\"});\n"; diff --git a/clang/test/CodeGenSYCL/free_function_int_header.cpp b/clang/test/CodeGenSYCL/free_function_int_header.cpp index cc26413b927cc..48a03c6c65916 100644 --- a/clang/test/CodeGenSYCL/free_function_int_header.cpp +++ b/clang/test/CodeGenSYCL/free_function_int_header.cpp @@ -264,6 +264,20 @@ void ff_21(AliasType start, AliasType *ptr) { void ff_22(AliasType start, AliasType *ptr) { } +// Forward declaration of ff_23. +[[__sycl_detail__::add_ir_attributes_function("sycl-single-task-kernel", 0)]] +void ff_23(int arg); + +// Forward declaration of ff_24 followed by a definition just after. +// Note that ff_24 appears earlier than ff_23 in the integration header because kernels which +// only declared and not defined such as ff_23 are handled at the end of the translation unit to wait for a definition if it appears. +[[__sycl_detail__::add_ir_attributes_function("sycl-single-task-kernel", 0)]] +void ff_24(int arg); + +[[__sycl_detail__::add_ir_attributes_function("sycl-single-task-kernel", 0)]] +void ff_24(int arg) { +} + // CHECK: const char* const kernel_names[] = { // CHECK-NEXT: {{.*}}__sycl_kernel_ff_2Piii // CHECK-NEXT: {{.*}}__sycl_kernel_ff_2Piiii @@ -298,6 +312,8 @@ void ff_22(AliasType start, AliasType *ptr) { // CHECK-NEXT: {{.*}}__sycl_kernel_ff_20N4sycl3_V18accessorIiLi1ELNS0_6access4modeE1026ELNS2_6targetE2014ELNS2_11placeholderE0ENS0_3ext6oneapi22accessor_property_listIJEEEEE // CHECK-NEXT: {{.*}}__sycl_kernel_ff_217DerivedPS_ // CHECK-NEXT: {{.*}}__sycl_kernel_ff_227DerivedPS_ +// CHECK-NEXT: {{.*}}__sycl_kernel_ff_24i" +// CHECK-NEXT: {{.*}}__sycl_kernel_ff_23i" // CHECK-NEXT: "" // CHECK-NEXT: }; @@ -417,6 +433,12 @@ void ff_22(AliasType start, AliasType *ptr) { // CHECK: //--- _Z19__sycl_kernel_ff_20N4sycl3_V18accessorIiLi1ELNS0_6access4modeE1026ELNS2_6targetE2014ELNS2_11placeholderE0ENS0_3ext6oneapi22accessor_property_listIJEEEEE // CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 4062, 0 }, +// CHECK: //--- _Z19__sycl_kernel_ff_24i +// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 0 }, + +// CHECK: //--- _Z19__sycl_kernel_ff_23i +// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 0 }, + // CHECK: { kernel_param_kind_t::kind_invalid, -987654321, -987654321 }, // CHECK-NEXT: }; @@ -991,8 +1013,6 @@ void ff_22(AliasType start, AliasType *ptr) { // CHECK-NEXT: struct ext::oneapi::experimental::is_single_task_kernel<__sycl_shim29()> { // CHECK-NEXT: static constexpr bool value = true; // CHECK-NEXT: }; -// CHECK-NEXT: } - // CHECK: void ff_21(Derived start, Derived * ptr); // CHECK-NEXT: static constexpr auto __sycl_shim30() { @@ -1022,6 +1042,39 @@ void ff_22(AliasType start, AliasType *ptr) { // CHECK-NEXT: struct ext::oneapi::experimental::is_single_task_kernel<__sycl_shim31()> { // CHECK-NEXT: static constexpr bool value = true; // CHECK-NEXT: }; + +// CHECK: Definition of _Z19__sycl_kernel_ff_24i as a free function kernel +// CHECK: Forward declarations of kernel and its argument types: +// CHECK: void ff_24(int arg); +// CHECK-NEXT: static constexpr auto __sycl_shim32() { +// CHECK-NEXT: return (void (*)(int))ff_24; +// CHECK-NEXT: } +// CHECK-NEXT: namespace sycl { +// CHECK-NEXT: template <> +// CHECK-NEXT: struct ext::oneapi::experimental::is_kernel<__sycl_shim32()> { +// CHECK-NEXT: static constexpr bool value = true; +// CHECK-NEXT: }; +// CHECK-NEXT: template <> +// CHECK-NEXT: struct ext::oneapi::experimental::is_single_task_kernel<__sycl_shim32()> { +// CHECK-NEXT: static constexpr bool value = true; +// CHECK-NEXT: }; + +// CHECK: Definition of _Z19__sycl_kernel_ff_23i as a free function kernel +// CHECK: Forward declarations of kernel and its argument types: +// CHECK: void ff_23(int arg); +// CHECK-NEXT: static constexpr auto __sycl_shim33() { +// CHECK-NEXT: return (void (*)(int))ff_23; +// CHECK-NEXT: } +// CHECK-NEXT: namespace sycl { +// CHECK-NEXT: template <> +// CHECK-NEXT: struct ext::oneapi::experimental::is_kernel<__sycl_shim33()> { +// CHECK-NEXT: static constexpr bool value = true; +// CHECK-NEXT: }; +// CHECK-NEXT: template <> +// CHECK-NEXT: struct ext::oneapi::experimental::is_single_task_kernel<__sycl_shim33()> { +// CHECK-NEXT: static constexpr bool value = true; +// CHECK-NEXT: }; + // CHECK-NEXT: } // CHECK: #include @@ -1252,5 +1305,20 @@ void ff_22(AliasType start, AliasType *ptr) { // CHECK-NEXT: template <> // CHECK-NEXT: kernel_id ext::oneapi::experimental::get_kernel_id<__sycl_shim31()>() { // CHECK-NEXT: return sycl::detail::get_kernel_id_impl(std::string_view{"_Z19__sycl_kernel_ff_227DerivedPS_"}); + +// CHECK: // Definition of kernel_id of _Z19__sycl_kernel_ff_24i +// CHECK-NEXT: namespace sycl { +// CHECK-NEXT: template <> +// CHECK-NEXT: inline kernel_id ext::oneapi::experimental::get_kernel_id<__sycl_shim32()>() { +// CHECK-NEXT return sycl::detail::get_kernel_id_impl(std::string_view{"_Z19__sycl_kernel_ff_24i"}); +// CHECK-NEXT: } +// CHECK-NEXT: } + +// CHECK: // Definition of kernel_id of _Z19__sycl_kernel_ff_23i +// CHECK-NEXT: namespace sycl { +// CHECK-NEXT: template <> +// CHECK-NEXT: inline kernel_id ext::oneapi::experimental::get_kernel_id<__sycl_shim33()>() { +// CHECK-NEXT: return sycl::detail::get_kernel_id_impl(std::string_view{"_Z19__sycl_kernel_ff_23i"}); + // CHECK-NEXT: } // CHECK-NEXT: } diff --git a/sycl/test-e2e/FreeFunctionKernels/ProductKernel.cc b/sycl/test-e2e/FreeFunctionKernels/ProductKernel.cc new file mode 100644 index 0000000000000..d909ce1f62d5b --- /dev/null +++ b/sycl/test-e2e/FreeFunctionKernels/ProductKernel.cc @@ -0,0 +1,31 @@ +// This is not meant as a standalone test but rather as a source file that will +// link with SeparateCompilation.cpp to check that separate compilation works +// with free function kernels. Hence the .cc suffix to exclude it from the list +// of picked up tests. + +#include "ProductKernel.hpp" +#include + +using namespace sycl; + +// Add declarations again to test the compiler with multiple +// declarations of the same free function kernel in the same translation unit + +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (ext::oneapi::experimental::nd_range_kernel<1>)) +void product(accessor accA, accessor accB, + accessor result); + +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (ext::oneapi::experimental::nd_range_kernel<1>)) +void product(accessor accA, accessor accB, + accessor result) { + size_t id = + ext::oneapi::this_work_item::get_nd_item<1>().get_global_linear_id(); + result[id] = accA[id] * accB[id]; +} + +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (ext::oneapi::experimental::nd_range_kernel<1>)) +void product(accessor accA, accessor accB, + accessor result); diff --git a/sycl/test-e2e/FreeFunctionKernels/ProductKernel.hpp b/sycl/test-e2e/FreeFunctionKernels/ProductKernel.hpp new file mode 100644 index 0000000000000..2ac12483db85c --- /dev/null +++ b/sycl/test-e2e/FreeFunctionKernels/ProductKernel.hpp @@ -0,0 +1,8 @@ +#pragma once +#include +#include + +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (sycl::ext::oneapi::experimental::nd_range_kernel<1>)) +void product(sycl::accessor accA, sycl::accessor accB, + sycl::accessor result); diff --git a/sycl/test-e2e/FreeFunctionKernels/SeparateCompilation.cpp b/sycl/test-e2e/FreeFunctionKernels/SeparateCompilation.cpp new file mode 100644 index 0000000000000..43ccbb6ed329a --- /dev/null +++ b/sycl/test-e2e/FreeFunctionKernels/SeparateCompilation.cpp @@ -0,0 +1,78 @@ +// RUN: %{build} %S/SumKernel.cc %S/ProductKernel.cc -o %t.out +// RUN: %{run} %t.out + +#include "ProductKernel.hpp" +#include "SumKernel.hpp" +#include +#include +#include +#include +#include + +using namespace sycl; + +// Add declarations again to test the compiler with multiple declarations of the +// same free function kernel in the translation unit. + +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (ext::oneapi::experimental::nd_range_kernel<1>)) +void SumKernel::sum(accessor accA, accessor accB, + accessor result); + +constexpr size_t SIZE = 16; + +int main() { + int data[SIZE]; + int result[SIZE]; + std::iota(data, data + SIZE, 0); + queue Q; + kernel_bundle bundle = + get_kernel_bundle(Q.get_context()); + kernel_id sumId = ext::oneapi::experimental::get_kernel_id(); + kernel_id productId = ext::oneapi::experimental::get_kernel_id(); + kernel sumKernel = bundle.get_kernel(sumId); + kernel productKernel = bundle.get_kernel(productId); + + { + buffer databuf{data, SIZE}; + buffer resultbuf{result, SIZE}; + + Q.submit([&](handler &h) { + accessor accdata(databuf, h); + accessor accresult(resultbuf, h); + h.set_args(accdata, accdata, accresult); + h.parallel_for(nd_range{{SIZE}, {SIZE}}, sumKernel); + }); + } + + int failed = 0; + for (int i = 0; i < SIZE; ++i) { + if (result[i] != 2 * data[i]) { + std::cout << "Failed at index " << i << ": " << result[i] + << "!=" << (2 * data[i]) << std::endl; + ++failed; + } + } + + { + buffer databuf{data, SIZE}; + buffer resultbuf{result, SIZE}; + + Q.submit([&](handler &h) { + accessor accdata(databuf, h); + accessor accresult(resultbuf, h); + h.set_args(accdata, accdata, accresult); + h.parallel_for(nd_range{{SIZE}, {SIZE}}, productKernel); + }); + } + + for (int i = 0; i < SIZE; ++i) { + if (result[i] != data[i] * data[i]) { + std::cout << "Failed at index " << i << ": " << result[i] + << "!=" << (data[i] * data[i]) << std::endl; + ++failed; + } + } + + return failed; +} diff --git a/sycl/test-e2e/FreeFunctionKernels/SumKernel.cc b/sycl/test-e2e/FreeFunctionKernels/SumKernel.cc new file mode 100644 index 0000000000000..df9b453d8fa16 --- /dev/null +++ b/sycl/test-e2e/FreeFunctionKernels/SumKernel.cc @@ -0,0 +1,18 @@ +// This is not meant as a standalone test but rather as a source file that will +// link with SeparateCompilation.cpp to check that separate compilation works +// with free function kernels. Hence the .cc suffix to exclude it from the list +// of picked up tests. + +#include "SumKernel.hpp" +#include + +using namespace sycl; + +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (ext::oneapi::experimental::nd_range_kernel<1>)) +void SumKernel::sum(accessor accA, accessor accB, + accessor result) { + size_t id = + ext::oneapi::this_work_item::get_nd_item<1>().get_global_linear_id(); + result[id] = accA[id] + accB[id]; +} diff --git a/sycl/test-e2e/FreeFunctionKernels/SumKernel.hpp b/sycl/test-e2e/FreeFunctionKernels/SumKernel.hpp new file mode 100644 index 0000000000000..7fc24c1cd706b --- /dev/null +++ b/sycl/test-e2e/FreeFunctionKernels/SumKernel.hpp @@ -0,0 +1,10 @@ +#pragma once +#include +#include + +namespace SumKernel { +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (sycl::ext::oneapi::experimental::nd_range_kernel<1>)) +void sum(sycl::accessor accA, sycl::accessor accB, + sycl::accessor result); +} // namespace SumKernel