Skip to content

Commit 16e39df

Browse files
authored
[ESIMD] Infer address space of pointer that are passed through invoke_simd to ESIMD API to generate better code on BE (#14528)
1 parent 34b8e40 commit 16e39df

File tree

3 files changed

+161
-0
lines changed

3 files changed

+161
-0
lines changed

llvm/lib/SYCLLowerIR/LowerInvokeSimd.cpp

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "llvm/ADT/SmallPtrSet.h"
2828
#include "llvm/GenXIntrinsics/GenXMetadata.h"
2929
#include "llvm/IR/Constants.h"
30+
#include "llvm/IR/InstIterator.h"
3031
#include "llvm/IR/Instructions.h"
3132
#include "llvm/IR/Module.h"
3233
#include "llvm/IR/Verifier.h"
@@ -259,6 +260,45 @@ void markFunctionAsESIMD(Function *F) {
259260
}
260261
}
261262

263+
void adjustAddressSpace(Function *F, uint32_t ArgNo, uint32_t ArgAddrSpace) {
264+
Argument *Arg = F->getArg(ArgNo);
265+
for (User *ArgUse : Arg->users()) {
266+
Instruction *Instr = dyn_cast<Instruction>(ArgUse);
267+
if (!Instr)
268+
continue;
269+
const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(ArgUse);
270+
if (ASC) {
271+
if (ASC->getDestAddressSpace() == ArgAddrSpace)
272+
continue;
273+
}
274+
275+
const CallInst *CI = dyn_cast<CallInst>(ArgUse);
276+
if (CI) {
277+
Function *Callee = CI->getCalledFunction();
278+
if (!Callee || Callee->isDeclaration())
279+
continue;
280+
281+
for (uint32_t i = 0; i < CI->getNumOperands(); ++i) {
282+
if (CI->getOperand(i) == Arg) {
283+
adjustAddressSpace(Callee, i, ArgAddrSpace);
284+
}
285+
}
286+
} else {
287+
for (unsigned int i = 0; i < ArgUse->getNumOperands(); ++i) {
288+
if (ArgUse->getOperand(i) == Arg) {
289+
PointerType *NPT = PointerType::get(Arg->getContext(), ArgAddrSpace);
290+
291+
auto *NewInstr = new AddrSpaceCastInst(ArgUse->getOperand(i), NPT);
292+
NewInstr->insertBefore(Instr);
293+
NewInstr->setDebugLoc(Instr->getDebugLoc());
294+
295+
ArgUse->setOperand(i, NewInstr);
296+
}
297+
}
298+
}
299+
}
300+
}
301+
262302
// Process 'invoke_simd(sub_group_obj, f, spmd_args...);' call.
263303
//
264304
// If f is a function name or a function pointer, this call is lowered into
@@ -319,6 +359,25 @@ bool processInvokeSimdCall(CallInst *InvokeSimd,
319359
SimdF->addFnAttr(INVOKE_SIMD_DIRECT_TARGET_ATTR);
320360
}
321361

362+
if (!SimdF->isDeclaration()) {
363+
// The real arguments for invoke_simd callee start at index 2.
364+
for (uint32_t i = 2; i < InvokeSimd->arg_size(); ++i) {
365+
const Value *Arg = InvokeSimd->getArgOperand(i);
366+
if (Arg->getType()->isPointerTy()) {
367+
uint32_t AddressSpace = Arg->getType()->getPointerAddressSpace();
368+
if (AddressSpace == 4) {
369+
const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(Arg);
370+
if (!ASC)
371+
continue;
372+
373+
AddressSpace =
374+
ASC->getOperand(0)->getType()->getPointerAddressSpace();
375+
}
376+
adjustAddressSpace(SimdF, i - 2, AddressSpace);
377+
}
378+
}
379+
}
380+
322381
// The invoke_simd target is known at compile-time - optimize.
323382
// 1. find the call to f within the cloned helper - it is its first parameter
324383
constexpr unsigned SimdCallTargetArgNo = 0;
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
//==------------- invoke_simd_smoke.cpp - DPC++ ESIMD on-device test----==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
// REQUIRES: gpu-intel-dg2 && level_zero
9+
// UNSUPPORTED: windows
10+
11+
// RUN: mkdir -p %t.dir && %{build} -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr -o %t.dir/exec.out
12+
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 IGC_DumpToCustomDir=%t.dir IGC_ShaderDumpEnable=1 %{run} %t.dir/exec.out
13+
// RUN: python3 %S/instruction_count.py %t.dir 149 OCL_asmc2becd046944fa5f_simd16_entry_0001.asm
14+
// RUN: echo "Baseline from driver version 1.3.29735"
15+
16+
#include "../../InvokeSimd/invoke_simd_smoke.cpp"
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
// RUN: %clangxx -fsycl -fsycl-device-only -Xclang -fsycl-allow-func-ptr -S %s -o %t.ll
2+
// RUN: sycl-post-link -O2 -device-globals -properties -spec-const=native -split=auto -emit-only-kernels-as-entry-points -emit-param-info -symbols -emit-exported-symbols -emit-imported-symbols -lower-esimd -S %t.ll -o %t.table
3+
// RUN: FileCheck %s -input-file=%t_0.ll
4+
5+
// The test validates proper address space inferral for a pointer passed to
6+
// invoke_simd callee that is used for ESIMD API memory API
7+
8+
#include <sycl/detail/core.hpp>
9+
#include <sycl/ext/intel/esimd.hpp>
10+
#include <sycl/ext/oneapi/experimental/invoke_simd.hpp>
11+
#include <sycl/ext/oneapi/experimental/uniform.hpp>
12+
#include <sycl/usm.hpp>
13+
14+
#include <functional>
15+
#include <iostream>
16+
#include <type_traits>
17+
18+
using namespace sycl::ext::oneapi::experimental;
19+
using namespace sycl;
20+
namespace esimd = sycl::ext::intel::esimd;
21+
22+
constexpr int VL = 32;
23+
24+
__attribute__((always_inline)) void ESIMD_CALLEE(float *A, float *B,
25+
int i) SYCL_ESIMD_FUNCTION {
26+
esimd::simd<float, VL> a;
27+
a.copy_from(A + i);
28+
a.copy_to(B + i);
29+
}
30+
31+
[[intel::device_indirectly_callable]] SYCL_EXTERNAL void __regcall SIMD_CALLEE1(
32+
float *A, float *B, int i) SYCL_ESIMD_FUNCTION {
33+
ESIMD_CALLEE(A, B, i);
34+
}
35+
bool test() {
36+
constexpr unsigned Size = 1024;
37+
constexpr unsigned GroupSize = 4 * VL;
38+
39+
queue q;
40+
41+
auto dev = q.get_device();
42+
float *A = malloc_shared<float>(Size, q);
43+
44+
sycl::range<1> GlobalRange{Size};
45+
// Number of workitems in each workgroup.
46+
sycl::range<1> LocalRange{GroupSize};
47+
48+
sycl::nd_range<1> Range(GlobalRange, LocalRange);
49+
50+
try {
51+
auto e = q.submit([&](handler &cgh) {
52+
local_accessor<float, 1> LocalAcc(Size, cgh);
53+
cgh.parallel_for(Range, [=](nd_item<1> item) [[intel::reqd_sub_group_size(
54+
VL)]] {
55+
sycl::group<1> g = item.get_group();
56+
sycl::sub_group sg = item.get_sub_group();
57+
58+
unsigned int i = g.get_group_id() * g.get_local_range() +
59+
sg.get_group_id() * sg.get_max_local_range();
60+
61+
invoke_simd(
62+
sg, SIMD_CALLEE1, uniform{A},
63+
uniform{LocalAcc.template get_multi_ptr<access::decorated::yes>()
64+
.get()},
65+
uniform{i});
66+
});
67+
});
68+
e.wait();
69+
} catch (sycl::exception const &e) {
70+
std::cout << "SYCL exception caught: " << e.what() << '\n';
71+
sycl::free(A, q);
72+
return false;
73+
}
74+
75+
sycl::free(A, q);
76+
77+
return 0;
78+
// CHECK: addrspacecast ptr addrspace(4) %A to ptr addrspace(1)
79+
// CHECK: addrspacecast ptr addrspace(4) %B to ptr addrspace(3)
80+
}
81+
82+
int main() {
83+
test();
84+
85+
return 0;
86+
}

0 commit comments

Comments
 (0)