Skip to content

Commit f67bc4e

Browse files
committed
[DevTSAN] Support detecting data race for local memory
1 parent c8667f5 commit f67bc4e

File tree

17 files changed

+679
-141
lines changed

17 files changed

+679
-141
lines changed

libdevice/sanitizer/tsan_rtl.cpp

Lines changed: 100 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ static const __SYCL_CONSTANT__ char __tsan_print_generic_to[] =
2222
"[kernel] %p(4) - %p(%d)\n";
2323

2424
static const __SYCL_CONSTANT__ char __tsan_print_raw_shadow[] =
25-
"[kernel] %p(%d) -> %p: {%x, %x, %x, %x}\n";
25+
"[kernel] %p(%d) -> %p: {%x, %x}\n";
2626

2727
static const __SYCL_CONSTANT__ char __tsan_print_shadow_value[] =
2828
"[kernel] %p(%d) : {size: %d, access: %x, sid: %d, clock: %d, is_write: "
@@ -90,26 +90,36 @@ inline __SYCL_GLOBAL__ RawShadow *MemToShadow_PVC(uptr addr, uint32_t as) {
9090
ConvertGenericPointer(addr, as);
9191
}
9292

93-
if (as != ADDRESS_SPACE_GLOBAL)
94-
return nullptr;
95-
9693
addr = RoundDownTo(addr, kShadowCell);
9794

98-
if (addr & 0xff00'0000'0000'0000ULL) {
99-
// device usm
100-
return addr < TsanLaunchInfo->GlobalShadowOffset
101-
? reinterpret_cast<__SYCL_GLOBAL__ RawShadow *>(
102-
addr + (TsanLaunchInfo->GlobalShadowOffset +
103-
0x200'0000'0000ULL - 0xff00'0000'0000'0000ULL))
104-
: reinterpret_cast<__SYCL_GLOBAL__ RawShadow *>(
105-
addr - (0xff00'ffff'ffff'ffffULL -
106-
TsanLaunchInfo->GlobalShadowOffsetEnd + 1));
107-
} else {
108-
// host & shared usm
109-
return reinterpret_cast<__SYCL_GLOBAL__ RawShadow *>(
110-
(addr & 0xffffffffffULL) + TsanLaunchInfo->GlobalShadowOffset +
111-
((addr & 0x800000000000ULL) >> 7));
95+
if (as == ADDRESS_SPACE_GLOBAL) {
96+
if (addr & 0xff00'0000'0000'0000ULL) {
97+
// device usm
98+
return addr < TsanLaunchInfo->GlobalShadowOffset
99+
? reinterpret_cast<__SYCL_GLOBAL__ RawShadow *>(
100+
addr + (TsanLaunchInfo->GlobalShadowOffset +
101+
0x200'0000'0000ULL - 0xff00'0000'0000'0000ULL))
102+
: reinterpret_cast<__SYCL_GLOBAL__ RawShadow *>(
103+
addr - (0xff00'ffff'ffff'ffffULL -
104+
TsanLaunchInfo->GlobalShadowOffsetEnd + 1));
105+
} else {
106+
// host & shared usm
107+
return reinterpret_cast<__SYCL_GLOBAL__ RawShadow *>(
108+
(addr & 0xffffffffffULL) + TsanLaunchInfo->GlobalShadowOffset +
109+
((addr & 0x800000000000ULL) >> 7));
110+
}
111+
} else if (as == ADDRESS_SPACE_LOCAL) {
112+
const auto shadow_offset = TsanLaunchInfo->LocalShadowOffset;
113+
if (shadow_offset != 0) {
114+
// The size of SLM is 128KB on PVC
115+
constexpr unsigned SLM_SIZE = 128 * 1024;
116+
const size_t wid = WorkGroupLinearId();
117+
return reinterpret_cast<__SYCL_GLOBAL__ RawShadow *>(
118+
shadow_offset + (wid * SLM_SIZE) + (addr & (SLM_SIZE - 1)));
119+
}
112120
}
121+
122+
return nullptr;
113123
}
114124

115125
inline __SYCL_GLOBAL__ RawShadow *MemToShadow(uptr addr, uint32_t as) {
@@ -151,7 +161,7 @@ inline void StoreShadow(__SYCL_GLOBAL__ RawShadow *p, RawShadow s) {
151161
}
152162

153163
inline void DoReportRace(__SYCL_GLOBAL__ RawShadow *s, AccessType type,
154-
uptr addr, uint32_t size,
164+
uptr addr, uint32_t size, uint32_t as,
155165
const char __SYCL_CONSTANT__ *file, uint32_t line,
156166
const char __SYCL_CONSTANT__ *func) {
157167
// This prevents trapping on this address in future.
@@ -179,8 +189,13 @@ inline void DoReportRace(__SYCL_GLOBAL__ RawShadow *s, AccessType type,
179189
auto &SanitizerReport =
180190
TsanLaunchInfo->Report[TsanLaunchInfo->RecordedReportCount++];
181191

192+
if (as == ADDRESS_SPACE_GENERIC) {
193+
ConvertGenericPointer(addr, as);
194+
}
195+
182196
SanitizerReport.Address = addr;
183-
SanitizerReport.Type = type;
197+
SanitizerReport.Type =
198+
type | (as == ADDRESS_SPACE_LOCAL ? kAccessLocal : 0);
184199
SanitizerReport.AccessSize = size;
185200

186201
int FileLength = 0;
@@ -224,7 +239,7 @@ inline void DoReportRace(__SYCL_GLOBAL__ RawShadow *s, AccessType type,
224239
}
225240

226241
inline bool CheckRace(__SYCL_GLOBAL__ RawShadow *s, Shadow cur, AccessType type,
227-
uptr addr, uint32_t size,
242+
uptr addr, uint32_t size, uint32_t as,
228243
const char __SYCL_CONSTANT__ *file, uint32_t line,
229244
const char __SYCL_CONSTANT__ *func) {
230245
bool stored = false;
@@ -258,7 +273,7 @@ inline bool CheckRace(__SYCL_GLOBAL__ RawShadow *s, Shadow cur, AccessType type,
258273
if (TsanLaunchInfo->Clock[cur.sid()].clk_[old.sid()] >= old.clock())
259274
continue;
260275

261-
DoReportRace(s, type, addr, size, file, line, func);
276+
DoReportRace(s, type, addr, size, as, file, line, func);
262277
return true;
263278
}
264279

@@ -301,17 +316,17 @@ inline bool ContainsSameAccess(__SYCL_GLOBAL__ RawShadow *s, Shadow cur,
301316
return; \
302317
Sid sid = GetCurrentSid(); \
303318
uint16_t current_clock = IncrementEpoch(sid) + 1; \
304-
TSAN_DEBUG(__spirv_ocl_printf( \
305-
__tsan_print_raw_shadow, (void *)addr, as, (void *)shadow_mem, \
306-
shadow_mem[0], shadow_mem[1], shadow_mem[2], shadow_mem[3])); \
319+
TSAN_DEBUG(__spirv_ocl_printf(__tsan_print_raw_shadow, (void *)addr, as, \
320+
(void *)shadow_mem, shadow_mem[0], \
321+
shadow_mem[1])); \
307322
AccessType type = is_write ? kAccessWrite : kAccessRead; \
308323
Shadow cur(addr, size, current_clock, sid, type); \
309324
TSAN_DEBUG(__spirv_ocl_printf(__tsan_print_shadow_value, (void *)addr, as, \
310325
size, cur.access(), cur.sid(), cur.clock(), \
311326
is_write)); \
312327
if (ContainsSameAccess(shadow_mem, cur, type)) \
313328
return; \
314-
CheckRace(shadow_mem, cur, type, addr, size, file, line, func); \
329+
CheckRace(shadow_mem, cur, type, addr, size, as, file, line, func); \
315330
}
316331

317332
TSAN_CHECK(read, false, 1)
@@ -349,16 +364,16 @@ __tsan_read16(uptr addr, uint32_t as, const char __SYCL_CONSTANT__ *file,
349364
AccessType type = is_write ? kAccessWrite : kAccessRead; \
350365
uptr size1 = Min(size, RoundUpTo(addr + 1, kShadowCell) - addr); \
351366
{ \
352-
TSAN_DEBUG(__spirv_ocl_printf( \
353-
__tsan_print_raw_shadow, (void *)addr, as, (void *)shadow_mem, \
354-
shadow_mem[0], shadow_mem[1], shadow_mem[2], shadow_mem[3])); \
367+
TSAN_DEBUG(__spirv_ocl_printf(__tsan_print_raw_shadow, (void *)addr, as, \
368+
(void *)shadow_mem, shadow_mem[0], \
369+
shadow_mem[1])); \
355370
Shadow cur(addr, size1, current_clock, sid, type); \
356371
TSAN_DEBUG(__spirv_ocl_printf(__tsan_print_shadow_value, (void *)addr, \
357372
as, size1, cur.access(), cur.sid(), \
358373
cur.clock(), is_write)); \
359374
if (ContainsSameAccess(shadow_mem, cur, type)) \
360375
goto SECOND; \
361-
if (CheckRace(shadow_mem, cur, type, addr, size1, file, line, func)) \
376+
if (CheckRace(shadow_mem, cur, type, addr, size1, as, file, line, func)) \
362377
return; \
363378
} \
364379
SECOND: \
@@ -367,17 +382,17 @@ __tsan_read16(uptr addr, uint32_t as, const char __SYCL_CONSTANT__ *file,
367382
return; \
368383
shadow_mem += kShadowCnt; \
369384
{ \
370-
TSAN_DEBUG( \
371-
__spirv_ocl_printf(__tsan_print_raw_shadow, (void *)(addr + size1), \
372-
as, (void *)shadow_mem, shadow_mem[0], \
373-
shadow_mem[1], shadow_mem[2], shadow_mem[3])); \
385+
TSAN_DEBUG(__spirv_ocl_printf( \
386+
__tsan_print_raw_shadow, (void *)(addr + size1), as, \
387+
(void *)shadow_mem, shadow_mem[0], shadow_mem[1])); \
374388
Shadow cur(0, size2, current_clock, sid, type); \
375389
TSAN_DEBUG(__spirv_ocl_printf( \
376390
__tsan_print_shadow_value, (void *)(addr + size1), as, size2, \
377391
cur.access(), cur.sid(), cur.clock(), is_write)); \
378392
if (ContainsSameAccess(shadow_mem, cur, type)) \
379393
return; \
380-
CheckRace(shadow_mem, cur, type, addr + size1, size2, file, line, func); \
394+
CheckRace(shadow_mem, cur, type, addr + size1, size2, as, file, line, \
395+
func); \
381396
} \
382397
}
383398

@@ -420,7 +435,7 @@ static inline void __tsan_cleanup_private_cpu_impl(uptr addr, uint32_t size) {
420435
}
421436
}
422437

423-
DEVICE_EXTERN_C_NOINLINE void __tsan_cleanup_private(uptr addr, uint32_t size) {
438+
DEVICE_EXTERN_C_NOINLINE void __tsan_cleanup_private(uptr addr, size_t size) {
424439
#if defined(__LIBDEVICE_CPU__)
425440
__tsan_cleanup_private_cpu_impl(addr, size);
426441
#elif defined(__LIBDEVICE_PVC__)
@@ -433,6 +448,55 @@ DEVICE_EXTERN_C_NOINLINE void __tsan_cleanup_private(uptr addr, uint32_t size) {
433448
#endif
434449
}
435450

451+
static __SYCL_CONSTANT__ const char __tsan_print_cleanup_local[] =
452+
"[kernel] cleanup shadow (%p ~ %p) for local %p\n";
453+
454+
DEVICE_EXTERN_C_NOINLINE void __tsan_cleanup_static_local(uptr addr,
455+
size_t size) {
456+
// Update shadow memory of local memory only on first work-item
457+
if (__spirv_LocalInvocationId_x() + __spirv_LocalInvocationId_y() +
458+
__spirv_LocalInvocationId_z() ==
459+
0) {
460+
if (TsanLaunchInfo->LocalShadowOffset == 0)
461+
return;
462+
463+
addr = RoundDownTo(addr, kShadowCell);
464+
size = RoundUpTo(size, kShadowCell);
465+
466+
RawShadow *Begin = MemToShadow(addr, ADDRESS_SPACE_LOCAL);
467+
for (uptr i = 0; i < size / kShadowCell * kShadowCnt; i++)
468+
Begin[i] = 0;
469+
470+
TSAN_DEBUG(__spirv_ocl_printf(
471+
__tsan_print_cleanup_local, addr, Begin,
472+
(uptr)Begin + size / kShadowCell * kShadowCnt * kShadowSize - 1));
473+
}
474+
}
475+
476+
static __SYCL_CONSTANT__ const char __tsan_print_report_arg_count_incorrect[] =
477+
"[kernel] ERROR: The number of local args is incorrect, expect %d, actual "
478+
"%d\n";
479+
480+
DEVICE_EXTERN_C_NOINLINE void __tsan_cleanup_dynamic_local(uptr ptr,
481+
uint32_t num_args) {
482+
if (!TsanLaunchInfo->LocalShadowOffset)
483+
return;
484+
485+
if (num_args != TsanLaunchInfo->NumLocalArgs) {
486+
__spirv_ocl_printf(__tsan_print_report_arg_count_incorrect, num_args,
487+
TsanLaunchInfo->NumLocalArgs);
488+
return;
489+
}
490+
491+
uptr *args = (uptr *)ptr;
492+
493+
for (uint32_t i = 0; i < num_args; ++i) {
494+
auto *local_arg = &TsanLaunchInfo->LocalArgs[i];
495+
496+
__tsan_cleanup_static_local(args[i], local_arg->Size);
497+
}
498+
}
499+
436500
DEVICE_EXTERN_C_INLINE void __tsan_device_barrier() {
437501
Sid sid = GetCurrentSid();
438502

llvm/include/llvm/Transforms/Instrumentation/SPIRVSanitizerCommonUtils.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "llvm/IR/DerivedTypes.h"
1616
#include "llvm/IR/Type.h"
1717
#include "llvm/IR/Value.h"
18+
#include "llvm/IR/Constants.h"
1819

1920
namespace llvm {
2021
// Spir memory address space
@@ -26,6 +27,8 @@ constexpr unsigned kSpirOffloadGenericAS = 4;
2627

2728
TargetExtType *getTargetExtType(Type *Ty);
2829
bool isJointMatrixAccess(Value *V);
30+
void getFunctionsOfUser(User *User, SmallVectorImpl<Function *> &Functions);
31+
2932
} // namespace llvm
3033

3134
#endif // LLVM_TRANSFORMS_INSTRUMENTATION_SPIRVSANITIZERCOMMONUTILS_H

llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2968,15 +2968,6 @@ void ModuleAddressSanitizer::instrumentDeviceGlobal(IRBuilder<> &IRB) {
29682968
G->eraseFromParent();
29692969
}
29702970

2971-
static void getFunctionsOfUser(User *User, DenseSet<Function *> &Functions) {
2972-
if (Instruction *Inst = dyn_cast<Instruction>(User)) {
2973-
Functions.insert(Inst->getFunction());
2974-
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(User)) {
2975-
for (auto *U : CE->users())
2976-
getFunctionsOfUser(U, Functions);
2977-
}
2978-
}
2979-
29802971
void ModuleAddressSanitizer::initializeRetVecMap(Function *F) {
29812972
if (KernelToRetVecMap.find(F) != KernelToRetVecMap.end())
29822973
return;
@@ -3109,19 +3100,23 @@ void ModuleAddressSanitizer::instrumentSyclStaticLocalMemory(IRBuilder<> &IRB) {
31093100
// We only instrument on spir_kernel, because local variables are
31103101
// kind of global variable
31113102
for (auto *G : LocalGlobals) {
3112-
DenseSet<Function *> InstrumentedFunc;
3103+
SmallVector<Function *> WorkList;
3104+
DenseSet<Function *> InstrumentedKernel;
31133105
for (auto *User : G->users())
3114-
getFunctionsOfUser(User, InstrumentedFunc);
3115-
for (Function *F : InstrumentedFunc) {
3106+
getFunctionsOfUser(User, WorkList);
3107+
while (!WorkList.empty()) {
3108+
Function *F = WorkList.pop_back_val();
31163109
if (F->getCallingConv() == CallingConv::SPIR_KERNEL) {
3117-
Instrument(G, F);
3110+
if (!InstrumentedKernel.contains(F)) {
3111+
Instrument(G, F);
3112+
InstrumentedKernel.insert(F);
3113+
}
31183114
continue;
31193115
}
31203116
// Get root spir_kernel of spir_func
31213117
initializeKernelCallerMap(F);
3122-
for (Function *Kernel : FuncToKernelCallerMap[F])
3123-
if (!InstrumentedFunc.contains(Kernel))
3124-
Instrument(G, Kernel);
3118+
for (auto *F : FuncToKernelCallerMap[F])
3119+
WorkList.push_back(F);
31253120
}
31263121
}
31273122
}

llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1012,15 +1012,6 @@ void MemorySanitizerOnSpirv::initializeKernelCallerMap(Function *F) {
10121012
}
10131013
}
10141014

1015-
static void getFunctionsOfUser(User *User, DenseSet<Function *> &Functions) {
1016-
if (Instruction *Inst = dyn_cast<Instruction>(User)) {
1017-
Functions.insert(Inst->getFunction());
1018-
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(User)) {
1019-
for (auto *U : CE->users())
1020-
getFunctionsOfUser(U, Functions);
1021-
}
1022-
}
1023-
10241015
void MemorySanitizerOnSpirv::instrumentStaticLocalMemory() {
10251016
if (!ClSpirOffloadLocals)
10261017
return;
@@ -1057,18 +1048,23 @@ void MemorySanitizerOnSpirv::instrumentStaticLocalMemory() {
10571048
// kind of global variable, which must be initialized only once.
10581049
for (auto &G : M.globals()) {
10591050
if (G.getAddressSpace() == kSpirOffloadLocalAS) {
1060-
DenseSet<Function *> InstrumentedFunc;
1051+
SmallVector<Function *> WorkList;
1052+
DenseSet<Function *> InstrumentedKernel;
10611053
for (auto *User : G.users())
1062-
getFunctionsOfUser(User, InstrumentedFunc);
1063-
for (Function *F : InstrumentedFunc) {
1054+
getFunctionsOfUser(User, WorkList);
1055+
while (!WorkList.empty()) {
1056+
Function *F = WorkList.pop_back_val();
10641057
if (F->getCallingConv() == CallingConv::SPIR_KERNEL) {
1065-
Instrument(&G, F);
1058+
if (!InstrumentedKernel.contains(F)) {
1059+
Instrument(&G, F);
1060+
InstrumentedKernel.insert(F);
1061+
}
10661062
continue;
10671063
}
10681064
// Get root spir_kernel of spir_func
10691065
initializeKernelCallerMap(F);
1070-
for (Function *Kernel : FuncToKernelCallerMap[F])
1071-
Instrument(&G, Kernel);
1066+
for (auto *F : FuncToKernelCallerMap[F])
1067+
WorkList.push_back(F);
10721068
}
10731069
}
10741070
}

llvm/lib/Transforms/Instrumentation/SPIRVSanitizerCommonUtils.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,4 +58,14 @@ bool isJointMatrixAccess(Value *V) {
5858
}
5959
return false;
6060
}
61+
62+
void getFunctionsOfUser(User *User, SmallVectorImpl<Function *> &Functions) {
63+
if (Instruction *Inst = dyn_cast<Instruction>(User)) {
64+
Functions.push_back(Inst->getFunction());
65+
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(User)) {
66+
for (auto *U : CE->users())
67+
getFunctionsOfUser(U, Functions);
68+
}
69+
}
70+
6171
} // namespace llvm

0 commit comments

Comments
 (0)