Skip to content

[DevTSAN] Support detecting data race for local memory #18718

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Jun 5, 2025
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 101 additions & 36 deletions libdevice/sanitizer/tsan_rtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ static const __SYCL_CONSTANT__ char __tsan_print_generic_to[] =
"[kernel] %p(4) - %p(%d)\n";

static const __SYCL_CONSTANT__ char __tsan_print_raw_shadow[] =
"[kernel] %p(%d) -> %p: {%x, %x, %x, %x}\n";
"[kernel] %p(%d) -> %p: {%x, %x}\n";

static const __SYCL_CONSTANT__ char __tsan_print_shadow_value[] =
"[kernel] %p(%d) : {size: %d, access: %x, sid: %d, clock: %d, is_write: "
Expand Down Expand Up @@ -90,26 +90,36 @@ inline __SYCL_GLOBAL__ RawShadow *MemToShadow_PVC(uptr addr, uint32_t as) {
ConvertGenericPointer(addr, as);
}

if (as != ADDRESS_SPACE_GLOBAL)
return nullptr;

addr = RoundDownTo(addr, kShadowCell);

if (addr & 0xff00'0000'0000'0000ULL) {
// device usm
return addr < TsanLaunchInfo->GlobalShadowOffset
? reinterpret_cast<__SYCL_GLOBAL__ RawShadow *>(
addr + (TsanLaunchInfo->GlobalShadowOffset +
0x200'0000'0000ULL - 0xff00'0000'0000'0000ULL))
: reinterpret_cast<__SYCL_GLOBAL__ RawShadow *>(
addr - (0xff00'ffff'ffff'ffffULL -
TsanLaunchInfo->GlobalShadowOffsetEnd + 1));
} else {
// host & shared usm
return reinterpret_cast<__SYCL_GLOBAL__ RawShadow *>(
(addr & 0xffffffffffULL) + TsanLaunchInfo->GlobalShadowOffset +
((addr & 0x800000000000ULL) >> 7));
if (as == ADDRESS_SPACE_GLOBAL) {
if (addr & 0xff00'0000'0000'0000ULL) {
// device usm
return addr < TsanLaunchInfo->GlobalShadowOffset
? reinterpret_cast<__SYCL_GLOBAL__ RawShadow *>(
addr + (TsanLaunchInfo->GlobalShadowOffset +
0x200'0000'0000ULL - 0xff00'0000'0000'0000ULL))
: reinterpret_cast<__SYCL_GLOBAL__ RawShadow *>(
addr - (0xff00'ffff'ffff'ffffULL -
TsanLaunchInfo->GlobalShadowOffsetEnd + 1));
} else {
// host & shared usm
return reinterpret_cast<__SYCL_GLOBAL__ RawShadow *>(
(addr & 0xffffffffffULL) + TsanLaunchInfo->GlobalShadowOffset +
((addr & 0x800000000000ULL) >> 7));
}
} else if (as == ADDRESS_SPACE_LOCAL) {
const auto shadow_offset = TsanLaunchInfo->LocalShadowOffset;
if (shadow_offset != 0) {
// The size of SLM is 128KB on PVC
constexpr unsigned SLM_SIZE = 128 * 1024;
const size_t wid = WorkGroupLinearId();
return reinterpret_cast<__SYCL_GLOBAL__ RawShadow *>(
shadow_offset + (wid * SLM_SIZE) + (addr & (SLM_SIZE - 1)));
}
}

return nullptr;
}

inline __SYCL_GLOBAL__ RawShadow *MemToShadow(uptr addr, uint32_t as) {
Expand Down Expand Up @@ -151,7 +161,7 @@ inline void StoreShadow(__SYCL_GLOBAL__ RawShadow *p, RawShadow s) {
}

inline void DoReportRace(__SYCL_GLOBAL__ RawShadow *s, AccessType type,
uptr addr, uint32_t size,
uptr addr, uint32_t size, uint32_t as,
const char __SYCL_CONSTANT__ *file, uint32_t line,
const char __SYCL_CONSTANT__ *func) {
// This prevents trapping on this address in future.
Expand All @@ -167,6 +177,11 @@ inline void DoReportRace(__SYCL_GLOBAL__ RawShadow *s, AccessType type,
return;
}

if (as == ADDRESS_SPACE_GENERIC &&
TsanLaunchInfo->DeviceTy != DeviceType::CPU) {
ConvertGenericPointer(addr, as);
}

// Check if current address already being recorded before.
for (uint32_t i = 0; i < TsanLaunchInfo->RecordedReportCount; i++) {
auto &SanitizerReport = TsanLaunchInfo->Report[i];
Expand All @@ -180,7 +195,8 @@ inline void DoReportRace(__SYCL_GLOBAL__ RawShadow *s, AccessType type,
TsanLaunchInfo->Report[TsanLaunchInfo->RecordedReportCount++];

SanitizerReport.Address = addr;
SanitizerReport.Type = type;
SanitizerReport.Type =
type | (as == ADDRESS_SPACE_LOCAL ? kAccessLocal : 0);
SanitizerReport.AccessSize = size;

int FileLength = 0;
Expand Down Expand Up @@ -224,7 +240,7 @@ inline void DoReportRace(__SYCL_GLOBAL__ RawShadow *s, AccessType type,
}

inline bool CheckRace(__SYCL_GLOBAL__ RawShadow *s, Shadow cur, AccessType type,
uptr addr, uint32_t size,
uptr addr, uint32_t size, uint32_t as,
const char __SYCL_CONSTANT__ *file, uint32_t line,
const char __SYCL_CONSTANT__ *func) {
bool stored = false;
Expand Down Expand Up @@ -258,7 +274,7 @@ inline bool CheckRace(__SYCL_GLOBAL__ RawShadow *s, Shadow cur, AccessType type,
if (TsanLaunchInfo->Clock[cur.sid()].clk_[old.sid()] >= old.clock())
continue;

DoReportRace(s, type, addr, size, file, line, func);
DoReportRace(s, type, addr, size, as, file, line, func);
return true;
}

Expand Down Expand Up @@ -301,17 +317,17 @@ inline bool ContainsSameAccess(__SYCL_GLOBAL__ RawShadow *s, Shadow cur,
return; \
Sid sid = GetCurrentSid(); \
uint16_t current_clock = IncrementEpoch(sid) + 1; \
TSAN_DEBUG(__spirv_ocl_printf( \
__tsan_print_raw_shadow, (void *)addr, as, (void *)shadow_mem, \
shadow_mem[0], shadow_mem[1], shadow_mem[2], shadow_mem[3])); \
TSAN_DEBUG(__spirv_ocl_printf(__tsan_print_raw_shadow, (void *)addr, as, \
(void *)shadow_mem, shadow_mem[0], \
shadow_mem[1])); \
AccessType type = is_write ? kAccessWrite : kAccessRead; \
Shadow cur(addr, size, current_clock, sid, type); \
TSAN_DEBUG(__spirv_ocl_printf(__tsan_print_shadow_value, (void *)addr, as, \
size, cur.access(), cur.sid(), cur.clock(), \
is_write)); \
if (ContainsSameAccess(shadow_mem, cur, type)) \
return; \
CheckRace(shadow_mem, cur, type, addr, size, file, line, func); \
CheckRace(shadow_mem, cur, type, addr, size, as, file, line, func); \
}

TSAN_CHECK(read, false, 1)
Expand Down Expand Up @@ -349,16 +365,16 @@ __tsan_read16(uptr addr, uint32_t as, const char __SYCL_CONSTANT__ *file,
AccessType type = is_write ? kAccessWrite : kAccessRead; \
uptr size1 = Min(size, RoundUpTo(addr + 1, kShadowCell) - addr); \
{ \
TSAN_DEBUG(__spirv_ocl_printf( \
__tsan_print_raw_shadow, (void *)addr, as, (void *)shadow_mem, \
shadow_mem[0], shadow_mem[1], shadow_mem[2], shadow_mem[3])); \
TSAN_DEBUG(__spirv_ocl_printf(__tsan_print_raw_shadow, (void *)addr, as, \
(void *)shadow_mem, shadow_mem[0], \
shadow_mem[1])); \
Shadow cur(addr, size1, current_clock, sid, type); \
TSAN_DEBUG(__spirv_ocl_printf(__tsan_print_shadow_value, (void *)addr, \
as, size1, cur.access(), cur.sid(), \
cur.clock(), is_write)); \
if (ContainsSameAccess(shadow_mem, cur, type)) \
goto SECOND; \
if (CheckRace(shadow_mem, cur, type, addr, size1, file, line, func)) \
if (CheckRace(shadow_mem, cur, type, addr, size1, as, file, line, func)) \
return; \
} \
SECOND: \
Expand All @@ -367,17 +383,17 @@ __tsan_read16(uptr addr, uint32_t as, const char __SYCL_CONSTANT__ *file,
return; \
shadow_mem += kShadowCnt; \
{ \
TSAN_DEBUG( \
__spirv_ocl_printf(__tsan_print_raw_shadow, (void *)(addr + size1), \
as, (void *)shadow_mem, shadow_mem[0], \
shadow_mem[1], shadow_mem[2], shadow_mem[3])); \
TSAN_DEBUG(__spirv_ocl_printf( \
__tsan_print_raw_shadow, (void *)(addr + size1), as, \
(void *)shadow_mem, shadow_mem[0], shadow_mem[1])); \
Shadow cur(0, size2, current_clock, sid, type); \
TSAN_DEBUG(__spirv_ocl_printf( \
__tsan_print_shadow_value, (void *)(addr + size1), as, size2, \
cur.access(), cur.sid(), cur.clock(), is_write)); \
if (ContainsSameAccess(shadow_mem, cur, type)) \
return; \
CheckRace(shadow_mem, cur, type, addr + size1, size2, file, line, func); \
CheckRace(shadow_mem, cur, type, addr + size1, size2, as, file, line, \
func); \
} \
}

Expand Down Expand Up @@ -420,7 +436,7 @@ static inline void __tsan_cleanup_private_cpu_impl(uptr addr, uint32_t size) {
}
}

DEVICE_EXTERN_C_NOINLINE void __tsan_cleanup_private(uptr addr, uint32_t size) {
DEVICE_EXTERN_C_NOINLINE void __tsan_cleanup_private(uptr addr, size_t size) {
#if defined(__LIBDEVICE_CPU__)
__tsan_cleanup_private_cpu_impl(addr, size);
#elif defined(__LIBDEVICE_PVC__)
Expand All @@ -433,6 +449,55 @@ DEVICE_EXTERN_C_NOINLINE void __tsan_cleanup_private(uptr addr, uint32_t size) {
#endif
}

static __SYCL_CONSTANT__ const char __tsan_print_cleanup_local[] =
"[kernel] cleanup shadow (%p ~ %p) for local %p\n";

DEVICE_EXTERN_C_NOINLINE void __tsan_cleanup_static_local(uptr addr,
size_t size) {
// Update shadow memory of local memory only on first work-item
if (__spirv_LocalInvocationId_x() + __spirv_LocalInvocationId_y() +
__spirv_LocalInvocationId_z() ==
0) {
if (TsanLaunchInfo->LocalShadowOffset == 0)
return;

addr = RoundDownTo(addr, kShadowCell);
size = RoundUpTo(size, kShadowCell);

RawShadow *Begin = MemToShadow(addr, ADDRESS_SPACE_LOCAL);
for (uptr i = 0; i < size / kShadowCell * kShadowCnt; i++)
Begin[i] = 0;

TSAN_DEBUG(__spirv_ocl_printf(
__tsan_print_cleanup_local, addr, Begin,
(uptr)Begin + size / kShadowCell * kShadowCnt * kShadowSize - 1));
}
}

static __SYCL_CONSTANT__ const char __tsan_print_report_arg_count_incorrect[] =
"[kernel] ERROR: The number of local args is incorrect, expect %d, actual "
"%d\n";

DEVICE_EXTERN_C_NOINLINE void __tsan_cleanup_dynamic_local(uptr ptr,
uint32_t num_args) {
if (!TsanLaunchInfo->LocalShadowOffset)
return;

if (num_args != TsanLaunchInfo->NumLocalArgs) {
__spirv_ocl_printf(__tsan_print_report_arg_count_incorrect, num_args,
TsanLaunchInfo->NumLocalArgs);
return;
}

uptr *args = (uptr *)ptr;

for (uint32_t i = 0; i < num_args; ++i) {
auto *local_arg = &TsanLaunchInfo->LocalArgs[i];

__tsan_cleanup_static_local(args[i], local_arg->Size);
}
}

DEVICE_EXTERN_C_INLINE void __tsan_device_barrier() {
Sid sid = GetCurrentSid();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_SPIRVSANITIZERCOMMONUTILS_H
#define LLVM_TRANSFORMS_INSTRUMENTATION_SPIRVSANITIZERCOMMONUTILS_H

#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
Expand All @@ -26,6 +27,8 @@ constexpr unsigned kSpirOffloadGenericAS = 4;

TargetExtType *getTargetExtType(Type *Ty);
bool isJointMatrixAccess(Value *V);
void getFunctionsOfUser(User *User, SmallVectorImpl<Function *> &Functions);

} // namespace llvm

#endif // LLVM_TRANSFORMS_INSTRUMENTATION_SPIRVSANITIZERCOMMONUTILS_H
27 changes: 11 additions & 16 deletions llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2968,15 +2968,6 @@ void ModuleAddressSanitizer::instrumentDeviceGlobal(IRBuilder<> &IRB) {
G->eraseFromParent();
}

static void getFunctionsOfUser(User *User, DenseSet<Function *> &Functions) {
if (Instruction *Inst = dyn_cast<Instruction>(User)) {
Functions.insert(Inst->getFunction());
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(User)) {
for (auto *U : CE->users())
getFunctionsOfUser(U, Functions);
}
}

void ModuleAddressSanitizer::initializeRetVecMap(Function *F) {
if (KernelToRetVecMap.find(F) != KernelToRetVecMap.end())
return;
Expand Down Expand Up @@ -3109,19 +3100,23 @@ void ModuleAddressSanitizer::instrumentSyclStaticLocalMemory(IRBuilder<> &IRB) {
// We only instrument on spir_kernel, because local variables are
// kind of global variable
for (auto *G : LocalGlobals) {
DenseSet<Function *> InstrumentedFunc;
SmallVector<Function *> WorkList;
DenseSet<Function *> InstrumentedKernel;
for (auto *User : G->users())
getFunctionsOfUser(User, InstrumentedFunc);
for (Function *F : InstrumentedFunc) {
getFunctionsOfUser(User, WorkList);
while (!WorkList.empty()) {
Function *F = WorkList.pop_back_val();
if (F->getCallingConv() == CallingConv::SPIR_KERNEL) {
Instrument(G, F);
if (!InstrumentedKernel.contains(F)) {
Instrument(G, F);
InstrumentedKernel.insert(F);
}
continue;
}
// Get root spir_kernel of spir_func
initializeKernelCallerMap(F);
for (Function *Kernel : FuncToKernelCallerMap[F])
if (!InstrumentedFunc.contains(Kernel))
Instrument(G, Kernel);
for (auto *F : FuncToKernelCallerMap[F])
WorkList.push_back(F);
}
}
}
Expand Down
26 changes: 11 additions & 15 deletions llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1012,15 +1012,6 @@ void MemorySanitizerOnSpirv::initializeKernelCallerMap(Function *F) {
}
}

static void getFunctionsOfUser(User *User, DenseSet<Function *> &Functions) {
if (Instruction *Inst = dyn_cast<Instruction>(User)) {
Functions.insert(Inst->getFunction());
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(User)) {
for (auto *U : CE->users())
getFunctionsOfUser(U, Functions);
}
}

void MemorySanitizerOnSpirv::instrumentStaticLocalMemory() {
if (!ClSpirOffloadLocals)
return;
Expand Down Expand Up @@ -1057,18 +1048,23 @@ void MemorySanitizerOnSpirv::instrumentStaticLocalMemory() {
// kind of global variable, which must be initialized only once.
for (auto &G : M.globals()) {
if (G.getAddressSpace() == kSpirOffloadLocalAS) {
DenseSet<Function *> InstrumentedFunc;
SmallVector<Function *> WorkList;
DenseSet<Function *> InstrumentedKernel;
for (auto *User : G.users())
getFunctionsOfUser(User, InstrumentedFunc);
for (Function *F : InstrumentedFunc) {
getFunctionsOfUser(User, WorkList);
while (!WorkList.empty()) {
Function *F = WorkList.pop_back_val();
if (F->getCallingConv() == CallingConv::SPIR_KERNEL) {
Instrument(&G, F);
if (!InstrumentedKernel.contains(F)) {
Instrument(&G, F);
InstrumentedKernel.insert(F);
}
continue;
}
// Get root spir_kernel of spir_func
initializeKernelCallerMap(F);
for (Function *Kernel : FuncToKernelCallerMap[F])
Instrument(&G, Kernel);
for (auto *F : FuncToKernelCallerMap[F])
WorkList.push_back(F);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,14 @@ bool isJointMatrixAccess(Value *V) {
}
return false;
}

void getFunctionsOfUser(User *User, SmallVectorImpl<Function *> &Functions) {
if (Instruction *Inst = dyn_cast<Instruction>(User)) {
Functions.push_back(Inst->getFunction());
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(User)) {
for (auto *U : CE->users())
getFunctionsOfUser(U, Functions);
}
}

} // namespace llvm
Loading
Loading