@@ -933,6 +933,7 @@ static constexpr IntrinsicHandler handlers[]{
933
933
/* isElemental=*/ false },
934
934
{" tand" , &I::genTand},
935
935
{" this_grid" , &I::genThisGrid, {}, /* isElemental=*/ false },
936
+ {" this_thread_block" , &I::genThisThreadBlock, {}, /* isElemental=*/ false },
936
937
{" this_warp" , &I::genThisWarp, {}, /* isElemental=*/ false },
937
938
{" threadfence" , &I::genThreadFence, {}, /* isElemental=*/ false },
938
939
{" threadfence_block" , &I::genThreadFenceBlock, {}, /* isElemental=*/ false },
@@ -8195,6 +8196,60 @@ mlir::Value IntrinsicLibrary::genThisGrid(mlir::Type resultType,
8195
8196
return res;
8196
8197
}
8197
8198
8199
+ // THIS_THREAD_BLOCK
8200
+ mlir::Value
8201
+ IntrinsicLibrary::genThisThreadBlock (mlir::Type resultType,
8202
+ llvm::ArrayRef<mlir::Value> args) {
8203
+ assert (args.size () == 0 );
8204
+ auto recTy = mlir::cast<fir::RecordType>(resultType);
8205
+ assert (recTy && " RecordType expepected" );
8206
+ mlir::Value res = builder.create <fir::AllocaOp>(loc, resultType);
8207
+ mlir::Type i32Ty = builder.getI32Type ();
8208
+
8209
+ // this_thread_block%size = blockDim.z * blockDim.y * blockDim.x;
8210
+ mlir::Value blockDimX = builder.create <mlir::NVVM::BlockDimXOp>(loc, i32Ty);
8211
+ mlir::Value blockDimY = builder.create <mlir::NVVM::BlockDimYOp>(loc, i32Ty);
8212
+ mlir::Value blockDimZ = builder.create <mlir::NVVM::BlockDimZOp>(loc, i32Ty);
8213
+ mlir::Value size =
8214
+ builder.create <mlir::arith::MulIOp>(loc, blockDimZ, blockDimY);
8215
+ size = builder.create <mlir::arith::MulIOp>(loc, size, blockDimX);
8216
+
8217
+ // this_thread_block%rank = ((threadIdx.z * blockDim.y) * blockDim.x) +
8218
+ // (threadIdx.y * blockDim.x) + threadIdx.x + 1;
8219
+ mlir::Value threadIdX = builder.create <mlir::NVVM::ThreadIdXOp>(loc, i32Ty);
8220
+ mlir::Value threadIdY = builder.create <mlir::NVVM::ThreadIdYOp>(loc, i32Ty);
8221
+ mlir::Value threadIdZ = builder.create <mlir::NVVM::ThreadIdZOp>(loc, i32Ty);
8222
+ mlir::Value r1 =
8223
+ builder.create <mlir::arith::MulIOp>(loc, threadIdZ, blockDimY);
8224
+ mlir::Value r2 = builder.create <mlir::arith::MulIOp>(loc, r1, blockDimX);
8225
+ mlir::Value r3 =
8226
+ builder.create <mlir::arith::MulIOp>(loc, threadIdY, blockDimX);
8227
+ mlir::Value r2r3 = builder.create <mlir::arith::AddIOp>(loc, r2, r3);
8228
+ mlir::Value rank = builder.create <mlir::arith::AddIOp>(loc, r2r3, threadIdX);
8229
+ mlir::Value one = builder.createIntegerConstant (loc, i32Ty, 1 );
8230
+ rank = builder.create <mlir::arith::AddIOp>(loc, rank, one);
8231
+
8232
+ auto sizeFieldName = recTy.getTypeList ()[1 ].first ;
8233
+ mlir::Type sizeFieldTy = recTy.getTypeList ()[1 ].second ;
8234
+ mlir::Type fieldIndexType = fir::FieldType::get (resultType.getContext ());
8235
+ mlir::Value sizeFieldIndex = builder.create <fir::FieldIndexOp>(
8236
+ loc, fieldIndexType, sizeFieldName, recTy,
8237
+ /* typeParams=*/ mlir::ValueRange{});
8238
+ mlir::Value sizeCoord = builder.create <fir::CoordinateOp>(
8239
+ loc, builder.getRefType (sizeFieldTy), res, sizeFieldIndex);
8240
+ builder.create <fir::StoreOp>(loc, size, sizeCoord);
8241
+
8242
+ auto rankFieldName = recTy.getTypeList ()[2 ].first ;
8243
+ mlir::Type rankFieldTy = recTy.getTypeList ()[2 ].second ;
8244
+ mlir::Value rankFieldIndex = builder.create <fir::FieldIndexOp>(
8245
+ loc, fieldIndexType, rankFieldName, recTy,
8246
+ /* typeParams=*/ mlir::ValueRange{});
8247
+ mlir::Value rankCoord = builder.create <fir::CoordinateOp>(
8248
+ loc, builder.getRefType (rankFieldTy), res, rankFieldIndex);
8249
+ builder.create <fir::StoreOp>(loc, rank, rankCoord);
8250
+ return res;
8251
+ }
8252
+
8198
8253
// THIS_WARP
8199
8254
mlir::Value IntrinsicLibrary::genThisWarp (mlir::Type resultType,
8200
8255
llvm::ArrayRef<mlir::Value> args) {
0 commit comments