Skip to content

Commit dc2cd0c

Browse files
authored
Use the upstreamed host_shared flag for gpu.alloc (#412)
Use host_shared flag for gpu.alloc
1 parent 1806eee commit dc2cd0c

File tree

8 files changed

+25
-29
lines changed

8 files changed

+25
-29
lines changed

docs/Transforms/InsertGpuAllocs.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,9 @@ The Pass will change the IR to:
3535
```
3636
// -----// IR Dump After {anonymous}::InsertGPUAllocs //----- //
3737
func.func @main() {
38-
%memref = gpu.alloc () {gpu.alloc_shared} : memref<8xf32>
39-
%memref_2 = gpu.alloc () {gpu.alloc_shared} : memref<8xf32>
40-
%memref_3 = gpu.alloc () {gpu.alloc_shared} : memref<8xf32>
38+
%memref = gpu.alloc host_shared () : memref<8xf32>
39+
%memref_2 = gpu.alloc host_shared () : memref<8xf32>
40+
%memref_3 = gpu.alloc host_shared () : memref<8xf32>
4141
.
4242
.
4343
.

docs/Transforms/SetSPIRVCapabilities.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ module attributes {gpu.container_module} {
1919
%cst = arith.constant 2.200000e+00 : f32
2020
%cst_0 = arith.constant 1.100000e+00 : f32
2121
%cst_1 = arith.constant 0.000000e+00 : f32
22-
%memref = gpu.alloc () {gpu.alloc_shared} : memref<8xf32>
23-
%memref_2 = gpu.alloc () {gpu.alloc_shared} : memref<8xf32>
24-
%memref_3 = gpu.alloc () {gpu.alloc_shared} : memref<8xf32>
22+
%memref = gpu.alloc host_shared () : memref<8xf32>
23+
%memref_2 = gpu.alloc host_shared () : memref<8xf32>
24+
%memref_3 = gpu.alloc host_shared () : memref<8xf32>
2525
%0 = memref.cast %memref : memref<8xf32> to memref<?xf32>
2626
%1 = memref.cast %memref_2 : memref<8xf32> to memref<?xf32>
2727
%2 = memref.cast %memref_3 : memref<8xf32> to memref<?xf32>
@@ -46,9 +46,9 @@ module attributes {gpu.container_module, spv.target_env = #spv.target_env<#spv.v
4646
%cst = arith.constant 2.200000e+00 : f32
4747
%cst_0 = arith.constant 1.100000e+00 : f32
4848
%cst_1 = arith.constant 0.000000e+00 : f32
49-
%memref = gpu.alloc () {gpu.alloc_shared} : memref<8xf32>
50-
%memref_2 = gpu.alloc () {gpu.alloc_shared} : memref<8xf32>
51-
%memref_3 = gpu.alloc () {gpu.alloc_shared} : memref<8xf32>
49+
%memref = gpu.alloc host_shared () : memref<8xf32>
50+
%memref_2 = gpu.alloc host_shared () : memref<8xf32>
51+
%memref_3 = gpu.alloc host_shared () : memref<8xf32>
5252
%0 = memref.cast %memref : memref<8xf32> to memref<?xf32>
5353
%1 = memref.cast %memref_2 : memref<8xf32> to memref<?xf32>
5454
%2 = memref.cast %memref_3 : memref<8xf32> to memref<?xf32>

include/imex/Dialect/GPUX/IR/GPUXOps.td

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,9 @@ def GPUX_AllocOp
177177

178178
let arguments = (ins Variadic<GPU_AsyncToken>:$asyncDependencies,
179179
GPUX_StreamType:$gpux_stream,
180-
Variadic<Index>:$dynamicSizes, Variadic<Index>:$symbolOperands);
180+
Variadic<Index>:$dynamicSizes,
181+
Variadic<Index>:$symbolOperands,
182+
UnitAttr:$hostShared);
181183
let results = (outs Res<AnyMemRef, "", [MemAlloc]>:$memref,
182184
Optional<GPU_AsyncToken>:$asyncToken);
183185
}

lib/Conversion/GPUToGPUX/GPUToGPUX.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,11 +66,12 @@ struct ConvertAllocOp : public mlir::OpRewritePattern<mlir::gpu::AllocOp> {
6666
if (!stream)
6767
return mlir::failure();
6868

69+
auto hostShared = op.getHostShared();
6970
mlir::Type token =
7071
op.getAsyncToken() ? op.getAsyncToken().getType() : nullptr;
7172
rewriter.replaceOpWithNewOp<imex::gpux::AllocOp>(
7273
op, op.getType(), token, op.getAsyncDependencies(), stream,
73-
op.getDynamicSizes(), op.getSymbolOperands());
74+
op.getDynamicSizes(), op.getSymbolOperands(), hostShared);
7475

7576
return mlir::success();
7677
}

lib/Transforms/InsertGpuAllocs.cpp

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,6 @@
2929

3030
namespace imex {
3131

32-
mlir::StringRef getAllocSharedAttrName() { return "gpu.alloc_shared"; }
33-
3432
struct InsertGPUAllocs
3533
: public mlir::PassWrapper<InsertGPUAllocs,
3634
mlir::OperationPass<mlir::func::FuncOp>> {
@@ -235,17 +233,14 @@ struct InsertGPUAllocs
235233
auto access = getAccessType(alloc);
236234
auto loc = alloc.getLoc();
237235
builder.setInsertionPoint(alloc);
236+
bool hostShared = access.hostRead || access.hostWrite;
238237
auto gpuAlloc = builder.create<mlir::gpu::AllocOp>(
239238
loc, alloc.getType(), /*asyncToken*/ nullptr,
240239
/*asyncDependencies*/ llvm::None, alloc.getDynamicSizes(),
241-
alloc.getSymbolOperands());
240+
alloc.getSymbolOperands(), hostShared);
242241
auto allocResult = gpuAlloc.getResult(0);
243242
alloc->replaceAllUsesWith(gpuAlloc);
244243
alloc.erase();
245-
if (access.hostRead || access.hostWrite)
246-
gpuAlloc->setAttr(imex::getAllocSharedAttrName(),
247-
builder.getUnitAttr());
248-
249244
builder.setInsertionPoint(term);
250245

251246
builder.create<mlir::gpu::DeallocOp>(loc, llvm::None, allocResult);
@@ -273,14 +268,12 @@ struct InsertGPUAllocs
273268
auto allocType = mlir::MemRefType::get(
274269
memrefType.getShape(), memrefType.getElementType(),
275270
mlir::MemRefLayoutAttrInterface{}, memrefType.getMemorySpace());
271+
bool hostShared = access.hostRead || access.hostWrite;
276272
auto gpuAlloc = builder.create<mlir::gpu::AllocOp>(
277273
loc, allocType, /*asyncToken*/ nullptr,
278274
/*asyncDependencies*/ llvm::None, dims,
279-
/*symbolOperands*/ llvm::None);
275+
/*symbolOperands*/ llvm::None, hostShared);
280276
auto allocResult = gpuAlloc.getResult(0);
281-
if (access.hostRead || access.hostWrite)
282-
gpuAlloc->setAttr(imex::getAllocSharedAttrName(),
283-
builder.getUnitAttr());
284277

285278
if (access.hostWrite && access.deviceRead) {
286279
auto copy = builder.create<mlir::memref::CopyOp>(loc, op, allocResult);

test/Transforms/InsertGpuAllocs/add-gpu-alloc.mlir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,13 @@ func.func @addt(%arg0: memref<2x5xf32>, %arg1: memref<2x5xf32>) -> memref<2x5xf3
55
%c2 = arith.constant 2 : index
66
%c1 = arith.constant 1 : index
77
%c5 = arith.constant 5 : index
8-
// CHECK: %[[MEMREF0:.*]]= gpu.alloc () {gpu.alloc_shared} : memref<2x5xf32>
8+
// CHECK: %[[MEMREF0:.*]]= gpu.alloc host_shared () : memref<2x5xf32>
99
// CHECK: memref.copy %arg1, %[[MEMREF0:.*]] : memref<2x5xf32> to memref<2x5xf32>
10-
// CHECK: %[[MEMREF1:.*]]= gpu.alloc () {gpu.alloc_shared} : memref<2x5xf32>
10+
// CHECK: %[[MEMREF1:.*]]= gpu.alloc host_shared () : memref<2x5xf32>
1111
// CHECK: memref.copy %arg0, %[[MEMREF1:.*]] : memref<2x5xf32> to memref<2x5xf32>
1212

1313
%0 = memref.alloc() {alignment = 128 : i64} : memref<2x5xf32>
14-
// CHECK: %[[MEMREF2:.*]] = gpu.alloc () {gpu.alloc_shared} : memref<2x5xf32>
14+
// CHECK: %[[MEMREF2:.*]] = gpu.alloc host_shared () : memref<2x5xf32>
1515

1616
%c1_0 = arith.constant 1 : index
1717
%1 = affine.apply affine_map<(d0)[s0, s1] -> ((d0 - s0) ceildiv s1)>(%c2)[%c0, %c1]

test/Transforms/InsertGpuAllocs/dynamic-dims.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ func.func @addt(%arg0: memref<2x5xf32>, %arg1: memref<?x?xf32>) -> memref<2x5xf3
77
%c5 = arith.constant 5 : index
88
// CHECK: [[VAR0:.*]] = memref.dim %arg1, %c0 : memref<?x?xf32>
99
// CHECK: [[VAR1:.*]] = memref.dim %arg1, %c1 : memref<?x?xf32>
10-
// CHECK: %[[MEMREF0:.*]] = gpu.alloc ([[VAR0:.*]], [[VAR1:.*]]) {gpu.alloc_shared} : memref<?x?xf32>
10+
// CHECK: %[[MEMREF0:.*]] = gpu.alloc host_shared ([[VAR0:.*]], [[VAR1:.*]]) : memref<?x?xf32>
1111
%0 = memref.alloc() {alignment = 128 : i64} : memref<2x5xf32>
1212
%c1_0 = arith.constant 1 : index
1313
%1 = affine.apply affine_map<(d0)[s0, s1] -> ((d0 - s0) ceildiv s1)>(%c2)[%c0, %c1]

test/Transforms/InsertGpuAllocs/memref-get-global.mlir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,12 @@ func.func @addt(%arg0: memref<2x5xf32>, %arg1: memref<2x5xf32>) -> memref<2x5xf3
1515
%2 = memref.alloc() {alignment = 128 : i64} : memref<2x5xf32>
1616

1717
// CHECK: [[VAR0:.*]] = memref.get_global @__constant_2x5xf32 : memref<2x5xf32>
18-
// CHECK: %[[MEMREF0:.*]] = gpu.alloc () {gpu.alloc_shared} : memref<2x5xf32>
18+
// CHECK: %[[MEMREF0:.*]] = gpu.alloc host_shared () : memref<2x5xf32>
1919
// CHECK: memref.copy [[VAR0:.*]], %[[MEMREF0:.*]] : memref<2x5xf32> to memref<2x5xf32>
2020
// CHECK: [[VAR1:.*]] = memref.get_global @__constant_2x5xf32_0 : memref<2x5xf32>
21-
// CHECK: %[[MEMREF1:.*]] = gpu.alloc () {gpu.alloc_shared} : memref<2x5xf32>
21+
// CHECK: %[[MEMREF1:.*]] = gpu.alloc host_shared () : memref<2x5xf32>
2222
// CHECK: memref.copy [[VAR1:.*]], %[[MEMREF1:.*]] : memref<2x5xf32> to memref<2x5xf32>
23-
// CHECK: %[[MEMREF2:.*]] = gpu.alloc () {gpu.alloc_shared} : memref<2x5xf32>
23+
// CHECK: %[[MEMREF2:.*]] = gpu.alloc host_shared () : memref<2x5xf32>
2424

2525
%c1_0 = arith.constant 1 : index
2626
%3 = affine.apply affine_map<(d0)[s0, s1] -> ((d0 - s0) ceildiv s1)>(%c2)[%c0, %c1]

0 commit comments

Comments
 (0)