Skip to content

Commit 63ab3a8

Browse files
nicolasvasilachefabianmcg
authored andcommitted
[mlir][memref] Add a new ReifyResultShapes pass (llvm#145927)
This pass reifies the shapes of a subset of `ReifyRankedShapedTypeOpInterface` ops with `tensor` results. The pass currently only supports result shape type reification for: - tensor::PadOp - tensor::ConcatOp It addresses a representation gap where implicit op semantics are needed to infer static result types from dynamic operands. But it does so by using `ReifyRankedShapedTypeOpInterface` as the source of truth rather than the op itself. As a consequence, this cannot generalize today. TODO: in the future, we should consider coupling this information with op "transfer functions" (e.g. `IndexingMapOpInterface`) to provide a source of truth that can work across result shape inference, canonicalization and op verifiers. The pass replaces the operations with their reified versions, when more static information can be derived, and inserts casts when results shapes are updated. Example: ```mlir #map = affine_map<(d0) -> (-d0 + 256)> func.func @func(%arg0: f32, %arg1: index, %arg2: tensor<64x?x64xf32>) -> tensor<1x?x64xf32> { %0 = affine.apply #map(%arg1) %extracted_slice = tensor.extract_slice %arg2[0, 0, 0] [1, %arg1, 64] [1, 1, 1] : tensor<64x?x64xf32> to tensor<1x?x64xf32> %padded = tensor.pad %extracted_slice low[0, 0, 0] high[0, %0, 0] { ^bb0(%arg3: index, %arg4: index, %arg5: index): tensor.yield %arg0 : f32 } : tensor<1x?x64xf32> to tensor<1x?x64xf32> return %padded : tensor<1x?x64xf32> } // mlir-opt --reify-result-shapes #map = affine_map<()[s0] -> (-s0 + 256)> func.func @func(%arg0: f32, %arg1: index, %arg2: tensor<64x?x64xf32>) -> tensor<1x?x64xf32> { %0 = affine.apply #map()[%arg1] %extracted_slice = tensor.extract_slice %arg2[0, 0, 0] [1, %arg1, 64] [1, 1, 1] : tensor<64x?x64xf32> to tensor<1x?x64xf32> %padded = tensor.pad %extracted_slice low[0, 0, 0] high[0, %0, 0] { ^bb0(%arg3: index, %arg4: index, %arg5: index): tensor.yield %arg0 : f32 } : tensor<1x?x64xf32> to tensor<1x256x64xf32> %cast = tensor.cast %padded : tensor<1x256x64xf32> to tensor<1x?x64xf32> return %cast : tensor<1x?x64xf32> } ``` --------- Co-authored-by: Fabian Mora <[email protected]>
1 parent 564148c commit 63ab3a8

File tree

5 files changed

+254
-1
lines changed

5 files changed

+254
-1
lines changed

mlir/include/mlir/Dialect/MemRef/Transforms/Passes.td

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,68 @@ def ResolveShapedTypeResultDimsPass : Pass<"resolve-shaped-type-result-dims"> {
182182
];
183183
}
184184

185+
def ReifyResultShapesPass : Pass<"reify-result-shapes"> {
186+
let summary ="Reifies the results of `tensor::PadOp` and `tensor::ConcatOp`.";
187+
let description = [{
188+
This pass reifies the shapes of a subset of `ReifyRankedShapedTypeOpInterface`
189+
ops with `tensor` results.
190+
191+
The pass currently only supports result shape type reification for:
192+
- tensor::PadOp
193+
- tensor::ConcatOp
194+
It addresses a representation gap where implicit op semantics are needed to
195+
infer static result types from dynamic operands.
196+
But it does so by using `ReifyRankedShapedTypeOpInterface` as the source of
197+
truth rather than the op itself. As a consequence, this cannot generalize
198+
today.
199+
200+
TODO: in the future, we should consider coupling this information with op
201+
"transfer functions" (e.g. `IndexingMapOpInterface`) to provide a source of
202+
truth that can work across result shape inference, canonicalization and op
203+
verifiers.
204+
205+
The pass replaces the operations with their reified versions, when more
206+
static information can be derived, and inserts casts when results shapes
207+
are updated.
208+
209+
Example:
210+
```mlir
211+
#map = affine_map<(d0) -> (-d0 + 256)>
212+
func.func @func(%arg0: f32, %arg1: index, %arg2: tensor<64x?x64xf32>)
213+
-> tensor<1x?x64xf32>
214+
{
215+
%0 = affine.apply #map(%arg1)
216+
%extracted_slice = tensor.extract_slice %arg2[0, 0, 0] [1, %arg1, 64] [1, 1, 1]
217+
: tensor<64x?x64xf32> to tensor<1x?x64xf32>
218+
%padded = tensor.pad %extracted_slice low[0, 0, 0] high[0, %0, 0] {
219+
^bb0(%arg3: index, %arg4: index, %arg5: index):
220+
tensor.yield %arg0 : f32
221+
} : tensor<1x?x64xf32> to tensor<1x?x64xf32>
222+
return %padded : tensor<1x?x64xf32>
223+
}
224+
225+
// mlir-opt --reify-result-shapes
226+
#map = affine_map<()[s0] -> (-s0 + 256)>
227+
func.func @func(%arg0: f32, %arg1: index, %arg2: tensor<64x?x64xf32>)
228+
-> tensor<1x?x64xf32>
229+
{
230+
%0 = affine.apply #map()[%arg1]
231+
%extracted_slice = tensor.extract_slice %arg2[0, 0, 0] [1, %arg1, 64] [1, 1, 1]
232+
: tensor<64x?x64xf32> to tensor<1x?x64xf32>
233+
%padded = tensor.pad %extracted_slice low[0, 0, 0] high[0, %0, 0] {
234+
^bb0(%arg3: index, %arg4: index, %arg5: index):
235+
tensor.yield %arg0 : f32
236+
} : tensor<1x?x64xf32> to tensor<1x256x64xf32>
237+
%cast = tensor.cast %padded : tensor<1x256x64xf32> to tensor<1x?x64xf32>
238+
return %cast : tensor<1x?x64xf32>
239+
}
240+
```
241+
}];
242+
let dependentDialects = [
243+
"affine::AffineDialect", "memref::MemRefDialect", "tensor::TensorDialect"
244+
];
245+
}
246+
185247
def ExpandStridedMetadataPass : Pass<"expand-strided-metadata"> {
186248
let summary = "Expand memref operations into easier to analyze constructs";
187249
let description = [{

mlir/include/mlir/Dialect/MemRef/Transforms/Transforms.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ class RewritePatternSet;
2323
class RewriterBase;
2424
class Value;
2525
class ValueRange;
26+
class ReifyRankedShapedTypeOpInterface;
2627

2728
namespace arith {
2829
class WideIntEmulationConverter;
@@ -208,7 +209,6 @@ FailureOr<Value> replaceWithIndependentOp(RewriterBase &rewriter,
208209
memref::AllocaOp allocToAlloca(
209210
RewriterBase &rewriter, memref::AllocOp alloc,
210211
function_ref<bool(memref::AllocOp, memref::DeallocOp)> filter = nullptr);
211-
212212
} // namespace memref
213213
} // namespace mlir
214214

mlir/lib/Dialect/MemRef/Transforms/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ add_mlir_dialect_library(MLIRMemRefTransforms
1313
IndependenceTransforms.cpp
1414
MultiBuffer.cpp
1515
NormalizeMemRefs.cpp
16+
ReifyResultShapes.cpp
1617
ResolveShapedTypeResultDims.cpp
1718
RuntimeOpVerification.cpp
1819

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
//===- ReifyResultShapes.cpp - Reify result shapes ------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This transform reifies result shapes of `ReifyRankedShapedTypeOpInterface`
10+
// operations with ranked `memref` and `tensor` results.
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
#include "mlir/Dialect/MemRef/Transforms/Passes.h"
15+
16+
#include "mlir/Dialect/Affine/IR/AffineOps.h"
17+
#include "mlir/Dialect/MemRef/IR/MemRef.h"
18+
#include "mlir/Dialect/MemRef/Transforms/Transforms.h"
19+
#include "mlir/Dialect/Tensor/IR/Tensor.h"
20+
#include "mlir/Interfaces/DestinationStyleOpInterface.h"
21+
#include "mlir/Interfaces/InferTypeOpInterface.h"
22+
#include "llvm/Support/InterleavedRange.h"
23+
24+
#define DEBUG_TYPE "reify-result-shapes"
25+
#define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE << "]: ")
26+
27+
namespace mlir {
28+
namespace memref {
29+
#define GEN_PASS_DEF_REIFYRESULTSHAPESPASS
30+
#include "mlir/Dialect/MemRef/Transforms/Passes.h.inc"
31+
} // namespace memref
32+
} // namespace mlir
33+
34+
using namespace mlir;
35+
36+
/// Reifies the results of `op`, potentially replacing `op` with a reified
37+
/// version. Returns `failure` if `mlir::reifyResultShapes` returned failure,
38+
/// otherwise it always succeeds. Users of this transform should always expect
39+
/// it to modify the IR, even when it fails. If any of the result types changes,
40+
/// the transform will insert cast operations to the old type to keep the IR
41+
/// consistent.
42+
static LogicalResult reifyOpResultShapes(RewriterBase &rewriter,
43+
ReifyRankedShapedTypeOpInterface op) {
44+
LLVM_DEBUG({ DBGS() << " reifying op: " << op << "\n"; });
45+
// Get the reified out shapes.
46+
ReifiedRankedShapedTypeDims reifiedResultShapes;
47+
if (failed(mlir::reifyResultShapes(rewriter, op, reifiedResultShapes)) ||
48+
reifiedResultShapes.empty()) {
49+
return op->emitWarning() << "failed to get the reified shapes";
50+
}
51+
52+
bool modified = false;
53+
// Compute the new output types.
54+
SmallVector<Type> outTypes;
55+
for (const auto &[oldTy, reifiedShape] :
56+
llvm::zip(op->getResultTypes(), reifiedResultShapes)) {
57+
// Skip if it's not a memref or tensor type.
58+
if (!isa<RankedTensorType, MemRefType>(oldTy)) {
59+
outTypes.push_back(oldTy);
60+
continue;
61+
}
62+
63+
ShapedType shapedTy = dyn_cast<ShapedType>(oldTy);
64+
65+
SmallVector<int64_t> shape = llvm::to_vector(shapedTy.getShape());
66+
for (auto &&[dim, ofr] : llvm::zip_equal(shape, reifiedShape)) {
67+
std::optional<int64_t> maybeCst = getConstantIntValue(ofr);
68+
// If the reified dim is dynamic set it appropriately.
69+
if (!maybeCst.has_value()) {
70+
dim = ShapedType::kDynamic;
71+
continue;
72+
}
73+
// Set the static dim.
74+
dim = *maybeCst;
75+
}
76+
77+
// If the shape didn't change continue.
78+
if (shape == shapedTy.getShape()) {
79+
outTypes.push_back(oldTy);
80+
continue;
81+
}
82+
modified = true;
83+
outTypes.push_back(shapedTy.cloneWith(shape, shapedTy.getElementType()));
84+
}
85+
86+
// Return if we don't need to update.
87+
if (!modified) {
88+
LLVM_DEBUG({ DBGS() << "- op doesn't require update\n"; });
89+
return success();
90+
}
91+
92+
LLVM_DEBUG({
93+
DBGS() << "- oldTypes: " << llvm::interleaved_array(op->getResultTypes())
94+
<< " \n";
95+
DBGS() << "- outTypes: " << llvm::interleaved_array(outTypes) << " \n";
96+
});
97+
98+
// We now have outTypes that need to be turned to cast ops.
99+
Location loc = op->getLoc();
100+
SmallVector<Value> newResults;
101+
// TODO: `mlir::reifyResultShapes` and op verifiers may not agree atm.
102+
// This is a confluence problem that will need to be addressed.
103+
// For now, we know PadOp and ConcatOp are fine.
104+
assert((isa<tensor::PadOp, tensor::ConcatOp>(op.getOperation())) &&
105+
"incorrect op");
106+
Operation *newOp = rewriter.clone(*op);
107+
for (auto [reifiedTy, oldRes] : llvm::zip(outTypes, op->getResults())) {
108+
OpResult newRes = newOp->getResult(oldRes.getResultNumber());
109+
Type oldTy = oldRes.getType();
110+
// Continue if the type remained invariant or is not shaped.
111+
if (oldTy == reifiedTy || !isa<MemRefType, RankedTensorType>(oldTy)) {
112+
newResults.push_back(newRes);
113+
continue;
114+
}
115+
116+
// Update the type.
117+
newRes.setType(reifiedTy);
118+
if (isa<RankedTensorType>(reifiedTy)) {
119+
newResults.push_back(rewriter.create<tensor::CastOp>(loc, oldTy, newRes));
120+
} else {
121+
assert(isa<MemRefType>(reifiedTy) && "expected a memref type");
122+
newResults.push_back(rewriter.create<memref::CastOp>(loc, oldTy, newRes));
123+
}
124+
}
125+
126+
LLVM_DEBUG({
127+
DBGS() << "- reified results " << llvm::interleaved_array(newResults)
128+
<< "\n";
129+
});
130+
rewriter.replaceOp(op, newResults);
131+
return success();
132+
}
133+
134+
//===----------------------------------------------------------------------===//
135+
// Pass registration
136+
//===----------------------------------------------------------------------===//
137+
138+
namespace {
139+
struct ReifyResultShapesPass final
140+
: public memref::impl::ReifyResultShapesPassBase<ReifyResultShapesPass> {
141+
void runOnOperation() override;
142+
};
143+
} // namespace
144+
145+
void ReifyResultShapesPass::runOnOperation() {
146+
SmallVector<ReifyRankedShapedTypeOpInterface> ops;
147+
getOperation()->walk([&](ReifyRankedShapedTypeOpInterface op) {
148+
// Handle ops that are not DPS and that do not carry an tied operand shapes.
149+
// For now, limit to tensor::PadOp and tensor::ConcatOp.
150+
if (!isa<tensor::PadOp, tensor::ConcatOp>(op.getOperation()))
151+
return;
152+
ops.push_back(op);
153+
});
154+
IRRewriter rewriter(&getContext());
155+
for (ReifyRankedShapedTypeOpInterface op : ops) {
156+
rewriter.setInsertionPoint(op);
157+
(void)reifyOpResultShapes(rewriter, op);
158+
}
159+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// RUN: mlir-opt -reify-result-shapes %s | FileCheck %s
2+
3+
// The test below checks concat op reification. In the first case, no cast is inserted while on the second a cast gets inserted.
4+
// CHECK-LABEL: func.func @concat_reification
5+
func.func @concat_reification(%arg0: tensor<4x7x3xf32>, %arg1 : tensor<4x4x3xf32>, %arg2: tensor<?x?x?xf32>)
6+
-> (tensor<4x11x3xf32>, tensor<?x?x?xf32>) {
7+
// CHECK: %[[RES0:.*]] = tensor.concat dim(1) %{{.*}} : (tensor<4x7x3xf32>, tensor<4x4x3xf32>) -> tensor<4x11x3xf32>
8+
%1 = tensor.concat dim(1) %arg0, %arg1 : (tensor<4x7x3xf32>, tensor<4x4x3xf32>) -> tensor<4x11x3xf32>
9+
// CHECK: %[[V0:.*]] = tensor.concat dim(2) %{{.*}} : (tensor<4x7x3xf32>, tensor<?x?x?xf32>) -> tensor<4x7x?xf32>
10+
// CHECK: %[[RES1:.*]] = tensor.cast %[[V0]] : tensor<4x7x?xf32> to tensor<?x?x?xf32>
11+
%2 = tensor.concat dim(2) %arg0, %arg2 : (tensor<4x7x3xf32>, tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
12+
// CHECK: return %[[RES0]], %[[RES1]] : tensor<4x11x3xf32>, tensor<?x?x?xf32>
13+
return %1, %2 : tensor<4x11x3xf32>, tensor<?x?x?xf32>
14+
}
15+
16+
// CHECK-LABEL: func.func @pad_reification
17+
func.func @pad_reification(%cst : f32, %idx : index, %t: tensor<64x?x64xf32>) -> tensor<1x?x64xf32> {
18+
%pad_amt = affine.apply affine_map<(d0) -> (-d0 + 256)>(%idx)
19+
%es = tensor.extract_slice %t[0, 0, 0] [1, %idx, 64] [1, 1, 1]
20+
: tensor<64x?x64xf32> to tensor<1x?x64xf32>
21+
22+
// CHECK: tensor.pad
23+
// CHECK: : tensor<1x?x64xf32> to tensor<1x256x64xf32>
24+
// CHECK: tensor.cast %{{.*}} : tensor<1x256x64xf32> to tensor<1x?x64xf32>
25+
%padded = tensor.pad %es low[0, 0, 0] high[0, %pad_amt, 0] {
26+
^bb0(%a: index, %b: index, %c: index):
27+
tensor.yield %cst : f32
28+
} : tensor<1x?x64xf32> to tensor<1x?x64xf32>
29+
30+
return %padded : tensor<1x?x64xf32>
31+
}

0 commit comments

Comments
 (0)