-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[mlir][memref] Add a new ReifyResultShapes
pass
#145927
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@fabianmcg I rewrote the shape inference part as a separate pass, not tested yet and it still requires a safe listener-based rewriter that cleans up after itself on scope_guard exit. Implementation that worked for tensor.pad specifically is still here: #145732 in case it is useful. |
475e919
to
b8539b7
Compare
b8539b7
to
465c660
Compare
ReifyResultShapes
pass
52d5c1f
to
1f4fba7
Compare
@llvm/pr-subscribers-mlir-tensor @llvm/pr-subscribers-mlir Author: Nicolas Vasilache (nicolasvasilache) ChangesThis patch introduces the Example: #map = affine_map<(d0) -> (-d0 + 256)>
func.func @<!-- -->func(%arg0: f32, %arg1: index, %arg2: tensor<64x?x64xf32>) -> tensor<1x?x64xf32> {
%0 = affine.apply #map(%arg1)
%extracted_slice = tensor.extract_slice %arg2[0, 0, 0] [1, %arg1, 64] [1, 1, 1] : tensor<64x?x64xf32> to tensor<1x?x64xf32>
%padded = tensor.pad %extracted_slice low[0, 0, 0] high[0, %0, 0] {
^bb0(%arg3: index, %arg4: index, %arg5: index):
tensor.yield %arg0 : f32
} : tensor<1x?x64xf32> to tensor<1x?x64xf32>
return %padded : tensor<1x?x64xf32>
}
// mlir-opt --reify-result-shapes
#map = affine_map<()[s0] -> (-s0 + 256)>
func.func @<!-- -->func(%arg0: f32, %arg1: index, %arg2: tensor<64x?x64xf32>) -> tensor<1x?x64xf32> {
%0 = affine.apply #map()[%arg1]
%extracted_slice = tensor.extract_slice %arg2[0, 0, 0] [1, %arg1, 64] [1, 1, 1] : tensor<64x?x64xf32> to tensor<1x?x64xf32>
%padded = tensor.pad %extracted_slice low[0, 0, 0] high[0, %0, 0] {
^bb0(%arg3: index, %arg4: index, %arg5: index):
tensor.yield %arg0 : f32
} : tensor<1x?x64xf32> to tensor<1x256x64xf32>
%cast = tensor.cast %padded : tensor<1x256x64xf32> to tensor<1x?x64xf32>
return %cast : tensor<1x?x64xf32>
}
---
Full diff: https://github.com/llvm/llvm-project/pull/145927.diff
5 Files Affected:
- (modified) mlir/include/mlir/Dialect/MemRef/Transforms/Passes.td (+40)
- (modified) mlir/include/mlir/Dialect/MemRef/Transforms/Transforms.h (+12)
- (modified) mlir/lib/Dialect/MemRef/Transforms/CMakeLists.txt (+1)
- (added) mlir/lib/Dialect/MemRef/Transforms/ReifyResultShapes.cpp (+144)
- (added) mlir/test/Dialect/Tensor/reify-shapes.mlir (+31)
``````````diff
diff --git a/mlir/include/mlir/Dialect/MemRef/Transforms/Passes.td b/mlir/include/mlir/Dialect/MemRef/Transforms/Passes.td
index a8d135caa74f0..4645d49cab2be 100644
--- a/mlir/include/mlir/Dialect/MemRef/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/MemRef/Transforms/Passes.td
@@ -182,6 +182,46 @@ def ResolveShapedTypeResultDimsPass : Pass<"resolve-shaped-type-result-dims"> {
];
}
+def ReifyResultShapesPass : Pass<"reify-result-shapes"> {
+ let summary = "Reifies the results of all `ReifyRankedShapedTypeOpInterface` operations";
+ let description = [{
+ This pass reifies the shapes of every `ReifyRankedShapedTypeOpInterface`
+ operation with ranked `memref` and `tensor` results. Replacing the
+ operations with their reified versions, and inserting casts when results
+ shapes are updated.
+
+ Example:
+ ```mlir
+ #map = affine_map<(d0) -> (-d0 + 256)>
+ func.func @func(%arg0: f32, %arg1: index, %arg2: tensor<64x?x64xf32>) -> tensor<1x?x64xf32> {
+ %0 = affine.apply #map(%arg1)
+ %extracted_slice = tensor.extract_slice %arg2[0, 0, 0] [1, %arg1, 64] [1, 1, 1] : tensor<64x?x64xf32> to tensor<1x?x64xf32>
+ %padded = tensor.pad %extracted_slice low[0, 0, 0] high[0, %0, 0] {
+ ^bb0(%arg3: index, %arg4: index, %arg5: index):
+ tensor.yield %arg0 : f32
+ } : tensor<1x?x64xf32> to tensor<1x?x64xf32>
+ return %padded : tensor<1x?x64xf32>
+ }
+
+ // mlir-opt --reify-result-shapes
+ #map = affine_map<()[s0] -> (-s0 + 256)>
+ func.func @func(%arg0: f32, %arg1: index, %arg2: tensor<64x?x64xf32>) -> tensor<1x?x64xf32> {
+ %0 = affine.apply #map()[%arg1]
+ %extracted_slice = tensor.extract_slice %arg2[0, 0, 0] [1, %arg1, 64] [1, 1, 1] : tensor<64x?x64xf32> to tensor<1x?x64xf32>
+ %padded = tensor.pad %extracted_slice low[0, 0, 0] high[0, %0, 0] {
+ ^bb0(%arg3: index, %arg4: index, %arg5: index):
+ tensor.yield %arg0 : f32
+ } : tensor<1x?x64xf32> to tensor<1x256x64xf32>
+ %cast = tensor.cast %padded : tensor<1x256x64xf32> to tensor<1x?x64xf32>
+ return %cast : tensor<1x?x64xf32>
+ }
+ ```
+ }];
+ let dependentDialects = [
+ "affine::AffineDialect", "memref::MemRefDialect", "tensor::TensorDialect"
+ ];
+}
+
def ExpandStridedMetadataPass : Pass<"expand-strided-metadata"> {
let summary = "Expand memref operations into easier to analyze constructs";
let description = [{
diff --git a/mlir/include/mlir/Dialect/MemRef/Transforms/Transforms.h b/mlir/include/mlir/Dialect/MemRef/Transforms/Transforms.h
index c2b8cb05be922..5f9f09d7992ca 100644
--- a/mlir/include/mlir/Dialect/MemRef/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/MemRef/Transforms/Transforms.h
@@ -23,6 +23,7 @@ class RewritePatternSet;
class RewriterBase;
class Value;
class ValueRange;
+class ReifyRankedShapedTypeOpInterface;
namespace arith {
class WideIntEmulationConverter;
@@ -209,6 +210,17 @@ memref::AllocaOp allocToAlloca(
RewriterBase &rewriter, memref::AllocOp alloc,
function_ref<bool(memref::AllocOp, memref::DeallocOp)> filter = nullptr);
+/// Reifies the results of `op`, potentially replacing `op` with a reified
+/// version. Returns `failure` if `mlir::reifyResultShapes` returned failure,
+/// otherwise it always succeeds. Users of this transform should always expect
+/// it to modify the IR, even when it fails. If any of the result types changes,
+/// the transform will insert cast operations to the old type to keep the IR
+/// consistent.
+///
+/// Note: This transform only works on ranked `memref` or `tensor` results,
+/// other types are ignored.
+LogicalResult reifyOpResultShapes(RewriterBase &rewriter,
+ ReifyRankedShapedTypeOpInterface op);
} // namespace memref
} // namespace mlir
diff --git a/mlir/lib/Dialect/MemRef/Transforms/CMakeLists.txt b/mlir/lib/Dialect/MemRef/Transforms/CMakeLists.txt
index 637f5ec1c9f9b..9049faccadef3 100644
--- a/mlir/lib/Dialect/MemRef/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/MemRef/Transforms/CMakeLists.txt
@@ -13,6 +13,7 @@ add_mlir_dialect_library(MLIRMemRefTransforms
IndependenceTransforms.cpp
MultiBuffer.cpp
NormalizeMemRefs.cpp
+ ReifyResultShapes.cpp
ResolveShapedTypeResultDims.cpp
RuntimeOpVerification.cpp
diff --git a/mlir/lib/Dialect/MemRef/Transforms/ReifyResultShapes.cpp b/mlir/lib/Dialect/MemRef/Transforms/ReifyResultShapes.cpp
new file mode 100644
index 0000000000000..dcb601577f88f
--- /dev/null
+++ b/mlir/lib/Dialect/MemRef/Transforms/ReifyResultShapes.cpp
@@ -0,0 +1,144 @@
+//===- ReifyResultShapes.cpp - Reify result shapes ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This transform reifies result shapes of `ReifyRankedShapedTypeOpInterface`
+// operations with ranked `memref` and `tensor` results.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/MemRef/Transforms/Passes.h"
+
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/MemRef/Transforms/Transforms.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/Interfaces/InferTypeOpInterface.h"
+#include "llvm/Support/InterleavedRange.h"
+
+#define DEBUG_TYPE "reify-result-shapes"
+#define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE << "]: ")
+
+namespace mlir {
+namespace memref {
+#define GEN_PASS_DEF_REIFYRESULTSHAPESPASS
+#include "mlir/Dialect/MemRef/Transforms/Passes.h.inc"
+} // namespace memref
+} // namespace mlir
+
+using namespace mlir;
+
+LogicalResult
+mlir::memref::reifyOpResultShapes(RewriterBase &rewriter,
+ ReifyRankedShapedTypeOpInterface op) {
+ LLVM_DEBUG({ DBGS() << " reifying op: " << op << "\n"; });
+ // Get the reified out shapes.
+ ReifiedRankedShapedTypeDims reifiedResultShapes;
+ if (failed(mlir::reifyResultShapes(rewriter, op, reifiedResultShapes)) ||
+ reifiedResultShapes.empty()) {
+ return op.emitError() << "failed to get the reified shapes";
+ }
+
+ bool modified = false;
+ // Compute the new output types.
+ SmallVector<Type> outTypes;
+ for (const auto &[oldTy, reifiedShape] :
+ llvm::zip(op->getResultTypes(), reifiedResultShapes)) {
+ // Skip if it's not a memref or tensor type.
+ if (!isa<RankedTensorType, MemRefType>(oldTy)) {
+ outTypes.push_back(oldTy);
+ continue;
+ }
+
+ ShapedType shapedTy = dyn_cast<ShapedType>(oldTy);
+
+ SmallVector<int64_t> shape = llvm::to_vector(shapedTy.getShape());
+ for (auto &&[dim, ofr] : llvm::zip_equal(shape, reifiedShape)) {
+ std::optional<int64_t> maybeCst = getConstantIntValue(ofr);
+ // If the reified dim is dynamic set it appropriately.
+ if (!maybeCst.has_value()) {
+ dim = ShapedType::kDynamic;
+ continue;
+ }
+ // Set the static dim.
+ dim = *maybeCst;
+ }
+
+ // If the shape didn't change continue.
+ if (shape == shapedTy.getShape()) {
+ outTypes.push_back(oldTy);
+ continue;
+ }
+ modified = true;
+ outTypes.push_back(shapedTy.cloneWith(shape, shapedTy.getElementType()));
+ }
+
+ // Return if we don't need to update.
+ if (!modified) {
+ LLVM_DEBUG({ DBGS() << "- op doesn't require update\n"; });
+ return success();
+ }
+
+ LLVM_DEBUG({
+ DBGS() << "- oldTypes: " << llvm::interleaved_array(op->getResultTypes())
+ << " \n";
+ DBGS() << "- outTypes: " << llvm::interleaved_array(outTypes) << " \n";
+ });
+
+ // We now have outTypes that need to be turned to cast ops.
+ Location loc = op->getLoc();
+ SmallVector<Value> newResults;
+ Operation *newOp = rewriter.clone(*op);
+ for (auto [reifiedTy, oldRes] : llvm::zip(outTypes, op->getResults())) {
+ OpResult newRes = newOp->getResult(oldRes.getResultNumber());
+ Type oldTy = oldRes.getType();
+ // Continue if the type remained invariant or is not shaped.
+ if (oldTy == reifiedTy || !isa<MemRefType, RankedTensorType>(oldTy)) {
+ newResults.push_back(newRes);
+ continue;
+ }
+
+ // Update the type.
+ newRes.setType(reifiedTy);
+ if (isa<RankedTensorType>(reifiedTy)) {
+ newResults.push_back(rewriter.create<tensor::CastOp>(loc, oldTy, newRes));
+ } else {
+ assert(isa<MemRefType>(reifiedTy) && "expected a memref type");
+ newResults.push_back(rewriter.create<memref::CastOp>(loc, oldTy, newRes));
+ }
+ }
+
+ LLVM_DEBUG({
+ DBGS() << "- reified results " << llvm::interleaved_array(newResults)
+ << "\n";
+ });
+ rewriter.replaceOp(op, newResults);
+ return success();
+}
+
+//===----------------------------------------------------------------------===//
+// Pass registration
+//===----------------------------------------------------------------------===//
+
+namespace {
+struct ReifyResultShapesPass final
+ : public memref::impl::ReifyResultShapesPassBase<ReifyResultShapesPass> {
+ void runOnOperation() override;
+};
+} // namespace
+
+void ReifyResultShapesPass::runOnOperation() {
+ SmallVector<ReifyRankedShapedTypeOpInterface> ops;
+ getOperation()->walk(
+ [&](ReifyRankedShapedTypeOpInterface op) { ops.push_back(op); });
+ IRRewriter rewriter(&getContext());
+ for (ReifyRankedShapedTypeOpInterface op : ops) {
+ rewriter.setInsertionPoint(op);
+ if (failed(memref::reifyOpResultShapes(rewriter, op)))
+ return signalPassFailure();
+ }
+}
diff --git a/mlir/test/Dialect/Tensor/reify-shapes.mlir b/mlir/test/Dialect/Tensor/reify-shapes.mlir
new file mode 100644
index 0000000000000..5569d90f8b731
--- /dev/null
+++ b/mlir/test/Dialect/Tensor/reify-shapes.mlir
@@ -0,0 +1,31 @@
+// RUN: mlir-opt -reify-result-shapes %s | FileCheck %s
+
+// The test below checks concat op reification. In the first case, no cast is inserted while on the second a cast gets inserted.
+// CHECK-LABEL: func.func @concat_reification
+func.func @concat_reification(%arg0: tensor<4x7x3xf32>, %arg1 : tensor<4x4x3xf32>, %arg2: tensor<?x?x?xf32>)
+ -> (tensor<4x11x3xf32>, tensor<?x?x?xf32>) {
+ // CHECK: %[[RES0:.*]] = tensor.concat dim(1) %{{.*}} : (tensor<4x7x3xf32>, tensor<4x4x3xf32>) -> tensor<4x11x3xf32>
+ %1 = tensor.concat dim(1) %arg0, %arg1 : (tensor<4x7x3xf32>, tensor<4x4x3xf32>) -> tensor<4x11x3xf32>
+ // CHECK: %[[V0:.*]] = tensor.concat dim(2) %{{.*}} : (tensor<4x7x3xf32>, tensor<?x?x?xf32>) -> tensor<4x7x?xf32>
+ // CHECK: %[[RES1:.*]] = tensor.cast %[[V0]] : tensor<4x7x?xf32> to tensor<?x?x?xf32>
+ %2 = tensor.concat dim(2) %arg0, %arg2 : (tensor<4x7x3xf32>, tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
+ // CHECK: return %[[RES0]], %[[RES1]] : tensor<4x11x3xf32>, tensor<?x?x?xf32>
+ return %1, %2 : tensor<4x11x3xf32>, tensor<?x?x?xf32>
+}
+
+// CHECK-LABEL: func.func @pad_reification
+func.func @pad_reification(%cst : f32, %idx : index, %t: tensor<64x?x64xf32>) -> tensor<1x?x64xf32> {
+ %pad_amt = affine.apply affine_map<(d0) -> (-d0 + 256)>(%idx)
+ %es = tensor.extract_slice %t[0, 0, 0] [1, %idx, 64] [1, 1, 1]
+ : tensor<64x?x64xf32> to tensor<1x?x64xf32>
+
+ // CHECK: tensor.pad
+ // CHECK: : tensor<1x?x64xf32> to tensor<1x256x64xf32>
+ // CHECK: tensor.cast %{{.*}} : tensor<1x256x64xf32> to tensor<1x?x64xf32>
+ %padded = tensor.pad %es low[0, 0, 0] high[0, %pad_amt, 0] {
+ ^bb0(%a: index, %b: index, %c: index):
+ tensor.yield %cst : f32
+ } : tensor<1x?x64xf32> to tensor<1x?x64xf32>
+
+ return %padded : tensor<1x?x64xf32>
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This seems to be similar to https://github.com/llvm/llvm-project/blob/main/mlir/lib/Dialect/MemRef/Transforms/ResolveShapedTypeResultDims.cpp . What is this doing on top of that pass?
They are not exactly similar. Also, just noticed that this pattern is wrong: https://github.com/llvm/llvm-project/blob/main/mlir/lib/Dialect/MemRef/Transforms/ResolveShapedTypeResultDims.cpp#L57-L59 It has the potential of returning |
Looking at
This has nothing to do with reifying result shapes. This is just a const shape propagation that should be covered by the "cast folding" pattern here
There isnt a tensor.dim in the input IR, so there is nothing to "reify" . Might be a mismatch in what you expect that interface to do. It is explicitly meant to resolve |
We might need to update the docs, because at least I cannot infer that from the docs:
Also, there's no cast to fold into or from in this case. So no way to augment those patterns. Finally, I'd argue that it is reification because we are coming from dynamic dimensions and inferring that the output type is static. |
Fair point. Document needs to be updated.
Ok, maybe I misread what it the intent was and got thrown off by the use of the For other operations this is done by just canonicalizing the op itself to go to a more "static" version of the op, and introduce
|
Well, the |
There is no need to limit the interface to only exist in conjunction with I also don't understand "implicitly saying generate the code for Happy to rename the functionality if that is what causes hiccups (naming is still hard in 2025). |
Also, only allow tensor::PadOp and tensor::ConcatOp for now as more extensive testing showed that other ops are not ready yet (e.g. at least tensor::ExtractSliceOp / tensor::InsertSliceOp).
7736abb
to
ba51026
Compare
LLVM_DEBUG({ DBGS() << " reifying op: " << op << "\n"; }); | ||
// Get the reified out shapes. | ||
ReifiedRankedShapedTypeDims reifiedResultShapes; | ||
if (failed(mlir::reifyResultShapes(rewriter, op, reifiedResultShapes)) || |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is my source of confusion. As far as I know this is meant to extract information about the shape of the result of the, but this is actually changing the operation itself. This seems like something that cannot be done just based on the interface/clone. The change in the result type might make the operation invalid (according to its verifier). This kind of rewrite cannot really be done just on the interface.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To me the interface description establishes an implicit contract allowing this:
Interface to compute the shape of the result of an operation when
the result is a ranked shape type, i.e. `RankedTensorType` or
`MemRefType`.
Because, what would it mean for reifyResultShapes
to return a shape that the op verifier will reject? The interface would produce inconsistent results with itself, rendering the interface implementation erroneous (IMO the verifier has higher precedence).
From my POV the interface solves this issue with the return of the LogicalResult
, because then either the reifyResultShapes
method should return failure or produce a shape that the verifier should accept. And if that's not the case then such an operation shouldn't implement the reify interface.
Nonetheless, I do see the argument for making the implicit contract explicit. So how about adding something along the lines the following method to the interface?
InterfaceMethod<
/*desc=*/[{
Reify the shape of the result of an operation (typically in terms of the
shape of its operands).
}],
/*retTy=*/"::llvm::LogicalResult",
/*methodName=*/"resifyOpResult",
/*args=*/(ins "::mlir::OpBuilder &":$builder,
"unsigned":$resultNum)
>
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To me the interface description establishes an implicit contract allowing this:
Interface to compute the shape of the result of an operation when the result is a ranked shape type, i.e. `RankedTensorType` or `MemRefType`.
Because, what would it mean for
reifyResultShapes
to return a shape that the op verifier will reject? The interface would produce inconsistent results with itself, rendering the interface implementation erroneous (IMO the verifier has higher precedence).
I am not sure I fully follow the logic. Lets assume the op is currently valid and you are getting the shape of the result. Now you are change the shape of the result without modifying any of its other operands. There is no way you can make an interface gaurantee that the shape it found is considered valid by the operation. The operation method itself might not know that. The inconsistency could be coming from the verifier checking consistency between the other operands values and its result type. You really cannot expect the reify
method implementation to also know whether the op is valid or not, that is kind of a disconnected contract that can be a source of bugs.
As usual, confluence is a non-negotiable property for the system to avoid crumbling upon its own weight. When I look here I see an alteration of the interface that seems related to confluence. |
Updated documentation post-offline sync with @Groverkss. |
I dont think this is true. The
Again, its an issue with the operation. You are right though, maybe that assertion should not have been removed. Also assertion are just a poor way of handling this. This needed to be an error. I think the way forward here is : add a pattern that is specific for these ops that will make the shape of the result consistent with what the interface returns. For now this could go in as a canonicalization cause that is consistent with how static shape information is propagated today in the code base. Then you dont need this pass. I think thats what you had tried initially, sorry maybe I misread the PR that led to this thread splintering into different directions. If you have a link to that PR, I can take a look again. |
I looked through the original "canonicalization" PR #145732 . This has the same issue that I am raising here. The The right ordering here would be a pattern that would do what is done in the
reify method just gets the information from the operation. The operation has to be the source of information here.
If you want me to stamp this as a WIP/to-be-evolved thing, thats fine, but it is relying on what I see as an inverted dependence structure between the op and the interface method. I would have raised this on the implementation of the |
I think there are problems with the approach as it has been set out, but unblocking for now cause we can hve this pass, but the issue is with PRs that have already landed.
Documenting offline discussion: for a meaningful class of ops, the "transfer function" can be encoded with
For other things, TBD what other meaningful classes of ops can have "transfer functions" that reconcile the SSA-less builder-less verifier with the SSA + builder + folding |
Hmmm I didn't get that, what is wrong in the I can see there is no guaranteed confluence between I am happy to do that now for PadOp if there is a preference for it. |
1497991
to
08a6823
Compare
6b7217a
to
b45abc7
Compare
This pass reifies the shapes of a subset of `ReifyRankedShapedTypeOpInterface` ops with `tensor` results. The pass currently only supports result shape type reification for: - tensor::PadOp - tensor::ConcatOp It addresses a representation gap where implicit op semantics are needed to infer static result types from dynamic operands. But it does so by using `ReifyRankedShapedTypeOpInterface` as the source of truth rather than the op itself. As a consequence, this cannot generalize today. TODO: in the future, we should consider coupling this information with op "transfer functions" (e.g. `IndexingMapOpInterface`) to provide a source of truth that can work across result shape inference, canonicalization and op verifiers. The pass replaces the operations with their reified versions, when more static information can be derived, and inserts casts when results shapes are updated. Example: ```mlir #map = affine_map<(d0) -> (-d0 + 256)> func.func @func(%arg0: f32, %arg1: index, %arg2: tensor<64x?x64xf32>) -> tensor<1x?x64xf32> { %0 = affine.apply #map(%arg1) %extracted_slice = tensor.extract_slice %arg2[0, 0, 0] [1, %arg1, 64] [1, 1, 1] : tensor<64x?x64xf32> to tensor<1x?x64xf32> %padded = tensor.pad %extracted_slice low[0, 0, 0] high[0, %0, 0] { ^bb0(%arg3: index, %arg4: index, %arg5: index): tensor.yield %arg0 : f32 } : tensor<1x?x64xf32> to tensor<1x?x64xf32> return %padded : tensor<1x?x64xf32> } // mlir-opt --reify-result-shapes #map = affine_map<()[s0] -> (-s0 + 256)> func.func @func(%arg0: f32, %arg1: index, %arg2: tensor<64x?x64xf32>) -> tensor<1x?x64xf32> { %0 = affine.apply #map()[%arg1] %extracted_slice = tensor.extract_slice %arg2[0, 0, 0] [1, %arg1, 64] [1, 1, 1] : tensor<64x?x64xf32> to tensor<1x?x64xf32> %padded = tensor.pad %extracted_slice low[0, 0, 0] high[0, %0, 0] { ^bb0(%arg3: index, %arg4: index, %arg5: index): tensor.yield %arg0 : f32 } : tensor<1x?x64xf32> to tensor<1x256x64xf32> %cast = tensor.cast %padded : tensor<1x256x64xf32> to tensor<1x?x64xf32> return %cast : tensor<1x?x64xf32> } ``` --------- Co-authored-by: Fabian Mora <[email protected]>
This pass reifies the shapes of a subset of `ReifyRankedShapedTypeOpInterface` ops with `tensor` results. The pass currently only supports result shape type reification for: - tensor::PadOp - tensor::ConcatOp It addresses a representation gap where implicit op semantics are needed to infer static result types from dynamic operands. But it does so by using `ReifyRankedShapedTypeOpInterface` as the source of truth rather than the op itself. As a consequence, this cannot generalize today. TODO: in the future, we should consider coupling this information with op "transfer functions" (e.g. `IndexingMapOpInterface`) to provide a source of truth that can work across result shape inference, canonicalization and op verifiers. The pass replaces the operations with their reified versions, when more static information can be derived, and inserts casts when results shapes are updated. Example: ```mlir #map = affine_map<(d0) -> (-d0 + 256)> func.func @func(%arg0: f32, %arg1: index, %arg2: tensor<64x?x64xf32>) -> tensor<1x?x64xf32> { %0 = affine.apply #map(%arg1) %extracted_slice = tensor.extract_slice %arg2[0, 0, 0] [1, %arg1, 64] [1, 1, 1] : tensor<64x?x64xf32> to tensor<1x?x64xf32> %padded = tensor.pad %extracted_slice low[0, 0, 0] high[0, %0, 0] { ^bb0(%arg3: index, %arg4: index, %arg5: index): tensor.yield %arg0 : f32 } : tensor<1x?x64xf32> to tensor<1x?x64xf32> return %padded : tensor<1x?x64xf32> } // mlir-opt --reify-result-shapes #map = affine_map<()[s0] -> (-s0 + 256)> func.func @func(%arg0: f32, %arg1: index, %arg2: tensor<64x?x64xf32>) -> tensor<1x?x64xf32> { %0 = affine.apply #map()[%arg1] %extracted_slice = tensor.extract_slice %arg2[0, 0, 0] [1, %arg1, 64] [1, 1, 1] : tensor<64x?x64xf32> to tensor<1x?x64xf32> %padded = tensor.pad %extracted_slice low[0, 0, 0] high[0, %0, 0] { ^bb0(%arg3: index, %arg4: index, %arg5: index): tensor.yield %arg0 : f32 } : tensor<1x?x64xf32> to tensor<1x256x64xf32> %cast = tensor.cast %padded : tensor<1x256x64xf32> to tensor<1x?x64xf32> return %cast : tensor<1x?x64xf32> } ``` --------- Co-authored-by: Fabian Mora <[email protected]>
This pass reifies the shapes of a subset of `ReifyRankedShapedTypeOpInterface` ops with `tensor` results. The pass currently only supports result shape type reification for: - tensor::PadOp - tensor::ConcatOp It addresses a representation gap where implicit op semantics are needed to infer static result types from dynamic operands. But it does so by using `ReifyRankedShapedTypeOpInterface` as the source of truth rather than the op itself. As a consequence, this cannot generalize today. TODO: in the future, we should consider coupling this information with op "transfer functions" (e.g. `IndexingMapOpInterface`) to provide a source of truth that can work across result shape inference, canonicalization and op verifiers. The pass replaces the operations with their reified versions, when more static information can be derived, and inserts casts when results shapes are updated. Example: ```mlir #map = affine_map<(d0) -> (-d0 + 256)> func.func @func(%arg0: f32, %arg1: index, %arg2: tensor<64x?x64xf32>) -> tensor<1x?x64xf32> { %0 = affine.apply #map(%arg1) %extracted_slice = tensor.extract_slice %arg2[0, 0, 0] [1, %arg1, 64] [1, 1, 1] : tensor<64x?x64xf32> to tensor<1x?x64xf32> %padded = tensor.pad %extracted_slice low[0, 0, 0] high[0, %0, 0] { ^bb0(%arg3: index, %arg4: index, %arg5: index): tensor.yield %arg0 : f32 } : tensor<1x?x64xf32> to tensor<1x?x64xf32> return %padded : tensor<1x?x64xf32> } // mlir-opt --reify-result-shapes #map = affine_map<()[s0] -> (-s0 + 256)> func.func @func(%arg0: f32, %arg1: index, %arg2: tensor<64x?x64xf32>) -> tensor<1x?x64xf32> { %0 = affine.apply #map()[%arg1] %extracted_slice = tensor.extract_slice %arg2[0, 0, 0] [1, %arg1, 64] [1, 1, 1] : tensor<64x?x64xf32> to tensor<1x?x64xf32> %padded = tensor.pad %extracted_slice low[0, 0, 0] high[0, %0, 0] { ^bb0(%arg3: index, %arg4: index, %arg5: index): tensor.yield %arg0 : f32 } : tensor<1x?x64xf32> to tensor<1x256x64xf32> %cast = tensor.cast %padded : tensor<1x256x64xf32> to tensor<1x?x64xf32> return %cast : tensor<1x?x64xf32> } ``` --------- Co-authored-by: Fabian Mora <[email protected]>
This pass reifies the shapes of a subset of `ReifyRankedShapedTypeOpInterface` ops with `tensor` results. The pass currently only supports result shape type reification for: - tensor::PadOp - tensor::ConcatOp It addresses a representation gap where implicit op semantics are needed to infer static result types from dynamic operands. But it does so by using `ReifyRankedShapedTypeOpInterface` as the source of truth rather than the op itself. As a consequence, this cannot generalize today. TODO: in the future, we should consider coupling this information with op "transfer functions" (e.g. `IndexingMapOpInterface`) to provide a source of truth that can work across result shape inference, canonicalization and op verifiers. The pass replaces the operations with their reified versions, when more static information can be derived, and inserts casts when results shapes are updated. Example: ```mlir #map = affine_map<(d0) -> (-d0 + 256)> func.func @func(%arg0: f32, %arg1: index, %arg2: tensor<64x?x64xf32>) -> tensor<1x?x64xf32> { %0 = affine.apply #map(%arg1) %extracted_slice = tensor.extract_slice %arg2[0, 0, 0] [1, %arg1, 64] [1, 1, 1] : tensor<64x?x64xf32> to tensor<1x?x64xf32> %padded = tensor.pad %extracted_slice low[0, 0, 0] high[0, %0, 0] { ^bb0(%arg3: index, %arg4: index, %arg5: index): tensor.yield %arg0 : f32 } : tensor<1x?x64xf32> to tensor<1x?x64xf32> return %padded : tensor<1x?x64xf32> } // mlir-opt --reify-result-shapes #map = affine_map<()[s0] -> (-s0 + 256)> func.func @func(%arg0: f32, %arg1: index, %arg2: tensor<64x?x64xf32>) -> tensor<1x?x64xf32> { %0 = affine.apply #map()[%arg1] %extracted_slice = tensor.extract_slice %arg2[0, 0, 0] [1, %arg1, 64] [1, 1, 1] : tensor<64x?x64xf32> to tensor<1x?x64xf32> %padded = tensor.pad %extracted_slice low[0, 0, 0] high[0, %0, 0] { ^bb0(%arg3: index, %arg4: index, %arg5: index): tensor.yield %arg0 : f32 } : tensor<1x?x64xf32> to tensor<1x256x64xf32> %cast = tensor.cast %padded : tensor<1x256x64xf32> to tensor<1x?x64xf32> return %cast : tensor<1x?x64xf32> } ``` --------- Co-authored-by: Fabian Mora <[email protected]>
This pass reifies the shapes of a subset of
ReifyRankedShapedTypeOpInterface
ops withtensor
results.The pass currently only supports result shape type reification for:
It addresses a representation gap where implicit op semantics are needed to infer static result types from dynamic
operands. But it does so by using
ReifyRankedShapedTypeOpInterface
as the source of truth rather than the op itself.As a consequence, this cannot generalize today.
TODO: in the future, we should consider coupling this information with op "transfer functions" (e.g.
IndexingMapOpInterface
) to provide a source of truth that can work across result shape inference, canonicalization andop verifiers.
The pass replaces the operations with their reified versions, when more static information can be derived, and inserts
casts when results shapes are updated.
Example: