-
Notifications
You must be signed in to change notification settings - Fork 14.5k
[LLVM][DAGCombiner] Fix size calculations in calculateByteProvider. #148425
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
NOTE: Prior to this PR these tests will trigger "Invalid size request on a scalable vector" errors. The output shows the result of simply bailing out for scalable vectors.
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-aarch64 Author: Paul Walker (paulwalker-arm) ChangesFixes #148387 Full diff: https://github.com/llvm/llvm-project/pull/148425.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 231184587d682..410e862b6c0f1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9149,11 +9149,12 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value())
return std::nullopt;
- unsigned BitWidth = Op.getValueSizeInBits();
- if (BitWidth % 8 != 0)
+ TypeSize BitWidth = Op.getValueSizeInBits();
+ if (!BitWidth.isKnownMultipleOf(8))
return std::nullopt;
- unsigned ByteWidth = BitWidth / 8;
- assert(Index < ByteWidth && "invalid index requested");
+ TypeSize ByteWidth = BitWidth.divideCoefficientBy(8);
+ assert(TypeSize::isKnownLT(TypeSize::getFixed(Index), ByteWidth) &&
+ "invalid index requested");
(void) ByteWidth;
switch (Op.getOpcode()) {
@@ -9248,20 +9249,23 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
if (!L->isSimple() || L->isIndexed())
return std::nullopt;
- unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
- if (NarrowBitWidth % 8 != 0)
+ TypeSize NarrowBitWidth = L->getMemoryVT().getSizeInBits();
+ if (!NarrowBitWidth.isKnownMultipleOf(8))
return std::nullopt;
- uint64_t NarrowByteWidth = NarrowBitWidth / 8;
+ TypeSize NarrowByteWidth = NarrowBitWidth.divideCoefficientBy(8);
// If the width of the load does not reach byte we are trying to provide for
// and it is not a ZEXTLOAD, then the load does not provide for the byte in
// question
- if (Index >= NarrowByteWidth)
+ if (TypeSize::isKnownGE(TypeSize::getFixed(Index), NarrowByteWidth))
return L->getExtensionType() == ISD::ZEXTLOAD
? std::optional<SDByteProvider>(
SDByteProvider::getConstantZero())
: std::nullopt;
+ if (!TypeSize::isKnownLT(TypeSize::getFixed(Index), NarrowByteWidth))
+ return std::nullopt;
+
unsigned BPVectorIndex = VectorIndex.value_or(0U);
return SDByteProvider::getSrc(L, Index, BPVectorIndex);
}
diff --git a/llvm/test/CodeGen/AArch64/load-combine.ll b/llvm/test/CodeGen/AArch64/load-combine.ll
index 57f61e5303ecf..0bf4fb1063025 100644
--- a/llvm/test/CodeGen/AArch64/load-combine.ll
+++ b/llvm/test/CodeGen/AArch64/load-combine.ll
@@ -713,3 +713,182 @@ define void @short_vector_to_i64(ptr %in, ptr %out, ptr %p) {
store i64 %i3, ptr %out
ret void
}
+
+; x1 = x0
+define void @scalable_vector_to_i32(ptr %in, ptr %out, ptr %p) #0 {
+; CHECK-LABEL: scalable_vector_to_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr w8, [x0]
+; CHECK-NEXT: str w8, [x1]
+; CHECK-NEXT: ret
+ %ld = load <vscale x 4 x i8>, ptr %in, align 4
+
+ %e1 = extractelement <vscale x 4 x i8> %ld, i32 0
+ %e2 = extractelement <vscale x 4 x i8> %ld, i32 1
+ %e3 = extractelement <vscale x 4 x i8> %ld, i32 2
+ %e4 = extractelement <vscale x 4 x i8> %ld, i32 3
+
+ %z0 = zext i8 %e1 to i32
+ %z1 = zext i8 %e2 to i32
+ %z2 = zext i8 %e3 to i32
+ %z3 = zext i8 %e4 to i32
+
+ %s1 = shl nuw nsw i32 %z1, 8
+ %s2 = shl nuw nsw i32 %z2, 16
+ %s3 = shl nuw i32 %z3, 24
+
+ %i1 = or i32 %s1, %z0
+ %i2 = or i32 %i1, %s2
+ %i3 = or i32 %i2, %s3
+
+ store i32 %i3, ptr %out
+ ret void
+}
+
+define void @scalable_vector_to_i32_unused_low_i8(ptr %in, ptr %out, ptr %p) #0 {
+; CHECK-LABEL: scalable_vector_to_i32_unused_low_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0]
+; CHECK-NEXT: mov w8, v0.s[1]
+; CHECK-NEXT: mov w9, v0.s[2]
+; CHECK-NEXT: mov w10, v0.s[3]
+; CHECK-NEXT: lsl w8, w8, #8
+; CHECK-NEXT: orr w8, w8, w9, lsl #16
+; CHECK-NEXT: orr w8, w8, w10, lsl #24
+; CHECK-NEXT: str w8, [x1]
+; CHECK-NEXT: ret
+ %ld = load <vscale x 4 x i8>, ptr %in, align 4
+
+ %e2 = extractelement <vscale x 4 x i8> %ld, i32 1
+ %e3 = extractelement <vscale x 4 x i8> %ld, i32 2
+ %e4 = extractelement <vscale x 4 x i8> %ld, i32 3
+
+ %z1 = zext i8 %e2 to i32
+ %z2 = zext i8 %e3 to i32
+ %z3 = zext i8 %e4 to i32
+
+ %s1 = shl nuw nsw i32 %z1, 8
+ %s2 = shl nuw nsw i32 %z2, 16
+ %s3 = shl nuw i32 %z3, 24
+
+ %i2 = or i32 %s1, %s2
+ %i3 = or i32 %i2, %s3
+
+ store i32 %i3, ptr %out
+ ret void
+}
+
+define void @scalable_vector_to_i32_unused_high_i8(ptr %in, ptr %out, ptr %p) #0 {
+; CHECK-LABEL: scalable_vector_to_i32_unused_high_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ldrh w9, [x0]
+; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0]
+; CHECK-NEXT: mov w8, v0.s[2]
+; CHECK-NEXT: orr w8, w9, w8, lsl #16
+; CHECK-NEXT: str w8, [x1]
+; CHECK-NEXT: ret
+ %ld = load <vscale x 4 x i8>, ptr %in, align 4
+
+ %e1 = extractelement <vscale x 4 x i8> %ld, i32 0
+ %e2 = extractelement <vscale x 4 x i8> %ld, i32 1
+ %e3 = extractelement <vscale x 4 x i8> %ld, i32 2
+
+ %z0 = zext i8 %e1 to i32
+ %z1 = zext i8 %e2 to i32
+ %z2 = zext i8 %e3 to i32
+
+ %s1 = shl nuw nsw i32 %z1, 8
+ %s2 = shl nuw nsw i32 %z2, 16
+
+ %i1 = or i32 %s1, %z0
+ %i2 = or i32 %i1, %s2
+
+ store i32 %i2, ptr %out
+ ret void
+}
+
+define void @scalable_vector_to_i32_unused_low_i16(ptr %in, ptr %out, ptr %p) #0 {
+; CHECK-LABEL: scalable_vector_to_i32_unused_low_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0]
+; CHECK-NEXT: mov w8, v0.s[2]
+; CHECK-NEXT: mov w9, v0.s[3]
+; CHECK-NEXT: lsl w8, w8, #16
+; CHECK-NEXT: orr w8, w8, w9, lsl #24
+; CHECK-NEXT: str w8, [x1]
+; CHECK-NEXT: ret
+ %ld = load <vscale x 4 x i8>, ptr %in, align 4
+
+ %e3 = extractelement <vscale x 4 x i8> %ld, i32 2
+ %e4 = extractelement <vscale x 4 x i8> %ld, i32 3
+
+ %z2 = zext i8 %e3 to i32
+ %z3 = zext i8 %e4 to i32
+
+ %s2 = shl nuw nsw i32 %z2, 16
+ %s3 = shl nuw i32 %z3, 24
+
+ %i3 = or i32 %s2, %s3
+
+ store i32 %i3, ptr %out
+ ret void
+}
+
+; x1 = x0[0:1]
+define void @scalable_vector_to_i32_unused_high_i16(ptr %in, ptr %out, ptr %p) #0 {
+; CHECK-LABEL: scalable_vector_to_i32_unused_high_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldrh w8, [x0]
+; CHECK-NEXT: str w8, [x1]
+; CHECK-NEXT: ret
+ %ld = load <vscale x 4 x i8>, ptr %in, align 4
+
+ %e1 = extractelement <vscale x 4 x i8> %ld, i32 0
+ %e2 = extractelement <vscale x 4 x i8> %ld, i32 1
+
+ %z0 = zext i8 %e1 to i32
+ %z1 = zext i8 %e2 to i32
+
+ %s1 = shl nuw nsw i32 %z1, 8
+
+ %i1 = or i32 %s1, %z0
+
+ store i32 %i1, ptr %out
+ ret void
+}
+
+; x1 = x0
+define void @scalable_vector_to_i64(ptr %in, ptr %out, ptr %p) #0 {
+; CHECK-LABEL: scalable_vector_to_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr w8, [x0]
+; CHECK-NEXT: str x8, [x1]
+; CHECK-NEXT: ret
+ %ld = load <vscale x 4 x i8>, ptr %in, align 4
+
+ %e1 = extractelement <vscale x 4 x i8> %ld, i32 0
+ %e2 = extractelement <vscale x 4 x i8> %ld, i32 1
+ %e3 = extractelement <vscale x 4 x i8> %ld, i32 2
+ %e4 = extractelement <vscale x 4 x i8> %ld, i32 3
+
+ %z0 = zext i8 %e1 to i64
+ %z1 = zext i8 %e2 to i64
+ %z2 = zext i8 %e3 to i64
+ %z3 = zext i8 %e4 to i64
+
+ %s1 = shl nuw nsw i64 %z1, 8
+ %s2 = shl nuw nsw i64 %z2, 16
+ %s3 = shl nuw i64 %z3, 24
+
+ %i1 = or i64 %s1, %z0
+ %i2 = or i64 %i1, %s2
+ %i3 = or i64 %i2, %s3
+
+ store i64 %i3, ptr %out
+ ret void
+}
+
+attributes #0 = { "target-features"="+sve" }
|
calculateByteProvider only cares about scalars or a single element within a vector. For the later there is the VectorIndex parameter to identify the element. All other properties, and specificially Index, are related to the underyling scalar type and thus when taking the size of a type it's the scalar size that matters. Fixes llvm#148387
@arsenm - The PR has change direction, it's now much simpler, after you accepted it. Do you mind taking another look to make sure you're still happy with it? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
calculateByteProvider only cares about scalars or a single element within a vector. For the later there is the VectorIndex parameter to identify the element. All other properties, and specificially Index, are related to the underyling scalar type and thus when taking the size of a type it's the scalar size that matters.
Fixes #148387