Skip to content

Commit b73d2c8

Browse files
fhahntru
authored andcommitted
[LV] Keep track of cost-based ScalarAfterVec in VPWidenPointerInd.
Epilogue vectorization uses isScalarAfterVectorization to check if widened versions for inductions need to be generated and bails out in those cases. At the moment, there are scenarios where isScalarAfterVectorization returns true but VPWidenPointerInduction::onlyScalarsGenerated would return false, causing widening. This can lead to widened phis with incorrect start values being created in the epilogue vector body. This patch addresses the issue by storing the cost-model decision in VPWidenPointerInductionRecipe and restoring the behavior before 151c144. This effectively reverts 151c144, but the long-term fix is to properly support widened inductions during epilogue vectorization Fixes #57712.
1 parent 38b5fa7 commit b73d2c8

File tree

10 files changed

+292
-191
lines changed

10 files changed

+292
-191
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8149,9 +8149,15 @@ VPRecipeBase *VPRecipeBuilder::tryToOptimizeInductionPHI(
81498149
*PSE.getSE(), *OrigLoop, Range);
81508150

81518151
// Check if this is pointer induction. If so, build the recipe for it.
8152-
if (auto *II = Legal->getPointerInductionDescriptor(Phi))
8153-
return new VPWidenPointerInductionRecipe(Phi, Operands[0], *II,
8154-
*PSE.getSE());
8152+
if (auto *II = Legal->getPointerInductionDescriptor(Phi)) {
8153+
return new VPWidenPointerInductionRecipe(
8154+
Phi, Operands[0], *II, *PSE.getSE(),
8155+
LoopVectorizationPlanner::getDecisionAndClampRange(
8156+
[&](ElementCount VF) {
8157+
return !VF.isScalable() && CM.isScalarAfterVectorization(Phi, VF);
8158+
},
8159+
Range));
8160+
}
81558161
return nullptr;
81568162
}
81578163

@@ -9332,7 +9338,7 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
93329338
auto *IVR = getParent()->getPlan()->getCanonicalIV();
93339339
PHINode *CanonicalIV = cast<PHINode>(State.get(IVR, 0));
93349340

9335-
if (onlyScalarsGenerated(State.VF)) {
9341+
if (onlyScalarsGenerated()) {
93369342
// This is the normalized GEP that starts counting at zero.
93379343
Value *PtrInd = State.Builder.CreateSExtOrTrunc(
93389344
CanonicalIV, IndDesc.getStep()->getType());

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -698,7 +698,7 @@ void VPlan::execute(VPTransformState *State) {
698698
auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
699699
// TODO: Split off the case that all users of a pointer phi are scalar
700700
// from the VPWidenPointerInductionRecipe.
701-
if (WidenPhi->onlyScalarsGenerated(State->VF))
701+
if (WidenPhi->onlyScalarsGenerated())
702702
continue;
703703

704704
auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi, 0));

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1187,15 +1187,19 @@ class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe {
11871187
/// explicitly.
11881188
ScalarEvolution &SE;
11891189

1190+
bool IsScalarAfterVectorization;
1191+
11901192
public:
11911193
/// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
11921194
/// Start.
11931195
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start,
11941196
const InductionDescriptor &IndDesc,
1195-
ScalarEvolution &SE)
1197+
ScalarEvolution &SE,
1198+
bool IsScalarAfterVectorization)
11961199
: VPHeaderPHIRecipe(VPVWidenPointerInductionSC, VPWidenPointerInductionSC,
11971200
Phi),
1198-
IndDesc(IndDesc), SE(SE) {
1201+
IndDesc(IndDesc), SE(SE),
1202+
IsScalarAfterVectorization(IsScalarAfterVectorization) {
11991203
addOperand(Start);
12001204
}
12011205

@@ -1216,7 +1220,7 @@ class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe {
12161220
void execute(VPTransformState &State) override;
12171221

12181222
/// Returns true if only scalar values will be generated.
1219-
bool onlyScalarsGenerated(ElementCount VF);
1223+
bool onlyScalarsGenerated();
12201224

12211225
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
12221226
/// Print the recipe.

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -982,11 +982,8 @@ void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
982982
}
983983
#endif
984984

985-
bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(ElementCount VF) {
986-
bool IsUniform = vputils::onlyFirstLaneUsed(this);
987-
return all_of(users(),
988-
[&](const VPUser *U) { return U->usesScalars(this); }) &&
989-
(IsUniform || !VF.isScalable());
985+
bool VPWidenPointerInductionRecipe::onlyScalarsGenerated() {
986+
return IsScalarAfterVectorization;
990987
}
991988

992989
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll

Lines changed: 50 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -13,65 +13,74 @@ define void @test_widen_ptr_induction(ptr %ptr.start.1) {
1313
; CHECK: vector.ph:
1414
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
1515
; CHECK: vector.body:
16-
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[PTR_START_1:%.*]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
1716
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
18-
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <2 x i64> <i64 0, i64 1>
19-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <2 x i64> <i64 2, i64 3>
20-
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x ptr> [[TMP0]], zeroinitializer
21-
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x ptr> [[TMP1]], zeroinitializer
22-
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
23-
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP4]])
24-
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
25-
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP5]])
26-
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
27-
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP6]])
28-
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
29-
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP7]])
30-
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x ptr> [[TMP0]], i32 0
31-
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i32 0
32-
; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP9]], align 1
33-
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP8]], i32 2
34-
; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP10]], align 1
17+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
18+
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR_START_1:%.*]], i64 [[TMP0]]
19+
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
20+
; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP1]]
21+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0
22+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x ptr> [[TMP2]], ptr [[NEXT_GEP1]], i32 1
23+
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 2
24+
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP4]]
25+
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 3
26+
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP5]]
27+
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP2]], i32 0
28+
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x ptr> [[TMP6]], ptr [[NEXT_GEP3]], i32 1
29+
; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <2 x ptr> [[TMP3]], zeroinitializer
30+
; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <2 x ptr> [[TMP7]], zeroinitializer
31+
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
32+
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP10]])
33+
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1
34+
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]])
35+
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0
36+
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP12]])
37+
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1
38+
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP13]])
39+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
40+
; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP14]], align 1
41+
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 2
42+
; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP15]], align 1
3543
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
36-
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 4
37-
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
38-
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
44+
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
45+
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
3946
; CHECK: middle.block:
4047
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 10001, 10000
4148
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
4249
; CHECK: vec.epilog.iter.check:
43-
; CHECK-NEXT: [[IND_END1:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000
50+
; CHECK-NEXT: [[IND_END4:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000
4451
; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
4552
; CHECK: vec.epilog.ph:
4653
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
4754
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000
4855
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
4956
; CHECK: vec.epilog.vector.body:
50-
; CHECK-NEXT: [[POINTER_PHI5:%.*]] = phi ptr [ [[PTR_START_1]], [[VEC_EPILOG_PH]] ], [ [[PTR_IND6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
51-
; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT7:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
52-
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[POINTER_PHI5]], <2 x i64> <i64 0, i64 1>
53-
; CHECK-NEXT: [[TMP13:%.*]] = icmp ne <2 x ptr> [[TMP12]], zeroinitializer
54-
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP13]], i32 0
55-
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP14]])
56-
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP13]], i32 1
57-
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP15]])
58-
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x ptr> [[TMP12]], i32 0
59-
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP16]], i32 0
60-
; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP17]], align 1
61-
; CHECK-NEXT: [[INDEX_NEXT7]] = add nuw i64 [[INDEX4]], 2
62-
; CHECK-NEXT: [[PTR_IND6]] = getelementptr i8, ptr [[POINTER_PHI5]], i64 2
63-
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT7]], 10000
64-
; CHECK-NEXT: br i1 [[TMP18]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
57+
; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
58+
; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX7]], 0
59+
; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP17]]
60+
; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[INDEX7]], 1
61+
; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP18]]
62+
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP8]], i32 0
63+
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x ptr> [[TMP19]], ptr [[NEXT_GEP9]], i32 1
64+
; CHECK-NEXT: [[TMP21:%.*]] = icmp ne <2 x ptr> [[TMP20]], zeroinitializer
65+
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP21]], i32 0
66+
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP22]])
67+
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP21]], i32 1
68+
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP23]])
69+
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[NEXT_GEP8]], i32 0
70+
; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP24]], align 1
71+
; CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[INDEX7]], 2
72+
; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT10]], 10000
73+
; CHECK-NEXT: br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
6574
; CHECK: vec.epilog.middle.block:
66-
; CHECK-NEXT: [[CMP_N3:%.*]] = icmp eq i64 10001, 10000
67-
; CHECK-NEXT: br i1 [[CMP_N3]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
75+
; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 10001, 10000
76+
; CHECK-NEXT: br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
6877
; CHECK: vec.epilog.scalar.ph:
6978
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
70-
; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END1]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START_1]], [[ITER_CHECK]] ]
79+
; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START_1]], [[ITER_CHECK]] ]
7180
; CHECK-NEXT: br label [[LOOP:%.*]]
7281
; CHECK: loop:
7382
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
74-
; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL2]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
83+
; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL5]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
7584
; CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = icmp ne ptr [[PTR_IV]], null
7685
; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I_I]])
7786
; CHECK-NEXT: store i8 0, ptr [[PTR_IV]], align 1

llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll

Lines changed: 34 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ define ptr @test(ptr %start.1, ptr %start.2, ptr %end) {
2626
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
2727
; CHECK: vector.body:
2828
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START_1]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
29+
; CHECK-NEXT: [[POINTER_PHI8:%.*]] = phi ptr [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND9:%.*]], [[VECTOR_BODY]] ]
2930
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3031
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
3132
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2
@@ -45,32 +46,43 @@ define ptr @test(ptr %start.1, ptr %start.2, ptr %end) {
4546
; CHECK-NEXT: [[TMP20:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT6]], [[TMP19]]
4647
; CHECK-NEXT: [[VECTOR_GEP7:%.*]] = mul <vscale x 2 x i64> [[TMP20]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 8, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
4748
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP7]]
48-
; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[INDEX]], 0
49-
; CHECK-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 8
50-
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[TMP23]]
51-
; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64()
52-
; CHECK-NEXT: [[TMP25:%.*]] = mul i64 [[TMP24]], 2
53-
; CHECK-NEXT: [[TMP26:%.*]] = add i64 [[TMP25]], 0
54-
; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[INDEX]], [[TMP26]]
55-
; CHECK-NEXT: [[TMP28:%.*]] = mul i64 [[TMP27]], 8
56-
; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[TMP28]]
57-
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0
58-
; CHECK-NEXT: store <vscale x 2 x i64> zeroinitializer, ptr [[TMP29]], align 4
59-
; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.vscale.i32()
60-
; CHECK-NEXT: [[TMP31:%.*]] = mul i32 [[TMP30]], 2
61-
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 [[TMP31]]
62-
; CHECK-NEXT: store <vscale x 2 x i64> zeroinitializer, ptr [[TMP32]], align 4
63-
; CHECK-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64()
64-
; CHECK-NEXT: [[TMP34:%.*]] = mul i64 [[TMP33]], 4
65-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP34]]
49+
; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
50+
; CHECK-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 2
51+
; CHECK-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 2
52+
; CHECK-NEXT: [[TMP25:%.*]] = mul i64 8, [[TMP24]]
53+
; CHECK-NEXT: [[TMP26:%.*]] = mul i64 [[TMP23]], 0
54+
; CHECK-NEXT: [[DOTSPLATINSERT10:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP26]], i32 0
55+
; CHECK-NEXT: [[DOTSPLAT11:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT10]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
56+
; CHECK-NEXT: [[TMP27:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
57+
; CHECK-NEXT: [[TMP28:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT11]], [[TMP27]]
58+
; CHECK-NEXT: [[VECTOR_GEP12:%.*]] = mul <vscale x 2 x i64> [[TMP28]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 8, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
59+
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[POINTER_PHI8]], <vscale x 2 x i64> [[VECTOR_GEP12]]
60+
; CHECK-NEXT: [[TMP30:%.*]] = mul i64 [[TMP23]], 1
61+
; CHECK-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP30]], i32 0
62+
; CHECK-NEXT: [[DOTSPLAT14:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT13]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
63+
; CHECK-NEXT: [[TMP31:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
64+
; CHECK-NEXT: [[TMP32:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT14]], [[TMP31]]
65+
; CHECK-NEXT: [[VECTOR_GEP15:%.*]] = mul <vscale x 2 x i64> [[TMP32]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 8, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
66+
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[POINTER_PHI8]], <vscale x 2 x i64> [[VECTOR_GEP15]]
67+
; CHECK-NEXT: [[TMP34:%.*]] = extractelement <vscale x 2 x ptr> [[TMP29]], i32 0
68+
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i64, ptr [[TMP34]], i32 0
69+
; CHECK-NEXT: store <vscale x 2 x i64> zeroinitializer, ptr [[TMP35]], align 4
70+
; CHECK-NEXT: [[TMP36:%.*]] = call i32 @llvm.vscale.i32()
71+
; CHECK-NEXT: [[TMP37:%.*]] = mul i32 [[TMP36]], 2
72+
; CHECK-NEXT: [[TMP38:%.*]] = getelementptr i64, ptr [[TMP34]], i32 [[TMP37]]
73+
; CHECK-NEXT: store <vscale x 2 x i64> zeroinitializer, ptr [[TMP38]], align 4
74+
; CHECK-NEXT: [[TMP39:%.*]] = call i64 @llvm.vscale.i64()
75+
; CHECK-NEXT: [[TMP40:%.*]] = mul i64 [[TMP39]], 4
76+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP40]]
6677
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP13]]
67-
; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
68-
; CHECK-NEXT: br i1 [[TMP35]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
78+
; CHECK-NEXT: [[PTR_IND9]] = getelementptr i8, ptr [[POINTER_PHI8]], i64 [[TMP25]]
79+
; CHECK-NEXT: [[TMP41:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
80+
; CHECK-NEXT: br i1 [[TMP41]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
6981
; CHECK: middle.block:
7082
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
7183
; CHECK-NEXT: [[CAST_CMO:%.*]] = sub i64 [[N_VEC]], 1
72-
; CHECK-NEXT: [[TMP36:%.*]] = mul i64 [[CAST_CMO]], 8
73-
; CHECK-NEXT: [[IND_ESCAPE:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP36]]
84+
; CHECK-NEXT: [[TMP42:%.*]] = mul i64 [[CAST_CMO]], 8
85+
; CHECK-NEXT: [[IND_ESCAPE:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP42]]
7486
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
7587
; CHECK: scalar.ph:
7688
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START_1]], [[ENTRY:%.*]] ]

0 commit comments

Comments
 (0)