Skip to content

Commit 5f620d0

Browse files
fhahntru
authored andcommitted
[LV] Update handling of scalable pointer inductions after b73d2c8.
The dependent code has been changed quite a lot since 151c144 which b73d2c8 effectively reverts. Now we run into a case where lowering didn't expect/support the behavior pre 151c144 any longer. Update the code dealing with scalable pointer inductions to also check for uniformity in combination with isScalarAfterVectorization. This should ensure scalable pointer inductions are handled properly during epilogue vectorization. Fixes #57912. (cherry picked from commit 2c692d8)
1 parent b3669eb commit 5f620d0

File tree

8 files changed

+174
-246
lines changed

8 files changed

+174
-246
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8154,7 +8154,7 @@ VPRecipeBase *VPRecipeBuilder::tryToOptimizeInductionPHI(
81548154
Phi, Operands[0], *II, *PSE.getSE(),
81558155
LoopVectorizationPlanner::getDecisionAndClampRange(
81568156
[&](ElementCount VF) {
8157-
return !VF.isScalable() && CM.isScalarAfterVectorization(Phi, VF);
8157+
return CM.isScalarAfterVectorization(Phi, VF);
81588158
},
81598159
Range));
81608160
}
@@ -9338,7 +9338,7 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
93389338
auto *IVR = getParent()->getPlan()->getCanonicalIV();
93399339
PHINode *CanonicalIV = cast<PHINode>(State.get(IVR, 0));
93409340

9341-
if (onlyScalarsGenerated()) {
9341+
if (onlyScalarsGenerated(State.VF)) {
93429342
// This is the normalized GEP that starts counting at zero.
93439343
Value *PtrInd = State.Builder.CreateSExtOrTrunc(
93449344
CanonicalIV, IndDesc.getStep()->getType());

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -698,7 +698,7 @@ void VPlan::execute(VPTransformState *State) {
698698
auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
699699
// TODO: Split off the case that all users of a pointer phi are scalar
700700
// from the VPWidenPointerInductionRecipe.
701-
if (WidenPhi->onlyScalarsGenerated())
701+
if (WidenPhi->onlyScalarsGenerated(State->VF))
702702
continue;
703703

704704
auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi, 0));

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1220,7 +1220,7 @@ class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe {
12201220
void execute(VPTransformState &State) override;
12211221

12221222
/// Returns true if only scalar values will be generated.
1223-
bool onlyScalarsGenerated();
1223+
bool onlyScalarsGenerated(ElementCount VF);
12241224

12251225
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
12261226
/// Print the recipe.

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -982,8 +982,9 @@ void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
982982
}
983983
#endif
984984

985-
bool VPWidenPointerInductionRecipe::onlyScalarsGenerated() {
986-
return IsScalarAfterVectorization;
985+
bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(ElementCount VF) {
986+
return IsScalarAfterVectorization &&
987+
(!VF.isScalable() || vputils::onlyFirstLaneUsed(this));
987988
}
988989

989990
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll

Lines changed: 73 additions & 113 deletions
Large diffs are not rendered by default.

llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll

Lines changed: 22 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ define ptr @test(ptr %start.1, ptr %start.2, ptr %end) {
2626
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
2727
; CHECK: vector.body:
2828
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START_1]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
29-
; CHECK-NEXT: [[POINTER_PHI8:%.*]] = phi ptr [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND9:%.*]], [[VECTOR_BODY]] ]
3029
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3130
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
3231
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2
@@ -46,43 +45,32 @@ define ptr @test(ptr %start.1, ptr %start.2, ptr %end) {
4645
; CHECK-NEXT: [[TMP20:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT6]], [[TMP19]]
4746
; CHECK-NEXT: [[VECTOR_GEP7:%.*]] = mul <vscale x 2 x i64> [[TMP20]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 8, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
4847
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP7]]
49-
; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
50-
; CHECK-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 2
51-
; CHECK-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 2
52-
; CHECK-NEXT: [[TMP25:%.*]] = mul i64 8, [[TMP24]]
53-
; CHECK-NEXT: [[TMP26:%.*]] = mul i64 [[TMP23]], 0
54-
; CHECK-NEXT: [[DOTSPLATINSERT10:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP26]], i32 0
55-
; CHECK-NEXT: [[DOTSPLAT11:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT10]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
56-
; CHECK-NEXT: [[TMP27:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
57-
; CHECK-NEXT: [[TMP28:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT11]], [[TMP27]]
58-
; CHECK-NEXT: [[VECTOR_GEP12:%.*]] = mul <vscale x 2 x i64> [[TMP28]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 8, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
59-
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[POINTER_PHI8]], <vscale x 2 x i64> [[VECTOR_GEP12]]
60-
; CHECK-NEXT: [[TMP30:%.*]] = mul i64 [[TMP23]], 1
61-
; CHECK-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP30]], i32 0
62-
; CHECK-NEXT: [[DOTSPLAT14:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT13]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
63-
; CHECK-NEXT: [[TMP31:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
64-
; CHECK-NEXT: [[TMP32:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT14]], [[TMP31]]
65-
; CHECK-NEXT: [[VECTOR_GEP15:%.*]] = mul <vscale x 2 x i64> [[TMP32]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 8, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
66-
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[POINTER_PHI8]], <vscale x 2 x i64> [[VECTOR_GEP15]]
67-
; CHECK-NEXT: [[TMP34:%.*]] = extractelement <vscale x 2 x ptr> [[TMP29]], i32 0
68-
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i64, ptr [[TMP34]], i32 0
69-
; CHECK-NEXT: store <vscale x 2 x i64> zeroinitializer, ptr [[TMP35]], align 4
70-
; CHECK-NEXT: [[TMP36:%.*]] = call i32 @llvm.vscale.i32()
71-
; CHECK-NEXT: [[TMP37:%.*]] = mul i32 [[TMP36]], 2
72-
; CHECK-NEXT: [[TMP38:%.*]] = getelementptr i64, ptr [[TMP34]], i32 [[TMP37]]
73-
; CHECK-NEXT: store <vscale x 2 x i64> zeroinitializer, ptr [[TMP38]], align 4
74-
; CHECK-NEXT: [[TMP39:%.*]] = call i64 @llvm.vscale.i64()
75-
; CHECK-NEXT: [[TMP40:%.*]] = mul i64 [[TMP39]], 4
76-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP40]]
48+
; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[INDEX]], 0
49+
; CHECK-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 8
50+
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[TMP23]]
51+
; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64()
52+
; CHECK-NEXT: [[TMP25:%.*]] = mul i64 [[TMP24]], 2
53+
; CHECK-NEXT: [[TMP26:%.*]] = add i64 [[TMP25]], 0
54+
; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[INDEX]], [[TMP26]]
55+
; CHECK-NEXT: [[TMP28:%.*]] = mul i64 [[TMP27]], 8
56+
; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[TMP28]]
57+
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0
58+
; CHECK-NEXT: store <vscale x 2 x i64> zeroinitializer, ptr [[TMP29]], align 4
59+
; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.vscale.i32()
60+
; CHECK-NEXT: [[TMP31:%.*]] = mul i32 [[TMP30]], 2
61+
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 [[TMP31]]
62+
; CHECK-NEXT: store <vscale x 2 x i64> zeroinitializer, ptr [[TMP32]], align 4
63+
; CHECK-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64()
64+
; CHECK-NEXT: [[TMP34:%.*]] = mul i64 [[TMP33]], 4
65+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP34]]
7766
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP13]]
78-
; CHECK-NEXT: [[PTR_IND9]] = getelementptr i8, ptr [[POINTER_PHI8]], i64 [[TMP25]]
79-
; CHECK-NEXT: [[TMP41:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
80-
; CHECK-NEXT: br i1 [[TMP41]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
67+
; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
68+
; CHECK-NEXT: br i1 [[TMP35]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
8169
; CHECK: middle.block:
8270
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
8371
; CHECK-NEXT: [[CAST_CMO:%.*]] = sub i64 [[N_VEC]], 1
84-
; CHECK-NEXT: [[TMP42:%.*]] = mul i64 [[CAST_CMO]], 8
85-
; CHECK-NEXT: [[IND_ESCAPE:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP42]]
72+
; CHECK-NEXT: [[TMP36:%.*]] = mul i64 [[CAST_CMO]], 8
73+
; CHECK-NEXT: [[IND_ESCAPE:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP36]]
8674
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
8775
; CHECK: scalar.ph:
8876
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START_1]], [[ENTRY:%.*]] ]

llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll

Lines changed: 30 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -49,50 +49,38 @@ define void @pointer_induction_used_as_vector(i8** noalias %start.1, i8* noalias
4949
; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, i8* [[START_2:%.*]], i64 [[N_VEC]]
5050
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
5151
; CHECK: vector.body:
52-
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i8** [ [[START_1]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
53-
; CHECK-NEXT: [[POINTER_PHI4:%.*]] = phi i8* [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND5:%.*]], [[VECTOR_BODY]] ]
52+
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i8* [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
5453
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
55-
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
56-
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
57-
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 1
58-
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 1, [[TMP6]]
59-
; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP5]], 0
60-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP8]], i32 0
54+
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0
55+
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8*, i8** [[START_1]], i64 [[TMP4]]
56+
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
57+
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2
58+
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 1
59+
; CHECK-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP7]]
60+
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP6]], 0
61+
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9]], i32 0
6162
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
62-
; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
63-
; CHECK-NEXT: [[TMP10:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP9]]
64-
; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul <vscale x 2 x i64> [[TMP10]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
65-
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8*, i8** [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP]]
66-
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
67-
; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2
68-
; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 1
69-
; CHECK-NEXT: [[TMP15:%.*]] = mul i64 1, [[TMP14]]
70-
; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP13]], 0
71-
; CHECK-NEXT: [[DOTSPLATINSERT6:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP16]], i32 0
72-
; CHECK-NEXT: [[DOTSPLAT7:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT6]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
73-
; CHECK-NEXT: [[TMP17:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
74-
; CHECK-NEXT: [[TMP18:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT7]], [[TMP17]]
75-
; CHECK-NEXT: [[VECTOR_GEP8:%.*]] = mul <vscale x 2 x i64> [[TMP18]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
76-
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, i8* [[POINTER_PHI4]], <vscale x 2 x i64> [[VECTOR_GEP8]]
77-
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, <vscale x 2 x i8*> [[TMP19]], i64 1
78-
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <vscale x 2 x i8**> [[TMP11]], i32 0
79-
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8*, i8** [[TMP21]], i32 0
80-
; CHECK-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to <vscale x 2 x i8*>*
81-
; CHECK-NEXT: store <vscale x 2 x i8*> [[TMP20]], <vscale x 2 x i8*>* [[TMP23]], align 8
82-
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <vscale x 2 x i8*> [[TMP19]], i32 0
83-
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i8, i8* [[TMP24]], i32 0
84-
; CHECK-NEXT: [[TMP26:%.*]] = bitcast i8* [[TMP25]] to <vscale x 2 x i8>*
85-
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP26]], align 1
86-
; CHECK-NEXT: [[TMP27:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 1, i32 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer)
87-
; CHECK-NEXT: [[TMP28:%.*]] = bitcast i8* [[TMP25]] to <vscale x 2 x i8>*
88-
; CHECK-NEXT: store <vscale x 2 x i8> [[TMP27]], <vscale x 2 x i8>* [[TMP28]], align 1
89-
; CHECK-NEXT: [[TMP29:%.*]] = call i64 @llvm.vscale.i64()
90-
; CHECK-NEXT: [[TMP30:%.*]] = mul i64 [[TMP29]], 2
91-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP30]]
92-
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8*, i8** [[POINTER_PHI]], i64 [[TMP7]]
93-
; CHECK-NEXT: [[PTR_IND5]] = getelementptr i8, i8* [[POINTER_PHI4]], i64 [[TMP15]]
94-
; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
95-
; CHECK-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
63+
; CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
64+
; CHECK-NEXT: [[TMP11:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP10]]
65+
; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul <vscale x 2 x i64> [[TMP11]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
66+
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP]]
67+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, <vscale x 2 x i8*> [[TMP12]], i64 1
68+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8*, i8** [[NEXT_GEP]], i32 0
69+
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to <vscale x 2 x i8*>*
70+
; CHECK-NEXT: store <vscale x 2 x i8*> [[TMP13]], <vscale x 2 x i8*>* [[TMP15]], align 8
71+
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <vscale x 2 x i8*> [[TMP12]], i32 0
72+
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, i8* [[TMP16]], i32 0
73+
; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to <vscale x 2 x i8>*
74+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP18]], align 1
75+
; CHECK-NEXT: [[TMP19:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 1, i32 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer)
76+
; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP17]] to <vscale x 2 x i8>*
77+
; CHECK-NEXT: store <vscale x 2 x i8> [[TMP19]], <vscale x 2 x i8>* [[TMP20]], align 1
78+
; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64()
79+
; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 2
80+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP22]]
81+
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, i8* [[POINTER_PHI]], i64 [[TMP8]]
82+
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
83+
; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
9684
; CHECK: middle.block:
9785
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
9886
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]

0 commit comments

Comments
 (0)