[LV] Update handling of scalable pointer inductions after b73d2c8.

fhahn · tru · commit 5f620d003fda · 2022-10-03T12:53:41.000+02:00
The dependent code has been changed quite a lot since 151c144 which b73d2c8 effectively reverts. Now we run into a case where lowering didn't expect/support the behavior pre 151c144 any longer. Update the code dealing with scalable pointer inductions to also check for uniformity in combination with isScalarAfterVectorization. This should ensure scalable pointer inductions are handled properly during epilogue vectorization. Fixes #57912. (cherry picked from commit 2c692d8)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8154,7 +8154,7 @@ VPRecipeBase *VPRecipeBuilder::tryToOptimizeInductionPHI(
         Phi, Operands[0], *II, *PSE.getSE(),
         LoopVectorizationPlanner::getDecisionAndClampRange(
             [&](ElementCount VF) {
-              return !VF.isScalable() && CM.isScalarAfterVectorization(Phi, VF);
+              return CM.isScalarAfterVectorization(Phi, VF);
             },
             Range));
   }
@@ -9338,7 +9338,7 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
   auto *IVR = getParent()->getPlan()->getCanonicalIV();
   PHINode *CanonicalIV = cast<PHINode>(State.get(IVR, 0));
 
-  if (onlyScalarsGenerated()) {
+  if (onlyScalarsGenerated(State.VF)) {
     // This is the normalized GEP that starts counting at zero.
     Value *PtrInd = State.Builder.CreateSExtOrTrunc(
         CanonicalIV, IndDesc.getStep()->getType());
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -698,7 +698,7 @@ void VPlan::execute(VPTransformState *State) {
         auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
         // TODO: Split off the case that all users of a pointer phi are scalar
         // from the VPWidenPointerInductionRecipe.
-        if (WidenPhi->onlyScalarsGenerated())
+        if (WidenPhi->onlyScalarsGenerated(State->VF))
           continue;
 
         auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi, 0));
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1220,7 +1220,7 @@ class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe {
   void execute(VPTransformState &State) override;
 
   /// Returns true if only scalar values will be generated.
-  bool onlyScalarsGenerated();
+  bool onlyScalarsGenerated(ElementCount VF);
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   /// Print the recipe.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -982,8 +982,9 @@ void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
 }
 #endif
 
-bool VPWidenPointerInductionRecipe::onlyScalarsGenerated() {
-  return IsScalarAfterVectorization;
+bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(ElementCount VF) {
+  return IsScalarAfterVectorization &&
+         (!VF.isScalable() || vputils::onlyFirstLaneUsed(this));
 }
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll
@@ -26,7 +26,6 @@ define ptr @test(ptr %start.1, ptr %start.2, ptr %end) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[POINTER_PHI:%.*]] = phi ptr [ [[START_1]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[POINTER_PHI8:%.*]] = phi ptr [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND9:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP11:%.*]] = mul i64 [[TMP10]], 2
@@ -46,43 +45,32 @@ define ptr @test(ptr %start.1, ptr %start.2, ptr %end) {
 ; CHECK-NEXT:    [[TMP20:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT6]], [[TMP19]]
 ; CHECK-NEXT:    [[VECTOR_GEP7:%.*]] = mul <vscale x 2 x i64> [[TMP20]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 8, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP7]]
-; CHECK-NEXT:    [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP23:%.*]] = mul i64 [[TMP22]], 2
-; CHECK-NEXT:    [[TMP24:%.*]] = mul i64 [[TMP23]], 2
-; CHECK-NEXT:    [[TMP25:%.*]] = mul i64 8, [[TMP24]]
-; CHECK-NEXT:    [[TMP26:%.*]] = mul i64 [[TMP23]], 0
-; CHECK-NEXT:    [[DOTSPLATINSERT10:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP26]], i32 0
-; CHECK-NEXT:    [[DOTSPLAT11:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT10]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP27:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
-; CHECK-NEXT:    [[TMP28:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT11]], [[TMP27]]
-; CHECK-NEXT:    [[VECTOR_GEP12:%.*]] = mul <vscale x 2 x i64> [[TMP28]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 8, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr i8, ptr [[POINTER_PHI8]], <vscale x 2 x i64> [[VECTOR_GEP12]]
-; CHECK-NEXT:    [[TMP30:%.*]] = mul i64 [[TMP23]], 1
-; CHECK-NEXT:    [[DOTSPLATINSERT13:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP30]], i32 0
-; CHECK-NEXT:    [[DOTSPLAT14:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT13]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP31:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
-; CHECK-NEXT:    [[TMP32:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT14]], [[TMP31]]
-; CHECK-NEXT:    [[VECTOR_GEP15:%.*]] = mul <vscale x 2 x i64> [[TMP32]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 8, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP33:%.*]] = getelementptr i8, ptr [[POINTER_PHI8]], <vscale x 2 x i64> [[VECTOR_GEP15]]
-; CHECK-NEXT:    [[TMP34:%.*]] = extractelement <vscale x 2 x ptr> [[TMP29]], i32 0
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr i64, ptr [[TMP34]], i32 0
-; CHECK-NEXT:    store <vscale x 2 x i64> zeroinitializer, ptr [[TMP35]], align 4
-; CHECK-NEXT:    [[TMP36:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT:    [[TMP37:%.*]] = mul i32 [[TMP36]], 2
-; CHECK-NEXT:    [[TMP38:%.*]] = getelementptr i64, ptr [[TMP34]], i32 [[TMP37]]
-; CHECK-NEXT:    store <vscale x 2 x i64> zeroinitializer, ptr [[TMP38]], align 4
-; CHECK-NEXT:    [[TMP39:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP40:%.*]] = mul i64 [[TMP39]], 4
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP40]]
+; CHECK-NEXT:    [[TMP22:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP23:%.*]] = mul i64 [[TMP22]], 8
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[TMP23]]
+; CHECK-NEXT:    [[TMP24:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP25:%.*]] = mul i64 [[TMP24]], 2
+; CHECK-NEXT:    [[TMP26:%.*]] = add i64 [[TMP25]], 0
+; CHECK-NEXT:    [[TMP27:%.*]] = add i64 [[INDEX]], [[TMP26]]
+; CHECK-NEXT:    [[TMP28:%.*]] = mul i64 [[TMP27]], 8
+; CHECK-NEXT:    [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[TMP28]]
+; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0
+; CHECK-NEXT:    store <vscale x 2 x i64> zeroinitializer, ptr [[TMP29]], align 4
+; CHECK-NEXT:    [[TMP30:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT:    [[TMP31:%.*]] = mul i32 [[TMP30]], 2
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 [[TMP31]]
+; CHECK-NEXT:    store <vscale x 2 x i64> zeroinitializer, ptr [[TMP32]], align 4
+; CHECK-NEXT:    [[TMP33:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP34:%.*]] = mul i64 [[TMP33]], 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP34]]
 ; CHECK-NEXT:    [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP13]]
-; CHECK-NEXT:    [[PTR_IND9]] = getelementptr i8, ptr [[POINTER_PHI8]], i64 [[TMP25]]
-; CHECK-NEXT:    [[TMP41:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP41]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT:    [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP35]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
 ; CHECK-NEXT:    [[CAST_CMO:%.*]] = sub i64 [[N_VEC]], 1
-; CHECK-NEXT:    [[TMP42:%.*]] = mul i64 [[CAST_CMO]], 8
-; CHECK-NEXT:    [[IND_ESCAPE:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP42]]
+; CHECK-NEXT:    [[TMP36:%.*]] = mul i64 [[CAST_CMO]], 8
+; CHECK-NEXT:    [[IND_ESCAPE:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP36]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START_1]], [[ENTRY:%.*]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
@@ -49,50 +49,38 @@ define void @pointer_induction_used_as_vector(i8** noalias %start.1, i8* noalias
 ; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr i8, i8* [[START_2:%.*]], i64 [[N_VEC]]
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[POINTER_PHI:%.*]] = phi i8** [ [[START_1]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[POINTER_PHI4:%.*]] = phi i8* [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[POINTER_PHI:%.*]] = phi i8* [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 2
-; CHECK-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 1
-; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 1, [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP5]], 0
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP8]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8*, i8** [[START_1]], i64 [[TMP4]]
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 2
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[TMP6]], 1
+; CHECK-NEXT:    [[TMP8:%.*]] = mul i64 1, [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP6]], 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9]], i32 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP9:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
-; CHECK-NEXT:    [[TMP10:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP9]]
-; CHECK-NEXT:    [[VECTOR_GEP:%.*]] = mul <vscale x 2 x i64> [[TMP10]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i8*, i8** [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP]]
-; CHECK-NEXT:    [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP13:%.*]] = mul i64 [[TMP12]], 2
-; CHECK-NEXT:    [[TMP14:%.*]] = mul i64 [[TMP13]], 1
-; CHECK-NEXT:    [[TMP15:%.*]] = mul i64 1, [[TMP14]]
-; CHECK-NEXT:    [[TMP16:%.*]] = mul i64 [[TMP13]], 0
-; CHECK-NEXT:    [[DOTSPLATINSERT6:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP16]], i32 0
-; CHECK-NEXT:    [[DOTSPLAT7:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT6]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP17:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
-; CHECK-NEXT:    [[TMP18:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT7]], [[TMP17]]
-; CHECK-NEXT:    [[VECTOR_GEP8:%.*]] = mul <vscale x 2 x i64> [[TMP18]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr i8, i8* [[POINTER_PHI4]], <vscale x 2 x i64> [[VECTOR_GEP8]]
-; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, <vscale x 2 x i8*> [[TMP19]], i64 1
-; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <vscale x 2 x i8**> [[TMP11]], i32 0
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr i8*, i8** [[TMP21]], i32 0
-; CHECK-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to <vscale x 2 x i8*>*
-; CHECK-NEXT:    store <vscale x 2 x i8*> [[TMP20]], <vscale x 2 x i8*>* [[TMP23]], align 8
-; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <vscale x 2 x i8*> [[TMP19]], i32 0
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr i8, i8* [[TMP24]], i32 0
-; CHECK-NEXT:    [[TMP26:%.*]] = bitcast i8* [[TMP25]] to <vscale x 2 x i8>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP26]], align 1
-; CHECK-NEXT:    [[TMP27:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 1, i32 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP28:%.*]] = bitcast i8* [[TMP25]] to <vscale x 2 x i8>*
-; CHECK-NEXT:    store <vscale x 2 x i8> [[TMP27]], <vscale x 2 x i8>* [[TMP28]], align 1
-; CHECK-NEXT:    [[TMP29:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP30:%.*]] = mul i64 [[TMP29]], 2
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP30]]
-; CHECK-NEXT:    [[PTR_IND]] = getelementptr i8*, i8** [[POINTER_PHI]], i64 [[TMP7]]
-; CHECK-NEXT:    [[PTR_IND5]] = getelementptr i8, i8* [[POINTER_PHI4]], i64 [[TMP15]]
-; CHECK-NEXT:    [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT:    [[TMP10:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
+; CHECK-NEXT:    [[TMP11:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP10]]
+; CHECK-NEXT:    [[VECTOR_GEP:%.*]] = mul <vscale x 2 x i64> [[TMP11]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr i8, i8* [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP]]
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, <vscale x 2 x i8*> [[TMP12]], i64 1
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr i8*, i8** [[NEXT_GEP]], i32 0
+; CHECK-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to <vscale x 2 x i8*>*
+; CHECK-NEXT:    store <vscale x 2 x i8*> [[TMP13]], <vscale x 2 x i8*>* [[TMP15]], align 8
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <vscale x 2 x i8*> [[TMP12]], i32 0
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr i8, i8* [[TMP16]], i32 0
+; CHECK-NEXT:    [[TMP18:%.*]] = bitcast i8* [[TMP17]] to <vscale x 2 x i8>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP18]], align 1
+; CHECK-NEXT:    [[TMP19:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 1, i32 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP20:%.*]] = bitcast i8* [[TMP17]] to <vscale x 2 x i8>*
+; CHECK-NEXT:    store <vscale x 2 x i8> [[TMP19]], <vscale x 2 x i8>* [[TMP20]], align 1
+; CHECK-NEXT:    [[TMP21:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP22:%.*]] = mul i64 [[TMP21]], 2
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP22]]
+; CHECK-NEXT:    [[PTR_IND]] = getelementptr i8, i8* [[POINTER_PHI]], i64 [[TMP8]]
+; CHECK-NEXT:    [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll

Original file line number	Diff line number	Diff line change
`@@ -982,8 +982,9 @@ void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,`
`982`	`982`	`}`
`983`	`983`	`#endif`
`984`	`984`
`985`		`-bool VPWidenPointerInductionRecipe::onlyScalarsGenerated() {`
`986`		`- return IsScalarAfterVectorization;`
	`985`	`+bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(ElementCount VF) {`
	`986`	`+ return IsScalarAfterVectorization &&`
	`987`	`+ (!VF.isScalable() \|\| vputils::onlyFirstLaneUsed(this));`
`987`	`988`	`}`
`988`	`989`
`989`	`990`	`#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)`