38
38
#include " llvm/Transforms/Utils/Cloning.h"
39
39
#include " llvm/Transforms/Utils/LoopSimplify.h"
40
40
#include " llvm/Transforms/Utils/LoopUtils.h"
41
+ #include " llvm/Transforms/Utils/ScalarEvolutionExpander.h"
41
42
#include " llvm/Transforms/Utils/ValueMapper.h"
42
43
#include < algorithm>
43
44
#include < cassert>
@@ -330,11 +331,7 @@ static unsigned peelToTurnInvariantLoadsDerefencebale(Loop &L,
330
331
331
332
bool llvm::canPeelLastIteration (const Loop &L, ScalarEvolution &SE) {
332
333
const SCEV *BTC = SE.getBackedgeTakenCount (&L);
333
- // The loop must execute at least 2 iterations to guarantee that peeled
334
- // iteration executes.
335
- // TODO: Add checks during codegen.
336
- if (isa<SCEVCouldNotCompute>(BTC) ||
337
- !SE.isKnownPredicate (CmpInst::ICMP_UGT, BTC, SE.getZero (BTC->getType ())))
334
+ if (isa<SCEVCouldNotCompute>(BTC))
338
335
return false ;
339
336
340
337
// Check if the exit condition of the loop can be adjusted by the peeling
@@ -364,12 +361,18 @@ bool llvm::canPeelLastIteration(const Loop &L, ScalarEvolution &SE) {
364
361
// / is known at the second-to-last.
365
362
static bool shouldPeelLastIteration (Loop &L, CmpPredicate Pred,
366
363
const SCEVAddRecExpr *LeftAR,
367
- const SCEV *RightSCEV,
368
- ScalarEvolution &SE ) {
364
+ const SCEV *RightSCEV, ScalarEvolution &SE,
365
+ const TargetTransformInfo &TTI ) {
369
366
if (!canPeelLastIteration (L, SE))
370
367
return false ;
371
368
372
369
const SCEV *BTC = SE.getBackedgeTakenCount (&L);
370
+ SCEVExpander Expander (SE, L.getHeader ()->getDataLayout (), " loop-peel" );
371
+ if (!SE.isKnownNonZero (BTC) &&
372
+ Expander.isHighCostExpansion (BTC, &L, SCEVCheapExpansionBudget, &TTI,
373
+ L.getLoopPredecessor ()->getTerminator ()))
374
+ return false ;
375
+
373
376
const SCEV *ValAtLastIter = LeftAR->evaluateAtIteration (BTC, SE);
374
377
const SCEV *ValAtSecondToLastIter = LeftAR->evaluateAtIteration (
375
378
SE.getMinusSCEV (BTC, SE.getOne (BTC->getType ())), SE);
@@ -391,7 +394,8 @@ static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred,
391
394
// ..
392
395
// }
393
396
static std::pair<unsigned , unsigned >
394
- countToEliminateCompares (Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) {
397
+ countToEliminateCompares (Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE,
398
+ const TargetTransformInfo &TTI) {
395
399
assert (L.isLoopSimplifyForm () && " Loop needs to be in loop simplify form" );
396
400
unsigned DesiredPeelCount = 0 ;
397
401
unsigned DesiredPeelCountLast = 0 ;
@@ -479,7 +483,7 @@ countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) {
479
483
const SCEV *Step = LeftAR->getStepRecurrence (SE);
480
484
if (!PeelWhilePredicateIsKnown (NewPeelCount, IterVal, RightSCEV, Step,
481
485
Pred)) {
482
- if (shouldPeelLastIteration (L, Pred, LeftAR, RightSCEV, SE))
486
+ if (shouldPeelLastIteration (L, Pred, LeftAR, RightSCEV, SE, TTI ))
483
487
DesiredPeelCountLast = 1 ;
484
488
return ;
485
489
}
@@ -593,8 +597,8 @@ static bool violatesLegacyMultiExitLoopCheck(Loop *L) {
593
597
void llvm::computePeelCount (Loop *L, unsigned LoopSize,
594
598
TargetTransformInfo::PeelingPreferences &PP,
595
599
unsigned TripCount, DominatorTree &DT,
596
- ScalarEvolution &SE, AssumptionCache *AC ,
597
- unsigned Threshold) {
600
+ ScalarEvolution &SE, const TargetTransformInfo &TTI ,
601
+ AssumptionCache *AC, unsigned Threshold) {
598
602
assert (LoopSize > 0 && " Zero loop size is not allowed!" );
599
603
// Save the PP.PeelCount value set by the target in
600
604
// TTI.getPeelingPreferences or by the flag -unroll-peel-count.
@@ -656,7 +660,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
656
660
}
657
661
658
662
const auto &[CountToEliminateCmps, CountToEliminateCmpsLast] =
659
- countToEliminateCompares (*L, MaxPeelCount, SE);
663
+ countToEliminateCompares (*L, MaxPeelCount, SE, TTI );
660
664
DesiredPeelCount = std::max (DesiredPeelCount, CountToEliminateCmps);
661
665
662
666
if (DesiredPeelCount == 0 )
@@ -822,7 +826,7 @@ static void initBranchWeights(DenseMap<Instruction *, WeightInfo> &WeightInfos,
822
826
// / instructions in the last peeled-off iteration.
823
827
static void cloneLoopBlocks (
824
828
Loop *L, unsigned IterNumber, bool PeelLast, BasicBlock *InsertTop,
825
- BasicBlock *InsertBot,
829
+ BasicBlock *InsertBot, BasicBlock *OrigPreHeader,
826
830
SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges,
827
831
SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
828
832
ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT,
@@ -914,12 +918,22 @@ static void cloneLoopBlocks(
914
918
// loop iteration. Since this copy is no longer part of the loop, we
915
919
// resolve this statically:
916
920
if (PeelLast) {
917
- // For the last iteration, we use the value from the latch of the original
918
- // loop directly.
921
+ // For the last iteration, we introduce new phis for each header phi in
922
+ // InsertTop, using the incoming value from the preheader for the original
923
+ // preheader (when skipping the main loop) and the incoming value from the
924
+ // latch for the latch (when continuing from the main loop).
925
+ IRBuilder<> B (InsertTop->getTerminator ());
919
926
for (BasicBlock::iterator I = Header->begin (); isa<PHINode>(I); ++I) {
920
927
PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
921
- VMap[&*I] = NewPHI->getIncomingValueForBlock (Latch );
928
+ PHINode *PN = B. CreatePHI ( NewPHI->getType (), 2 );
922
929
NewPHI->eraseFromParent ();
930
+ if (OrigPreHeader)
931
+ PN->addIncoming (cast<PHINode>(&*I)->getIncomingValueForBlock (PreHeader),
932
+ OrigPreHeader);
933
+
934
+ PN->addIncoming (cast<PHINode>(&*I)->getIncomingValueForBlock (Latch),
935
+ Latch);
936
+ VMap[&*I] = PN;
923
937
}
924
938
} else {
925
939
// For the first iteration, we use the value from the preheader directly.
@@ -1053,7 +1067,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
1053
1067
// Set up all the necessary basic blocks.
1054
1068
BasicBlock *InsertTop;
1055
1069
BasicBlock *InsertBot;
1056
- BasicBlock *NewPreHeader;
1070
+ BasicBlock *NewPreHeader = nullptr ;
1057
1071
DenseMap<Instruction *, Value *> ExitValues;
1058
1072
if (PeelLast) {
1059
1073
// It is convenient to split the single exit block from the latch the
@@ -1084,11 +1098,34 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
1084
1098
for (PHINode &P : Exit->phis ())
1085
1099
ExitValues[&P] = P.getIncomingValueForBlock (Latch);
1086
1100
1101
+ const SCEV *BTC = SE->getBackedgeTakenCount (L);
1102
+
1087
1103
InsertTop = SplitEdge (Latch, Exit, &DT, LI);
1088
1104
InsertBot = SplitBlock (InsertTop, InsertTop->getTerminator (), &DT, LI);
1089
1105
1090
1106
InsertTop->setName (Exit->getName () + " .peel.begin" );
1091
1107
InsertBot->setName (Exit->getName () + " .peel.next" );
1108
+ NewPreHeader = nullptr ;
1109
+
1110
+ // If the original loop may only execute a single iteration we need to
1111
+ // insert a trip count check and skip the original loop with the last
1112
+ // iteration peeled off if necessary.
1113
+ if (!SE->isKnownNonZero (BTC)) {
1114
+ NewPreHeader = SplitEdge (PreHeader, Header, &DT, LI);
1115
+ SCEVExpander Expander (*SE, Latch->getDataLayout (), " loop-peel" );
1116
+
1117
+ BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator ());
1118
+ Value *BTCValue =
1119
+ Expander.expandCodeFor (BTC, BTC->getType (), PreHeaderBR);
1120
+ IRBuilder<> B (PreHeaderBR);
1121
+ Value *Cond =
1122
+ B.CreateICmpNE (BTCValue, ConstantInt::get (BTCValue->getType (), 0 ));
1123
+ B.CreateCondBr (Cond, NewPreHeader, InsertTop);
1124
+ PreHeaderBR->eraseFromParent ();
1125
+
1126
+ // PreHeader now dominates InsertTop.
1127
+ DT.changeImmediateDominator (InsertTop, PreHeader);
1128
+ }
1092
1129
} else {
1093
1130
// It is convenient to split the preheader into 3 parts - two blocks to
1094
1131
// anchor the peeled copy of the loop body, and a new preheader for the
@@ -1162,8 +1199,9 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
1162
1199
for (unsigned Iter = 0 ; Iter < PeelCount; ++Iter) {
1163
1200
SmallVector<BasicBlock *, 8 > NewBlocks;
1164
1201
1165
- cloneLoopBlocks (L, Iter, PeelLast, InsertTop, InsertBot, ExitEdges,
1166
- NewBlocks, LoopBlocks, VMap, LVMap, &DT, LI,
1202
+ cloneLoopBlocks (L, Iter, PeelLast, InsertTop, InsertBot,
1203
+ NewPreHeader ? PreHeader : nullptr , ExitEdges, NewBlocks,
1204
+ LoopBlocks, VMap, LVMap, &DT, LI,
1167
1205
LoopLocalNoAliasDeclScopes, *SE);
1168
1206
1169
1207
// Remap to use values from the current iteration instead of the
@@ -1216,9 +1254,11 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
1216
1254
1217
1255
if (PeelLast) {
1218
1256
// Now adjust users of the original exit values by replacing them with the
1219
- // exit value from the peeled iteration.
1220
- for (const auto &[P, E] : ExitValues)
1257
+ // exit value from the peeled iteration and remove them .
1258
+ for (const auto &[P, E] : ExitValues) {
1221
1259
P->replaceAllUsesWith (isa<Constant>(E) ? E : &*VMap.lookup (E));
1260
+ P->eraseFromParent ();
1261
+ }
1222
1262
formLCSSA (*L, DT, LI, SE);
1223
1263
} else {
1224
1264
// Now adjust the phi nodes in the loop header to get their initial values
0 commit comments