X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FScalar%2FIndVarSimplify.cpp;h=66f8aef98d4c160a9259ec89ed0733eff49cbb0f;hb=5982c0a7280bbceb38d2cb2f1bdf4e8feb6c98c8;hp=9c092e6077e4e094741b7ed4c1d1a300f6924e9e;hpb=14807bd8c801f976c999e5a6699f31ee9642021a;p=oota-llvm.git diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 9c092e6077e..66f8aef98d4 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -24,32 +24,35 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "indvars" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" -#include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" using namespace llvm; +#define DEBUG_TYPE "indvars" + STATISTIC(NumWidened , "Number of indvars widened"); STATISTIC(NumReplaced , "Number of exit values replaced"); STATISTIC(NumLFTR , "Number of loop exit tests replaced"); @@ -63,40 +66,59 @@ static cl::opt VerifyIndvars( "verify-indvars", cl::Hidden, cl::desc("Verify the ScalarEvolution result after running indvars")); +static cl::opt ReduceLiveIVs("liv-reduce", cl::Hidden, + cl::desc("Reduce live induction variables.")); + +enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, AlwaysRepl }; + +static cl::opt ReplaceExitValue( + "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), + cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), + cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), + clEnumValN(OnlyCheapRepl, "cheap", + "only replace exit value when the cost is cheap"), + clEnumValN(AlwaysRepl, "always", + "always replace exit value whenever possible"), + clEnumValEnd)); + +namespace { +struct RewritePhi; +} + namespace { class IndVarSimplify : public LoopPass { - LoopInfo *LI; - ScalarEvolution *SE; - DominatorTree *DT; - DataLayout *TD; - TargetLibraryInfo *TLI; + LoopInfo *LI; + ScalarEvolution *SE; + DominatorTree *DT; + TargetLibraryInfo *TLI; + const TargetTransformInfo *TTI; SmallVector DeadInsts; bool Changed; public: static char ID; // Pass identification, replacement for typeid - IndVarSimplify() : LoopPass(ID), LI(0), SE(0), DT(0), TD(0), - Changed(false) { + IndVarSimplify() + : LoopPass(ID), LI(nullptr), SE(nullptr), DT(nullptr), Changed(false) { initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnLoop(Loop *L, LPPassManager &LPM); + bool runOnLoop(Loop *L, LPPassManager &LPM) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); AU.addRequiredID(LoopSimplifyID); AU.addRequiredID(LCSSAID); - AU.addPreserved(); + AU.addPreserved(); AU.addPreservedID(LoopSimplifyID); AU.addPreservedID(LCSSAID); AU.setPreservesCFG(); } private: - virtual void releaseMemory() { + void releaseMemory() override { DeadInsts.clear(); } @@ -107,21 +129,25 @@ namespace { void SimplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LPPassManager &LPM); + bool CanLoopBeDeleted(Loop *L, SmallVector &RewritePhiSet); void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); Value *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, PHINode *IndVar, SCEVExpander &Rewriter); void SinkUnusedInvariants(Loop *L); + + Value *ExpandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, Loop *L, + Instruction *InsertPt, Type *Ty); }; } char IndVarSimplify::ID = 0; INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars", "Induction Variable Simplification", false, false) -INITIALIZE_PASS_DEPENDENCY(DominatorTree) -INITIALIZE_PASS_DEPENDENCY(LoopInfo) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LCSSA) INITIALIZE_PASS_END(IndVarSimplify, "indvars", @@ -193,7 +219,7 @@ static Instruction *getInsertPointForUses(Instruction *User, Value *Def, if (!PHI) return User; - Instruction *InsertPt = 0; + Instruction *InsertPt = nullptr; for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { if (PHI->getIncomingValue(i) != Def) continue; @@ -254,34 +280,34 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { // an add or increment value can not be represented by an integer. BinaryOperator *Incr = dyn_cast(PN->getIncomingValue(BackEdge)); - if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return; + if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return; // If this is not an add of the PHI with a constantfp, or if the constant fp // is not an integer, bail out. ConstantFP *IncValueVal = dyn_cast(Incr->getOperand(1)); int64_t IncValue; - if (IncValueVal == 0 || Incr->getOperand(0) != PN || + if (IncValueVal == nullptr || Incr->getOperand(0) != PN || !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) return; // Check Incr uses. One user is PN and the other user is an exit condition // used by the conditional terminator. - Value::use_iterator IncrUse = Incr->use_begin(); + Value::user_iterator IncrUse = Incr->user_begin(); Instruction *U1 = cast(*IncrUse++); - if (IncrUse == Incr->use_end()) return; + if (IncrUse == Incr->user_end()) return; Instruction *U2 = cast(*IncrUse++); - if (IncrUse != Incr->use_end()) return; + if (IncrUse != Incr->user_end()) return; // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't // only used by a branch, we can't transform it. FCmpInst *Compare = dyn_cast(U1); if (!Compare) Compare = dyn_cast(U2); - if (Compare == 0 || !Compare->hasOneUse() || - !isa(Compare->use_back())) + if (!Compare || !Compare->hasOneUse() || + !isa(Compare->user_back())) return; - BranchInst *TheBr = cast(Compare->use_back()); + BranchInst *TheBr = cast(Compare->user_back()); // We need to verify that the branch actually controls the iteration count // of the loop. If not, the new IV can overflow and no one will notice. @@ -298,7 +324,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { // transform it. ConstantFP *ExitValueVal = dyn_cast(Compare->getOperand(1)); int64_t ExitValue; - if (ExitValueVal == 0 || + if (ExitValueVal == nullptr || !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) return; @@ -459,6 +485,33 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) { SE->forgetLoop(L); } +namespace { +// Collect information about PHI nodes which can be transformed in +// RewriteLoopExitValues. +struct RewritePhi { + PHINode *PN; + unsigned Ith; // Ith incoming value. + Value *Val; // Exit value after expansion. + bool HighCost; // High Cost when expansion. + bool SafePhi; // LCSSASafePhiForRAUW. + + RewritePhi(PHINode *P, unsigned I, Value *V, bool H, bool S) + : PN(P), Ith(I), Val(V), HighCost(H), SafePhi(S) {} +}; +} + +Value *IndVarSimplify::ExpandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, + Loop *L, Instruction *InsertPt, + Type *ResultTy) { + // Before expanding S into an expensive LLVM expression, see if we can use an + // already existing value as the expansion for S. + if (Value *RetValue = Rewriter.findExistingExpansion(S, InsertPt, L)) + return RetValue; + + // We didn't find anything, fall back to using SCEVExpander. + return Rewriter.expandCodeFor(S, ResultTy, InsertPt); +} + //===----------------------------------------------------------------------===// // RewriteLoopExitValues - Optimize IV users outside the loop. // As a side effect, reduces the amount of IV processing within the loop. @@ -481,6 +534,7 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { SmallVector ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); + SmallVector RewritePhiSet; // Find all values that are computed inside the loop, but used outside of it. // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan // the exit blocks of the loop to find them. @@ -494,6 +548,21 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { unsigned NumPreds = PN->getNumIncomingValues(); + // We would like to be able to RAUW single-incoming value PHI nodes. We + // have to be certain this is safe even when this is an LCSSA PHI node. + // While the computed exit value is no longer varying in *this* loop, the + // exit block may be an exit block for an outer containing loop as well, + // the exit value may be varying in the outer loop, and thus it may still + // require an LCSSA PHI node. The safe case is when this is + // single-predecessor PHI node (LCSSA) and the exit block containing it is + // part of the enclosing loop, or this is the outer most loop of the nest. + // In either case the exit value could (at most) be varying in the same + // loop body as the phi node itself. Thus if it is in turn used outside of + // an enclosing loop it will only be via a separate LCSSA node. + bool LCSSASafePhiForRAUW = + NumPreds == 1 && + (!L->getParentLoop() || L->getParentLoop() == LI->getLoopFor(ExitBB)); + // Iterate over all of the PHI nodes. BasicBlock::iterator BBI = ExitBB->begin(); while ((PN = dyn_cast(BBI++))) { @@ -532,7 +601,8 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { // and varies predictably *inside* the loop. Evaluate the value it // contains when the loop exits, if possible. const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); - if (!SE->isLoopInvariant(ExitValue, L) || !isSafeToExpand(ExitValue)) + if (!SE->isLoopInvariant(ExitValue, L) || + !isSafeToExpand(ExitValue, *SE)) continue; // Computing the value outside of the loop brings no benefit if : @@ -544,8 +614,8 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { unsigned NumHardInternalUses = 0; unsigned NumSoftExternalUses = 0; unsigned NumUses = 0; - for (Value::use_iterator IB=Inst->use_begin(), IE=Inst->use_end(); - IB!=IE && NumUses<=6 ; ++IB) { + for (auto IB = Inst->user_begin(), IE = Inst->user_end(); + IB != IE && NumUses <= 6; ++IB) { Instruction *UseInstr = cast(*IB); unsigned Opc = UseInstr->getOpcode(); NumUses++; @@ -557,9 +627,9 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { // Do not count the Phi as a use. LCSSA may have inserted // plenty of trivial ones. NumUses--; - for (Value::use_iterator PB=UseInstr->use_begin(), - PE=UseInstr->use_end(); - PB!=PE && NumUses<=6 ; ++PB, ++NumUses) { + for (auto PB = UseInstr->user_begin(), + PE = UseInstr->user_end(); + PB != PE && NumUses <= 6; ++PB, ++NumUses) { unsigned PhiOpc = cast(*PB)->getOpcode(); if (PhiOpc != Instruction::Call && PhiOpc != Instruction::Ret) NumSoftExternalUses++; @@ -574,7 +644,9 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { continue; } - Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); + bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst); + Value *ExitVal = + ExpandSCEVIfNeeded(Rewriter, ExitValue, L, Inst, PN->getType()); DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n' << " LoopVal = " << *Inst << "\n"); @@ -583,33 +655,43 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { DeadInsts.push_back(ExitVal); continue; } - Changed = true; - ++NumReplaced; - PN->setIncomingValue(i, ExitVal); + // Collect all the candidate PHINodes to be rewritten. + RewritePhiSet.push_back( + RewritePhi(PN, i, ExitVal, HighCost, LCSSASafePhiForRAUW)); + } + } + } - // If this instruction is dead now, delete it. Don't do it now to avoid - // invalidating iterators. - if (isInstructionTriviallyDead(Inst, TLI)) - DeadInsts.push_back(Inst); + bool LoopCanBeDel = CanLoopBeDeleted(L, RewritePhiSet); - if (NumPreds == 1) { - // Completely replace a single-pred PHI. This is safe, because the - // NewVal won't be variant in the loop, so we don't need an LCSSA phi - // node anymore. - PN->replaceAllUsesWith(ExitVal); - PN->eraseFromParent(); - } - } - if (NumPreds != 1) { - // Clone the PHI and delete the original one. This lets IVUsers and - // any other maps purge the original user from their records. - PHINode *NewPN = cast(PN->clone()); - NewPN->takeName(PN); - NewPN->insertBefore(PN); - PN->replaceAllUsesWith(NewPN); - PN->eraseFromParent(); - } + // Transformation. + for (const RewritePhi &Phi : RewritePhiSet) { + PHINode *PN = Phi.PN; + Value *ExitVal = Phi.Val; + + // Only do the rewrite when the ExitValue can be expanded cheaply. + // If LoopCanBeDel is true, rewrite exit value aggressively. + if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) { + DeadInsts.push_back(ExitVal); + continue; + } + + Changed = true; + ++NumReplaced; + Instruction *Inst = cast(PN->getIncomingValue(Phi.Ith)); + PN->setIncomingValue(Phi.Ith, ExitVal); + + // If this instruction is dead now, delete it. Don't do it now to avoid + // invalidating iterators. + if (isInstructionTriviallyDead(Inst, TLI)) + DeadInsts.push_back(Inst); + + // If we determined that this PHI is safe to replace even if an LCSSA + // PHI, do so. + if (Phi.SafePhi) { + PN->replaceAllUsesWith(ExitVal); + PN->eraseFromParent(); } } @@ -618,6 +700,65 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { Rewriter.clearInsertPoint(); } +/// CanLoopBeDeleted - Check whether it is possible to delete the loop after +/// rewriting exit value. If it is possible, ignore ReplaceExitValue and +/// do rewriting aggressively. +bool IndVarSimplify::CanLoopBeDeleted( + Loop *L, SmallVector &RewritePhiSet) { + + BasicBlock *Preheader = L->getLoopPreheader(); + // If there is no preheader, the loop will not be deleted. + if (!Preheader) + return false; + + // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1. + // We obviate multiple ExitingBlocks case for simplicity. + // TODO: If we see testcase with multiple ExitingBlocks can be deleted + // after exit value rewriting, we can enhance the logic here. + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + SmallVector ExitBlocks; + L->getUniqueExitBlocks(ExitBlocks); + if (ExitBlocks.size() > 1 || ExitingBlocks.size() > 1) + return false; + + BasicBlock *ExitBlock = ExitBlocks[0]; + BasicBlock::iterator BI = ExitBlock->begin(); + while (PHINode *P = dyn_cast(BI)) { + Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]); + + // If the Incoming value of P is found in RewritePhiSet, we know it + // could be rewritten to use a loop invariant value in transformation + // phase later. Skip it in the loop invariant check below. + bool found = false; + for (const RewritePhi &Phi : RewritePhiSet) { + unsigned i = Phi.Ith; + if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) { + found = true; + break; + } + } + + Instruction *I; + if (!found && (I = dyn_cast(Incoming))) + if (!L->hasLoopInvariantOperands(I)) + return false; + + ++BI; + } + + for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end(); + LI != LE; ++LI) { + for (BasicBlock::iterator BI = (*LI)->begin(), BE = (*LI)->end(); BI != BE; + ++BI) { + if (BI->mayHaveSideEffects()) + return false; + } + } + + return true; +} + //===----------------------------------------------------------------------===// // IV Widening - Extend the width of an IV to cover its widest uses. //===----------------------------------------------------------------------===// @@ -629,40 +770,40 @@ namespace { struct WideIVInfo { PHINode *NarrowIV; Type *WidestNativeType; // Widest integer type created [sz]ext - bool IsSigned; // Was an sext user seen before a zext? + bool IsSigned; // Was a sext user seen before a zext? - WideIVInfo() : NarrowIV(0), WidestNativeType(0), IsSigned(false) {} - }; - - class WideIVVisitor : public IVVisitor { - ScalarEvolution *SE; - const DataLayout *TD; - - public: - WideIVInfo WI; - - WideIVVisitor(PHINode *NarrowIV, ScalarEvolution *SCEV, - const DataLayout *TData) : - SE(SCEV), TD(TData) { WI.NarrowIV = NarrowIV; } - - // Implement the interface used by simplifyUsersOfIV. - virtual void visitCast(CastInst *Cast); + WideIVInfo() : NarrowIV(nullptr), WidestNativeType(nullptr), + IsSigned(false) {} }; } /// visitCast - Update information about the induction variable that is /// extended by this sign or zero extend operation. This is used to determine /// the final width of the IV before actually widening it. -void WideIVVisitor::visitCast(CastInst *Cast) { +static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, + const TargetTransformInfo *TTI) { bool IsSigned = Cast->getOpcode() == Instruction::SExt; if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) return; Type *Ty = Cast->getType(); uint64_t Width = SE->getTypeSizeInBits(Ty); - if (TD && !TD->isLegalInteger(Width)) + if (!Cast->getModule()->getDataLayout().isLegalInteger(Width)) return; + // Cast is either an sext or zext up to this point. + // We should not widen an indvar if arithmetics on the wider indvar are more + // expensive than those on the narrower indvar. We check only the cost of ADD + // because at least an ADD is required to increment the induction variable. We + // could compute more comprehensively the cost of all instructions on the + // induction variable when necessary. + if (TTI && + TTI->getArithmeticInstrCost(Instruction::Add, Ty) > + TTI->getArithmeticInstrCost(Instruction::Add, + Cast->getOperand(0)->getType())) { + return; + } + if (!WI.WidestNativeType) { WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); WI.IsSigned = IsSigned; @@ -687,7 +828,7 @@ struct NarrowIVDefUse { Instruction *NarrowUse; Instruction *WideDef; - NarrowIVDefUse(): NarrowDef(0), NarrowUse(0), WideDef(0) {} + NarrowIVDefUse(): NarrowDef(nullptr), NarrowUse(nullptr), WideDef(nullptr) {} NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD): NarrowDef(ND), NarrowUse(NU), WideDef(WD) {} @@ -730,9 +871,9 @@ public: L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), - WidePhi(0), - WideInc(0), - WideIncExpr(0), + WidePhi(nullptr), + WideInc(nullptr), + WideIncExpr(nullptr), DeadInsts(DI) { assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); } @@ -749,8 +890,13 @@ protected: const SCEVAddRecExpr* GetExtendedOperandRecurrence(NarrowIVDefUse DU); + const SCEV *GetSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, + unsigned OpCode) const; + Instruction *WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter); + bool WidenLoopCompare(NarrowIVDefUse DU); + void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); }; } // anonymous namespace @@ -787,7 +933,7 @@ Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) { unsigned Opcode = DU.NarrowUse->getOpcode(); switch (Opcode) { default: - return 0; + return nullptr; case Instruction::Add: case Instruction::Mul: case Instruction::UDiv: @@ -825,21 +971,38 @@ Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) { } } +const SCEV *WidenIV::GetSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, + unsigned OpCode) const { + if (OpCode == Instruction::Add) + return SE->getAddExpr(LHS, RHS); + if (OpCode == Instruction::Sub) + return SE->getMinusSCEV(LHS, RHS); + if (OpCode == Instruction::Mul) + return SE->getMulExpr(LHS, RHS); + + llvm_unreachable("Unsupported opcode."); +} + /// No-wrap operations can transfer sign extension of their result to their /// operands. Generate the SCEV value for the widened operation without /// actually modifying the IR yet. If the expression after extending the /// operands is an AddRec for this loop, return it. const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) { + // Handle the common case of add - if (DU.NarrowUse->getOpcode() != Instruction::Add) - return 0; + const unsigned OpCode = DU.NarrowUse->getOpcode(); + // Only Add/Sub/Mul instructions supported yet. + if (OpCode != Instruction::Add && OpCode != Instruction::Sub && + OpCode != Instruction::Mul) + return nullptr; // One operand (NarrowDef) has already been extended to WideDef. Now determine // if extending the other will lead to a recurrence. - unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; + const unsigned ExtendOperIdx = + DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU"); - const SCEV *ExtendOperExpr = 0; + const SCEV *ExtendOperExpr = nullptr; const OverflowingBinaryOperator *OBO = cast(DU.NarrowUse); if (IsSigned && OBO->hasNoSignedWrap()) @@ -849,36 +1012,43 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) { ExtendOperExpr = SE->getZeroExtendExpr( SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); else - return 0; + return nullptr; - // When creating this AddExpr, don't apply the current operations NSW or NUW + // When creating this SCEV expr, don't apply the current operations NSW or NUW // flags. This instruction may be guarded by control flow that the no-wrap // behavior depends on. Non-control-equivalent instructions can be mapped to // the same SCEV expression, and it would be incorrect to transfer NSW/NUW // semantics to those operations. - const SCEVAddRecExpr *AddRec = dyn_cast( - SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr)); + const SCEV *lhs = SE->getSCEV(DU.WideDef); + const SCEV *rhs = ExtendOperExpr; + + // Let's swap operands to the initial order for the case of non-commutative + // operations, like SUB. See PR21014. + if (ExtendOperIdx == 0) + std::swap(lhs, rhs); + const SCEVAddRecExpr *AddRec = + dyn_cast(GetSCEVByOpCode(lhs, rhs, OpCode)); if (!AddRec || AddRec->getLoop() != L) - return 0; + return nullptr; return AddRec; } -/// GetWideRecurrence - Is this instruction potentially interesting from -/// IVUsers' perspective after widening it's type? In other words, can the +/// GetWideRecurrence - Is this instruction potentially interesting for further +/// simplification after widening it's type? In other words, can the /// extend be safely hoisted out of the loop with SCEV reducing the value to a /// recurrence on the same loop. If so, return the sign or zero extended /// recurrence. Otherwise return NULL. const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) { if (!SE->isSCEVable(NarrowUse->getType())) - return 0; + return nullptr; const SCEV *NarrowExpr = SE->getSCEV(NarrowUse); if (SE->getTypeSizeInBits(NarrowExpr->getType()) >= SE->getTypeSizeInBits(WideType)) { // NarrowUse implicitly widens its operand. e.g. a gep with a narrow // index. So don't follow this use. - return 0; + return nullptr; } const SCEV *WideExpr = IsSigned ? @@ -886,19 +1056,76 @@ const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) { SE->getZeroExtendExpr(NarrowExpr, WideType); const SCEVAddRecExpr *AddRec = dyn_cast(WideExpr); if (!AddRec || AddRec->getLoop() != L) - return 0; + return nullptr; return AddRec; } +/// This IV user cannot be widen. Replace this use of the original narrow IV +/// with a truncation of the new wide IV to isolate and eliminate the narrow IV. +static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT) { + DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef + << " for user " << *DU.NarrowUse << "\n"); + IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT)); + Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); + DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); +} + +/// If the narrow use is a compare instruction, then widen the compare +// (and possibly the other operand). The extend operation is hoisted into the +// loop preheader as far as possible. +bool WidenIV::WidenLoopCompare(NarrowIVDefUse DU) { + ICmpInst *Cmp = dyn_cast(DU.NarrowUse); + if (!Cmp) + return false; + + // Sign of IV user and compare must match. + if (IsSigned != CmpInst::isSigned(Cmp->getPredicate())) + return false; + + Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0); + unsigned CastWidth = SE->getTypeSizeInBits(Op->getType()); + unsigned IVWidth = SE->getTypeSizeInBits(WideType); + assert (CastWidth <= IVWidth && "Unexpected width while widening compare."); + + // Widen the compare instruction. + IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT)); + DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); + + // Widen the other operand of the compare, if necessary. + if (CastWidth < IVWidth) { + Value *ExtOp = getExtend(Op, WideType, IsSigned, Cmp); + DU.NarrowUse->replaceUsesOfWith(Op, ExtOp); + } + return true; +} + /// WidenIVUse - Determine whether an individual user of the narrow IV can be /// widened. If so, return the wide clone of the user. Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { // Stop traversing the def-use chain at inner-loop phis or post-loop phis. - if (isa(DU.NarrowUse) && - LI->getLoopFor(DU.NarrowUse->getParent()) != L) - return 0; - + if (PHINode *UsePhi = dyn_cast(DU.NarrowUse)) { + if (LI->getLoopFor(UsePhi->getParent()) != L) { + // For LCSSA phis, sink the truncate outside the loop. + // After SimplifyCFG most loop exit targets have a single predecessor. + // Otherwise fall back to a truncate within the loop. + if (UsePhi->getNumOperands() != 1) + truncateIVUse(DU, DT); + else { + PHINode *WidePhi = + PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide", + UsePhi); + WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0)); + IRBuilder<> Builder(WidePhi->getParent()->getFirstInsertionPt()); + Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType()); + UsePhi->replaceAllUsesWith(Trunc); + DeadInsts.emplace_back(UsePhi); + DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi + << " to " << *WidePhi << "\n"); + } + return nullptr; + } + } // Our raison d'etre! Eliminate sign and zero extension. if (IsSigned ? isa(DU.NarrowUse) : isa(DU.NarrowUse)) { Value *NewDef = DU.WideDef; @@ -925,7 +1152,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { << " replaced by " << *DU.WideDef << "\n"); ++NumElimExt; DU.NarrowUse->replaceAllUsesWith(NewDef); - DeadInsts.push_back(DU.NarrowUse); + DeadInsts.emplace_back(DU.NarrowUse); } // Now that the extend is gone, we want to expose it's uses for potential // further simplification. We don't need to directly inform SimplifyIVUsers @@ -934,22 +1161,25 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { // push the uses of WideDef here. // No further widening is needed. The deceased [sz]ext had done it for us. - return 0; + return nullptr; } // Does this user itself evaluate to a recurrence after widening? const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(DU.NarrowUse); + if (!WideAddRec) + WideAddRec = GetExtendedOperandRecurrence(DU); + if (!WideAddRec) { - WideAddRec = GetExtendedOperandRecurrence(DU); - } - if (!WideAddRec) { + // If use is a loop condition, try to promote the condition instead of + // truncating the IV first. + if (WidenLoopCompare(DU)) + return nullptr; + // This user does not evaluate to a recurence after widening, so don't // follow it. Instead insert a Trunc to kill off the original use, // eventually isolating the original narrow IV so it can be removed. - IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT)); - Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); - DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); - return 0; + truncateIVUse(DU, DT); + return nullptr; } // Assume block terminators cannot evaluate to a recurrence. We can't to // insert a Trunc after a terminator if there happens to be a critical edge. @@ -958,14 +1188,14 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { // Reuse the IV increment that SCEVExpander created as long as it dominates // NarrowUse. - Instruction *WideUse = 0; + Instruction *WideUse = nullptr; if (WideAddRec == WideIncExpr && Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) WideUse = WideInc; else { WideUse = CloneIVUser(DU); if (!WideUse) - return 0; + return nullptr; } // Evaluation of WideAddRec ensured that the narrow expression could be // extended outside the loop without overflow. This suggests that the wide use @@ -975,8 +1205,8 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { if (WideAddRec != SE->getSCEV(WideUse)) { DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n"); - DeadInsts.push_back(WideUse); - return 0; + DeadInsts.emplace_back(WideUse); + return nullptr; } // Returning WideUse pushes it on the worklist. @@ -986,15 +1216,14 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { /// pushNarrowIVUsers - Add eligible users of NarrowDef to NarrowIVUsers. /// void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { - for (Value::use_iterator UI = NarrowDef->use_begin(), - UE = NarrowDef->use_end(); UI != UE; ++UI) { - Instruction *NarrowUse = cast(*UI); + for (User *U : NarrowDef->users()) { + Instruction *NarrowUser = cast(U); // Handle data flow merges and bizarre phi cycles. - if (!Widened.insert(NarrowUse)) + if (!Widened.insert(NarrowUser).second) continue; - NarrowIVUsers.push_back(NarrowIVDefUse(NarrowDef, NarrowUse, WideDef)); + NarrowIVUsers.push_back(NarrowIVDefUse(NarrowDef, NarrowUser, WideDef)); } } @@ -1012,7 +1241,7 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) { // Is this phi an induction variable? const SCEVAddRecExpr *AddRec = dyn_cast(SE->getSCEV(OrigPhi)); if (!AddRec) - return NULL; + return nullptr; // Widen the induction variable expression. const SCEV *WideIVExpr = IsSigned ? @@ -1025,7 +1254,7 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) { // Can the IV be extended outside the loop without overflow? AddRec = dyn_cast(WideIVExpr); if (!AddRec || AddRec->getLoop() != L) - return NULL; + return nullptr; // An AddRec must have loop-invariant operands. Since this AddRec is // materialized by a loop header phi, the expression cannot have any post-loop @@ -1073,15 +1302,43 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) { // WidenIVUse may have removed the def-use edge. if (DU.NarrowDef->use_empty()) - DeadInsts.push_back(DU.NarrowDef); + DeadInsts.emplace_back(DU.NarrowDef); } return WidePhi; } +//===----------------------------------------------------------------------===// +// Live IV Reduction - Minimize IVs live across the loop. +//===----------------------------------------------------------------------===// + + //===----------------------------------------------------------------------===// // Simplification of IV users based on SCEV evaluation. //===----------------------------------------------------------------------===// +namespace { + class IndVarSimplifyVisitor : public IVVisitor { + ScalarEvolution *SE; + const TargetTransformInfo *TTI; + PHINode *IVPhi; + + public: + WideIVInfo WI; + + IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, + const TargetTransformInfo *TTI, + const DominatorTree *DTree) + : SE(SCEV), TTI(TTI), IVPhi(IV) { + DT = DTree; + WI.NarrowIV = IVPhi; + if (ReduceLiveIVs) + setSplitOverflowIntrinsics(); + } + + // Implement the interface used by simplifyUsersOfIV. + void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } + }; +} /// SimplifyAndExtend - Iteratively perform simplification on a worklist of IV /// users. Each successive simplification may push more users which may @@ -1113,12 +1370,12 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L, PHINode *CurrIV = LoopPhis.pop_back_val(); // Information about sign/zero extensions of CurrIV. - WideIVVisitor WIV(CurrIV, SE, TD); + IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); - Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &WIV); + Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &Visitor); - if (WIV.WI.WidestNativeType) { - WideIVs.push_back(WIV.WI); + if (Visitor.WI.WidestNativeType) { + WideIVs.push_back(Visitor.WI); } } while(!LoopPhis.empty()); @@ -1136,55 +1393,6 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L, // LinearFunctionTestReplace and its kin. Rewrite the loop exit condition. //===----------------------------------------------------------------------===// -/// Check for expressions that ScalarEvolution generates to compute -/// BackedgeTakenInfo. If these expressions have not been reduced, then -/// expanding them may incur additional cost (albeit in the loop preheader). -static bool isHighCostExpansion(const SCEV *S, BranchInst *BI, - SmallPtrSet &Processed, - ScalarEvolution *SE) { - if (!Processed.insert(S)) - return false; - - // If the backedge-taken count is a UDiv, it's very likely a UDiv that - // ScalarEvolution's HowFarToZero or HowManyLessThans produced to compute a - // precise expression, rather than a UDiv from the user's code. If we can't - // find a UDiv in the code with some simple searching, assume the former and - // forego rewriting the loop. - if (isa(S)) { - ICmpInst *OrigCond = dyn_cast(BI->getCondition()); - if (!OrigCond) return true; - const SCEV *R = SE->getSCEV(OrigCond->getOperand(1)); - R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1)); - if (R != S) { - const SCEV *L = SE->getSCEV(OrigCond->getOperand(0)); - L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1)); - if (L != S) - return true; - } - } - - // Recurse past add expressions, which commonly occur in the - // BackedgeTakenCount. They may already exist in program code, and if not, - // they are not too expensive rematerialize. - if (const SCEVAddExpr *Add = dyn_cast(S)) { - for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); - I != E; ++I) { - if (isHighCostExpansion(*I, BI, Processed, SE)) - return true; - } - return false; - } - - // HowManyLessThans uses a Max expression whenever the loop is not guarded by - // the exit condition. - if (isa(S) || isa(S)) - return true; - - // If we haven't recognized an expensive SCEV pattern, assume it's an - // expression produced by program code. - return false; -} - /// canExpandBackedgeTakenCount - Return true if this loop's backedge taken /// count expression can be safely and cheaply expanded into an instruction /// sequence that can be used by LinearFunctionTestReplace. @@ -1198,7 +1406,8 @@ static bool isHighCostExpansion(const SCEV *S, BranchInst *BI, /// used by ABI constrained operation, as opposed to inttoptr/ptrtoint). /// However, we don't yet have a strong motivation for converting loop tests /// into inequality tests. -static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) { +static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE, + SCEVExpander &Rewriter) { const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); if (isa(BackedgeTakenCount) || BackedgeTakenCount->isZero()) @@ -1208,12 +1417,10 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) { return false; // Can't rewrite non-branch yet. - BranchInst *BI = dyn_cast(L->getExitingBlock()->getTerminator()); - if (!BI) + if (!isa(L->getExitingBlock()->getTerminator())) return false; - SmallPtrSet Processed; - if (isHighCostExpansion(BackedgeTakenCount, BI, Processed, SE)) + if (Rewriter.isHighCostExpansion(BackedgeTakenCount, L)) return false; return true; @@ -1224,7 +1431,7 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) { static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) { Instruction *IncI = dyn_cast(IncV); if (!IncI) - return 0; + return nullptr; switch (IncI->getOpcode()) { case Instruction::Add: @@ -1235,17 +1442,17 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) { if (IncI->getNumOperands() == 2) break; default: - return 0; + return nullptr; } PHINode *Phi = dyn_cast(IncI->getOperand(0)); if (Phi && Phi->getParent() == L->getHeader()) { if (isLoopInvariant(IncI->getOperand(1), L, DT)) return Phi; - return 0; + return nullptr; } if (IncI->getOpcode() == Instruction::GetElementPtr) - return 0; + return nullptr; // Allow add/sub to be commuted. Phi = dyn_cast(IncI->getOperand(1)); @@ -1253,7 +1460,7 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) { if (isLoopInvariant(IncI->getOperand(0), L, DT)) return Phi; } - return 0; + return nullptr; } /// Return the compare guarding the loop latch, or NULL for unrecognized tests. @@ -1263,7 +1470,7 @@ static ICmpInst *getLoopTest(Loop *L) { BasicBlock *LatchBlock = L->getLoopLatch(); // Don't bother with LFTR if the loop is not properly simplified. if (!LatchBlock) - return 0; + return nullptr; BranchInst *BI = dyn_cast(L->getExitingBlock()->getTerminator()); assert(BI && "expected exit branch"); @@ -1313,7 +1520,7 @@ static bool needsLFTR(Loop *L, DominatorTree *DT) { /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils /// down to checking that all operands are constant and listing instructions /// that may hide undef. -static bool hasConcreteDefImpl(Value *V, SmallPtrSet &Visited, +static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl &Visited, unsigned Depth) { if (isa(V)) return !isa(V); @@ -1333,7 +1540,7 @@ static bool hasConcreteDefImpl(Value *V, SmallPtrSet &Visited, // Optimistically handle other instructions. for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) { - if (!Visited.insert(*OI)) + if (!Visited.insert(*OI).second) continue; if (!hasConcreteDefImpl(*OI, Visited, Depth+1)) return false; @@ -1358,15 +1565,11 @@ static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); Value *IncV = Phi->getIncomingValue(LatchIdx); - for (Value::use_iterator UI = Phi->use_begin(), UE = Phi->use_end(); - UI != UE; ++UI) { - if (*UI != Cond && *UI != IncV) return false; - } + for (User *U : Phi->users()) + if (U != Cond && U != IncV) return false; - for (Value::use_iterator UI = IncV->use_begin(), UE = IncV->use_end(); - UI != UE; ++UI) { - if (*UI != Cond && *UI != Phi) return false; - } + for (User *U : IncV->users()) + if (U != Cond && U != Phi) return false; return true; } @@ -1383,17 +1586,16 @@ static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { /// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride. /// This is difficult in general for SCEV because of potential overflow. But we /// could at least handle constant BECounts. -static PHINode * -FindLoopCounter(Loop *L, const SCEV *BECount, - ScalarEvolution *SE, DominatorTree *DT, const DataLayout *TD) { +static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount, + ScalarEvolution *SE, DominatorTree *DT) { uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); Value *Cond = cast(L->getExitingBlock()->getTerminator())->getCondition(); // Loop over all of the PHI nodes, looking for a simple counter. - PHINode *BestPhi = 0; - const SCEV *BestInit = 0; + PHINode *BestPhi = nullptr; + const SCEV *BestInit = nullptr; BasicBlock *LatchBlock = L->getLoopLatch(); assert(LatchBlock && "needsLFTR should guarantee a loop latch"); @@ -1414,7 +1616,8 @@ FindLoopCounter(Loop *L, const SCEV *BECount, // AR may be wider than BECount. With eq/ne tests overflow is immaterial. // AR may not be a narrower type, or we may never exit. uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); - if (PhiWidth < BCWidth || (TD && !TD->isLegalInteger(PhiWidth))) + if (PhiWidth < BCWidth || + !L->getHeader()->getModule()->getDataLayout().isLegalInteger(PhiWidth)) continue; const SCEV *Step = dyn_cast(AR->getStepRecurrence(*SE)); @@ -1479,8 +1682,14 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L, if (IndVar->getType()->isPointerTy() && !IVCount->getType()->isPointerTy()) { + // IVOffset will be the new GEP offset that is interpreted by GEP as a + // signed value. IVCount on the other hand represents the loop trip count, + // which is an unsigned value. FindLoopCounter only allows induction + // variables that have a positive unit stride of one. This means we don't + // have to handle the case of negative offsets (yet) and just need to zero + // extend IVCount. Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType()); - const SCEV *IVOffset = SE->getTruncateOrSignExtend(IVCount, OfsTy); + const SCEV *IVOffset = SE->getTruncateOrZeroExtend(IVCount, OfsTy); // Expand the code for the iteration count. assert(SE->isLoopInvariant(IVOffset, L) && @@ -1497,7 +1706,7 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L, && "unit stride pointer IV must be i8*"); IRBuilder<> Builder(L->getLoopPreheader()->getTerminator()); - return Builder.CreateGEP(GEPBase, GEPOffset, "lftr.limit"); + return Builder.CreateGEP(nullptr, GEPBase, GEPOffset, "lftr.limit"); } else { // In any other case, convert both IVInit and IVCount to integers before @@ -1506,11 +1715,12 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L, // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). // // Valid Cases: (1) both integers is most common; (2) both may be pointers - // for simple memset-style loops; (3) IVInit is an integer and IVCount is a - // pointer may occur when enable-iv-rewrite generates a canonical IV on top - // of case #2. + // for simple memset-style loops. + // + // IVInit integer and IVCount pointer would only occur if a canonical IV + // were generated on top of case #2, which is not expected. - const SCEV *IVLimit = 0; + const SCEV *IVLimit = nullptr; // For unit stride, IVCount = Start + BECount with 2's complement overflow. // For non-zero Start, compute IVCount here. if (AR->getStart()->isZero()) @@ -1550,7 +1760,7 @@ LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, PHINode *IndVar, SCEVExpander &Rewriter) { - assert(canExpandBackedgeTakenCount(L, SE) && "precondition"); + assert(canExpandBackedgeTakenCount(L, SE, Rewriter) && "precondition"); // Initialize CmpIndVar and IVCount to their preincremented values. Value *CmpIndVar = IndVar; @@ -1689,13 +1899,12 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) { // Determine if there is a use in or before the loop (direct or // otherwise). bool UsedInLoop = false; - for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); - UI != UE; ++UI) { - User *U = *UI; - BasicBlock *UseBB = cast(U)->getParent(); - if (PHINode *P = dyn_cast(U)) { + for (Use &U : I->uses()) { + Instruction *User = cast(U.getUser()); + BasicBlock *UseBB = User->getParent(); + if (PHINode *P = dyn_cast(User)) { unsigned i = - PHINode::getIncomingValueNumForOperand(UI.getOperandNo()); + PHINode::getIncomingValueNumForOperand(U.getOperandNo()); UseBB = P->getIncomingBlock(i); } if (UseBB == Preheader || L->contains(UseBB)) { @@ -1735,6 +1944,9 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) { //===----------------------------------------------------------------------===// bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { + if (skipOptnoneFunction(L)) + return false; + // If LoopSimplify form is not available, stay out of trouble. Some notes: // - LSR currently only supports LoopSimplify-form loops. Indvars' // canonicalization can be a pessimization without LSR to "clean up" @@ -1746,11 +1958,14 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { if (!L->isLoopSimplifyForm()) return false; - LI = &getAnalysis(); - SE = &getAnalysis(); - DT = &getAnalysis(); - TD = getAnalysisIfAvailable(); - TLI = getAnalysisIfAvailable(); + LI = &getAnalysis().getLoopInfo(); + SE = &getAnalysis().getSE(); + DT = &getAnalysis().getDomTree(); + auto *TLIP = getAnalysisIfAvailable(); + TLI = TLIP ? &TLIP->getTLI() : nullptr; + auto *TTIP = getAnalysisIfAvailable(); + TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr; + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); DeadInsts.clear(); Changed = false; @@ -1762,7 +1977,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); // Create a rewriter object which we'll use to transform the code with. - SCEVExpander Rewriter(*SE, "indvars"); + SCEVExpander Rewriter(*SE, DL, "indvars"); #ifndef NDEBUG Rewriter.setDebugType(DEBUG_TYPE); #endif @@ -1782,7 +1997,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // loop into any instructions outside of the loop that use the final values of // the current expressions. // - if (!isa(BackedgeTakenCount)) + if (ReplaceExitValue != NeverRepl && + !isa(BackedgeTakenCount)) RewriteLoopExitValues(L, Rewriter); // Eliminate redundant IV cycles. @@ -1790,14 +2006,14 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // If we have a trip count expression, rewrite the loop's exit condition // using it. We can currently only handle loops with a single exit. - if (canExpandBackedgeTakenCount(L, SE) && needsLFTR(L, DT)) { - PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT, TD); + if (canExpandBackedgeTakenCount(L, SE, Rewriter) && needsLFTR(L, DT)) { + PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT); if (IndVar) { // Check preconditions for proper SCEVExpander operation. SCEV does not // express SCEVExpander's dependencies, such as LoopSimplify. Instead any // pass that uses the SCEVExpander must do it. This does not work well for - // loop passes because SCEVExpander makes assumptions about all loops, while - // LoopPassManager only forces the current loop to be simplified. + // loop passes because SCEVExpander makes assumptions about all loops, + // while LoopPassManager only forces the current loop to be simplified. // // FIXME: SCEV expansion has no way to bail out, so the caller must // explicitly check any assumptions made by SCEV. Brittle. @@ -1816,7 +2032,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // which are now dead. while (!DeadInsts.empty()) if (Instruction *Inst = - dyn_cast_or_null(&*DeadInsts.pop_back_val())) + dyn_cast_or_null(DeadInsts.pop_back_val())) RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI); // The Rewriter may not be used from this point on.