X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTransforms%2FScalar%2FLoopStrengthReduce.cpp;h=94c229a8e2440da07c098ec18102cd6a1bd81b9e;hp=85f1389fe31934d5a34515672b5b8262000a0323;hb=d9b0b025612992a0b724eeca8bdf10b1d7a5c355;hpb=64925c55c65f9345a69fb67db07aa62cfb723577 diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 85f1389fe31..94c229a8e24 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -77,11 +77,11 @@ #include using namespace llvm; -static cl::opt EnableNested( - "enable-lsr-nested", cl::Hidden, cl::desc("Enable LSR on nested loops")); - -static cl::opt EnableRetry( - "enable-lsr-retry", cl::Hidden, cl::desc("Enable LSR retry")); +/// MaxIVUsers is an arbitrary threshold that provides an early opportunitiy for +/// bail out. This threshold is far beyond the number of users that LSR can +/// conceivably solve, so it should not affect generated code, but catches the +/// worst cases before LSR burns too much compile time and stack space. +static const unsigned MaxIVUsers = 200; // Temporary flag to cleanup congruent phis after LSR phi expansion. // It's currently disabled until we can determine whether it's truly useful or @@ -710,8 +710,9 @@ static bool isHighCostExpansion(const SCEV *S, Value *UVal = U->getValue(); for (Value::use_iterator UI = UVal->use_begin(), UE = UVal->use_end(); UI != UE; ++UI) { - Instruction *User = cast(*UI); - if (User->getOpcode() == Instruction::Mul + // If U is a constant, it may be used by a ConstantExpr. + Instruction *User = dyn_cast(*UI); + if (User && User->getOpcode() == Instruction::Mul && SE.isSCEVable(User->getType())) { return SE.getSCEV(User) == Mul; } @@ -824,36 +825,20 @@ void Cost::RateRegister(const SCEV *Reg, const Loop *L, ScalarEvolution &SE, DominatorTree &DT) { if (const SCEVAddRecExpr *AR = dyn_cast(Reg)) { - if (AR->getLoop() == L) - AddRecCost += 1; /// TODO: This should be a function of the stride. - // If this is an addrec for another loop, don't second-guess its addrec phi // nodes. LSR isn't currently smart enough to reason about more than one - // loop at a time. LSR has either already run on inner loops, will not run - // on other loops, and cannot be expected to change sibling loops. If the - // AddRec exists, consider it's register free and leave it alone. Otherwise, - // do not consider this formula at all. - else if (!EnableNested || L->contains(AR->getLoop()) || - (!AR->getLoop()->contains(L) && - DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))) { + // loop at a time. LSR has already run on inner loops, will not run on outer + // loops, and cannot be expected to change sibling loops. + if (AR->getLoop() != L) { + // If the AddRec exists, consider it's register free and leave it alone. if (isExistingPhi(AR, SE)) return; - // For !EnableNested, never rewrite IVs in other loops. - if (!EnableNested) { - Loose(); - return; - } - // If this isn't one of the addrecs that the loop already has, it - // would require a costly new phi and add. TODO: This isn't - // precisely modeled right now. - ++NumBaseAdds; - if (!Regs.count(AR->getStart())) { - RateRegister(AR->getStart(), Regs, L, SE, DT); - if (isLoser()) - return; - } + // Otherwise, do not consider this formula at all. + Loose(); + return; } + AddRecCost += 1; /// TODO: This should be a function of the stride. // Add the step value register, if it needs one. // TODO: The non-affine case isn't precisely modeled here. @@ -1303,10 +1288,19 @@ static bool isLegalUse(const TargetLowering::AddrMode &AM, // If we have low-level target information, ask the target if it can fold an // integer immediate on an icmp. if (AM.BaseOffs != 0) { - if (TLI) return TLI->isLegalICmpImmediate(-(uint64_t)AM.BaseOffs); - return false; + if (!TLI) + return false; + // We have one of: + // ICmpZero BaseReg + Offset => ICmp BaseReg, -Offset + // ICmpZero -1*ScaleReg + Offset => ICmp ScaleReg, Offset + // Offs is the ICmp immediate. + int64_t Offs = AM.BaseOffs; + if (AM.Scale == 0) + Offs = -(uint64_t)Offs; // The cast does the right thing with INT64_MIN. + return TLI->isLegalICmpImmediate(Offs); } + // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg return true; case LSRUse::Basic: @@ -1318,7 +1312,7 @@ static bool isLegalUse(const TargetLowering::AddrMode &AM, return AM.Scale == 0 || AM.Scale == -1; } - return false; + llvm_unreachable("Invalid LSRUse Kind!"); } static bool isLegalUse(TargetLowering::AddrMode AM, @@ -1445,7 +1439,41 @@ struct IVInc { // IVChain - The list of IV increments in program order. // We typically add the head of a chain without finding subsequent links. -typedef SmallVector IVChain; +struct IVChain { + SmallVector Incs; + const SCEV *ExprBase; + + IVChain() : ExprBase(0) {} + + IVChain(const IVInc &Head, const SCEV *Base) + : Incs(1, Head), ExprBase(Base) {} + + typedef SmallVectorImpl::const_iterator const_iterator; + + // begin - return the first increment in the chain. + const_iterator begin() const { + assert(!Incs.empty()); + return llvm::next(Incs.begin()); + } + const_iterator end() const { + return Incs.end(); + } + + // hasIncs - Returns true if this chain contains any increments. + bool hasIncs() const { return Incs.size() >= 2; } + + // add - Add an IVInc to the end of this chain. + void add(const IVInc &X) { Incs.push_back(X); } + + // tailUserInst - Returns the last UserInst in the chain. + Instruction *tailUserInst() const { return Incs.back().UserInst; } + + // isProfitableIncrement - Returns true if IncExpr can be profitably added to + // this chain. + bool isProfitableIncrement(const SCEV *OperExpr, + const SCEV *IncExpr, + ScalarEvolution&); +}; /// ChainUsers - Helper for CollectChains to track multiple IV increment uses. /// Distinguish between FarUsers that definitely cross IV increments and @@ -1573,9 +1601,11 @@ class LSRInstance { BasicBlock::iterator HoistInsertPosition(BasicBlock::iterator IP, const SmallVectorImpl &Inputs) const; - BasicBlock::iterator AdjustInsertPositionForExpand(BasicBlock::iterator IP, - const LSRFixup &LF, - const LSRUse &LU) const; + BasicBlock::iterator + AdjustInsertPositionForExpand(BasicBlock::iterator IP, + const LSRFixup &LF, + const LSRUse &LU, + SCEVExpander &Rewriter) const; Value *Expand(const LSRFixup &LF, const Formula &F, @@ -2164,7 +2194,7 @@ LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF, return &LU; // This is the formula where all the registers and symbols matched; // there aren't going to be any others. Since we declined it, we - // can skip the rest of the formulae and procede to the next LSRUse. + // can skip the rest of the formulae and proceed to the next LSRUse. break; } } @@ -2191,7 +2221,7 @@ void LSRInstance::CollectInterestingTypesAndFactors() { do { const SCEV *S = Worklist.pop_back_val(); if (const SCEVAddRecExpr *AR = dyn_cast(S)) { - if (EnableNested || AR->getLoop() == L) + if (AR->getLoop() == L) Strides.insert(AR->getStepRecurrence(SE)); Worklist.push_back(AR->getStart()); } else if (const SCEVAddExpr *Add = dyn_cast(S)) { @@ -2323,41 +2353,23 @@ static const SCEV *getExprBase(const SCEV *S) { /// increment will be an offset relative to the same base. We allow such offsets /// to potentially be used as chain increment as long as it's not obviously /// expensive to expand using real instructions. -static const SCEV * -getProfitableChainIncrement(Value *NextIV, Value *PrevIV, - const IVChain &Chain, Loop *L, - ScalarEvolution &SE, const TargetLowering *TLI) { - // Prune the solution space aggressively by checking that both IV operands - // are expressions that operate on the same unscaled SCEVUnknown. This - // "base" will be canceled by the subsequent getMinusSCEV call. Checking first - // avoids creating extra SCEV expressions. - const SCEV *OperExpr = SE.getSCEV(NextIV); - const SCEV *PrevExpr = SE.getSCEV(PrevIV); - if (getExprBase(OperExpr) != getExprBase(PrevExpr) && !StressIVChain) - return 0; - - const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr); - if (!SE.isLoopInvariant(IncExpr, L)) - return 0; - - // We are not able to expand an increment unless it is loop invariant, - // however, the following checks are purely for profitability. +bool IVChain::isProfitableIncrement(const SCEV *OperExpr, + const SCEV *IncExpr, + ScalarEvolution &SE) { + // Aggressively form chains when -stress-ivchain. if (StressIVChain) - return IncExpr; + return true; // Do not replace a constant offset from IV head with a nonconstant IV // increment. if (!isa(IncExpr)) { - const SCEV *HeadExpr = SE.getSCEV(getWideOperand(Chain[0].IVOperand)); + const SCEV *HeadExpr = SE.getSCEV(getWideOperand(Incs[0].IVOperand)); if (isa(SE.getMinusSCEV(OperExpr, HeadExpr))) return 0; } SmallPtrSet Processed; - if (isHighCostExpansion(IncExpr, Processed, SE)) - return 0; - - return IncExpr; + return !isHighCostExpansion(IncExpr, Processed, SE); } /// Return true if the number of registers needed for the chain is estimated to @@ -2376,18 +2388,18 @@ isProfitableChain(IVChain &Chain, SmallPtrSet &Users, if (StressIVChain) return true; - if (Chain.size() <= 2) + if (!Chain.hasIncs()) return false; if (!Users.empty()) { - DEBUG(dbgs() << "Chain: " << *Chain[0].UserInst << " users:\n"; + DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " users:\n"; for (SmallPtrSet::const_iterator I = Users.begin(), E = Users.end(); I != E; ++I) { dbgs() << " " << **I << "\n"; }); return false; } - assert(!Chain.empty() && "empty IV chains are not allowed"); + assert(!Chain.Incs.empty() && "empty IV chains are not allowed"); // The chain itself may require a register, so intialize cost to 1. int cost = 1; @@ -2395,15 +2407,15 @@ isProfitableChain(IVChain &Chain, SmallPtrSet &Users, // A complete chain likely eliminates the need for keeping the original IV in // a register. LSR does not currently know how to form a complete chain unless // the header phi already exists. - if (isa(Chain.back().UserInst) - && SE.getSCEV(Chain.back().UserInst) == Chain[0].IncExpr) { + if (isa(Chain.tailUserInst()) + && SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) { --cost; } const SCEV *LastIncExpr = 0; unsigned NumConstIncrements = 0; unsigned NumVarIncrements = 0; unsigned NumReusedIncrements = 0; - for (IVChain::const_iterator I = llvm::next(Chain.begin()), E = Chain.end(); + for (IVChain::const_iterator I = Chain.begin(), E = Chain.end(); I != E; ++I) { if (I->IncExpr->isZero()) @@ -2439,7 +2451,8 @@ isProfitableChain(IVChain &Chain, SmallPtrSet &Users, // the stride. cost -= NumReusedIncrements; - DEBUG(dbgs() << "Chain: " << *Chain[0].UserInst << " Cost: " << cost << "\n"); + DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " Cost: " << cost + << "\n"); return cost < 0; } @@ -2450,25 +2463,39 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper, SmallVectorImpl &ChainUsersVec) { // When IVs are used as types of varying widths, they are generally converted // to a wider type with some uses remaining narrow under a (free) trunc. - Value *NextIV = getWideOperand(IVOper); + Value *const NextIV = getWideOperand(IVOper); + const SCEV *const OperExpr = SE.getSCEV(NextIV); + const SCEV *const OperExprBase = getExprBase(OperExpr); // Visit all existing chains. Check if its IVOper can be computed as a // profitable loop invariant increment from the last link in the Chain. unsigned ChainIdx = 0, NChains = IVChainVec.size(); const SCEV *LastIncExpr = 0; for (; ChainIdx < NChains; ++ChainIdx) { - Value *PrevIV = getWideOperand(IVChainVec[ChainIdx].back().IVOperand); + IVChain &Chain = IVChainVec[ChainIdx]; + + // Prune the solution space aggressively by checking that both IV operands + // are expressions that operate on the same unscaled SCEVUnknown. This + // "base" will be canceled by the subsequent getMinusSCEV call. Checking + // first avoids creating extra SCEV expressions. + if (!StressIVChain && Chain.ExprBase != OperExprBase) + continue; + + Value *PrevIV = getWideOperand(Chain.Incs.back().IVOperand); if (!isCompatibleIVType(PrevIV, NextIV)) continue; - // A phi nodes terminates a chain. - if (isa(UserInst) - && isa(IVChainVec[ChainIdx].back().UserInst)) + // A phi node terminates a chain. + if (isa(UserInst) && isa(Chain.tailUserInst())) + continue; + + // The increment must be loop-invariant so it can be kept in a register. + const SCEV *PrevExpr = SE.getSCEV(PrevIV); + const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr); + if (!SE.isLoopInvariant(IncExpr, L)) continue; - if (const SCEV *IncExpr = - getProfitableChainIncrement(NextIV, PrevIV, IVChainVec[ChainIdx], - L, SE, TLI)) { + if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) { LastIncExpr = IncExpr; break; } @@ -2482,20 +2509,24 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper, DEBUG(dbgs() << "IV Chain Limit\n"); return; } + LastIncExpr = OperExpr; + // IVUsers may have skipped over sign/zero extensions. We don't currently + // attempt to form chains involving extensions unless they can be hoisted + // into this loop's AddRec. + if (!isa(LastIncExpr)) + return; ++NChains; - IVChainVec.resize(NChains); + IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr), + OperExprBase)); ChainUsersVec.resize(NChains); - LastIncExpr = SE.getSCEV(NextIV); - assert(isa(LastIncExpr) && "expect recurrence at IV user"); - DEBUG(dbgs() << "IV Head: (" << *UserInst << ") IV=" << *LastIncExpr - << "\n"); + DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Head: (" << *UserInst + << ") IV=" << *LastIncExpr << "\n"); + } else { + DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Inc: (" << *UserInst + << ") IV+" << *LastIncExpr << "\n"); + // Add this IV user to the end of the chain. + IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr)); } - else - DEBUG(dbgs() << "IV Inc: (" << *UserInst << ") IV+" << *LastIncExpr - << "\n"); - - // Add this IV user to the end of the chain. - IVChainVec[ChainIdx].push_back(IVInc(UserInst, IVOper, LastIncExpr)); SmallPtrSet &NearUsers = ChainUsersVec[ChainIdx].NearUsers; // This chain's NearUsers become FarUsers. @@ -2513,13 +2544,14 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper, for (Value::use_iterator UseIter = IVOper->use_begin(), UseEnd = IVOper->use_end(); UseIter != UseEnd; ++UseIter) { Instruction *OtherUse = dyn_cast(*UseIter); + if (!OtherUse || OtherUse == UserInst) + continue; if (SE.isSCEVable(OtherUse->getType()) && !isa(SE.getSCEV(OtherUse)) && IU.isIVUserOrOperand(OtherUse)) { continue; } - if (OtherUse && OtherUse != UserInst) - NearUsers.insert(OtherUse); + NearUsers.insert(OtherUse); } // Since this user is part of the chain, it's no longer considered a use @@ -2550,6 +2582,7 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper, /// loop latch. This will discover chains on side paths, but requires /// maintaining multiple copies of the Chains state. void LSRInstance::CollectChains() { + DEBUG(dbgs() << "Collecting IV Chains.\n"); SmallVector ChainUsersVec; SmallVector LatchPath; @@ -2621,10 +2654,10 @@ void LSRInstance::CollectChains() { } void LSRInstance::FinalizeChain(IVChain &Chain) { - assert(!Chain.empty() && "empty IV chains are not allowed"); - DEBUG(dbgs() << "Final Chain: " << *Chain[0].UserInst << "\n"); + assert(!Chain.Incs.empty() && "empty IV chains are not allowed"); + DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n"); - for (IVChain::const_iterator I = llvm::next(Chain.begin()), E = Chain.end(); + for (IVChain::const_iterator I = Chain.begin(), E = Chain.end(); I != E; ++I) { DEBUG(dbgs() << " Inc: " << *I->UserInst << "\n"); User::op_iterator UseI = @@ -2658,7 +2691,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, SmallVectorImpl &DeadInsts) { // Find the new IVOperand for the head of the chain. It may have been replaced // by LSR. - const IVInc &Head = Chain[0]; + const IVInc &Head = Chain.Incs[0]; User::op_iterator IVOpEnd = Head.UserInst->op_end(); User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(), IVOpEnd, L, SE); @@ -2690,7 +2723,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, Type *IVTy = IVSrc->getType(); Type *IntTy = SE.getEffectiveSCEVType(IVTy); const SCEV *LeftOverExpr = 0; - for (IVChain::const_iterator IncI = llvm::next(Chain.begin()), + for (IVChain::const_iterator IncI = Chain.begin(), IncE = Chain.end(); IncI != IncE; ++IncI) { Instruction *InsertPt = IncI->UserInst; @@ -2735,7 +2768,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, } // If LSR created a new, wider phi, we may also replace its postinc. We only // do this if we also found a wide value for the head of the chain. - if (isa(Chain.back().UserInst)) { + if (isa(Chain.tailUserInst())) { for (BasicBlock::iterator I = L->getHeader()->begin(); PHINode *Phi = dyn_cast(I); ++I) { if (!isCompatibleIVType(Phi, IVSrc)) @@ -3980,24 +4013,29 @@ void LSRInstance::SolveRecurse(SmallVectorImpl &Solution, if (LU.Regs.count(*I)) ReqRegs.insert(*I); - bool AnySatisfiedReqRegs = false; SmallPtrSet NewRegs; Cost NewCost; -retry: for (SmallVectorImpl::const_iterator I = LU.Formulae.begin(), E = LU.Formulae.end(); I != E; ++I) { const Formula &F = *I; // Ignore formulae which do not use any of the required registers. + bool SatisfiedReqReg = true; for (SmallSetVector::const_iterator J = ReqRegs.begin(), JE = ReqRegs.end(); J != JE; ++J) { const SCEV *Reg = *J; if ((!F.ScaledReg || F.ScaledReg != Reg) && std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) == - F.BaseRegs.end()) - goto skip; + F.BaseRegs.end()) { + SatisfiedReqReg = false; + break; + } + } + if (!SatisfiedReqReg) { + // If none of the formulae satisfied the required registers, then we could + // clear ReqRegs and try again. Currently, we simply give up in this case. + continue; } - AnySatisfiedReqRegs = true; // Evaluate the cost of the current formula. If it's already worse than // the current best, prune the search at that point. @@ -4024,18 +4062,6 @@ retry: } Workspace.pop_back(); } - skip:; - } - - if (!EnableRetry && !AnySatisfiedReqRegs) - return; - - // If none of the formulae had all of the required registers, relax the - // constraint so that we don't exclude all formulae. - if (!AnySatisfiedReqRegs) { - assert(!ReqRegs.empty() && "Solver failed even without required registers"); - ReqRegs.clear(); - goto retry; } } @@ -4114,7 +4140,7 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP, // Attempt to find an insert position in the middle of the block, // instead of at the end, so that it can be used for other expansions. if (IDom == Inst->getParent() && - (!BetterPos || DT.dominates(BetterPos, Inst))) + (!BetterPos || !DT.dominates(Inst, BetterPos))) BetterPos = llvm::next(BasicBlock::iterator(Inst)); } if (!AllDominate) @@ -4131,9 +4157,10 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP, /// AdjustInsertPositionForExpand - Determine an input position which will be /// dominated by the operands and which will dominate the result. BasicBlock::iterator -LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP, +LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP, const LSRFixup &LF, - const LSRUse &LU) const { + const LSRUse &LU, + SCEVExpander &Rewriter) const { // Collect some instructions which must be dominated by the // expanding replacement. These must be dominated by any operands that // will be required in the expansion. @@ -4168,9 +4195,13 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP, } } + assert(!isa(LowestIP) && !isa(LowestIP) + && !isa(LowestIP) && + "Insertion point must be a normal instruction"); + // Then, climb up the immediate dominator tree as far as we can go while // still being dominated by the input positions. - IP = HoistInsertPosition(IP, Inputs); + BasicBlock::iterator IP = HoistInsertPosition(LowestIP, Inputs); // Don't insert instructions before PHI nodes. while (isa(IP)) ++IP; @@ -4181,6 +4212,11 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP, // Ignore debug intrinsics. while (isa(IP)) ++IP; + // Set IP below instructions recently inserted by SCEVExpander. This keeps the + // IP consistent across expansions and allows the previously inserted + // instructions to be reused by subsequent expansion. + while (Rewriter.isInsertedInstruction(IP) && IP != LowestIP) ++IP; + return IP; } @@ -4195,7 +4231,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, // Determine an input position which will be dominated by the operands and // which will dominate the result. - IP = AdjustInsertPositionForExpand(IP, LF, LU); + IP = AdjustInsertPositionForExpand(IP, LF, LU, Rewriter); // Inform the Rewriter if we have a post-increment use, so that it can // perform an advantageous expansion. @@ -4481,7 +4517,7 @@ LSRInstance::ImplementSolution(const SmallVectorImpl &Solution, // Mark phi nodes that terminate chains so the expander tries to reuse them. for (SmallVectorImpl::const_iterator ChainI = IVChainVec.begin(), ChainE = IVChainVec.end(); ChainI != ChainE; ++ChainI) { - if (PHINode *PN = dyn_cast(ChainI->back().UserInst)) + if (PHINode *PN = dyn_cast(ChainI->tailUserInst())) Rewriter.setChainedPhi(PN); } @@ -4518,15 +4554,37 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P) if (!L->isLoopSimplifyForm()) return; - // All outer loops must have preheaders, or SCEVExpander may not be able to - // materialize an AddRecExpr whose Start is an outer AddRecExpr. - for (const Loop *OuterLoop = L; (OuterLoop = OuterLoop->getParentLoop());) { - if (!OuterLoop->getLoopPreheader()) - return; - } // If there's no interesting work to be done, bail early. if (IU.empty()) return; + // If there's too much analysis to be done, bail early. We won't be able to + // model the problem anyway. + unsigned NumUsers = 0; + for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) { + if (++NumUsers > MaxIVUsers) { + DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << *L + << "\n"); + return; + } + } + +#ifndef NDEBUG + // All dominating loops must have preheaders, or SCEVExpander may not be able + // to materialize an AddRecExpr whose Start is an outer AddRecExpr. + // + // IVUsers analysis should only create users that are dominated by simple loop + // headers. Since this loop should dominate all of its users, its user list + // should be empty if this loop itself is not within a simple loop nest. + for (DomTreeNode *Rung = DT.getNode(L->getLoopPreheader()); + Rung; Rung = Rung->getIDom()) { + BasicBlock *BB = Rung->getBlock(); + const Loop *DomLoop = LI.getLoopFor(BB); + if (DomLoop && DomLoop->getHeader() == BB) { + assert(DomLoop->getLoopPreheader() && "LSR needs a simplified loop nest"); + } + } +#endif // DEBUG + DEBUG(dbgs() << "\nLSR on loop "; WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false); dbgs() << ":\n"); @@ -4539,7 +4597,7 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P) if (IU.empty()) return; // Skip nested loops until we can model them better with formulae. - if (!EnableNested && !L->empty()) { + if (!L->empty()) { DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n"); return; }