+/// Return true if the number of registers needed for the chain is estimated to
+/// be less than the number required for the individual IV users. First prohibit
+/// any IV users that keep the IV live across increments (the Users set should
+/// be empty). Next count the number and type of increments in the chain.
+///
+/// Chaining IVs can lead to considerable code bloat if ISEL doesn't
+/// effectively use postinc addressing modes. Only consider it profitable it the
+/// increments can be computed in fewer registers when chained.
+///
+/// TODO: Consider IVInc free if it's already used in another chains.
+static bool
+isProfitableChain(IVChain &Chain, SmallPtrSetImpl<Instruction*> &Users,
+ ScalarEvolution &SE, const TargetTransformInfo &TTI) {
+ if (StressIVChain)
+ return true;
+
+ if (!Chain.hasIncs())
+ return false;
+
+ if (!Users.empty()) {
+ DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " users:\n";
+ for (Instruction *Inst : Users) {
+ dbgs() << " " << *Inst << "\n";
+ });
+ return false;
+ }
+ assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
+
+ // The chain itself may require a register, so intialize cost to 1.
+ int cost = 1;
+
+ // A complete chain likely eliminates the need for keeping the original IV in
+ // a register. LSR does not currently know how to form a complete chain unless
+ // the header phi already exists.
+ if (isa<PHINode>(Chain.tailUserInst())
+ && SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
+ --cost;
+ }
+ const SCEV *LastIncExpr = nullptr;
+ unsigned NumConstIncrements = 0;
+ unsigned NumVarIncrements = 0;
+ unsigned NumReusedIncrements = 0;
+ for (IVChain::const_iterator I = Chain.begin(), E = Chain.end();
+ I != E; ++I) {
+
+ if (I->IncExpr->isZero())
+ continue;
+
+ // Incrementing by zero or some constant is neutral. We assume constants can
+ // be folded into an addressing mode or an add's immediate operand.
+ if (isa<SCEVConstant>(I->IncExpr)) {
+ ++NumConstIncrements;
+ continue;
+ }
+
+ if (I->IncExpr == LastIncExpr)
+ ++NumReusedIncrements;
+ else
+ ++NumVarIncrements;
+
+ LastIncExpr = I->IncExpr;
+ }
+ // An IV chain with a single increment is handled by LSR's postinc
+ // uses. However, a chain with multiple increments requires keeping the IV's
+ // value live longer than it needs to be if chained.
+ if (NumConstIncrements > 1)
+ --cost;
+
+ // Materializing increment expressions in the preheader that didn't exist in
+ // the original code may cost a register. For example, sign-extended array
+ // indices can produce ridiculous increments like this:
+ // IV + ((sext i32 (2 * %s) to i64) + (-1 * (sext i32 %s to i64)))
+ cost += NumVarIncrements;
+
+ // Reusing variable increments likely saves a register to hold the multiple of
+ // the stride.
+ cost -= NumReusedIncrements;
+
+ DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " Cost: " << cost
+ << "\n");
+
+ return cost < 0;
+}
+
+/// ChainInstruction - Add this IV user to an existing chain or make it the head
+/// of a new chain.
+void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
+ SmallVectorImpl<ChainUsers> &ChainUsersVec) {
+ // When IVs are used as types of varying widths, they are generally converted
+ // to a wider type with some uses remaining narrow under a (free) trunc.
+ Value *const NextIV = getWideOperand(IVOper);
+ const SCEV *const OperExpr = SE.getSCEV(NextIV);
+ const SCEV *const OperExprBase = getExprBase(OperExpr);
+
+ // Visit all existing chains. Check if its IVOper can be computed as a
+ // profitable loop invariant increment from the last link in the Chain.
+ unsigned ChainIdx = 0, NChains = IVChainVec.size();
+ const SCEV *LastIncExpr = nullptr;
+ for (; ChainIdx < NChains; ++ChainIdx) {
+ IVChain &Chain = IVChainVec[ChainIdx];
+
+ // Prune the solution space aggressively by checking that both IV operands
+ // are expressions that operate on the same unscaled SCEVUnknown. This
+ // "base" will be canceled by the subsequent getMinusSCEV call. Checking
+ // first avoids creating extra SCEV expressions.
+ if (!StressIVChain && Chain.ExprBase != OperExprBase)
+ continue;
+
+ Value *PrevIV = getWideOperand(Chain.Incs.back().IVOperand);
+ if (!isCompatibleIVType(PrevIV, NextIV))
+ continue;
+
+ // A phi node terminates a chain.
+ if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
+ continue;
+
+ // The increment must be loop-invariant so it can be kept in a register.
+ const SCEV *PrevExpr = SE.getSCEV(PrevIV);
+ const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr);
+ if (!SE.isLoopInvariant(IncExpr, L))
+ continue;
+
+ if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
+ LastIncExpr = IncExpr;
+ break;
+ }
+ }
+ // If we haven't found a chain, create a new one, unless we hit the max. Don't
+ // bother for phi nodes, because they must be last in the chain.
+ if (ChainIdx == NChains) {
+ if (isa<PHINode>(UserInst))
+ return;
+ if (NChains >= MaxChains && !StressIVChain) {
+ DEBUG(dbgs() << "IV Chain Limit\n");
+ return;
+ }
+ LastIncExpr = OperExpr;
+ // IVUsers may have skipped over sign/zero extensions. We don't currently
+ // attempt to form chains involving extensions unless they can be hoisted
+ // into this loop's AddRec.
+ if (!isa<SCEVAddRecExpr>(LastIncExpr))
+ return;
+ ++NChains;
+ IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
+ OperExprBase));
+ ChainUsersVec.resize(NChains);
+ DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Head: (" << *UserInst
+ << ") IV=" << *LastIncExpr << "\n");
+ } else {
+ DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Inc: (" << *UserInst
+ << ") IV+" << *LastIncExpr << "\n");
+ // Add this IV user to the end of the chain.
+ IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
+ }
+ IVChain &Chain = IVChainVec[ChainIdx];
+
+ SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
+ // This chain's NearUsers become FarUsers.
+ if (!LastIncExpr->isZero()) {
+ ChainUsersVec[ChainIdx].FarUsers.insert(NearUsers.begin(),
+ NearUsers.end());
+ NearUsers.clear();
+ }
+
+ // All other uses of IVOperand become near uses of the chain.
+ // We currently ignore intermediate values within SCEV expressions, assuming
+ // they will eventually be used be the current chain, or can be computed
+ // from one of the chain increments. To be more precise we could
+ // transitively follow its user and only add leaf IV users to the set.
+ for (User *U : IVOper->users()) {
+ Instruction *OtherUse = dyn_cast<Instruction>(U);
+ if (!OtherUse)
+ continue;
+ // Uses in the chain will no longer be uses if the chain is formed.
+ // Include the head of the chain in this iteration (not Chain.begin()).
+ IVChain::const_iterator IncIter = Chain.Incs.begin();
+ IVChain::const_iterator IncEnd = Chain.Incs.end();
+ for( ; IncIter != IncEnd; ++IncIter) {
+ if (IncIter->UserInst == OtherUse)
+ break;
+ }
+ if (IncIter != IncEnd)
+ continue;
+
+ if (SE.isSCEVable(OtherUse->getType())
+ && !isa<SCEVUnknown>(SE.getSCEV(OtherUse))
+ && IU.isIVUserOrOperand(OtherUse)) {
+ continue;
+ }
+ NearUsers.insert(OtherUse);
+ }
+
+ // Since this user is part of the chain, it's no longer considered a use
+ // of the chain.
+ ChainUsersVec[ChainIdx].FarUsers.erase(UserInst);
+}
+
+/// CollectChains - Populate the vector of Chains.
+///
+/// This decreases ILP at the architecture level. Targets with ample registers,
+/// multiple memory ports, and no register renaming probably don't want
+/// this. However, such targets should probably disable LSR altogether.
+///
+/// The job of LSR is to make a reasonable choice of induction variables across
+/// the loop. Subsequent passes can easily "unchain" computation exposing more
+/// ILP *within the loop* if the target wants it.
+///
+/// Finding the best IV chain is potentially a scheduling problem. Since LSR
+/// will not reorder memory operations, it will recognize this as a chain, but
+/// will generate redundant IV increments. Ideally this would be corrected later
+/// by a smart scheduler:
+/// = A[i]
+/// = A[i+x]
+/// A[i] =
+/// A[i+x] =
+///
+/// TODO: Walk the entire domtree within this loop, not just the path to the
+/// loop latch. This will discover chains on side paths, but requires
+/// maintaining multiple copies of the Chains state.
+void LSRInstance::CollectChains() {
+ DEBUG(dbgs() << "Collecting IV Chains.\n");
+ SmallVector<ChainUsers, 8> ChainUsersVec;
+
+ SmallVector<BasicBlock *,8> LatchPath;
+ BasicBlock *LoopHeader = L->getHeader();
+ for (DomTreeNode *Rung = DT.getNode(L->getLoopLatch());
+ Rung->getBlock() != LoopHeader; Rung = Rung->getIDom()) {
+ LatchPath.push_back(Rung->getBlock());
+ }
+ LatchPath.push_back(LoopHeader);
+
+ // Walk the instruction stream from the loop header to the loop latch.
+ for (SmallVectorImpl<BasicBlock *>::reverse_iterator
+ BBIter = LatchPath.rbegin(), BBEnd = LatchPath.rend();
+ BBIter != BBEnd; ++BBIter) {
+ for (BasicBlock::iterator I = (*BBIter)->begin(), E = (*BBIter)->end();
+ I != E; ++I) {
+ // Skip instructions that weren't seen by IVUsers analysis.
+ if (isa<PHINode>(I) || !IU.isIVUserOrOperand(I))
+ continue;
+
+ // Ignore users that are part of a SCEV expression. This way we only
+ // consider leaf IV Users. This effectively rediscovers a portion of
+ // IVUsers analysis but in program order this time.
+ if (SE.isSCEVable(I->getType()) && !isa<SCEVUnknown>(SE.getSCEV(I)))
+ continue;
+
+ // Remove this instruction from any NearUsers set it may be in.
+ for (unsigned ChainIdx = 0, NChains = IVChainVec.size();
+ ChainIdx < NChains; ++ChainIdx) {
+ ChainUsersVec[ChainIdx].NearUsers.erase(I);
+ }
+ // Search for operands that can be chained.
+ SmallPtrSet<Instruction*, 4> UniqueOperands;
+ User::op_iterator IVOpEnd = I->op_end();
+ User::op_iterator IVOpIter = findIVOperand(I->op_begin(), IVOpEnd, L, SE);
+ while (IVOpIter != IVOpEnd) {
+ Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
+ if (UniqueOperands.insert(IVOpInst).second)
+ ChainInstruction(I, IVOpInst, ChainUsersVec);
+ IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
+ }
+ } // Continue walking down the instructions.
+ } // Continue walking down the domtree.
+ // Visit phi backedges to determine if the chain can generate the IV postinc.
+ for (BasicBlock::iterator I = L->getHeader()->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ if (!SE.isSCEVable(PN->getType()))
+ continue;
+
+ Instruction *IncV =
+ dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
+ if (IncV)
+ ChainInstruction(PN, IncV, ChainUsersVec);
+ }
+ // Remove any unprofitable chains.
+ unsigned ChainIdx = 0;
+ for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
+ UsersIdx < NChains; ++UsersIdx) {
+ if (!isProfitableChain(IVChainVec[UsersIdx],
+ ChainUsersVec[UsersIdx].FarUsers, SE, TTI))
+ continue;
+ // Preserve the chain at UsesIdx.
+ if (ChainIdx != UsersIdx)
+ IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
+ FinalizeChain(IVChainVec[ChainIdx]);
+ ++ChainIdx;
+ }
+ IVChainVec.resize(ChainIdx);
+}
+
+void LSRInstance::FinalizeChain(IVChain &Chain) {
+ assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
+ DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n");
+
+ for (IVChain::const_iterator I = Chain.begin(), E = Chain.end();
+ I != E; ++I) {
+ DEBUG(dbgs() << " Inc: " << *I->UserInst << "\n");
+ User::op_iterator UseI =
+ std::find(I->UserInst->op_begin(), I->UserInst->op_end(), I->IVOperand);
+ assert(UseI != I->UserInst->op_end() && "cannot find IV operand");
+ IVIncSet.insert(UseI);
+ }
+}
+
+/// Return true if the IVInc can be folded into an addressing mode.
+static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
+ Value *Operand, const TargetTransformInfo &TTI) {
+ const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
+ if (!IncConst || !isAddressUse(UserInst, Operand))
+ return false;
+
+ if (IncConst->getValue()->getValue().getMinSignedBits() > 64)
+ return false;
+
+ int64_t IncOffset = IncConst->getValue()->getSExtValue();
+ if (!isAlwaysFoldable(TTI, LSRUse::Address,
+ getAccessType(UserInst), /*BaseGV=*/ nullptr,
+ IncOffset, /*HaseBaseReg=*/ false))
+ return false;
+
+ return true;
+}
+
+/// GenerateIVChains - Generate an add or subtract for each IVInc in a chain to
+/// materialize the IV user's operand from the previous IV user's operand.
+void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakVH> &DeadInsts) {
+ // Find the new IVOperand for the head of the chain. It may have been replaced
+ // by LSR.
+ const IVInc &Head = Chain.Incs[0];
+ User::op_iterator IVOpEnd = Head.UserInst->op_end();
+ // findIVOperand returns IVOpEnd if it can no longer find a valid IV user.
+ User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(),
+ IVOpEnd, L, SE);
+ Value *IVSrc = nullptr;
+ while (IVOpIter != IVOpEnd) {
+ IVSrc = getWideOperand(*IVOpIter);
+
+ // If this operand computes the expression that the chain needs, we may use
+ // it. (Check this after setting IVSrc which is used below.)
+ //
+ // Note that if Head.IncExpr is wider than IVSrc, then this phi is too
+ // narrow for the chain, so we can no longer use it. We do allow using a
+ // wider phi, assuming the LSR checked for free truncation. In that case we
+ // should already have a truncate on this operand such that
+ // getSCEV(IVSrc) == IncExpr.
+ if (SE.getSCEV(*IVOpIter) == Head.IncExpr
+ || SE.getSCEV(IVSrc) == Head.IncExpr) {
+ break;
+ }
+ IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
+ }
+ if (IVOpIter == IVOpEnd) {
+ // Gracefully give up on this chain.
+ DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n");
+ return;
+ }
+
+ DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n");
+ Type *IVTy = IVSrc->getType();
+ Type *IntTy = SE.getEffectiveSCEVType(IVTy);
+ const SCEV *LeftOverExpr = nullptr;
+ for (IVChain::const_iterator IncI = Chain.begin(),
+ IncE = Chain.end(); IncI != IncE; ++IncI) {
+
+ Instruction *InsertPt = IncI->UserInst;
+ if (isa<PHINode>(InsertPt))
+ InsertPt = L->getLoopLatch()->getTerminator();
+
+ // IVOper will replace the current IV User's operand. IVSrc is the IV
+ // value currently held in a register.
+ Value *IVOper = IVSrc;
+ if (!IncI->IncExpr->isZero()) {
+ // IncExpr was the result of subtraction of two narrow values, so must
+ // be signed.
+ const SCEV *IncExpr = SE.getNoopOrSignExtend(IncI->IncExpr, IntTy);
+ LeftOverExpr = LeftOverExpr ?
+ SE.getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
+ }
+ if (LeftOverExpr && !LeftOverExpr->isZero()) {
+ // Expand the IV increment.
+ Rewriter.clearPostInc();
+ Value *IncV = Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
+ const SCEV *IVOperExpr = SE.getAddExpr(SE.getUnknown(IVSrc),
+ SE.getUnknown(IncV));
+ IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
+
+ // If an IV increment can't be folded, use it as the next IV value.
+ if (!canFoldIVIncExpr(LeftOverExpr, IncI->UserInst, IncI->IVOperand,
+ TTI)) {
+ assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
+ IVSrc = IVOper;
+ LeftOverExpr = nullptr;
+ }
+ }
+ Type *OperTy = IncI->IVOperand->getType();
+ if (IVTy != OperTy) {
+ assert(SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) &&
+ "cannot extend a chained IV");
+ IRBuilder<> Builder(InsertPt);
+ IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy, "lsr.chain");
+ }
+ IncI->UserInst->replaceUsesOfWith(IncI->IVOperand, IVOper);
+ DeadInsts.push_back(IncI->IVOperand);
+ }
+ // If LSR created a new, wider phi, we may also replace its postinc. We only
+ // do this if we also found a wide value for the head of the chain.
+ if (isa<PHINode>(Chain.tailUserInst())) {
+ for (BasicBlock::iterator I = L->getHeader()->begin();
+ PHINode *Phi = dyn_cast<PHINode>(I); ++I) {
+ if (!isCompatibleIVType(Phi, IVSrc))
+ continue;
+ Instruction *PostIncV = dyn_cast<Instruction>(
+ Phi->getIncomingValueForBlock(L->getLoopLatch()));
+ if (!PostIncV || (SE.getSCEV(PostIncV) != SE.getSCEV(IVSrc)))
+ continue;
+ Value *IVOper = IVSrc;
+ Type *PostIncTy = PostIncV->getType();
+ if (IVTy != PostIncTy) {
+ assert(PostIncTy->isPointerTy() && "mixing int/ptr IV types");
+ IRBuilder<> Builder(L->getLoopLatch()->getTerminator());
+ Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc());
+ IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy, "lsr.chain");
+ }
+ Phi->replaceUsesOfWith(PostIncV, IVOper);
+ DeadInsts.push_back(PostIncV);
+ }
+ }
+}
+
+void LSRInstance::CollectFixupsAndInitialFormulae() {
+ for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
+ Instruction *UserInst = UI->getUser();
+ // Skip IV users that are part of profitable IV Chains.
+ User::op_iterator UseI = std::find(UserInst->op_begin(), UserInst->op_end(),
+ UI->getOperandValToReplace());
+ assert(UseI != UserInst->op_end() && "cannot find IV operand");
+ if (IVIncSet.count(UseI))
+ continue;
+
+ // Record the uses.
+ LSRFixup &LF = getNewFixup();
+ LF.UserInst = UserInst;
+ LF.OperandValToReplace = UI->getOperandValToReplace();
+ LF.PostIncLoops = UI->getPostIncLoops();
+
+ LSRUse::KindType Kind = LSRUse::Basic;
+ Type *AccessTy = nullptr;
+ if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) {
+ Kind = LSRUse::Address;
+ AccessTy = getAccessType(LF.UserInst);
+ }
+
+ const SCEV *S = IU.getExpr(*UI);
+
+ // Equality (== and !=) ICmps are special. We can rewrite (i == N) as
+ // (N - i == 0), and this allows (N - i) to be the expression that we work
+ // with rather than just N or i, so we can consider the register
+ // requirements for both N and i at the same time. Limiting this code to
+ // equality icmps is not a problem because all interesting loops use
+ // equality icmps, thanks to IndVarSimplify.
+ if (ICmpInst *CI = dyn_cast<ICmpInst>(LF.UserInst))
+ if (CI->isEquality()) {
+ // Swap the operands if needed to put the OperandValToReplace on the
+ // left, for consistency.
+ Value *NV = CI->getOperand(1);
+ if (NV == LF.OperandValToReplace) {
+ CI->setOperand(1, CI->getOperand(0));
+ CI->setOperand(0, NV);
+ NV = CI->getOperand(1);
+ Changed = true;
+ }
+
+ // x == y --> x - y == 0
+ const SCEV *N = SE.getSCEV(NV);
+ if (SE.isLoopInvariant(N, L) && isSafeToExpand(N, SE)) {
+ // S is normalized, so normalize N before folding it into S
+ // to keep the result normalized.
+ N = TransformForPostIncUse(Normalize, N, CI, nullptr,
+ LF.PostIncLoops, SE, DT);
+ Kind = LSRUse::ICmpZero;
+ S = SE.getMinusSCEV(N, S);
+ }
+
+ // -1 and the negations of all interesting strides (except the negation
+ // of -1) are now also interesting.
+ for (size_t i = 0, e = Factors.size(); i != e; ++i)
+ if (Factors[i] != -1)
+ Factors.insert(-(uint64_t)Factors[i]);
+ Factors.insert(-1);
+ }
+
+ // Set up the initial formula for this use.
+ std::pair<size_t, int64_t> P = getUse(S, Kind, AccessTy);
+ LF.LUIdx = P.first;
+ LF.Offset = P.second;
+ LSRUse &LU = Uses[LF.LUIdx];
+ LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
+ if (!LU.WidestFixupType ||
+ SE.getTypeSizeInBits(LU.WidestFixupType) <
+ SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
+ LU.WidestFixupType = LF.OperandValToReplace->getType();
+
+ // If this is the first use of this LSRUse, give it a formula.
+ if (LU.Formulae.empty()) {
+ InsertInitialFormula(S, LU, LF.LUIdx);
+ CountRegisters(LU.Formulae.back(), LF.LUIdx);
+ }
+ }
+
+ DEBUG(print_fixups(dbgs()));
+}
+
+/// InsertInitialFormula - Insert a formula for the given expression into
+/// the given use, separating out loop-variant portions from loop-invariant
+/// and loop-computable portions.
+void
+LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
+ // Mark uses whose expressions cannot be expanded.
+ if (!isSafeToExpand(S, SE))
+ LU.RigidFormula = true;
+