#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLowering.h"
#include <algorithm>
using namespace llvm;
+/// MaxIVUsers is an arbitrary threshold that provides an early opportunitiy for
+/// bail out. This threshold is far beyond the number of users that LSR can
+/// conceivably solve, so it should not affect generated code, but catches the
+/// worst cases before LSR burns too much compile time and stack space.
+static const unsigned MaxIVUsers = 200;
+
+// Temporary flag to cleanup congruent phis after LSR phi expansion.
+// It's currently disabled until we can determine whether it's truly useful or
+// not. The flag should be removed after the v3.0 release.
+// This is now needed for ivchains.
+static cl::opt<bool> EnablePhiElim(
+ "enable-lsr-phielim", cl::Hidden, cl::init(true),
+ cl::desc("Enable LSR phi elimination"));
+
+#ifndef NDEBUG
+// Stress test IV chain generation.
+static cl::opt<bool> StressIVChain(
+ "stress-ivchain", cl::Hidden, cl::init(false),
+ cl::desc("Stress test LSR IV chains"));
+#else
+static bool StressIVChain = false;
+#endif
+
namespace {
/// RegSortData - This class holds data which is used to order reuse candidates.
void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
unsigned getNumRegs() const;
- const Type *getType() const;
+ Type *getType() const;
void DeleteBaseReg(const SCEV *&S);
/// getType - Return the type of this formula, if it has one, or null
/// otherwise. This type is meaningless except for the bit size.
-const Type *Formula::getType() const {
+Type *Formula::getType() const {
return !BaseRegs.empty() ? BaseRegs.front()->getType() :
ScaledReg ? ScaledReg->getType() :
AM.BaseGV ? AM.BaseGV->getType() :
/// isAddRecSExtable - Return true if the given addrec can be sign-extended
/// without changing its value.
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
- const Type *WideTy =
+ Type *WideTy =
IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1);
return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
}
/// isAddSExtable - Return true if the given add can be sign-extended
/// without changing its value.
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
- const Type *WideTy =
+ Type *WideTy =
IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);
return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
}
/// isMulSExtable - Return true if the given mul can be sign-extended
/// without changing its value.
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
- const Type *WideTy =
+ Type *WideTy =
IntegerType::get(SE.getContext(),
SE.getTypeSizeInBits(M->getType()) * M->getNumOperands());
return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));
}
/// getAccessType - Return the type of the memory being accessed.
-static const Type *getAccessType(const Instruction *Inst) {
- const Type *AccessTy = Inst->getType();
+static Type *getAccessType(const Instruction *Inst) {
+ Type *AccessTy = Inst->getType();
if (const StoreInst *SI = dyn_cast<StoreInst>(Inst))
AccessTy = SI->getOperand(0)->getType();
else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
// All pointers have the same requirements, so canonicalize them to an
// arbitrary pointer type to minimize variation.
- if (const PointerType *PTy = dyn_cast<PointerType>(AccessTy))
+ if (PointerType *PTy = dyn_cast<PointerType>(AccessTy))
AccessTy = PointerType::get(IntegerType::get(PTy->getContext(), 1),
PTy->getAddressSpace());
return AccessTy;
}
+/// isExistingPhi - Return true if this AddRec is already a phi in its loop.
+static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
+ for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ if (SE.isSCEVable(PN->getType()) &&
+ (SE.getEffectiveSCEVType(PN->getType()) ==
+ SE.getEffectiveSCEVType(AR->getType())) &&
+ SE.getSCEV(PN) == AR)
+ return true;
+ }
+ return false;
+}
+
+/// Check if expanding this expression is likely to incur significant cost. This
+/// is tricky because SCEV doesn't track which expressions are actually computed
+/// by the current IR.
+///
+/// We currently allow expansion of IV increments that involve adds,
+/// multiplication by constants, and AddRecs from existing phis.
+///
+/// TODO: Allow UDivExpr if we can find an existing IV increment that is an
+/// obvious multiple of the UDivExpr.
+static bool isHighCostExpansion(const SCEV *S,
+ SmallPtrSet<const SCEV*, 8> &Processed,
+ ScalarEvolution &SE) {
+ // Zero/One operand expressions
+ switch (S->getSCEVType()) {
+ case scUnknown:
+ case scConstant:
+ return false;
+ case scTruncate:
+ return isHighCostExpansion(cast<SCEVTruncateExpr>(S)->getOperand(),
+ Processed, SE);
+ case scZeroExtend:
+ return isHighCostExpansion(cast<SCEVZeroExtendExpr>(S)->getOperand(),
+ Processed, SE);
+ case scSignExtend:
+ return isHighCostExpansion(cast<SCEVSignExtendExpr>(S)->getOperand(),
+ Processed, SE);
+ }
+
+ if (!Processed.insert(S))
+ return false;
+
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+ I != E; ++I) {
+ if (isHighCostExpansion(*I, Processed, SE))
+ return true;
+ }
+ return false;
+ }
+
+ if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
+ if (Mul->getNumOperands() == 2) {
+ // Multiplication by a constant is ok
+ if (isa<SCEVConstant>(Mul->getOperand(0)))
+ return isHighCostExpansion(Mul->getOperand(1), Processed, SE);
+
+ // If we have the value of one operand, check if an existing
+ // multiplication already generates this expression.
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Mul->getOperand(1))) {
+ Value *UVal = U->getValue();
+ for (Value::use_iterator UI = UVal->use_begin(), UE = UVal->use_end();
+ UI != UE; ++UI) {
+ // If U is a constant, it may be used by a ConstantExpr.
+ Instruction *User = dyn_cast<Instruction>(*UI);
+ if (User && User->getOpcode() == Instruction::Mul
+ && SE.isSCEVable(User->getType())) {
+ return SE.getSCEV(User) == Mul;
+ }
+ }
+ }
+ }
+ }
+
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ if (isExistingPhi(AR, SE))
+ return false;
+ }
+
+ // Fow now, consider any other type of expression (div/mul/min/max) high cost.
+ return true;
+}
+
/// DeleteTriviallyDeadInstructions - If any of the instructions is the
/// specified set are trivially dead, delete them and see if this makes any of
/// their operands subsequently dead.
void Loose();
+#ifndef NDEBUG
+ // Once any of the metrics loses, they must all remain losers.
+ bool isValid() {
+ return ((NumRegs | AddRecCost | NumIVMuls | NumBaseAdds
+ | ImmCost | SetupCost) != ~0u)
+ || ((NumRegs & AddRecCost & NumIVMuls & NumBaseAdds
+ & ImmCost & SetupCost) == ~0u);
+ }
+#endif
+
+ bool isLoser() {
+ assert(isValid() && "invalid cost");
+ return NumRegs == ~0u;
+ }
+
void RateFormula(const Formula &F,
SmallPtrSet<const SCEV *, 16> &Regs,
const DenseSet<const SCEV *> &VisitedRegs,
const Loop *L,
const SmallVectorImpl<int64_t> &Offsets,
- ScalarEvolution &SE, DominatorTree &DT);
+ ScalarEvolution &SE, DominatorTree &DT,
+ SmallPtrSet<const SCEV *, 16> *LoserRegs = 0);
void print(raw_ostream &OS) const;
void dump() const;
void RatePrimaryRegister(const SCEV *Reg,
SmallPtrSet<const SCEV *, 16> &Regs,
const Loop *L,
- ScalarEvolution &SE, DominatorTree &DT);
+ ScalarEvolution &SE, DominatorTree &DT,
+ SmallPtrSet<const SCEV *, 16> *LoserRegs);
};
}
const Loop *L,
ScalarEvolution &SE, DominatorTree &DT) {
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
- if (AR->getLoop() == L)
- AddRecCost += 1; /// TODO: This should be a function of the stride.
-
- // If this is an addrec for a loop that's already been visited by LSR,
- // don't second-guess its addrec phi nodes. LSR isn't currently smart
- // enough to reason about more than one loop at a time. Consider these
- // registers free and leave them alone.
- else if (L->contains(AR->getLoop()) ||
- (!AR->getLoop()->contains(L) &&
- DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))) {
- for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
- PHINode *PN = dyn_cast<PHINode>(I); ++I)
- if (SE.isSCEVable(PN->getType()) &&
- (SE.getEffectiveSCEVType(PN->getType()) ==
- SE.getEffectiveSCEVType(AR->getType())) &&
- SE.getSCEV(PN) == AR)
- return;
+ // If this is an addrec for another loop, don't second-guess its addrec phi
+ // nodes. LSR isn't currently smart enough to reason about more than one
+ // loop at a time. LSR has already run on inner loops, will not run on outer
+ // loops, and cannot be expected to change sibling loops.
+ if (AR->getLoop() != L) {
+ // If the AddRec exists, consider it's register free and leave it alone.
+ if (isExistingPhi(AR, SE))
+ return;
- // If this isn't one of the addrecs that the loop already has, it
- // would require a costly new phi and add. TODO: This isn't
- // precisely modeled right now.
- ++NumBaseAdds;
- if (!Regs.count(AR->getStart()))
- RateRegister(AR->getStart(), Regs, L, SE, DT);
+ // Otherwise, do not consider this formula at all.
+ Loose();
+ return;
}
+ AddRecCost += 1; /// TODO: This should be a function of the stride.
// Add the step value register, if it needs one.
// TODO: The non-affine case isn't precisely modeled here.
- if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1)))
- if (!Regs.count(AR->getStart()))
+ if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
+ if (!Regs.count(AR->getOperand(1))) {
RateRegister(AR->getOperand(1), Regs, L, SE, DT);
+ if (isLoser())
+ return;
+ }
+ }
}
++NumRegs;
}
/// RatePrimaryRegister - Record this register in the set. If we haven't seen it
-/// before, rate it.
+/// before, rate it. Optional LoserRegs provides a way to declare any formula
+/// that refers to one of those regs an instant loser.
void Cost::RatePrimaryRegister(const SCEV *Reg,
SmallPtrSet<const SCEV *, 16> &Regs,
const Loop *L,
- ScalarEvolution &SE, DominatorTree &DT) {
- if (Regs.insert(Reg))
+ ScalarEvolution &SE, DominatorTree &DT,
+ SmallPtrSet<const SCEV *, 16> *LoserRegs) {
+ if (LoserRegs && LoserRegs->count(Reg)) {
+ Loose();
+ return;
+ }
+ if (Regs.insert(Reg)) {
RateRegister(Reg, Regs, L, SE, DT);
+ if (isLoser())
+ LoserRegs->insert(Reg);
+ }
}
void Cost::RateFormula(const Formula &F,
const DenseSet<const SCEV *> &VisitedRegs,
const Loop *L,
const SmallVectorImpl<int64_t> &Offsets,
- ScalarEvolution &SE, DominatorTree &DT) {
+ ScalarEvolution &SE, DominatorTree &DT,
+ SmallPtrSet<const SCEV *, 16> *LoserRegs) {
// Tally up the registers.
if (const SCEV *ScaledReg = F.ScaledReg) {
if (VisitedRegs.count(ScaledReg)) {
Loose();
return;
}
- RatePrimaryRegister(ScaledReg, Regs, L, SE, DT);
+ RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs);
+ if (isLoser())
+ return;
}
for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
E = F.BaseRegs.end(); I != E; ++I) {
Loose();
return;
}
- RatePrimaryRegister(BaseReg, Regs, L, SE, DT);
+ RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs);
+ if (isLoser())
+ return;
}
// Determine how many (unfolded) adds we'll need inside the loop.
else if (Offset != 0)
ImmCost += APInt(64, Offset, true).getMinSignedBits();
}
+ assert(isValid() && "invalid cost");
}
/// Loose - Set this cost to a losing value.
};
KindType Kind;
- const Type *AccessTy;
+ Type *AccessTy;
SmallVector<int64_t, 8> Offsets;
int64_t MinOffset;
/// this LSRUse. FindUseWithSimilarFormula can't consider uses with different
/// max fixup widths to be equivalent, because the narrower one may be relying
/// on the implicit truncation to truncate away bogus bits.
- const Type *WidestFixupType;
+ Type *WidestFixupType;
/// Formulae - A list of ways to build a value that can satisfy this user.
/// After the list is populated, one of these is selected heuristically and
/// Regs - The set of register candidates used by all formulae in this LSRUse.
SmallPtrSet<const SCEV *, 4> Regs;
- LSRUse(KindType K, const Type *T) : Kind(K), AccessTy(T),
+ LSRUse(KindType K, Type *T) : Kind(K), AccessTy(T),
MinOffset(INT64_MAX),
MaxOffset(INT64_MIN),
AllFixupsOutsideLoop(true),
Formulae.push_back(F);
// Record registers now being used by this use.
- if (F.ScaledReg) Regs.insert(F.ScaledReg);
Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
return true;
if (&F != &Formulae.back())
std::swap(F, Formulae.back());
Formulae.pop_back();
- assert(!Formulae.empty() && "LSRUse has no formulae left!");
}
/// RecomputeRegs - Recompute the Regs field, and update RegUses.
/// be completely folded into the user instruction at isel time. This includes
/// address-mode folding and special icmp tricks.
static bool isLegalUse(const TargetLowering::AddrMode &AM,
- LSRUse::KindType Kind, const Type *AccessTy,
+ LSRUse::KindType Kind, Type *AccessTy,
const TargetLowering *TLI) {
switch (Kind) {
case LSRUse::Address:
// If we have low-level target information, ask the target if it can fold an
// integer immediate on an icmp.
if (AM.BaseOffs != 0) {
- if (TLI) return TLI->isLegalICmpImmediate(-AM.BaseOffs);
- return false;
+ if (!TLI)
+ return false;
+ // We have one of:
+ // ICmpZero BaseReg + Offset => ICmp BaseReg, -Offset
+ // ICmpZero -1*ScaleReg + Offset => ICmp ScaleReg, Offset
+ // Offs is the ICmp immediate.
+ int64_t Offs = AM.BaseOffs;
+ if (AM.Scale == 0)
+ Offs = -(uint64_t)Offs; // The cast does the right thing with INT64_MIN.
+ return TLI->isLegalICmpImmediate(Offs);
}
+ // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg
return true;
case LSRUse::Basic:
return !AM.BaseGV && AM.Scale == 0 && AM.BaseOffs == 0;
case LSRUse::Special:
- // Only handle -1 scales, or no scale.
- return AM.Scale == 0 || AM.Scale == -1;
+ // Special case Basic to handle -1 scales.
+ return !AM.BaseGV && (AM.Scale == 0 || AM.Scale == -1) && AM.BaseOffs == 0;
}
- return false;
+ llvm_unreachable("Invalid LSRUse Kind!");
}
static bool isLegalUse(TargetLowering::AddrMode AM,
int64_t MinOffset, int64_t MaxOffset,
- LSRUse::KindType Kind, const Type *AccessTy,
+ LSRUse::KindType Kind, Type *AccessTy,
const TargetLowering *TLI) {
// Check for overflow.
if (((int64_t)((uint64_t)AM.BaseOffs + MinOffset) > AM.BaseOffs) !=
static bool isAlwaysFoldable(int64_t BaseOffs,
GlobalValue *BaseGV,
bool HasBaseReg,
- LSRUse::KindType Kind, const Type *AccessTy,
+ LSRUse::KindType Kind, Type *AccessTy,
const TargetLowering *TLI) {
// Fast-path: zero is always foldable.
if (BaseOffs == 0 && !BaseGV) return true;
static bool isAlwaysFoldable(const SCEV *S,
int64_t MinOffset, int64_t MaxOffset,
bool HasBaseReg,
- LSRUse::KindType Kind, const Type *AccessTy,
+ LSRUse::KindType Kind, Type *AccessTy,
const TargetLowering *TLI,
ScalarEvolution &SE) {
// Fast-path: zero is always foldable.
}
};
+/// IVInc - An individual increment in a Chain of IV increments.
+/// Relate an IV user to an expression that computes the IV it uses from the IV
+/// used by the previous link in the Chain.
+///
+/// For the head of a chain, IncExpr holds the absolute SCEV expression for the
+/// original IVOperand. The head of the chain's IVOperand is only valid during
+/// chain collection, before LSR replaces IV users. During chain generation,
+/// IncExpr can be used to find the new IVOperand that computes the same
+/// expression.
+struct IVInc {
+ Instruction *UserInst;
+ Value* IVOperand;
+ const SCEV *IncExpr;
+
+ IVInc(Instruction *U, Value *O, const SCEV *E):
+ UserInst(U), IVOperand(O), IncExpr(E) {}
+};
+
+// IVChain - The list of IV increments in program order.
+// We typically add the head of a chain without finding subsequent links.
+struct IVChain {
+ SmallVector<IVInc,1> Incs;
+ const SCEV *ExprBase;
+
+ IVChain() : ExprBase(0) {}
+
+ IVChain(const IVInc &Head, const SCEV *Base)
+ : Incs(1, Head), ExprBase(Base) {}
+
+ typedef SmallVectorImpl<IVInc>::const_iterator const_iterator;
+
+ // begin - return the first increment in the chain.
+ const_iterator begin() const {
+ assert(!Incs.empty());
+ return llvm::next(Incs.begin());
+ }
+ const_iterator end() const {
+ return Incs.end();
+ }
+
+ // hasIncs - Returns true if this chain contains any increments.
+ bool hasIncs() const { return Incs.size() >= 2; }
+
+ // add - Add an IVInc to the end of this chain.
+ void add(const IVInc &X) { Incs.push_back(X); }
+
+ // tailUserInst - Returns the last UserInst in the chain.
+ Instruction *tailUserInst() const { return Incs.back().UserInst; }
+
+ // isProfitableIncrement - Returns true if IncExpr can be profitably added to
+ // this chain.
+ bool isProfitableIncrement(const SCEV *OperExpr,
+ const SCEV *IncExpr,
+ ScalarEvolution&);
+};
+
+/// ChainUsers - Helper for CollectChains to track multiple IV increment uses.
+/// Distinguish between FarUsers that definitely cross IV increments and
+/// NearUsers that may be used between IV increments.
+struct ChainUsers {
+ SmallPtrSet<Instruction*, 4> FarUsers;
+ SmallPtrSet<Instruction*, 4> NearUsers;
+};
+
/// LSRInstance - This class holds state for the main loop strength reduction
/// logic.
class LSRInstance {
SmallSetVector<int64_t, 8> Factors;
/// Types - Interesting use types, to facilitate truncation reuse.
- SmallSetVector<const Type *, 4> Types;
+ SmallSetVector<Type *, 4> Types;
/// Fixups - The list of operands which are to be replaced.
SmallVector<LSRFixup, 16> Fixups;
/// RegUses - Track which uses use which register candidates.
RegUseTracker RegUses;
+ // Limit the number of chains to avoid quadratic behavior. We don't expect to
+ // have more than a few IV increment chains in a loop. Missing a Chain falls
+ // back to normal LSR behavior for those uses.
+ static const unsigned MaxChains = 8;
+
+ /// IVChainVec - IV users can form a chain of IV increments.
+ SmallVector<IVChain, MaxChains> IVChainVec;
+
+ /// IVIncSet - IV users that belong to profitable IVChains.
+ SmallPtrSet<Use*, MaxChains> IVIncSet;
+
void OptimizeShadowIV();
bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
void OptimizeLoopTermCond();
+ void ChainInstruction(Instruction *UserInst, Instruction *IVOper,
+ SmallVectorImpl<ChainUsers> &ChainUsersVec);
+ void FinalizeChain(IVChain &Chain);
+ void CollectChains();
+ void GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakVH> &DeadInsts);
+
void CollectInterestingTypesAndFactors();
void CollectFixupsAndInitialFormulae();
UseMapTy UseMap;
bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
- LSRUse::KindType Kind, const Type *AccessTy);
+ LSRUse::KindType Kind, Type *AccessTy);
std::pair<size_t, int64_t> getUse(const SCEV *&Expr,
LSRUse::KindType Kind,
- const Type *AccessTy);
+ Type *AccessTy);
void DeleteUse(LSRUse &LU, size_t LUIdx);
LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);
-public:
void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
void CountRegisters(const Formula &F, size_t LUIdx);
BasicBlock::iterator
HoistInsertPosition(BasicBlock::iterator IP,
const SmallVectorImpl<Instruction *> &Inputs) const;
- BasicBlock::iterator AdjustInsertPositionForExpand(BasicBlock::iterator IP,
- const LSRFixup &LF,
- const LSRUse &LU) const;
+ BasicBlock::iterator
+ AdjustInsertPositionForExpand(BasicBlock::iterator IP,
+ const LSRFixup &LF,
+ const LSRUse &LU,
+ SCEVExpander &Rewriter) const;
Value *Expand(const LSRFixup &LF,
const Formula &F,
void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
Pass *P);
+public:
LSRInstance(const TargetLowering *tli, Loop *l, Pass *P);
bool getChanged() const { return Changed; }
IVUsers::const_iterator CandidateUI = UI;
++UI;
Instruction *ShadowUse = CandidateUI->getUser();
- const Type *DestTy = NULL;
+ Type *DestTy = NULL;
+ bool IsSigned = false;
/* If shadow use is a int->float cast then insert a second IV
to eliminate this cast.
for (unsigned i = 0; i < n; ++i, ++d)
foo(d);
*/
- if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser()))
+ if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
+ IsSigned = false;
DestTy = UCast->getDestTy();
- else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser()))
+ }
+ else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
+ IsSigned = true;
DestTy = SCast->getDestTy();
+ }
if (!DestTy) continue;
if (TLI) {
if (!PH) continue;
if (PH->getNumIncomingValues() != 2) continue;
- const Type *SrcTy = PH->getType();
+ Type *SrcTy = PH->getType();
int Mantissa = DestTy->getFPMantissaWidth();
if (Mantissa == -1) continue;
if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa)
ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
if (!Init) continue;
- Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
+ Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
+ (double)Init->getSExtValue() :
+ (double)Init->getZExtValue());
BinaryOperator *Incr =
dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
if (!TLI)
goto decline_post_inc;
// Check for possible scaled-address reuse.
- const Type *AccessTy = getAccessType(UI->getUser());
+ Type *AccessTy = getAccessType(UI->getUser());
TargetLowering::AddrMode AM;
AM.Scale = C->getSExtValue();
if (TLI->isLegalAddressingMode(AM, AccessTy))
/// return true.
bool
LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
- LSRUse::KindType Kind, const Type *AccessTy) {
+ LSRUse::KindType Kind, Type *AccessTy) {
int64_t NewMinOffset = LU.MinOffset;
int64_t NewMaxOffset = LU.MaxOffset;
- const Type *NewAccessTy = AccessTy;
+ Type *NewAccessTy = AccessTy;
// Check for a mismatched kind. It's tempting to collapse mismatched kinds to
// something conservative, however this can pessimize in the case that one of
/// Either reuse an existing use or create a new one, as needed.
std::pair<size_t, int64_t>
LSRInstance::getUse(const SCEV *&Expr,
- LSRUse::KindType Kind, const Type *AccessTy) {
+ LSRUse::KindType Kind, Type *AccessTy) {
const SCEV *Copy = Expr;
int64_t Offset = ExtractImmediate(Expr, SE);
return &LU;
// This is the formula where all the registers and symbols matched;
// there aren't going to be any others. Since we declined it, we
- // can skip the rest of the formulae and procede to the next LSRUse.
+ // can skip the rest of the formulae and proceed to the next LSRUse.
break;
}
}
do {
const SCEV *S = Worklist.pop_back_val();
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
- Strides.insert(AR->getStepRecurrence(SE));
+ if (AR->getLoop() == L)
+ Strides.insert(AR->getStepRecurrence(SE));
Worklist.push_back(AR->getStart());
} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
Worklist.append(Add->op_begin(), Add->op_end());
DEBUG(print_factors_and_types(dbgs()));
}
+/// findIVOperand - Helper for CollectChains that finds an IV operand (computed
+/// by an AddRec in this loop) within [OI,OE) or returns OE. If IVUsers mapped
+/// Instructions to IVStrideUses, we could partially skip this.
+static User::op_iterator
+findIVOperand(User::op_iterator OI, User::op_iterator OE,
+ Loop *L, ScalarEvolution &SE) {
+ for(; OI != OE; ++OI) {
+ if (Instruction *Oper = dyn_cast<Instruction>(*OI)) {
+ if (!SE.isSCEVable(Oper->getType()))
+ continue;
+
+ if (const SCEVAddRecExpr *AR =
+ dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Oper))) {
+ if (AR->getLoop() == L)
+ break;
+ }
+ }
+ }
+ return OI;
+}
+
+/// getWideOperand - IVChain logic must consistenctly peek base TruncInst
+/// operands, so wrap it in a convenient helper.
+static Value *getWideOperand(Value *Oper) {
+ if (TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
+ return Trunc->getOperand(0);
+ return Oper;
+}
+
+/// isCompatibleIVType - Return true if we allow an IV chain to include both
+/// types.
+static bool isCompatibleIVType(Value *LVal, Value *RVal) {
+ Type *LType = LVal->getType();
+ Type *RType = RVal->getType();
+ return (LType == RType) || (LType->isPointerTy() && RType->isPointerTy());
+}
+
+/// getExprBase - Return an approximation of this SCEV expression's "base", or
+/// NULL for any constant. Returning the expression itself is
+/// conservative. Returning a deeper subexpression is more precise and valid as
+/// long as it isn't less complex than another subexpression. For expressions
+/// involving multiple unscaled values, we need to return the pointer-type
+/// SCEVUnknown. This avoids forming chains across objects, such as:
+/// PrevOper==a[i], IVOper==b[i], IVInc==b-a.
+///
+/// Since SCEVUnknown is the rightmost type, and pointers are the rightmost
+/// SCEVUnknown, we simply return the rightmost SCEV operand.
+static const SCEV *getExprBase(const SCEV *S) {
+ switch (S->getSCEVType()) {
+ default: // uncluding scUnknown.
+ return S;
+ case scConstant:
+ return 0;
+ case scTruncate:
+ return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
+ case scZeroExtend:
+ return getExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());
+ case scSignExtend:
+ return getExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());
+ case scAddExpr: {
+ // Skip over scaled operands (scMulExpr) to follow add operands as long as
+ // there's nothing more complex.
+ // FIXME: not sure if we want to recognize negation.
+ const SCEVAddExpr *Add = cast<SCEVAddExpr>(S);
+ for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(Add->op_end()),
+ E(Add->op_begin()); I != E; ++I) {
+ const SCEV *SubExpr = *I;
+ if (SubExpr->getSCEVType() == scAddExpr)
+ return getExprBase(SubExpr);
+
+ if (SubExpr->getSCEVType() != scMulExpr)
+ return SubExpr;
+ }
+ return S; // all operands are scaled, be conservative.
+ }
+ case scAddRecExpr:
+ return getExprBase(cast<SCEVAddRecExpr>(S)->getStart());
+ }
+}
+
+/// Return true if the chain increment is profitable to expand into a loop
+/// invariant value, which may require its own register. A profitable chain
+/// increment will be an offset relative to the same base. We allow such offsets
+/// to potentially be used as chain increment as long as it's not obviously
+/// expensive to expand using real instructions.
+bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
+ const SCEV *IncExpr,
+ ScalarEvolution &SE) {
+ // Aggressively form chains when -stress-ivchain.
+ if (StressIVChain)
+ return true;
+
+ // Do not replace a constant offset from IV head with a nonconstant IV
+ // increment.
+ if (!isa<SCEVConstant>(IncExpr)) {
+ const SCEV *HeadExpr = SE.getSCEV(getWideOperand(Incs[0].IVOperand));
+ if (isa<SCEVConstant>(SE.getMinusSCEV(OperExpr, HeadExpr)))
+ return 0;
+ }
+
+ SmallPtrSet<const SCEV*, 8> Processed;
+ return !isHighCostExpansion(IncExpr, Processed, SE);
+}
+
+/// Return true if the number of registers needed for the chain is estimated to
+/// be less than the number required for the individual IV users. First prohibit
+/// any IV users that keep the IV live across increments (the Users set should
+/// be empty). Next count the number and type of increments in the chain.
+///
+/// Chaining IVs can lead to considerable code bloat if ISEL doesn't
+/// effectively use postinc addressing modes. Only consider it profitable it the
+/// increments can be computed in fewer registers when chained.
+///
+/// TODO: Consider IVInc free if it's already used in another chains.
+static bool
+isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users,
+ ScalarEvolution &SE, const TargetLowering *TLI) {
+ if (StressIVChain)
+ return true;
+
+ if (!Chain.hasIncs())
+ return false;
+
+ if (!Users.empty()) {
+ DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " users:\n";
+ for (SmallPtrSet<Instruction*, 4>::const_iterator I = Users.begin(),
+ E = Users.end(); I != E; ++I) {
+ dbgs() << " " << **I << "\n";
+ });
+ return false;
+ }
+ assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
+
+ // The chain itself may require a register, so intialize cost to 1.
+ int cost = 1;
+
+ // A complete chain likely eliminates the need for keeping the original IV in
+ // a register. LSR does not currently know how to form a complete chain unless
+ // the header phi already exists.
+ if (isa<PHINode>(Chain.tailUserInst())
+ && SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
+ --cost;
+ }
+ const SCEV *LastIncExpr = 0;
+ unsigned NumConstIncrements = 0;
+ unsigned NumVarIncrements = 0;
+ unsigned NumReusedIncrements = 0;
+ for (IVChain::const_iterator I = Chain.begin(), E = Chain.end();
+ I != E; ++I) {
+
+ if (I->IncExpr->isZero())
+ continue;
+
+ // Incrementing by zero or some constant is neutral. We assume constants can
+ // be folded into an addressing mode or an add's immediate operand.
+ if (isa<SCEVConstant>(I->IncExpr)) {
+ ++NumConstIncrements;
+ continue;
+ }
+
+ if (I->IncExpr == LastIncExpr)
+ ++NumReusedIncrements;
+ else
+ ++NumVarIncrements;
+
+ LastIncExpr = I->IncExpr;
+ }
+ // An IV chain with a single increment is handled by LSR's postinc
+ // uses. However, a chain with multiple increments requires keeping the IV's
+ // value live longer than it needs to be if chained.
+ if (NumConstIncrements > 1)
+ --cost;
+
+ // Materializing increment expressions in the preheader that didn't exist in
+ // the original code may cost a register. For example, sign-extended array
+ // indices can produce ridiculous increments like this:
+ // IV + ((sext i32 (2 * %s) to i64) + (-1 * (sext i32 %s to i64)))
+ cost += NumVarIncrements;
+
+ // Reusing variable increments likely saves a register to hold the multiple of
+ // the stride.
+ cost -= NumReusedIncrements;
+
+ DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " Cost: " << cost
+ << "\n");
+
+ return cost < 0;
+}
+
+/// ChainInstruction - Add this IV user to an existing chain or make it the head
+/// of a new chain.
+void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
+ SmallVectorImpl<ChainUsers> &ChainUsersVec) {
+ // When IVs are used as types of varying widths, they are generally converted
+ // to a wider type with some uses remaining narrow under a (free) trunc.
+ Value *const NextIV = getWideOperand(IVOper);
+ const SCEV *const OperExpr = SE.getSCEV(NextIV);
+ const SCEV *const OperExprBase = getExprBase(OperExpr);
+
+ // Visit all existing chains. Check if its IVOper can be computed as a
+ // profitable loop invariant increment from the last link in the Chain.
+ unsigned ChainIdx = 0, NChains = IVChainVec.size();
+ const SCEV *LastIncExpr = 0;
+ for (; ChainIdx < NChains; ++ChainIdx) {
+ IVChain &Chain = IVChainVec[ChainIdx];
+
+ // Prune the solution space aggressively by checking that both IV operands
+ // are expressions that operate on the same unscaled SCEVUnknown. This
+ // "base" will be canceled by the subsequent getMinusSCEV call. Checking
+ // first avoids creating extra SCEV expressions.
+ if (!StressIVChain && Chain.ExprBase != OperExprBase)
+ continue;
+
+ Value *PrevIV = getWideOperand(Chain.Incs.back().IVOperand);
+ if (!isCompatibleIVType(PrevIV, NextIV))
+ continue;
+
+ // A phi node terminates a chain.
+ if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
+ continue;
+
+ // The increment must be loop-invariant so it can be kept in a register.
+ const SCEV *PrevExpr = SE.getSCEV(PrevIV);
+ const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr);
+ if (!SE.isLoopInvariant(IncExpr, L))
+ continue;
+
+ if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
+ LastIncExpr = IncExpr;
+ break;
+ }
+ }
+ // If we haven't found a chain, create a new one, unless we hit the max. Don't
+ // bother for phi nodes, because they must be last in the chain.
+ if (ChainIdx == NChains) {
+ if (isa<PHINode>(UserInst))
+ return;
+ if (NChains >= MaxChains && !StressIVChain) {
+ DEBUG(dbgs() << "IV Chain Limit\n");
+ return;
+ }
+ LastIncExpr = OperExpr;
+ // IVUsers may have skipped over sign/zero extensions. We don't currently
+ // attempt to form chains involving extensions unless they can be hoisted
+ // into this loop's AddRec.
+ if (!isa<SCEVAddRecExpr>(LastIncExpr))
+ return;
+ ++NChains;
+ IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
+ OperExprBase));
+ ChainUsersVec.resize(NChains);
+ DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Head: (" << *UserInst
+ << ") IV=" << *LastIncExpr << "\n");
+ } else {
+ DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Inc: (" << *UserInst
+ << ") IV+" << *LastIncExpr << "\n");
+ // Add this IV user to the end of the chain.
+ IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
+ }
+
+ SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
+ // This chain's NearUsers become FarUsers.
+ if (!LastIncExpr->isZero()) {
+ ChainUsersVec[ChainIdx].FarUsers.insert(NearUsers.begin(),
+ NearUsers.end());
+ NearUsers.clear();
+ }
+
+ // All other uses of IVOperand become near uses of the chain.
+ // We currently ignore intermediate values within SCEV expressions, assuming
+ // they will eventually be used be the current chain, or can be computed
+ // from one of the chain increments. To be more precise we could
+ // transitively follow its user and only add leaf IV users to the set.
+ for (Value::use_iterator UseIter = IVOper->use_begin(),
+ UseEnd = IVOper->use_end(); UseIter != UseEnd; ++UseIter) {
+ Instruction *OtherUse = dyn_cast<Instruction>(*UseIter);
+ if (!OtherUse || OtherUse == UserInst)
+ continue;
+ if (SE.isSCEVable(OtherUse->getType())
+ && !isa<SCEVUnknown>(SE.getSCEV(OtherUse))
+ && IU.isIVUserOrOperand(OtherUse)) {
+ continue;
+ }
+ NearUsers.insert(OtherUse);
+ }
+
+ // Since this user is part of the chain, it's no longer considered a use
+ // of the chain.
+ ChainUsersVec[ChainIdx].FarUsers.erase(UserInst);
+}
+
+/// CollectChains - Populate the vector of Chains.
+///
+/// This decreases ILP at the architecture level. Targets with ample registers,
+/// multiple memory ports, and no register renaming probably don't want
+/// this. However, such targets should probably disable LSR altogether.
+///
+/// The job of LSR is to make a reasonable choice of induction variables across
+/// the loop. Subsequent passes can easily "unchain" computation exposing more
+/// ILP *within the loop* if the target wants it.
+///
+/// Finding the best IV chain is potentially a scheduling problem. Since LSR
+/// will not reorder memory operations, it will recognize this as a chain, but
+/// will generate redundant IV increments. Ideally this would be corrected later
+/// by a smart scheduler:
+/// = A[i]
+/// = A[i+x]
+/// A[i] =
+/// A[i+x] =
+///
+/// TODO: Walk the entire domtree within this loop, not just the path to the
+/// loop latch. This will discover chains on side paths, but requires
+/// maintaining multiple copies of the Chains state.
+void LSRInstance::CollectChains() {
+ DEBUG(dbgs() << "Collecting IV Chains.\n");
+ SmallVector<ChainUsers, 8> ChainUsersVec;
+
+ SmallVector<BasicBlock *,8> LatchPath;
+ BasicBlock *LoopHeader = L->getHeader();
+ for (DomTreeNode *Rung = DT.getNode(L->getLoopLatch());
+ Rung->getBlock() != LoopHeader; Rung = Rung->getIDom()) {
+ LatchPath.push_back(Rung->getBlock());
+ }
+ LatchPath.push_back(LoopHeader);
+
+ // Walk the instruction stream from the loop header to the loop latch.
+ for (SmallVectorImpl<BasicBlock *>::reverse_iterator
+ BBIter = LatchPath.rbegin(), BBEnd = LatchPath.rend();
+ BBIter != BBEnd; ++BBIter) {
+ for (BasicBlock::iterator I = (*BBIter)->begin(), E = (*BBIter)->end();
+ I != E; ++I) {
+ // Skip instructions that weren't seen by IVUsers analysis.
+ if (isa<PHINode>(I) || !IU.isIVUserOrOperand(I))
+ continue;
+
+ // Ignore users that are part of a SCEV expression. This way we only
+ // consider leaf IV Users. This effectively rediscovers a portion of
+ // IVUsers analysis but in program order this time.
+ if (SE.isSCEVable(I->getType()) && !isa<SCEVUnknown>(SE.getSCEV(I)))
+ continue;
+
+ // Remove this instruction from any NearUsers set it may be in.
+ for (unsigned ChainIdx = 0, NChains = IVChainVec.size();
+ ChainIdx < NChains; ++ChainIdx) {
+ ChainUsersVec[ChainIdx].NearUsers.erase(I);
+ }
+ // Search for operands that can be chained.
+ SmallPtrSet<Instruction*, 4> UniqueOperands;
+ User::op_iterator IVOpEnd = I->op_end();
+ User::op_iterator IVOpIter = findIVOperand(I->op_begin(), IVOpEnd, L, SE);
+ while (IVOpIter != IVOpEnd) {
+ Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
+ if (UniqueOperands.insert(IVOpInst))
+ ChainInstruction(I, IVOpInst, ChainUsersVec);
+ IVOpIter = findIVOperand(llvm::next(IVOpIter), IVOpEnd, L, SE);
+ }
+ } // Continue walking down the instructions.
+ } // Continue walking down the domtree.
+ // Visit phi backedges to determine if the chain can generate the IV postinc.
+ for (BasicBlock::iterator I = L->getHeader()->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ if (!SE.isSCEVable(PN->getType()))
+ continue;
+
+ Instruction *IncV =
+ dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
+ if (IncV)
+ ChainInstruction(PN, IncV, ChainUsersVec);
+ }
+ // Remove any unprofitable chains.
+ unsigned ChainIdx = 0;
+ for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
+ UsersIdx < NChains; ++UsersIdx) {
+ if (!isProfitableChain(IVChainVec[UsersIdx],
+ ChainUsersVec[UsersIdx].FarUsers, SE, TLI))
+ continue;
+ // Preserve the chain at UsesIdx.
+ if (ChainIdx != UsersIdx)
+ IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
+ FinalizeChain(IVChainVec[ChainIdx]);
+ ++ChainIdx;
+ }
+ IVChainVec.resize(ChainIdx);
+}
+
+void LSRInstance::FinalizeChain(IVChain &Chain) {
+ assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
+ DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n");
+
+ for (IVChain::const_iterator I = Chain.begin(), E = Chain.end();
+ I != E; ++I) {
+ DEBUG(dbgs() << " Inc: " << *I->UserInst << "\n");
+ User::op_iterator UseI =
+ std::find(I->UserInst->op_begin(), I->UserInst->op_end(), I->IVOperand);
+ assert(UseI != I->UserInst->op_end() && "cannot find IV operand");
+ IVIncSet.insert(UseI);
+ }
+}
+
+/// Return true if the IVInc can be folded into an addressing mode.
+static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
+ Value *Operand, const TargetLowering *TLI) {
+ const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
+ if (!IncConst || !isAddressUse(UserInst, Operand))
+ return false;
+
+ if (IncConst->getValue()->getValue().getMinSignedBits() > 64)
+ return false;
+
+ int64_t IncOffset = IncConst->getValue()->getSExtValue();
+ if (!isAlwaysFoldable(IncOffset, /*BaseGV=*/0, /*HaseBaseReg=*/false,
+ LSRUse::Address, getAccessType(UserInst), TLI))
+ return false;
+
+ return true;
+}
+
+/// GenerateIVChains - Generate an add or subtract for each IVInc in a chain to
+/// materialize the IV user's operand from the previous IV user's operand.
+void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakVH> &DeadInsts) {
+ // Find the new IVOperand for the head of the chain. It may have been replaced
+ // by LSR.
+ const IVInc &Head = Chain.Incs[0];
+ User::op_iterator IVOpEnd = Head.UserInst->op_end();
+ User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(),
+ IVOpEnd, L, SE);
+ Value *IVSrc = 0;
+ while (IVOpIter != IVOpEnd) {
+ IVSrc = getWideOperand(*IVOpIter);
+
+ // If this operand computes the expression that the chain needs, we may use
+ // it. (Check this after setting IVSrc which is used below.)
+ //
+ // Note that if Head.IncExpr is wider than IVSrc, then this phi is too
+ // narrow for the chain, so we can no longer use it. We do allow using a
+ // wider phi, assuming the LSR checked for free truncation. In that case we
+ // should already have a truncate on this operand such that
+ // getSCEV(IVSrc) == IncExpr.
+ if (SE.getSCEV(*IVOpIter) == Head.IncExpr
+ || SE.getSCEV(IVSrc) == Head.IncExpr) {
+ break;
+ }
+ IVOpIter = findIVOperand(llvm::next(IVOpIter), IVOpEnd, L, SE);
+ }
+ if (IVOpIter == IVOpEnd) {
+ // Gracefully give up on this chain.
+ DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n");
+ return;
+ }
+
+ DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n");
+ Type *IVTy = IVSrc->getType();
+ Type *IntTy = SE.getEffectiveSCEVType(IVTy);
+ const SCEV *LeftOverExpr = 0;
+ for (IVChain::const_iterator IncI = Chain.begin(),
+ IncE = Chain.end(); IncI != IncE; ++IncI) {
+
+ Instruction *InsertPt = IncI->UserInst;
+ if (isa<PHINode>(InsertPt))
+ InsertPt = L->getLoopLatch()->getTerminator();
+
+ // IVOper will replace the current IV User's operand. IVSrc is the IV
+ // value currently held in a register.
+ Value *IVOper = IVSrc;
+ if (!IncI->IncExpr->isZero()) {
+ // IncExpr was the result of subtraction of two narrow values, so must
+ // be signed.
+ const SCEV *IncExpr = SE.getNoopOrSignExtend(IncI->IncExpr, IntTy);
+ LeftOverExpr = LeftOverExpr ?
+ SE.getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
+ }
+ if (LeftOverExpr && !LeftOverExpr->isZero()) {
+ // Expand the IV increment.
+ Rewriter.clearPostInc();
+ Value *IncV = Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
+ const SCEV *IVOperExpr = SE.getAddExpr(SE.getUnknown(IVSrc),
+ SE.getUnknown(IncV));
+ IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
+
+ // If an IV increment can't be folded, use it as the next IV value.
+ if (!canFoldIVIncExpr(LeftOverExpr, IncI->UserInst, IncI->IVOperand,
+ TLI)) {
+ assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
+ IVSrc = IVOper;
+ LeftOverExpr = 0;
+ }
+ }
+ Type *OperTy = IncI->IVOperand->getType();
+ if (IVTy != OperTy) {
+ assert(SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) &&
+ "cannot extend a chained IV");
+ IRBuilder<> Builder(InsertPt);
+ IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy, "lsr.chain");
+ }
+ IncI->UserInst->replaceUsesOfWith(IncI->IVOperand, IVOper);
+ DeadInsts.push_back(IncI->IVOperand);
+ }
+ // If LSR created a new, wider phi, we may also replace its postinc. We only
+ // do this if we also found a wide value for the head of the chain.
+ if (isa<PHINode>(Chain.tailUserInst())) {
+ for (BasicBlock::iterator I = L->getHeader()->begin();
+ PHINode *Phi = dyn_cast<PHINode>(I); ++I) {
+ if (!isCompatibleIVType(Phi, IVSrc))
+ continue;
+ Instruction *PostIncV = dyn_cast<Instruction>(
+ Phi->getIncomingValueForBlock(L->getLoopLatch()));
+ if (!PostIncV || (SE.getSCEV(PostIncV) != SE.getSCEV(IVSrc)))
+ continue;
+ Value *IVOper = IVSrc;
+ Type *PostIncTy = PostIncV->getType();
+ if (IVTy != PostIncTy) {
+ assert(PostIncTy->isPointerTy() && "mixing int/ptr IV types");
+ IRBuilder<> Builder(L->getLoopLatch()->getTerminator());
+ Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc());
+ IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy, "lsr.chain");
+ }
+ Phi->replaceUsesOfWith(PostIncV, IVOper);
+ DeadInsts.push_back(PostIncV);
+ }
+ }
+}
+
void LSRInstance::CollectFixupsAndInitialFormulae() {
for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
+ Instruction *UserInst = UI->getUser();
+ // Skip IV users that are part of profitable IV Chains.
+ User::op_iterator UseI = std::find(UserInst->op_begin(), UserInst->op_end(),
+ UI->getOperandValToReplace());
+ assert(UseI != UserInst->op_end() && "cannot find IV operand");
+ if (IVIncSet.count(UseI))
+ continue;
+
// Record the uses.
LSRFixup &LF = getNewFixup();
- LF.UserInst = UI->getUser();
+ LF.UserInst = UserInst;
LF.OperandValToReplace = UI->getOperandValToReplace();
LF.PostIncLoops = UI->getPostIncLoops();
LSRUse::KindType Kind = LSRUse::Basic;
- const Type *AccessTy = 0;
+ Type *AccessTy = 0;
if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) {
Kind = LSRUse::Address;
AccessTy = getAccessType(LF.UserInst);
// x == y --> x - y == 0
const SCEV *N = SE.getSCEV(NV);
- if (SE.isLoopInvariant(N, L)) {
+ if (SE.isLoopInvariant(N, L) && isSafeToExpand(N)) {
// S is normalized, so normalize N before folding it into S
// to keep the result normalized.
N = TransformForPostIncUse(Normalize, N, CI, 0,
/// CollectSubexprs - Split S into subexpressions which can be pulled out into
/// separate registers. If C is non-null, multiply each subexpression by C.
-static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
- SmallVectorImpl<const SCEV *> &Ops,
- const Loop *L,
- ScalarEvolution &SE) {
+///
+/// Return remainder expression after factoring the subexpressions captured by
+/// Ops. If Ops is complete, return NULL.
+static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
+ SmallVectorImpl<const SCEV *> &Ops,
+ const Loop *L,
+ ScalarEvolution &SE,
+ unsigned Depth = 0) {
+ // Arbitrarily cap recursion to protect compile time.
+ if (Depth >= 3)
+ return S;
+
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
// Break out add operands.
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
- I != E; ++I)
- CollectSubexprs(*I, C, Ops, L, SE);
- return;
+ I != E; ++I) {
+ const SCEV *Remainder = CollectSubexprs(*I, C, Ops, L, SE, Depth+1);
+ if (Remainder)
+ Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
+ }
+ return NULL;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
// Split a non-zero base out of an addrec.
- if (!AR->getStart()->isZero()) {
- CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
- AR->getStepRecurrence(SE),
- AR->getLoop(),
- //FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
- SCEV::FlagAnyWrap),
- C, Ops, L, SE);
- CollectSubexprs(AR->getStart(), C, Ops, L, SE);
- return;
+ if (AR->getStart()->isZero())
+ return S;
+
+ const SCEV *Remainder = CollectSubexprs(AR->getStart(),
+ C, Ops, L, SE, Depth+1);
+ // Split the non-zero AddRec unless it is part of a nested recurrence that
+ // does not pertain to this loop.
+ if (Remainder && (AR->getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
+ Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
+ Remainder = NULL;
+ }
+ if (Remainder != AR->getStart()) {
+ if (!Remainder)
+ Remainder = SE.getConstant(AR->getType(), 0);
+ return SE.getAddRecExpr(Remainder,
+ AR->getStepRecurrence(SE),
+ AR->getLoop(),
+ //FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
+ SCEV::FlagAnyWrap);
}
} else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
// Break (C * (a + b + c)) into C*a + C*b + C*c.
- if (Mul->getNumOperands() == 2)
- if (const SCEVConstant *Op0 =
- dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
- CollectSubexprs(Mul->getOperand(1),
- C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0,
- Ops, L, SE);
- return;
- }
+ if (Mul->getNumOperands() != 2)
+ return S;
+ if (const SCEVConstant *Op0 =
+ dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
+ C = C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0;
+ const SCEV *Remainder =
+ CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1);
+ if (Remainder)
+ Ops.push_back(SE.getMulExpr(C, Remainder));
+ return NULL;
+ }
}
-
- // Otherwise use the value itself, optionally with a scale applied.
- Ops.push_back(C ? SE.getMulExpr(C, S) : S);
+ return S;
}
/// GenerateReassociations - Split out subexpressions from adds and the bases of
const SCEV *BaseReg = Base.BaseRegs[i];
SmallVector<const SCEV *, 8> AddOps;
- CollectSubexprs(BaseReg, 0, AddOps, L, SE);
+ const SCEV *Remainder = CollectSubexprs(BaseReg, 0, AddOps, L, SE);
+ if (Remainder)
+ AddOps.push_back(Remainder);
if (AddOps.size() == 1) continue;
if (LU.Kind != LSRUse::ICmpZero) return;
// Determine the integer type for the base formula.
- const Type *IntTy = Base.getType();
+ Type *IntTy = Base.getType();
if (!IntTy) return;
if (SE.getTypeSizeInBits(IntTy) > 64) return;
/// scaled-offset address modes, for example.
void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
// Determine the integer type for the base formula.
- const Type *IntTy = Base.getType();
+ Type *IntTy = Base.getType();
if (!IntTy) return;
// If this Formula already has a scaled register, we can't add another one.
if (Base.AM.BaseGV) return;
// Determine the integer type for the base formula.
- const Type *DstTy = Base.getType();
+ Type *DstTy = Base.getType();
if (!DstTy) return;
DstTy = SE.getEffectiveSCEVType(DstTy);
- for (SmallSetVector<const Type *, 4>::const_iterator
+ for (SmallSetVector<Type *, 4>::const_iterator
I = Types.begin(), E = Types.end(); I != E; ++I) {
- const Type *SrcTy = *I;
+ Type *SrcTy = *I;
if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) {
Formula F = Base;
int64_t Imm = WI.Imm;
const SCEV *OrigReg = WI.OrigReg;
- const Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());
+ Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());
const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm));
unsigned BitWidth = SE.getTypeSizeInBits(IntTy);
// value to the immediate would produce a value closer to zero than the
// immediate itself, then the formula isn't worthwhile.
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
- if (C->getValue()->getValue().isNegative() !=
+ if (C->getValue()->isNegative() !=
(NewF.AM.BaseOffs < 0) &&
(C->getValue()->getValue().abs() * APInt(BitWidth, F.AM.Scale))
.ule(abs64(NewF.AM.BaseOffs)))
void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
DenseSet<const SCEV *> VisitedRegs;
SmallPtrSet<const SCEV *, 16> Regs;
+ SmallPtrSet<const SCEV *, 16> LoserRegs;
#ifndef NDEBUG
bool ChangedFormulae = false;
#endif
FIdx != NumForms; ++FIdx) {
Formula &F = LU.Formulae[FIdx];
- SmallVector<const SCEV *, 2> Key;
- for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
- JE = F.BaseRegs.end(); J != JE; ++J) {
- const SCEV *Reg = *J;
- if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
- Key.push_back(Reg);
+ // Some formulas are instant losers. For example, they may depend on
+ // nonexistent AddRecs from other loops. These need to be filtered
+ // immediately, otherwise heuristics could choose them over others leading
+ // to an unsatisfactory solution. Passing LoserRegs into RateFormula here
+ // avoids the need to recompute this information across formulae using the
+ // same bad AddRec. Passing LoserRegs is also essential unless we remove
+ // the corresponding bad register from the Regs set.
+ Cost CostF;
+ Regs.clear();
+ CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT,
+ &LoserRegs);
+ if (CostF.isLoser()) {
+ // During initial formula generation, undesirable formulae are generated
+ // by uses within other loops that have some non-trivial address mode or
+ // use the postinc form of the IV. LSR needs to provide these formulae
+ // as the basis of rediscovering the desired formula that uses an AddRec
+ // corresponding to the existing phi. Once all formulae have been
+ // generated, these initial losers may be pruned.
+ DEBUG(dbgs() << " Filtering loser "; F.print(dbgs());
+ dbgs() << "\n");
}
- if (F.ScaledReg &&
- RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
- Key.push_back(F.ScaledReg);
- // Unstable sort by host order ok, because this is only used for
- // uniquifying.
- std::sort(Key.begin(), Key.end());
-
- std::pair<BestFormulaeTy::const_iterator, bool> P =
- BestFormulae.insert(std::make_pair(Key, FIdx));
- if (!P.second) {
+ else {
+ SmallVector<const SCEV *, 2> Key;
+ for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
+ JE = F.BaseRegs.end(); J != JE; ++J) {
+ const SCEV *Reg = *J;
+ if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
+ Key.push_back(Reg);
+ }
+ if (F.ScaledReg &&
+ RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
+ Key.push_back(F.ScaledReg);
+ // Unstable sort by host order ok, because this is only used for
+ // uniquifying.
+ std::sort(Key.begin(), Key.end());
+
+ std::pair<BestFormulaeTy::const_iterator, bool> P =
+ BestFormulae.insert(std::make_pair(Key, FIdx));
+ if (P.second)
+ continue;
+
Formula &Best = LU.Formulae[P.first->second];
- Cost CostF;
- CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
- Regs.clear();
Cost CostBest;
- CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
Regs.clear();
+ CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
if (CostF < CostBest)
std::swap(F, Best);
DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
dbgs() << "\n"
" in favor of formula "; Best.print(dbgs());
dbgs() << '\n');
+ }
#ifndef NDEBUG
- ChangedFormulae = true;
+ ChangedFormulae = true;
#endif
- LU.DeleteFormula(F);
- --FIdx;
- --NumForms;
- Any = true;
- continue;
- }
+ LU.DeleteFormula(F);
+ --FIdx;
+ --NumForms;
+ Any = true;
}
// Now that we've filtered out some formulae, recompute the Regs set.
if (LU.Regs.count(*I))
ReqRegs.insert(*I);
- bool AnySatisfiedReqRegs = false;
SmallPtrSet<const SCEV *, 16> NewRegs;
Cost NewCost;
-retry:
for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
E = LU.Formulae.end(); I != E; ++I) {
const Formula &F = *I;
// Ignore formulae which do not use any of the required registers.
+ bool SatisfiedReqReg = true;
for (SmallSetVector<const SCEV *, 4>::const_iterator J = ReqRegs.begin(),
JE = ReqRegs.end(); J != JE; ++J) {
const SCEV *Reg = *J;
if ((!F.ScaledReg || F.ScaledReg != Reg) &&
std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) ==
- F.BaseRegs.end())
- goto skip;
+ F.BaseRegs.end()) {
+ SatisfiedReqReg = false;
+ break;
+ }
+ }
+ if (!SatisfiedReqReg) {
+ // If none of the formulae satisfied the required registers, then we could
+ // clear ReqRegs and try again. Currently, we simply give up in this case.
+ continue;
}
- AnySatisfiedReqRegs = true;
// Evaluate the cost of the current formula. If it's already worse than
// the current best, prune the search at that point.
VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);
} else {
DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());
- dbgs() << ". Regs:";
+ dbgs() << ".\n Regs:";
for (SmallPtrSet<const SCEV *, 16>::const_iterator
I = NewRegs.begin(), E = NewRegs.end(); I != E; ++I)
dbgs() << ' ' << **I;
}
Workspace.pop_back();
}
- skip:;
- }
-
- // If none of the formulae had all of the required registers, relax the
- // constraint so that we don't exclude all formulae.
- if (!AnySatisfiedReqRegs) {
- assert(!ReqRegs.empty() && "Solver failed even without required registers");
- ReqRegs.clear();
- goto retry;
}
}
// SolveRecurse does all the work.
SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
CurRegs, VisitedRegs);
+ if (Solution.empty()) {
+ DEBUG(dbgs() << "\nNo Satisfactory Solution\n");
+ return;
+ }
// Ok, we've now made all our decisions.
DEBUG(dbgs() << "\n"
// Attempt to find an insert position in the middle of the block,
// instead of at the end, so that it can be used for other expansions.
if (IDom == Inst->getParent() &&
- (!BetterPos || DT.dominates(BetterPos, Inst)))
+ (!BetterPos || !DT.dominates(Inst, BetterPos)))
BetterPos = llvm::next(BasicBlock::iterator(Inst));
}
if (!AllDominate)
/// AdjustInsertPositionForExpand - Determine an input position which will be
/// dominated by the operands and which will dominate the result.
BasicBlock::iterator
-LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP,
+LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
const LSRFixup &LF,
- const LSRUse &LU) const {
+ const LSRUse &LU,
+ SCEVExpander &Rewriter) const {
// Collect some instructions which must be dominated by the
// expanding replacement. These must be dominated by any operands that
// will be required in the expansion.
}
}
+ assert(!isa<PHINode>(LowestIP) && !isa<LandingPadInst>(LowestIP)
+ && !isa<DbgInfoIntrinsic>(LowestIP) &&
+ "Insertion point must be a normal instruction");
+
// Then, climb up the immediate dominator tree as far as we can go while
// still being dominated by the input positions.
- IP = HoistInsertPosition(IP, Inputs);
+ BasicBlock::iterator IP = HoistInsertPosition(LowestIP, Inputs);
// Don't insert instructions before PHI nodes.
while (isa<PHINode>(IP)) ++IP;
+ // Ignore landingpad instructions.
+ while (isa<LandingPadInst>(IP)) ++IP;
+
// Ignore debug intrinsics.
while (isa<DbgInfoIntrinsic>(IP)) ++IP;
+ // Set IP below instructions recently inserted by SCEVExpander. This keeps the
+ // IP consistent across expansions and allows the previously inserted
+ // instructions to be reused by subsequent expansion.
+ while (Rewriter.isInsertedInstruction(IP) && IP != LowestIP) ++IP;
+
return IP;
}
// Determine an input position which will be dominated by the operands and
// which will dominate the result.
- IP = AdjustInsertPositionForExpand(IP, LF, LU);
+ IP = AdjustInsertPositionForExpand(IP, LF, LU, Rewriter);
// Inform the Rewriter if we have a post-increment use, so that it can
// perform an advantageous expansion.
Rewriter.setPostInc(LF.PostIncLoops);
// This is the type that the user actually needs.
- const Type *OpTy = LF.OperandValToReplace->getType();
+ Type *OpTy = LF.OperandValToReplace->getType();
// This will be the type that we'll initially expand to.
- const Type *Ty = F.getType();
+ Type *Ty = F.getType();
if (!Ty)
// No type known; just expand directly to the ultimate type.
Ty = OpTy;
// Expand directly to the ultimate type if it's the right size.
Ty = OpTy;
// This is the type to do integer arithmetic in.
- const Type *IntTy = SE.getEffectiveSCEVType(Ty);
+ Type *IntTy = SE.getEffectiveSCEVType(Ty);
// Build up a list of operands to add together to form the full base.
SmallVector<const SCEV *, 8> Ops;
Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
}
- // Flush the operand list to suppress SCEVExpander hoisting.
- if (!Ops.empty()) {
- Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
- Ops.clear();
- Ops.push_back(SE.getUnknown(FullV));
- }
-
// Expand the ScaledReg portion.
Value *ICmpScaledV = 0;
if (F.AM.Scale != 0) {
} else {
// Otherwise just expand the scaled register and an explicit scale,
// which is expected to be matched as part of the address.
+
+ // Flush the operand list to suppress SCEVExpander hoisting address modes.
+ if (!Ops.empty() && LU.Kind == LSRUse::Address) {
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+ Ops.clear();
+ Ops.push_back(SE.getUnknown(FullV));
+ }
ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP));
ScaledS = SE.getMulExpr(ScaledS,
SE.getConstant(ScaledS->getType(), F.AM.Scale));
Ops.push_back(ScaledS);
-
- // Flush the operand list to suppress SCEVExpander hoisting.
- Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
- Ops.clear();
- Ops.push_back(SE.getUnknown(FullV));
}
}
// Expand the GV portion.
if (F.AM.BaseGV) {
+ // Flush the operand list to suppress SCEVExpander hoisting.
+ if (!Ops.empty()) {
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+ Ops.clear();
+ Ops.push_back(SE.getUnknown(FullV));
+ }
Ops.push_back(SE.getUnknown(F.AM.BaseGV));
+ }
- // Flush the operand list to suppress SCEVExpander hoisting.
+ // Flush the operand list to suppress SCEVExpander hoisting of both folded and
+ // unfolded offsets. LSR assumes they both live next to their uses.
+ if (!Ops.empty()) {
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
// The other interesting way of "folding" with an ICmpZero is to use a
// negated immediate.
if (!ICmpScaledV)
- ICmpScaledV = ConstantInt::get(IntTy, -Offset);
+ ICmpScaledV = ConstantInt::get(IntTy, -(uint64_t)Offset);
else {
Ops.push_back(SE.getUnknown(ICmpScaledV));
ICmpScaledV = ConstantInt::get(IntTy, Offset);
// users.
if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
!isa<IndirectBrInst>(BB->getTerminator())) {
- Loop *PNLoop = LI.getLoopFor(PN->getParent());
- if (!PNLoop || PN->getParent() != PNLoop->getHeader()) {
+ BasicBlock *Parent = PN->getParent();
+ Loop *PNLoop = LI.getLoopFor(Parent);
+ if (!PNLoop || Parent != PNLoop->getHeader()) {
// Split the critical edge.
- BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P);
+ BasicBlock *NewBB = 0;
+ if (!Parent->isLandingPad()) {
+ NewBB = SplitCriticalEdge(BB, Parent, P,
+ /*MergeIdenticalEdges=*/true,
+ /*DontDeleteUselessPhis=*/true);
+ } else {
+ SmallVector<BasicBlock*, 2> NewBBs;
+ SplitLandingPadPredecessors(Parent, BB, "", "", P, NewBBs);
+ NewBB = NewBBs[0];
+ }
// If PN is outside of the loop and BB is in the loop, we want to
// move the block to be immediately before the PHI block, not
Value *FullV = Expand(LF, F, BB->getTerminator(), Rewriter, DeadInsts);
// If this is reuse-by-noop-cast, insert the noop cast.
- const Type *OpTy = LF.OperandValToReplace->getType();
+ Type *OpTy = LF.OperandValToReplace->getType();
if (FullV->getType() != OpTy)
FullV =
CastInst::Create(CastInst::getCastOpcode(FullV, false,
Value *FullV = Expand(LF, F, LF.UserInst, Rewriter, DeadInsts);
// If this is reuse-by-noop-cast, insert the noop cast.
- const Type *OpTy = LF.OperandValToReplace->getType();
+ Type *OpTy = LF.OperandValToReplace->getType();
if (FullV->getType() != OpTy) {
Instruction *Cast =
CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false),
// we can remove them after we are done working.
SmallVector<WeakVH, 16> DeadInsts;
- SCEVExpander Rewriter(SE);
+ SCEVExpander Rewriter(SE, "lsr");
+#ifndef NDEBUG
+ Rewriter.setDebugType(DEBUG_TYPE);
+#endif
Rewriter.disableCanonicalMode();
+ Rewriter.enableLSRMode();
Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
+ // Mark phi nodes that terminate chains so the expander tries to reuse them.
+ for (SmallVectorImpl<IVChain>::const_iterator ChainI = IVChainVec.begin(),
+ ChainE = IVChainVec.end(); ChainI != ChainE; ++ChainI) {
+ if (PHINode *PN = dyn_cast<PHINode>(ChainI->tailUserInst()))
+ Rewriter.setChainedPhi(PN);
+ }
+
// Expand the new value definitions and update the users.
for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(),
E = Fixups.end(); I != E; ++I) {
Changed = true;
}
+ for (SmallVectorImpl<IVChain>::const_iterator ChainI = IVChainVec.begin(),
+ ChainE = IVChainVec.end(); ChainI != ChainE; ++ChainI) {
+ GenerateIVChain(*ChainI, Rewriter, DeadInsts);
+ Changed = true;
+ }
// Clean up after ourselves. This must be done before deleting any
// instructions.
Rewriter.clear();
TLI(tli), L(l), Changed(false), IVIncInsertPos(0) {
// If LoopSimplify form is not available, stay out of trouble.
- if (!L->isLoopSimplifyForm()) return;
+ if (!L->isLoopSimplifyForm())
+ return;
// If there's no interesting work to be done, bail early.
if (IU.empty()) return;
+ // If there's too much analysis to be done, bail early. We won't be able to
+ // model the problem anyway.
+ unsigned NumUsers = 0;
+ for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
+ if (++NumUsers > MaxIVUsers) {
+ DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << *L
+ << "\n");
+ return;
+ }
+ }
+
+#ifndef NDEBUG
+ // All dominating loops must have preheaders, or SCEVExpander may not be able
+ // to materialize an AddRecExpr whose Start is an outer AddRecExpr.
+ //
+ // IVUsers analysis should only create users that are dominated by simple loop
+ // headers. Since this loop should dominate all of its users, its user list
+ // should be empty if this loop itself is not within a simple loop nest.
+ for (DomTreeNode *Rung = DT.getNode(L->getLoopPreheader());
+ Rung; Rung = Rung->getIDom()) {
+ BasicBlock *BB = Rung->getBlock();
+ const Loop *DomLoop = LI.getLoopFor(BB);
+ if (DomLoop && DomLoop->getHeader() == BB) {
+ assert(DomLoop->getLoopPreheader() && "LSR needs a simplified loop nest");
+ }
+ }
+#endif // DEBUG
+
DEBUG(dbgs() << "\nLSR on loop ";
WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false);
dbgs() << ":\n");
OptimizeShadowIV();
OptimizeLoopTermCond();
+ // If loop preparation eliminates all interesting IV users, bail.
+ if (IU.empty()) return;
+
+ // Skip nested loops until we can model them better with formulae.
+ if (!L->empty()) {
+ DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n");
+ return;
+ }
+
// Start collecting data and preparing for the solver.
+ CollectChains();
CollectInterestingTypesAndFactors();
CollectFixupsAndInitialFormulae();
CollectLoopInvariantFixupsAndFormulae();
+ assert(!Uses.empty() && "IVUsers reported at least one use");
DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n";
print_uses(dbgs()));
Types.clear();
RegUses.clear();
+ if (Solution.empty())
+ return;
+
#ifndef NDEBUG
// Formulae should be legal.
for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
OS << '*' << *I;
}
- for (SmallSetVector<const Type *, 4>::const_iterator
+ for (SmallSetVector<Type *, 4>::const_iterator
I = Types.begin(), E = Types.end(); I != E; ++I) {
if (!First) OS << ", ";
First = false;
// Run the main LSR transformation.
Changed |= LSRInstance(TLI, L, this).getChanged();
- // At this point, it is worth checking to see if any recurrence PHIs are also
- // dead, so that we can remove them as well.
+ // Remove any extra phis created by processing inner loops.
Changed |= DeleteDeadPHIs(L->getHeader());
-
+ if (EnablePhiElim) {
+ SmallVector<WeakVH, 16> DeadInsts;
+ SCEVExpander Rewriter(getAnalysis<ScalarEvolution>(), "lsr");
+#ifndef NDEBUG
+ Rewriter.setDebugType(DEBUG_TYPE);
+#endif
+ unsigned numFolded = Rewriter.
+ replaceCongruentIVs(L, &getAnalysis<DominatorTree>(), DeadInsts, TLI);
+ if (numFolded) {
+ Changed = true;
+ DeleteTriviallyDeadInstructions(DeadInsts);
+ DeleteDeadPHIs(L->getHeader());
+ }
+ }
return Changed;
}