//
// TODO: Handle multiple loops at a time.
//
-// TODO: Should TargetLowering::AddrMode::BaseGV be changed to a ConstantExpr
-// instead of a GlobalValue?
+// TODO: Should the addressing mode BaseGV be changed to a ConstantExpr instead
+// of a GlobalValue?
//
// TODO: When truncation is free, truncate ICmp users' operands to make it a
// smaller encoding (on x86 at least).
#define DEBUG_TYPE "loop-reduce"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Analysis/IVUsers.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallBitVector.h"
#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/IVUsers.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Assembly/Writer.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/SmallBitVector.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
using namespace llvm;
OS << "[NumUses=" << UsedByIndices.count() << ']';
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void RegSortData::dump() const {
print(errs()); errs() << '\n';
}
+#endif
namespace {
/// computing satisfying a use. It may include broken-out immediates and scaled
/// registers.
struct Formula {
- /// AM - This is used to represent complex addressing, as well as other kinds
- /// of interesting uses.
- TargetLowering::AddrMode AM;
+ /// Global base address used for complex addressing.
+ GlobalValue *BaseGV;
+
+ /// Base offset for complex addressing.
+ int64_t BaseOffset;
+
+ /// Whether any complex addressing has a base register.
+ bool HasBaseReg;
+
+ /// The scale of any complex addressing.
+ int64_t Scale;
/// BaseRegs - The list of "base" registers for this use. When this is
- /// non-empty, AM.HasBaseReg should be set to true.
- SmallVector<const SCEV *, 2> BaseRegs;
+ /// non-empty,
+ SmallVector<const SCEV *, 4> BaseRegs;
/// ScaledReg - The 'scaled' register for this use. This should be non-null
- /// when AM.Scale is not zero.
+ /// when Scale is not zero.
const SCEV *ScaledReg;
/// UnfoldedOffset - An additional constant offset which added near the
/// live in an add immediate field rather than a register.
int64_t UnfoldedOffset;
- Formula() : ScaledReg(0), UnfoldedOffset(0) {}
+ Formula()
+ : BaseGV(0), BaseOffset(0), HasBaseReg(false), Scale(0), ScaledReg(0),
+ UnfoldedOffset(0) {}
void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
const SCEV *Sum = SE.getAddExpr(Good);
if (!Sum->isZero())
BaseRegs.push_back(Sum);
- AM.HasBaseReg = true;
+ HasBaseReg = true;
}
if (!Bad.empty()) {
const SCEV *Sum = SE.getAddExpr(Bad);
if (!Sum->isZero())
BaseRegs.push_back(Sum);
- AM.HasBaseReg = true;
+ HasBaseReg = true;
}
}
Type *Formula::getType() const {
return !BaseRegs.empty() ? BaseRegs.front()->getType() :
ScaledReg ? ScaledReg->getType() :
- AM.BaseGV ? AM.BaseGV->getType() :
+ BaseGV ? BaseGV->getType() :
0;
}
void Formula::print(raw_ostream &OS) const {
bool First = true;
- if (AM.BaseGV) {
+ if (BaseGV) {
if (!First) OS << " + "; else First = false;
- WriteAsOperand(OS, AM.BaseGV, /*PrintType=*/false);
+ WriteAsOperand(OS, BaseGV, /*PrintType=*/false);
}
- if (AM.BaseOffs != 0) {
+ if (BaseOffset != 0) {
if (!First) OS << " + "; else First = false;
- OS << AM.BaseOffs;
+ OS << BaseOffset;
}
for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
E = BaseRegs.end(); I != E; ++I) {
if (!First) OS << " + "; else First = false;
OS << "reg(" << **I << ')';
}
- if (AM.HasBaseReg && BaseRegs.empty()) {
+ if (HasBaseReg && BaseRegs.empty()) {
if (!First) OS << " + "; else First = false;
OS << "**error: HasBaseReg**";
- } else if (!AM.HasBaseReg && !BaseRegs.empty()) {
+ } else if (!HasBaseReg && !BaseRegs.empty()) {
if (!First) OS << " + "; else First = false;
OS << "**error: !HasBaseReg**";
}
- if (AM.Scale != 0) {
+ if (Scale != 0) {
if (!First) OS << " + "; else First = false;
- OS << AM.Scale << "*reg(";
+ OS << Scale << "*reg(";
if (ScaledReg)
OS << *ScaledReg;
else
}
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void Formula::dump() const {
print(errs()); errs() << '\n';
}
+#endif
/// isAddRecSExtable - Return true if the given addrec can be sign-extended
/// without changing its value.
bool Changed = false;
while (!DeadInsts.empty()) {
- Instruction *I = dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val());
+ Value *V = DeadInsts.pop_back_val();
+ Instruction *I = dyn_cast_or_null<Instruction>(V);
if (I == 0 || !isInstructionTriviallyDead(I))
continue;
// Tally up the non-zero immediates.
for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
E = Offsets.end(); I != E; ++I) {
- int64_t Offset = (uint64_t)*I + F.AM.BaseOffs;
- if (F.AM.BaseGV)
+ int64_t Offset = (uint64_t)*I + F.BaseOffset;
+ if (F.BaseGV)
ImmCost += 64; // Handle symbolic values conservatively.
// TODO: This should probably be the pointer size.
else if (Offset != 0)
OS << ", plus " << SetupCost << " setup cost";
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void Cost::dump() const {
print(errs()); errs() << '\n';
}
+#endif
namespace {
OS << ", Offset=" << Offset;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void LSRFixup::dump() const {
print(errs()); errs() << '\n';
}
+#endif
namespace {
/// UniquifierDenseMapInfo - A DenseMapInfo implementation for holding
/// DenseMaps and DenseSets of sorted SmallVectors of const SCEV*.
struct UniquifierDenseMapInfo {
- static SmallVector<const SCEV *, 2> getEmptyKey() {
- SmallVector<const SCEV *, 2> V;
+ static SmallVector<const SCEV *, 4> getEmptyKey() {
+ SmallVector<const SCEV *, 4> V;
V.push_back(reinterpret_cast<const SCEV *>(-1));
return V;
}
- static SmallVector<const SCEV *, 2> getTombstoneKey() {
- SmallVector<const SCEV *, 2> V;
+ static SmallVector<const SCEV *, 4> getTombstoneKey() {
+ SmallVector<const SCEV *, 4> V;
V.push_back(reinterpret_cast<const SCEV *>(-2));
return V;
}
- static unsigned getHashValue(const SmallVector<const SCEV *, 2> &V) {
+ static unsigned getHashValue(const SmallVector<const SCEV *, 4> &V) {
unsigned Result = 0;
for (SmallVectorImpl<const SCEV *>::const_iterator I = V.begin(),
E = V.end(); I != E; ++I)
return Result;
}
- static bool isEqual(const SmallVector<const SCEV *, 2> &LHS,
- const SmallVector<const SCEV *, 2> &RHS) {
+ static bool isEqual(const SmallVector<const SCEV *, 4> &LHS,
+ const SmallVector<const SCEV *, 4> &RHS) {
return LHS == RHS;
}
};
/// the user itself, and information about how the use may be satisfied.
/// TODO: Represent multiple users of the same expression in common?
class LSRUse {
- DenseSet<SmallVector<const SCEV *, 2>, UniquifierDenseMapInfo> Uniquifier;
+ DenseSet<SmallVector<const SCEV *, 4>, UniquifierDenseMapInfo> Uniquifier;
public:
/// KindType - An enum for a kind of use, indicating what types of
/// HasFormula - Test whether this use as a formula which has the same
/// registers as the given formula.
bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
- SmallVector<const SCEV *, 2> Key = F.BaseRegs;
+ SmallVector<const SCEV *, 4> Key = F.BaseRegs;
if (F.ScaledReg) Key.push_back(F.ScaledReg);
// Unstable sort by host order ok, because this is only used for uniquifying.
std::sort(Key.begin(), Key.end());
/// InsertFormula - If the given formula has not yet been inserted, add it to
/// the list, and return true. Return false otherwise.
bool LSRUse::InsertFormula(const Formula &F) {
- SmallVector<const SCEV *, 2> Key = F.BaseRegs;
+ SmallVector<const SCEV *, 4> Key = F.BaseRegs;
if (F.ScaledReg) Key.push_back(F.ScaledReg);
// Unstable sort by host order ok, because this is only used for uniquifying.
std::sort(Key.begin(), Key.end());
OS << ", widest fixup type: " << *WidestFixupType;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void LSRUse::dump() const {
print(errs()); errs() << '\n';
}
+#endif
/// isLegalUse - Test whether the use described by AM is "legal", meaning it can
/// be completely folded into the user instruction at isel time. This includes
/// address-mode folding and special icmp tricks.
-static bool isLegalUse(const TargetLowering::AddrMode &AM,
- LSRUse::KindType Kind, Type *AccessTy,
- const TargetLowering *TLI) {
+static bool isLegalUse(const TargetTransformInfo &TTI, LSRUse::KindType Kind,
+ Type *AccessTy, GlobalValue *BaseGV, int64_t BaseOffset,
+ bool HasBaseReg, int64_t Scale) {
switch (Kind) {
case LSRUse::Address:
- // If we have low-level target information, ask the target if it can
- // completely fold this address.
- if (TLI) return TLI->isLegalAddressingMode(AM, AccessTy);
+ return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
// Otherwise, just guess that reg+reg addressing is legal.
- return !AM.BaseGV && AM.BaseOffs == 0 && AM.Scale <= 1;
+ //return ;
case LSRUse::ICmpZero:
// There's not even a target hook for querying whether it would be legal to
// fold a GV into an ICmp.
- if (AM.BaseGV)
+ if (BaseGV)
return false;
// ICmp only has two operands; don't allow more than two non-trivial parts.
- if (AM.Scale != 0 && AM.HasBaseReg && AM.BaseOffs != 0)
+ if (Scale != 0 && HasBaseReg && BaseOffset != 0)
return false;
// ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by
// putting the scaled register in the other operand of the icmp.
- if (AM.Scale != 0 && AM.Scale != -1)
+ if (Scale != 0 && Scale != -1)
return false;
// If we have low-level target information, ask the target if it can fold an
// integer immediate on an icmp.
- if (AM.BaseOffs != 0) {
- if (!TLI)
- return false;
+ if (BaseOffset != 0) {
// We have one of:
- // ICmpZero BaseReg + Offset => ICmp BaseReg, -Offset
- // ICmpZero -1*ScaleReg + Offset => ICmp ScaleReg, Offset
+ // ICmpZero BaseReg + BaseOffset => ICmp BaseReg, -BaseOffset
+ // ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset
// Offs is the ICmp immediate.
- int64_t Offs = AM.BaseOffs;
- if (AM.Scale == 0)
- Offs = -(uint64_t)Offs; // The cast does the right thing with INT64_MIN.
- return TLI->isLegalICmpImmediate(Offs);
+ if (Scale == 0)
+ // The cast does the right thing with INT64_MIN.
+ BaseOffset = -(uint64_t)BaseOffset;
+ return TTI.isLegalICmpImmediate(BaseOffset);
}
// ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg
case LSRUse::Basic:
// Only handle single-register values.
- return !AM.BaseGV && AM.Scale == 0 && AM.BaseOffs == 0;
+ return !BaseGV && Scale == 0 && BaseOffset == 0;
case LSRUse::Special:
- // Only handle -1 scales, or no scale.
- return AM.Scale == 0 || AM.Scale == -1;
+ // Special case Basic to handle -1 scales.
+ return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0;
}
llvm_unreachable("Invalid LSRUse Kind!");
}
-static bool isLegalUse(TargetLowering::AddrMode AM,
- int64_t MinOffset, int64_t MaxOffset,
- LSRUse::KindType Kind, Type *AccessTy,
- const TargetLowering *TLI) {
+static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
+ int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
+ GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
+ int64_t Scale) {
// Check for overflow.
- if (((int64_t)((uint64_t)AM.BaseOffs + MinOffset) > AM.BaseOffs) !=
+ if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
(MinOffset > 0))
return false;
- AM.BaseOffs = (uint64_t)AM.BaseOffs + MinOffset;
- if (isLegalUse(AM, Kind, AccessTy, TLI)) {
- AM.BaseOffs = (uint64_t)AM.BaseOffs - MinOffset;
- // Check for overflow.
- if (((int64_t)((uint64_t)AM.BaseOffs + MaxOffset) > AM.BaseOffs) !=
- (MaxOffset > 0))
- return false;
- AM.BaseOffs = (uint64_t)AM.BaseOffs + MaxOffset;
- return isLegalUse(AM, Kind, AccessTy, TLI);
- }
- return false;
+ MinOffset = (uint64_t)BaseOffset + MinOffset;
+ if (((int64_t)((uint64_t)BaseOffset + MaxOffset) > BaseOffset) !=
+ (MaxOffset > 0))
+ return false;
+ MaxOffset = (uint64_t)BaseOffset + MaxOffset;
+
+ return isLegalUse(TTI, Kind, AccessTy, BaseGV, MinOffset, HasBaseReg,
+ Scale) &&
+ isLegalUse(TTI, Kind, AccessTy, BaseGV, MaxOffset, HasBaseReg, Scale);
+}
+
+static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
+ int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
+ const Formula &F) {
+ return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV,
+ F.BaseOffset, F.HasBaseReg, F.Scale);
}
-static bool isAlwaysFoldable(int64_t BaseOffs,
- GlobalValue *BaseGV,
- bool HasBaseReg,
+static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
LSRUse::KindType Kind, Type *AccessTy,
- const TargetLowering *TLI) {
+ GlobalValue *BaseGV, int64_t BaseOffset,
+ bool HasBaseReg) {
// Fast-path: zero is always foldable.
- if (BaseOffs == 0 && !BaseGV) return true;
+ if (BaseOffset == 0 && !BaseGV) return true;
// Conservatively, create an address with an immediate and a
// base and a scale.
- TargetLowering::AddrMode AM;
- AM.BaseOffs = BaseOffs;
- AM.BaseGV = BaseGV;
- AM.HasBaseReg = HasBaseReg;
- AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
+ int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
// Canonicalize a scale of 1 to a base register if the formula doesn't
// already have a base register.
- if (!AM.HasBaseReg && AM.Scale == 1) {
- AM.Scale = 0;
- AM.HasBaseReg = true;
+ if (!HasBaseReg && Scale == 1) {
+ Scale = 0;
+ HasBaseReg = true;
}
- return isLegalUse(AM, Kind, AccessTy, TLI);
+ return isLegalUse(TTI, Kind, AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
}
-static bool isAlwaysFoldable(const SCEV *S,
- int64_t MinOffset, int64_t MaxOffset,
- bool HasBaseReg,
- LSRUse::KindType Kind, Type *AccessTy,
- const TargetLowering *TLI,
- ScalarEvolution &SE) {
+static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
+ ScalarEvolution &SE, int64_t MinOffset,
+ int64_t MaxOffset, LSRUse::KindType Kind,
+ Type *AccessTy, const SCEV *S, bool HasBaseReg) {
// Fast-path: zero is always foldable.
if (S->isZero()) return true;
// Conservatively, create an address with an immediate and a
// base and a scale.
- int64_t BaseOffs = ExtractImmediate(S, SE);
+ int64_t BaseOffset = ExtractImmediate(S, SE);
GlobalValue *BaseGV = ExtractSymbol(S, SE);
// If there's anything else involved, it's not foldable.
if (!S->isZero()) return false;
// Fast-path: zero is always foldable.
- if (BaseOffs == 0 && !BaseGV) return true;
+ if (BaseOffset == 0 && !BaseGV) return true;
// Conservatively, create an address with an immediate and a
// base and a scale.
- TargetLowering::AddrMode AM;
- AM.BaseOffs = BaseOffs;
- AM.BaseGV = BaseGV;
- AM.HasBaseReg = HasBaseReg;
- AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
+ int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
- return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, TLI);
+ return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
+ BaseOffset, HasBaseReg, Scale);
}
namespace {
ScalarEvolution &SE;
DominatorTree &DT;
LoopInfo &LI;
- const TargetLowering *const TLI;
+ const TargetTransformInfo &TTI;
Loop *const L;
bool Changed;
Pass *P);
public:
- LSRInstance(const TargetLowering *tli, Loop *l, Pass *P);
+ LSRInstance(Loop *L, Pass *P);
bool getChanged() const { return Changed; }
}
if (!DestTy) continue;
- if (TLI) {
- // If target does not support DestTy natively then do not apply
- // this transformation.
- EVT DVT = TLI->getValueType(DestTy);
- if (!TLI->isTypeLegal(DVT)) continue;
- }
+ // If target does not support DestTy natively then do not apply
+ // this transformation.
+ if (!TTI.isTypeLegal(DestTy)) continue;
PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
if (!PH) continue;
if (C->getValue().getMinSignedBits() >= 64 ||
C->getValue().isMinSignedValue())
goto decline_post_inc;
- // Without TLI, assume that any stride might be valid, and so any
- // use might be shared.
- if (!TLI)
- goto decline_post_inc;
// Check for possible scaled-address reuse.
Type *AccessTy = getAccessType(UI->getUser());
- TargetLowering::AddrMode AM;
- AM.Scale = C->getSExtValue();
- if (TLI->isLegalAddressingMode(AM, AccessTy))
+ int64_t Scale = C->getSExtValue();
+ if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0,
+ /*BaseOffset=*/ 0,
+ /*HasBaseReg=*/ false, Scale))
goto decline_post_inc;
- AM.Scale = -AM.Scale;
- if (TLI->isLegalAddressingMode(AM, AccessTy))
+ Scale = -Scale;
+ if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0,
+ /*BaseOffset=*/ 0,
+ /*HasBaseReg=*/ false, Scale))
goto decline_post_inc;
}
}
return false;
// Conservatively assume HasBaseReg is true for now.
if (NewOffset < LU.MinOffset) {
- if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, HasBaseReg,
- Kind, AccessTy, TLI))
+ if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+ LU.MaxOffset - NewOffset, HasBaseReg))
return false;
NewMinOffset = NewOffset;
} else if (NewOffset > LU.MaxOffset) {
- if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, HasBaseReg,
- Kind, AccessTy, TLI))
+ if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+ NewOffset - LU.MinOffset, HasBaseReg))
return false;
NewMaxOffset = NewOffset;
}
int64_t Offset = ExtractImmediate(Expr, SE);
// Basic uses can't accept any offset, for example.
- if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true, Kind, AccessTy, TLI)) {
+ if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+ Offset, /*HasBaseReg=*/ true)) {
Expr = Copy;
Offset = 0;
}
// as OrigF.
if (F.BaseRegs == OrigF.BaseRegs &&
F.ScaledReg == OrigF.ScaledReg &&
- F.AM.BaseGV == OrigF.AM.BaseGV &&
- F.AM.Scale == OrigF.AM.Scale &&
+ F.BaseGV == OrigF.BaseGV &&
+ F.Scale == OrigF.Scale &&
F.UnfoldedOffset == OrigF.UnfoldedOffset) {
- if (F.AM.BaseOffs == 0)
+ if (F.BaseOffset == 0)
return &LU;
// This is the formula where all the registers and symbols matched;
// there aren't going to be any others. Since we declined it, we
/// TODO: Consider IVInc free if it's already used in another chains.
static bool
isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users,
- ScalarEvolution &SE, const TargetLowering *TLI) {
+ ScalarEvolution &SE, const TargetTransformInfo &TTI) {
if (StressIVChain)
return true;
for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
UsersIdx < NChains; ++UsersIdx) {
if (!isProfitableChain(IVChainVec[UsersIdx],
- ChainUsersVec[UsersIdx].FarUsers, SE, TLI))
+ ChainUsersVec[UsersIdx].FarUsers, SE, TTI))
continue;
// Preserve the chain at UsesIdx.
if (ChainIdx != UsersIdx)
/// Return true if the IVInc can be folded into an addressing mode.
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
- Value *Operand, const TargetLowering *TLI) {
+ Value *Operand, const TargetTransformInfo &TTI) {
const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
if (!IncConst || !isAddressUse(UserInst, Operand))
return false;
return false;
int64_t IncOffset = IncConst->getValue()->getSExtValue();
- if (!isAlwaysFoldable(IncOffset, /*BaseGV=*/0, /*HaseBaseReg=*/false,
- LSRUse::Address, getAccessType(UserInst), TLI))
+ if (!isAlwaysFoldable(TTI, LSRUse::Address,
+ getAccessType(UserInst), /*BaseGV=*/ 0,
+ IncOffset, /*HaseBaseReg=*/ false))
return false;
return true;
// If an IV increment can't be folded, use it as the next IV value.
if (!canFoldIVIncExpr(LeftOverExpr, IncI->UserInst, IncI->IVOperand,
- TLI)) {
+ TTI)) {
assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
IVSrc = IVOper;
LeftOverExpr = 0;
// x == y --> x - y == 0
const SCEV *N = SE.getSCEV(NV);
- if (SE.isLoopInvariant(N, L)) {
+ if (SE.isLoopInvariant(N, L) && isSafeToExpand(N)) {
// S is normalized, so normalize N before folding it into S
// to keep the result normalized.
N = TransformForPostIncUse(Normalize, N, CI, 0,
LSRUse &LU, size_t LUIdx) {
Formula F;
F.BaseRegs.push_back(S);
- F.AM.HasBaseReg = true;
+ F.HasBaseReg = true;
bool Inserted = InsertFormula(LU, LUIdx, F);
assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
}
/// CollectSubexprs - Split S into subexpressions which can be pulled out into
/// separate registers. If C is non-null, multiply each subexpression by C.
-static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
- SmallVectorImpl<const SCEV *> &Ops,
- const Loop *L,
- ScalarEvolution &SE) {
+///
+/// Return remainder expression after factoring the subexpressions captured by
+/// Ops. If Ops is complete, return NULL.
+static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
+ SmallVectorImpl<const SCEV *> &Ops,
+ const Loop *L,
+ ScalarEvolution &SE,
+ unsigned Depth = 0) {
+ // Arbitrarily cap recursion to protect compile time.
+ if (Depth >= 3)
+ return S;
+
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
// Break out add operands.
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
- I != E; ++I)
- CollectSubexprs(*I, C, Ops, L, SE);
- return;
+ I != E; ++I) {
+ const SCEV *Remainder = CollectSubexprs(*I, C, Ops, L, SE, Depth+1);
+ if (Remainder)
+ Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
+ }
+ return NULL;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
// Split a non-zero base out of an addrec.
- if (!AR->getStart()->isZero()) {
- CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
- AR->getStepRecurrence(SE),
- AR->getLoop(),
- //FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
- SCEV::FlagAnyWrap),
- C, Ops, L, SE);
- CollectSubexprs(AR->getStart(), C, Ops, L, SE);
- return;
+ if (AR->getStart()->isZero())
+ return S;
+
+ const SCEV *Remainder = CollectSubexprs(AR->getStart(),
+ C, Ops, L, SE, Depth+1);
+ // Split the non-zero AddRec unless it is part of a nested recurrence that
+ // does not pertain to this loop.
+ if (Remainder && (AR->getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
+ Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
+ Remainder = NULL;
+ }
+ if (Remainder != AR->getStart()) {
+ if (!Remainder)
+ Remainder = SE.getConstant(AR->getType(), 0);
+ return SE.getAddRecExpr(Remainder,
+ AR->getStepRecurrence(SE),
+ AR->getLoop(),
+ //FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
+ SCEV::FlagAnyWrap);
}
} else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
// Break (C * (a + b + c)) into C*a + C*b + C*c.
- if (Mul->getNumOperands() == 2)
- if (const SCEVConstant *Op0 =
- dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
- CollectSubexprs(Mul->getOperand(1),
- C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0,
- Ops, L, SE);
- return;
- }
+ if (Mul->getNumOperands() != 2)
+ return S;
+ if (const SCEVConstant *Op0 =
+ dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
+ C = C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0;
+ const SCEV *Remainder =
+ CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1);
+ if (Remainder)
+ Ops.push_back(SE.getMulExpr(C, Remainder));
+ return NULL;
+ }
}
-
- // Otherwise use the value itself, optionally with a scale applied.
- Ops.push_back(C ? SE.getMulExpr(C, S) : S);
+ return S;
}
/// GenerateReassociations - Split out subexpressions from adds and the bases of
const SCEV *BaseReg = Base.BaseRegs[i];
SmallVector<const SCEV *, 8> AddOps;
- CollectSubexprs(BaseReg, 0, AddOps, L, SE);
+ const SCEV *Remainder = CollectSubexprs(BaseReg, 0, AddOps, L, SE);
+ if (Remainder)
+ AddOps.push_back(Remainder);
if (AddOps.size() == 1) continue;
// Don't pull a constant into a register if the constant could be folded
// into an immediate field.
- if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset,
- Base.getNumRegs() > 1,
- LU.Kind, LU.AccessTy, TLI, SE))
+ if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
+ LU.AccessTy, *J, Base.getNumRegs() > 1))
continue;
// Collect all operands except *J.
// Don't leave just a constant behind in a register if the constant could
// be folded into an immediate field.
if (InnerAddOps.size() == 1 &&
- isAlwaysFoldable(InnerAddOps[0], LU.MinOffset, LU.MaxOffset,
- Base.getNumRegs() > 1,
- LU.Kind, LU.AccessTy, TLI, SE))
+ isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
+ LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
continue;
const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
// Add the remaining pieces of the add back into the new formula.
const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
- if (TLI && InnerSumSC &&
+ if (InnerSumSC &&
SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
- TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
- InnerSumSC->getValue()->getZExtValue())) {
+ TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+ InnerSumSC->getValue()->getZExtValue())) {
F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
InnerSumSC->getValue()->getZExtValue();
F.BaseRegs.erase(F.BaseRegs.begin() + i);
// Add J as its own register, or an unfolded immediate.
const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
- if (TLI && SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
- TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
- SC->getValue()->getZExtValue()))
+ if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
+ TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+ SC->getValue()->getZExtValue()))
F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
SC->getValue()->getZExtValue();
else
void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
Formula Base) {
// We can't add a symbolic offset if the address already contains one.
- if (Base.AM.BaseGV) return;
+ if (Base.BaseGV) return;
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
const SCEV *G = Base.BaseRegs[i];
if (G->isZero() || !GV)
continue;
Formula F = Base;
- F.AM.BaseGV = GV;
- if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
- LU.Kind, LU.AccessTy, TLI))
+ F.BaseGV = GV;
+ if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
continue;
F.BaseRegs[i] = G;
(void)InsertFormula(LU, LUIdx, F);
for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(),
E = Worklist.end(); I != E; ++I) {
Formula F = Base;
- F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs - *I;
- if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I,
- LU.Kind, LU.AccessTy, TLI)) {
+ F.BaseOffset = (uint64_t)Base.BaseOffset - *I;
+ if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind,
+ LU.AccessTy, F)) {
// Add the offset to the base register.
const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
// If it cancelled out, drop the base register, otherwise update it.
if (G->isZero() || Imm == 0)
continue;
Formula F = Base;
- F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Imm;
- if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
- LU.Kind, LU.AccessTy, TLI))
+ F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
+ if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
continue;
F.BaseRegs[i] = G;
(void)InsertFormula(LU, LUIdx, F);
// Don't do this if there is more than one offset.
if (LU.MinOffset != LU.MaxOffset) return;
- assert(!Base.AM.BaseGV && "ICmpZero use is not legal!");
+ assert(!Base.BaseGV && "ICmpZero use is not legal!");
// Check each interesting stride.
for (SmallSetVector<int64_t, 8>::const_iterator
int64_t Factor = *I;
// Check that the multiplication doesn't overflow.
- if (Base.AM.BaseOffs == INT64_MIN && Factor == -1)
+ if (Base.BaseOffset == INT64_MIN && Factor == -1)
continue;
- int64_t NewBaseOffs = (uint64_t)Base.AM.BaseOffs * Factor;
- if (NewBaseOffs / Factor != Base.AM.BaseOffs)
+ int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor;
+ if (NewBaseOffset / Factor != Base.BaseOffset)
continue;
// Check that multiplying with the use offset doesn't overflow.
continue;
Formula F = Base;
- F.AM.BaseOffs = NewBaseOffs;
+ F.BaseOffset = NewBaseOffset;
// Check that this scale is legal.
- if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, TLI))
+ if (!isLegalUse(TTI, Offset, Offset, LU.Kind, LU.AccessTy, F))
continue;
// Compensate for the use having MinOffset built into it.
- F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Offset - LU.MinOffset;
+ F.BaseOffset = (uint64_t)F.BaseOffset + Offset - LU.MinOffset;
const SCEV *FactorS = SE.getConstant(IntTy, Factor);
if (!IntTy) return;
// If this Formula already has a scaled register, we can't add another one.
- if (Base.AM.Scale != 0) return;
+ if (Base.Scale != 0) return;
// Check each interesting stride.
for (SmallSetVector<int64_t, 8>::const_iterator
I = Factors.begin(), E = Factors.end(); I != E; ++I) {
int64_t Factor = *I;
- Base.AM.Scale = Factor;
- Base.AM.HasBaseReg = Base.BaseRegs.size() > 1;
+ Base.Scale = Factor;
+ Base.HasBaseReg = Base.BaseRegs.size() > 1;
// Check whether this scale is going to be legal.
- if (!isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
- LU.Kind, LU.AccessTy, TLI)) {
+ if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
+ Base)) {
// As a special-case, handle special out-of-loop Basic users specially.
// TODO: Reconsider this special case.
if (LU.Kind == LSRUse::Basic &&
- isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
- LSRUse::Special, LU.AccessTy, TLI) &&
+ isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
+ LU.AccessTy, Base) &&
LU.AllFixupsOutsideLoop)
LU.Kind = LSRUse::Special;
else
// For an ICmpZero, negating a solitary base register won't lead to
// new solutions.
if (LU.Kind == LSRUse::ICmpZero &&
- !Base.AM.HasBaseReg && Base.AM.BaseOffs == 0 && !Base.AM.BaseGV)
+ !Base.HasBaseReg && Base.BaseOffset == 0 && !Base.BaseGV)
continue;
// For each addrec base reg, apply the scale, if possible.
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
/// GenerateTruncates - Generate reuse formulae from different IV types.
void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
- // This requires TargetLowering to tell us which truncates are free.
- if (!TLI) return;
-
// Don't bother truncating symbolic values.
- if (Base.AM.BaseGV) return;
+ if (Base.BaseGV) return;
// Determine the integer type for the base formula.
Type *DstTy = Base.getType();
for (SmallSetVector<Type *, 4>::const_iterator
I = Types.begin(), E = Types.end(); I != E; ++I) {
Type *SrcTy = *I;
- if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) {
+ if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) {
Formula F = Base;
if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I);
<< " , add offset " << Imm;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void WorkItem::dump() const {
print(errs()); errs() << '\n';
}
+#endif
/// GenerateCrossUseConstantOffsets - Look for registers which are a constant
/// distance apart and try to form reuse opportunities between them.
const Formula &F = LU.Formulae[L];
// Use the immediate in the scaled register.
if (F.ScaledReg == OrigReg) {
- int64_t Offs = (uint64_t)F.AM.BaseOffs +
- Imm * (uint64_t)F.AM.Scale;
+ int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale;
// Don't create 50 + reg(-50).
if (F.referencesReg(SE.getSCEV(
- ConstantInt::get(IntTy, -(uint64_t)Offs))))
+ ConstantInt::get(IntTy, -(uint64_t)Offset))))
continue;
Formula NewF = F;
- NewF.AM.BaseOffs = Offs;
- if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
- LU.Kind, LU.AccessTy, TLI))
+ NewF.BaseOffset = Offset;
+ if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
+ NewF))
continue;
NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
// immediate itself, then the formula isn't worthwhile.
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
if (C->getValue()->isNegative() !=
- (NewF.AM.BaseOffs < 0) &&
- (C->getValue()->getValue().abs() * APInt(BitWidth, F.AM.Scale))
- .ule(abs64(NewF.AM.BaseOffs)))
+ (NewF.BaseOffset < 0) &&
+ (C->getValue()->getValue().abs() * APInt(BitWidth, F.Scale))
+ .ule(abs64(NewF.BaseOffset)))
continue;
// OK, looks good.
if (BaseReg != OrigReg)
continue;
Formula NewF = F;
- NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm;
- if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
- LU.Kind, LU.AccessTy, TLI)) {
- if (!TLI ||
- !TLI->isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
+ NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm;
+ if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset,
+ LU.Kind, LU.AccessTy, NewF)) {
+ if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
continue;
NewF = F;
NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm;
J = NewF.BaseRegs.begin(), JE = NewF.BaseRegs.end();
J != JE; ++J)
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*J))
- if ((C->getValue()->getValue() + NewF.AM.BaseOffs).abs().slt(
- abs64(NewF.AM.BaseOffs)) &&
+ if ((C->getValue()->getValue() + NewF.BaseOffset).abs().slt(
+ abs64(NewF.BaseOffset)) &&
(C->getValue()->getValue() +
- NewF.AM.BaseOffs).countTrailingZeros() >=
- CountTrailingZeros_64(NewF.AM.BaseOffs))
+ NewF.BaseOffset).countTrailingZeros() >=
+ CountTrailingZeros_64(NewF.BaseOffset))
goto skip_formula;
// Ok, looks good.
// Collect the best formula for each unique set of shared registers. This
// is reset for each use.
- typedef DenseMap<SmallVector<const SCEV *, 2>, size_t, UniquifierDenseMapInfo>
+ typedef DenseMap<SmallVector<const SCEV *, 4>, size_t, UniquifierDenseMapInfo>
BestFormulaeTy;
BestFormulaeTy BestFormulae;
dbgs() << "\n");
}
else {
- SmallVector<const SCEV *, 2> Key;
+ SmallVector<const SCEV *, 4> Key;
for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
JE = F.BaseRegs.end(); J != JE; ++J) {
const SCEV *Reg = *J;
I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {
Formula NewF = F;
- NewF.AM.BaseOffs += C->getValue()->getSExtValue();
+ NewF.BaseOffset += C->getValue()->getSExtValue();
NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
(I - F.BaseRegs.begin()));
if (LU.HasFormulaWithSameRegs(NewF)) {
}
} else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) {
if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))
- if (!F.AM.BaseGV) {
+ if (!F.BaseGV) {
Formula NewF = F;
- NewF.AM.BaseGV = GV;
+ NewF.BaseGV = GV;
NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
(I - F.BaseRegs.begin()));
if (LU.HasFormulaWithSameRegs(NewF)) {
for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
E = LU.Formulae.end(); I != E; ++I) {
const Formula &F = *I;
- if (F.AM.BaseOffs != 0 && F.AM.Scale == 0) {
+ if (F.BaseOffset != 0 && F.Scale == 0) {
if (LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU)) {
- if (reconcileNewOffset(*LUThatHas, F.AM.BaseOffs,
+ if (reconcileNewOffset(*LUThatHas, F.BaseOffset,
/*HasBaseReg=*/false,
LU.Kind, LU.AccessTy)) {
DEBUG(dbgs() << " Deleting use "; LU.print(dbgs());
LSRFixup &Fixup = *I;
if (Fixup.LUIdx == LUIdx) {
Fixup.LUIdx = LUThatHas - &Uses.front();
- Fixup.Offset += F.AM.BaseOffs;
+ Fixup.Offset += F.BaseOffset;
// Add the new offset to LUThatHas' offset list.
if (LUThatHas->Offsets.back() != Fixup.Offset) {
LUThatHas->Offsets.push_back(Fixup.Offset);
bool Any = false;
for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
Formula &F = LUThatHas->Formulae[i];
- if (!isLegalUse(F.AM,
- LUThatHas->MinOffset, LUThatHas->MaxOffset,
- LUThatHas->Kind, LUThatHas->AccessTy, TLI)) {
+ if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
+ LUThatHas->Kind, LUThatHas->AccessTy, F)) {
DEBUG(dbgs() << " Deleting "; F.print(dbgs());
dbgs() << '\n');
LUThatHas->DeleteFormula(F);
Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
}
- // Flush the operand list to suppress SCEVExpander hoisting.
- if (!Ops.empty()) {
- Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
- Ops.clear();
- Ops.push_back(SE.getUnknown(FullV));
- }
-
// Expand the ScaledReg portion.
Value *ICmpScaledV = 0;
- if (F.AM.Scale != 0) {
+ if (F.Scale != 0) {
const SCEV *ScaledS = F.ScaledReg;
// If we're expanding for a post-inc user, make the post-inc adjustment.
// An interesting way of "folding" with an icmp is to use a negated
// scale, which we'll implement by inserting it into the other operand
// of the icmp.
- assert(F.AM.Scale == -1 &&
+ assert(F.Scale == -1 &&
"The only scale supported by ICmpZero uses is -1!");
ICmpScaledV = Rewriter.expandCodeFor(ScaledS, 0, IP);
} else {
// Otherwise just expand the scaled register and an explicit scale,
// which is expected to be matched as part of the address.
+
+ // Flush the operand list to suppress SCEVExpander hoisting address modes.
+ if (!Ops.empty() && LU.Kind == LSRUse::Address) {
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+ Ops.clear();
+ Ops.push_back(SE.getUnknown(FullV));
+ }
ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP));
ScaledS = SE.getMulExpr(ScaledS,
- SE.getConstant(ScaledS->getType(), F.AM.Scale));
+ SE.getConstant(ScaledS->getType(), F.Scale));
Ops.push_back(ScaledS);
+ }
+ }
- // Flush the operand list to suppress SCEVExpander hoisting.
+ // Expand the GV portion.
+ if (F.BaseGV) {
+ // Flush the operand list to suppress SCEVExpander hoisting.
+ if (!Ops.empty()) {
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
+ Ops.push_back(SE.getUnknown(F.BaseGV));
}
- // Expand the GV portion.
- if (F.AM.BaseGV) {
- Ops.push_back(SE.getUnknown(F.AM.BaseGV));
-
- // Flush the operand list to suppress SCEVExpander hoisting.
+ // Flush the operand list to suppress SCEVExpander hoisting of both folded and
+ // unfolded offsets. LSR assumes they both live next to their uses.
+ if (!Ops.empty()) {
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
// Expand the immediate portion.
- int64_t Offset = (uint64_t)F.AM.BaseOffs + LF.Offset;
+ int64_t Offset = (uint64_t)F.BaseOffset + LF.Offset;
if (Offset != 0) {
if (LU.Kind == LSRUse::ICmpZero) {
// The other interesting way of "folding" with an ICmpZero is to use a
if (LU.Kind == LSRUse::ICmpZero) {
ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
DeadInsts.push_back(CI->getOperand(1));
- assert(!F.AM.BaseGV && "ICmp does not support folding a global value and "
+ assert(!F.BaseGV && "ICmp does not support folding a global value and "
"a scale at the same time!");
- if (F.AM.Scale == -1) {
+ if (F.Scale == -1) {
if (ICmpScaledV->getType() != OpTy) {
Instruction *Cast =
CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false,
}
CI->setOperand(1, ICmpScaledV);
} else {
- assert(F.AM.Scale == 0 &&
+ assert(F.Scale == 0 &&
"ICmp does not support folding a global value and "
"a scale at the same time!");
Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
SplitLandingPadPredecessors(Parent, BB, "", "", P, NewBBs);
NewBB = NewBBs[0];
}
-
- // If PN is outside of the loop and BB is in the loop, we want to
- // move the block to be immediately before the PHI block, not
- // immediately after BB.
- if (L->contains(BB) && !L->contains(PN))
- NewBB->moveBefore(PN->getParent());
-
- // Splitting the edge can reduce the number of PHI entries we have.
- e = PN->getNumIncomingValues();
- BB = NewBB;
- i = PN->getBasicBlockIndex(BB);
+ // If NewBB==NULL, then SplitCriticalEdge refused to split because all
+ // phi predecessors are identical. The simple thing to do is skip
+ // splitting in this case rather than complicate the API.
+ if (NewBB) {
+ // If PN is outside of the loop and BB is in the loop, we want to
+ // move the block to be immediately before the PHI block, not
+ // immediately after BB.
+ if (L->contains(BB) && !L->contains(PN))
+ NewBB->moveBefore(PN->getParent());
+
+ // Splitting the edge can reduce the number of PHI entries we have.
+ e = PN->getNumIncomingValues();
+ BB = NewBB;
+ i = PN->getBasicBlockIndex(BB);
+ }
}
}
Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
}
-LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
- : IU(P->getAnalysis<IVUsers>()),
- SE(P->getAnalysis<ScalarEvolution>()),
- DT(P->getAnalysis<DominatorTree>()),
- LI(P->getAnalysis<LoopInfo>()),
- TLI(tli), L(l), Changed(false), IVIncInsertPos(0) {
-
+LSRInstance::LSRInstance(Loop *L, Pass *P)
+ : IU(P->getAnalysis<IVUsers>()), SE(P->getAnalysis<ScalarEvolution>()),
+ DT(P->getAnalysis<DominatorTree>()), LI(P->getAnalysis<LoopInfo>()),
+ TTI(P->getAnalysis<TargetTransformInfo>()), L(L), Changed(false),
+ IVIncInsertPos(0) {
// If LoopSimplify form is not available, stay out of trouble.
if (!L->isLoopSimplifyForm())
return;
#ifndef NDEBUG
// Formulae should be legal.
- for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
- E = Uses.end(); I != E; ++I) {
- const LSRUse &LU = *I;
- for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
- JE = LU.Formulae.end(); J != JE; ++J)
- assert(isLegalUse(J->AM, LU.MinOffset, LU.MaxOffset,
- LU.Kind, LU.AccessTy, TLI) &&
- "Illegal formula generated!");
+ for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), E = Uses.end();
+ I != E; ++I) {
+ const LSRUse &LU = *I;
+ for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
+ JE = LU.Formulae.end();
+ J != JE; ++J)
+ assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
+ *J) && "Illegal formula generated!");
};
#endif
print_uses(OS);
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void LSRInstance::dump() const {
print(errs()); errs() << '\n';
}
+#endif
namespace {
class LoopStrengthReduce : public LoopPass {
- /// TLI - Keep a pointer of a TargetLowering to consult for determining
- /// transformation profitability.
- const TargetLowering *const TLI;
-
public:
static char ID; // Pass ID, replacement for typeid
- explicit LoopStrengthReduce(const TargetLowering *tli = 0);
+ LoopStrengthReduce();
private:
bool runOnLoop(Loop *L, LPPassManager &LPM);
char LoopStrengthReduce::ID = 0;
INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
"Loop Strength Reduction", false, false)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_DEPENDENCY(IVUsers)
"Loop Strength Reduction", false, false)
-Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
- return new LoopStrengthReduce(TLI);
+Pass *llvm::createLoopStrengthReducePass() {
+ return new LoopStrengthReduce();
}
-LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli)
- : LoopPass(ID), TLI(tli) {
- initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
- }
+LoopStrengthReduce::LoopStrengthReduce() : LoopPass(ID) {
+ initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
+}
void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
// We split critical edges, so we change the CFG. However, we do update
AU.addRequiredID(LoopSimplifyID);
AU.addRequired<IVUsers>();
AU.addPreserved<IVUsers>();
+ AU.addRequired<TargetTransformInfo>();
}
bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
bool Changed = false;
// Run the main LSR transformation.
- Changed |= LSRInstance(TLI, L, this).getChanged();
+ Changed |= LSRInstance(L, this).getChanged();
// Remove any extra phis created by processing inner loops.
Changed |= DeleteDeadPHIs(L->getHeader());
- if (EnablePhiElim) {
+ if (EnablePhiElim && L->isLoopSimplifyForm()) {
SmallVector<WeakVH, 16> DeadInsts;
SCEVExpander Rewriter(getAnalysis<ScalarEvolution>(), "lsr");
#ifndef NDEBUG
Rewriter.setDebugType(DEBUG_TYPE);
#endif
- unsigned numFolded = Rewriter.
- replaceCongruentIVs(L, &getAnalysis<DominatorTree>(), DeadInsts, TLI);
+ unsigned numFolded =
+ Rewriter.replaceCongruentIVs(L, &getAnalysis<DominatorTree>(),
+ DeadInsts,
+ &getAnalysis<TargetTransformInfo>());
if (numFolded) {
Changed = true;
DeleteTriviallyDeadInstructions(DeadInsts);