#define DEBUG_TYPE "codegenprepare"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Pass.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
+#include "llvm/ADT/ValueMap.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/Assembly/Writer.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/PatternMatch.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Target/TargetLowering.h"
-#include "llvm/Transforms/Utils/AddrModeMatcher.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/BypassSlowDivision.h"
class CodeGenPrepare : public FunctionPass {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
/// transformation profitability.
+ const TargetMachine *TM;
const TargetLowering *TLI;
const TargetLibraryInfo *TLInfo;
DominatorTree *DT;
- ProfileInfo *PFI;
/// CurInstIterator - As we scan instructions optimizing them, this is the
/// next instruction to optimize. Xforms that can invalidate this should
/// Keeps track of non-local addresses that have been sunk into a block.
/// This allows us to avoid inserting duplicate code for blocks with
/// multiple load/stores of the same address.
- DenseMap<Value*, Value*> SunkAddrs;
+ ValueMap<Value*, Value*> SunkAddrs;
/// ModifiedDT - If CFG is modified in anyway, dominator tree may need to
/// be updated.
public:
static char ID; // Pass identification, replacement for typeid
- explicit CodeGenPrepare(const TargetLowering *tli = 0)
- : FunctionPass(ID), TLI(tli) {
+ explicit CodeGenPrepare(const TargetMachine *TM = 0)
+ : FunctionPass(ID), TM(TM), TLI(0) {
initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F);
+ const char *getPassName() const { return "CodeGen Prepare"; }
+
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addPreserved<DominatorTree>();
- AU.addPreserved<ProfileInfo>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<TargetLibraryInfo>();
}
bool MoveExtToFormExtLoad(Instruction *I);
bool OptimizeExtUses(Instruction *I);
bool OptimizeSelectInst(SelectInst *SI);
- bool DupRetToEnableTailCallOpts(ReturnInst *RI);
+ bool DupRetToEnableTailCallOpts(BasicBlock *BB);
bool PlaceDbgValues(Function &F);
- bool ConvertLoadToSwitch(LoadInst *LI);
};
}
char CodeGenPrepare::ID = 0;
-INITIALIZE_PASS_BEGIN(CodeGenPrepare, "codegenprepare",
- "Optimize for code generation", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
-INITIALIZE_PASS_END(CodeGenPrepare, "codegenprepare",
- "Optimize for code generation", false, false)
-
-FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
- return new CodeGenPrepare(TLI);
+static void *initializeCodeGenPreparePassOnce(PassRegistry &Registry) {
+ initializeTargetLibraryInfoPass(Registry);
+ PassInfo *PI = new PassInfo(
+ "Optimize for code generation", "codegenprepare", &CodeGenPrepare::ID,
+ PassInfo::NormalCtor_t(callDefaultCtor<CodeGenPrepare>), false, false,
+ PassInfo::TargetMachineCtor_t(callTargetMachineCtor<CodeGenPrepare>));
+ Registry.registerPass(*PI, true);
+ return PI;
+}
+
+void llvm::initializeCodeGenPreparePass(PassRegistry &Registry) {
+ CALL_ONCE_INITIALIZATION(initializeCodeGenPreparePassOnce)
+}
+
+FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) {
+ return new CodeGenPrepare(TM);
}
bool CodeGenPrepare::runOnFunction(Function &F) {
bool EverMadeChange = false;
ModifiedDT = false;
+ if (TM) TLI = TM->getTargetLowering();
TLInfo = &getAnalysis<TargetLibraryInfo>();
- DT = getAnalysisIfAvailable<DominatorTree>();
- PFI = getAnalysisIfAvailable<ProfileInfo>();
- OptSize = F.getFnAttributes().hasOptimizeForSizeAttr();
+ DominatorTreeWrapperPass *DTWP =
+ getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ DT = DTWP ? &DTWP->getDomTree() : 0;
+ OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::OptimizeForSize);
/// This optimization identifies DIV instructions that can be
/// profitably bypassed and carried out with a shorter, faster divide.
- if (TLI && TLI->isSlowDivBypassed()) {
- const DenseMap<Type*, Type*> &BypassTypeMap = TLI->getBypassSlowDivTypes();
+ if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
+ const DenseMap<unsigned int, unsigned int> &BypassWidths =
+ TLI->getBypassSlowDivWidths();
for (Function::iterator I = F.begin(); I != F.end(); I++)
- EverMadeChange |= bypassSlowDivision(F, I, BypassTypeMap);
+ EverMadeChange |= bypassSlowDivision(F, I, BypassWidths);
}
// Eliminate blocks that contain only PHI nodes and an
WorkList.insert(*II);
}
- for (SmallPtrSet<BasicBlock*, 8>::iterator
- I = WorkList.begin(), E = WorkList.end(); I != E; ++I)
- DeleteDeadBlock(*I);
+ // Delete the dead blocks and any of their dead successors.
+ MadeChange |= !WorkList.empty();
+ while (!WorkList.empty()) {
+ BasicBlock *BB = *WorkList.begin();
+ WorkList.erase(BB);
+ SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
+
+ DeleteDeadBlock(BB);
+
+ for (SmallVectorImpl<BasicBlock*>::iterator
+ II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
+ if (pred_begin(*II) == pred_end(*II))
+ WorkList.insert(*II);
+ }
// Merge pairs of basic blocks with unconditional branches, connected by
// a single edge.
}
if (ModifiedDT && DT)
- DT->DT->recalculate(F);
+ DT->recalculate(F);
return EverMadeChange;
}
bool CodeGenPrepare::EliminateFallThrough(Function &F) {
bool Changed = false;
// Scan all of the blocks in the function, except for the entry block.
- for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) {
+ for (Function::iterator I = llvm::next(F.begin()), E = F.end(); I != E; ) {
BasicBlock *BB = I++;
// If the destination block has a single pred, then this is a trivial
// edge, just collapse it.
bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) {
bool MadeChange = false;
// Note that this intentionally skips the entry block.
- for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) {
+ for (Function::iterator I = llvm::next(F.begin()), E = F.end(); I != E; ) {
BasicBlock *BB = I++;
// If this block doesn't end with an uncond branch, ignore it.
DT->changeImmediateDominator(DestBB, NewIDom);
DT->eraseNode(BB);
}
- if (PFI) {
- PFI->replaceAllUses(BB, DestBB);
- PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB));
- }
BB->eraseFromParent();
++NumBlocksElim;
// happens.
WeakVH IterHandle(CurInstIterator);
- replaceAndRecursivelySimplify(CI, RetVal, TLI ? TLI->getTargetData() : 0,
+ replaceAndRecursivelySimplify(CI, RetVal, TLI ? TLI->getDataLayout() : 0,
TLInfo, ModifiedDT ? 0 : DT);
// If the iterator instruction was recursively deleted, start over at the
// From here on out we're working with named functions.
if (CI->getCalledFunction() == 0) return false;
- // We'll need TargetData from here on out.
- const TargetData *TD = TLI ? TLI->getTargetData() : 0;
+ // We'll need DataLayout from here on out.
+ const DataLayout *TD = TLI ? TLI->getDataLayout() : 0;
if (!TD) return false;
// Lower all default uses of _chk calls. This is very similar
/// %tmp2 = tail call i32 @f2()
/// ret i32 %tmp2
/// @endcode
-bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
+bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) {
if (!TLI)
return false;
+ ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
+ if (!RI)
+ return false;
+
PHINode *PN = 0;
BitCastInst *BCI = 0;
Value *V = RI->getReturnValue();
return false;
}
- BasicBlock *BB = RI->getParent();
if (PN && PN->getParent() != BB)
return false;
// It's not safe to eliminate the sign / zero extension of the return value.
// See llvm::isInTailCallPosition().
const Function *F = BB->getParent();
- Attributes CallerRetAttr = F->getAttributes().getRetAttributes();
- if (CallerRetAttr.hasZExtAttr() || CallerRetAttr.hasSExtAttr())
+ AttributeSet CallerAttrs = F->getAttributes();
+ if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) ||
+ CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
return false;
// Make sure there are no instructions between the PHI and return, or that the
// Conservatively require the attributes of the call to match those of the
// return. Ignore noalias because it doesn't affect the call sequence.
- Attributes CalleeRetAttr = CS.getAttributes().getRetAttributes();
- if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
+ AttributeSet CalleeAttrs = CS.getAttributes();
+ if (AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
+ removeAttribute(Attribute::NoAlias) !=
+ AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
+ removeAttribute(Attribute::NoAlias))
continue;
// Make sure the call instruction is followed by an unconditional branch to
// Memory Optimization
//===----------------------------------------------------------------------===//
+namespace {
+
+/// ExtAddrMode - This is an extended version of TargetLowering::AddrMode
+/// which holds actual Value*'s for register values.
+struct ExtAddrMode : public TargetLowering::AddrMode {
+ Value *BaseReg;
+ Value *ScaledReg;
+ ExtAddrMode() : BaseReg(0), ScaledReg(0) {}
+ void print(raw_ostream &OS) const;
+ void dump() const;
+
+ bool operator==(const ExtAddrMode& O) const {
+ return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) &&
+ (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) &&
+ (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale);
+ }
+};
+
+#ifndef NDEBUG
+static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
+ AM.print(OS);
+ return OS;
+}
+#endif
+
+void ExtAddrMode::print(raw_ostream &OS) const {
+ bool NeedPlus = false;
+ OS << "[";
+ if (BaseGV) {
+ OS << (NeedPlus ? " + " : "")
+ << "GV:";
+ BaseGV->printAsOperand(OS, /*PrintType=*/false);
+ NeedPlus = true;
+ }
+
+ if (BaseOffs)
+ OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true;
+
+ if (BaseReg) {
+ OS << (NeedPlus ? " + " : "")
+ << "Base:";
+ BaseReg->printAsOperand(OS, /*PrintType=*/false);
+ NeedPlus = true;
+ }
+ if (Scale) {
+ OS << (NeedPlus ? " + " : "")
+ << Scale << "*";
+ ScaledReg->printAsOperand(OS, /*PrintType=*/false);
+ }
+
+ OS << ']';
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void ExtAddrMode::dump() const {
+ print(dbgs());
+ dbgs() << '\n';
+}
+#endif
+
+
+/// \brief A helper class for matching addressing modes.
+///
+/// This encapsulates the logic for matching the target-legal addressing modes.
+class AddressingModeMatcher {
+ SmallVectorImpl<Instruction*> &AddrModeInsts;
+ const TargetLowering &TLI;
+
+ /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
+ /// the memory instruction that we're computing this address for.
+ Type *AccessTy;
+ Instruction *MemoryInst;
+
+ /// AddrMode - This is the addressing mode that we're building up. This is
+ /// part of the return value of this addressing mode matching stuff.
+ ExtAddrMode &AddrMode;
+
+ /// IgnoreProfitability - This is set to true when we should not do
+ /// profitability checks. When true, IsProfitableToFoldIntoAddressingMode
+ /// always returns true.
+ bool IgnoreProfitability;
+
+ AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI,
+ const TargetLowering &T, Type *AT,
+ Instruction *MI, ExtAddrMode &AM)
+ : AddrModeInsts(AMI), TLI(T), AccessTy(AT), MemoryInst(MI), AddrMode(AM) {
+ IgnoreProfitability = false;
+ }
+public:
+
+ /// Match - Find the maximal addressing mode that a load/store of V can fold,
+ /// give an access type of AccessTy. This returns a list of involved
+ /// instructions in AddrModeInsts.
+ static ExtAddrMode Match(Value *V, Type *AccessTy,
+ Instruction *MemoryInst,
+ SmallVectorImpl<Instruction*> &AddrModeInsts,
+ const TargetLowering &TLI) {
+ ExtAddrMode Result;
+
+ bool Success =
+ AddressingModeMatcher(AddrModeInsts, TLI, AccessTy,
+ MemoryInst, Result).MatchAddr(V, 0);
+ (void)Success; assert(Success && "Couldn't select *anything*?");
+ return Result;
+ }
+private:
+ bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
+ bool MatchAddr(Value *V, unsigned Depth);
+ bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth);
+ bool IsProfitableToFoldIntoAddressingMode(Instruction *I,
+ ExtAddrMode &AMBefore,
+ ExtAddrMode &AMAfter);
+ bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
+};
+
+/// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode.
+/// Return true and update AddrMode if this addr mode is legal for the target,
+/// false if not.
+bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
+ unsigned Depth) {
+ // If Scale is 1, then this is the same as adding ScaleReg to the addressing
+ // mode. Just process that directly.
+ if (Scale == 1)
+ return MatchAddr(ScaleReg, Depth);
+
+ // If the scale is 0, it takes nothing to add this.
+ if (Scale == 0)
+ return true;
+
+ // If we already have a scale of this value, we can add to it, otherwise, we
+ // need an available scale field.
+ if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
+ return false;
+
+ ExtAddrMode TestAddrMode = AddrMode;
+
+ // Add scale to turn X*4+X*3 -> X*7. This could also do things like
+ // [A+B + A*7] -> [B+A*8].
+ TestAddrMode.Scale += Scale;
+ TestAddrMode.ScaledReg = ScaleReg;
+
+ // If the new address isn't legal, bail out.
+ if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy))
+ return false;
+
+ // It was legal, so commit it.
+ AddrMode = TestAddrMode;
+
+ // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
+ // to see if ScaleReg is actually X+C. If so, we can turn this into adding
+ // X*Scale + C*Scale to addr mode.
+ ConstantInt *CI = 0; Value *AddLHS = 0;
+ if (isa<Instruction>(ScaleReg) && // not a constant expr.
+ match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
+ TestAddrMode.ScaledReg = AddLHS;
+ TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
+
+ // If this addressing mode is legal, commit it and remember that we folded
+ // this instruction.
+ if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) {
+ AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
+ AddrMode = TestAddrMode;
+ return true;
+ }
+ }
+
+ // Otherwise, not (x+c)*scale, just return what we have.
+ return true;
+}
+
+/// MightBeFoldableInst - This is a little filter, which returns true if an
+/// addressing computation involving I might be folded into a load/store
+/// accessing it. This doesn't need to be perfect, but needs to accept at least
+/// the set of instructions that MatchOperationAddr can.
+static bool MightBeFoldableInst(Instruction *I) {
+ switch (I->getOpcode()) {
+ case Instruction::BitCast:
+ // Don't touch identity bitcasts.
+ if (I->getType() == I->getOperand(0)->getType())
+ return false;
+ return I->getType()->isPointerTy() || I->getType()->isIntegerTy();
+ case Instruction::PtrToInt:
+ // PtrToInt is always a noop, as we know that the int type is pointer sized.
+ return true;
+ case Instruction::IntToPtr:
+ // We know the input is intptr_t, so this is foldable.
+ return true;
+ case Instruction::Add:
+ return true;
+ case Instruction::Mul:
+ case Instruction::Shl:
+ // Can only handle X*C and X << C.
+ return isa<ConstantInt>(I->getOperand(1));
+ case Instruction::GetElementPtr:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// MatchOperationAddr - Given an instruction or constant expr, see if we can
+/// fold the operation into the addressing mode. If so, update the addressing
+/// mode and return true, otherwise return false without modifying AddrMode.
+bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
+ unsigned Depth) {
+ // Avoid exponential behavior on extremely deep expression trees.
+ if (Depth >= 5) return false;
+
+ switch (Opcode) {
+ case Instruction::PtrToInt:
+ // PtrToInt is always a noop, as we know that the int type is pointer sized.
+ return MatchAddr(AddrInst->getOperand(0), Depth);
+ case Instruction::IntToPtr:
+ // This inttoptr is a no-op if the integer type is pointer sized.
+ if (TLI.getValueType(AddrInst->getOperand(0)->getType()) ==
+ TLI.getPointerTy(AddrInst->getType()->getPointerAddressSpace()))
+ return MatchAddr(AddrInst->getOperand(0), Depth);
+ return false;
+ case Instruction::BitCast:
+ // BitCast is always a noop, and we can handle it as long as it is
+ // int->int or pointer->pointer (we don't want int<->fp or something).
+ if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||
+ AddrInst->getOperand(0)->getType()->isIntegerTy()) &&
+ // Don't touch identity bitcasts. These were probably put here by LSR,
+ // and we don't want to mess around with them. Assume it knows what it
+ // is doing.
+ AddrInst->getOperand(0)->getType() != AddrInst->getType())
+ return MatchAddr(AddrInst->getOperand(0), Depth);
+ return false;
+ case Instruction::Add: {
+ // Check to see if we can merge in the RHS then the LHS. If so, we win.
+ ExtAddrMode BackupAddrMode = AddrMode;
+ unsigned OldSize = AddrModeInsts.size();
+ if (MatchAddr(AddrInst->getOperand(1), Depth+1) &&
+ MatchAddr(AddrInst->getOperand(0), Depth+1))
+ return true;
+
+ // Restore the old addr mode info.
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+
+ // Otherwise this was over-aggressive. Try merging in the LHS then the RHS.
+ if (MatchAddr(AddrInst->getOperand(0), Depth+1) &&
+ MatchAddr(AddrInst->getOperand(1), Depth+1))
+ return true;
+
+ // Otherwise we definitely can't merge the ADD in.
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ break;
+ }
+ //case Instruction::Or:
+ // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
+ //break;
+ case Instruction::Mul:
+ case Instruction::Shl: {
+ // Can only handle X*C and X << C.
+ ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
+ if (!RHS) return false;
+ int64_t Scale = RHS->getSExtValue();
+ if (Opcode == Instruction::Shl)
+ Scale = 1LL << Scale;
+
+ return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth);
+ }
+ case Instruction::GetElementPtr: {
+ // Scan the GEP. We check it if it contains constant offsets and at most
+ // one variable offset.
+ int VariableOperand = -1;
+ unsigned VariableScale = 0;
+
+ int64_t ConstantOffset = 0;
+ const DataLayout *TD = TLI.getDataLayout();
+ gep_type_iterator GTI = gep_type_begin(AddrInst);
+ for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ const StructLayout *SL = TD->getStructLayout(STy);
+ unsigned Idx =
+ cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
+ ConstantOffset += SL->getElementOffset(Idx);
+ } else {
+ uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType());
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
+ ConstantOffset += CI->getSExtValue()*TypeSize;
+ } else if (TypeSize) { // Scales of zero don't do anything.
+ // We only allow one variable index at the moment.
+ if (VariableOperand != -1)
+ return false;
+
+ // Remember the variable index.
+ VariableOperand = i;
+ VariableScale = TypeSize;
+ }
+ }
+ }
+
+ // A common case is for the GEP to only do a constant offset. In this case,
+ // just add it to the disp field and check validity.
+ if (VariableOperand == -1) {
+ AddrMode.BaseOffs += ConstantOffset;
+ if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){
+ // Check to see if we can fold the base pointer in too.
+ if (MatchAddr(AddrInst->getOperand(0), Depth+1))
+ return true;
+ }
+ AddrMode.BaseOffs -= ConstantOffset;
+ return false;
+ }
+
+ // Save the valid addressing mode in case we can't match.
+ ExtAddrMode BackupAddrMode = AddrMode;
+ unsigned OldSize = AddrModeInsts.size();
+
+ // See if the scale and offset amount is valid for this target.
+ AddrMode.BaseOffs += ConstantOffset;
+
+ // Match the base operand of the GEP.
+ if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) {
+ // If it couldn't be matched, just stuff the value in a register.
+ if (AddrMode.HasBaseReg) {
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ return false;
+ }
+ AddrMode.HasBaseReg = true;
+ AddrMode.BaseReg = AddrInst->getOperand(0);
+ }
+
+ // Match the remaining variable portion of the GEP.
+ if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
+ Depth)) {
+ // If it couldn't be matched, try stuffing the base into a register
+ // instead of matching it, and retrying the match of the scale.
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ if (AddrMode.HasBaseReg)
+ return false;
+ AddrMode.HasBaseReg = true;
+ AddrMode.BaseReg = AddrInst->getOperand(0);
+ AddrMode.BaseOffs += ConstantOffset;
+ if (!MatchScaledValue(AddrInst->getOperand(VariableOperand),
+ VariableScale, Depth)) {
+ // If even that didn't work, bail.
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ return false;
+ }
+ }
+
+ return true;
+ }
+ }
+ return false;
+}
+
+/// MatchAddr - If we can, try to add the value of 'Addr' into the current
+/// addressing mode. If Addr can't be added to AddrMode this returns false and
+/// leaves AddrMode unmodified. This assumes that Addr is either a pointer type
+/// or intptr_t for the target.
+///
+bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
+ // Fold in immediates if legal for the target.
+ AddrMode.BaseOffs += CI->getSExtValue();
+ if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+ return true;
+ AddrMode.BaseOffs -= CI->getSExtValue();
+ } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
+ // If this is a global variable, try to fold it into the addressing mode.
+ if (AddrMode.BaseGV == 0) {
+ AddrMode.BaseGV = GV;
+ if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+ return true;
+ AddrMode.BaseGV = 0;
+ }
+ } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
+ ExtAddrMode BackupAddrMode = AddrMode;
+ unsigned OldSize = AddrModeInsts.size();
+
+ // Check to see if it is possible to fold this operation.
+ if (MatchOperationAddr(I, I->getOpcode(), Depth)) {
+ // Okay, it's possible to fold this. Check to see if it is actually
+ // *profitable* to do so. We use a simple cost model to avoid increasing
+ // register pressure too much.
+ if (I->hasOneUse() ||
+ IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
+ AddrModeInsts.push_back(I);
+ return true;
+ }
+
+ // It isn't profitable to do this, roll back.
+ //cerr << "NOT FOLDING: " << *I;
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ }
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
+ if (MatchOperationAddr(CE, CE->getOpcode(), Depth))
+ return true;
+ } else if (isa<ConstantPointerNull>(Addr)) {
+ // Null pointer gets folded without affecting the addressing mode.
+ return true;
+ }
+
+ // Worse case, the target should support [reg] addressing modes. :)
+ if (!AddrMode.HasBaseReg) {
+ AddrMode.HasBaseReg = true;
+ AddrMode.BaseReg = Addr;
+ // Still check for legality in case the target supports [imm] but not [i+r].
+ if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+ return true;
+ AddrMode.HasBaseReg = false;
+ AddrMode.BaseReg = 0;
+ }
+
+ // If the base register is already taken, see if we can do [r+r].
+ if (AddrMode.Scale == 0) {
+ AddrMode.Scale = 1;
+ AddrMode.ScaledReg = Addr;
+ if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+ return true;
+ AddrMode.Scale = 0;
+ AddrMode.ScaledReg = 0;
+ }
+ // Couldn't match.
+ return false;
+}
+
+/// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified
+/// inline asm call are due to memory operands. If so, return true, otherwise
+/// return false.
+static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
+ const TargetLowering &TLI) {
+ TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI));
+ for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+ TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
+
+ // Compute the constraint code and ConstraintType to use.
+ TLI.ComputeConstraintToUse(OpInfo, SDValue());
+
+ // If this asm operand is our Value*, and if it isn't an indirect memory
+ // operand, we can't fold it!
+ if (OpInfo.CallOperandVal == OpVal &&
+ (OpInfo.ConstraintType != TargetLowering::C_Memory ||
+ !OpInfo.isIndirect))
+ return false;
+ }
+
+ return true;
+}
+
+/// FindAllMemoryUses - Recursively walk all the uses of I until we find a
+/// memory use. If we find an obviously non-foldable instruction, return true.
+/// Add the ultimately found memory instructions to MemoryUses.
+static bool FindAllMemoryUses(Instruction *I,
+ SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses,
+ SmallPtrSet<Instruction*, 16> &ConsideredInsts,
+ const TargetLowering &TLI) {
+ // If we already considered this instruction, we're done.
+ if (!ConsideredInsts.insert(I))
+ return false;
+
+ // If this is an obviously unfoldable instruction, bail out.
+ if (!MightBeFoldableInst(I))
+ return true;
+
+ // Loop over all the uses, recursively processing them.
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+ UI != E; ++UI) {
+ User *U = *UI;
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ MemoryUses.push_back(std::make_pair(LI, UI.getOperandNo()));
+ continue;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ unsigned opNo = UI.getOperandNo();
+ if (opNo == 0) return true; // Storing addr, not into addr.
+ MemoryUses.push_back(std::make_pair(SI, opNo));
+ continue;
+ }
+
+ if (CallInst *CI = dyn_cast<CallInst>(U)) {
+ InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
+ if (!IA) return true;
+
+ // If this is a memory operand, we're cool, otherwise bail out.
+ if (!IsOperandAMemoryOperand(CI, IA, I, TLI))
+ return true;
+ continue;
+ }
+
+ if (FindAllMemoryUses(cast<Instruction>(U), MemoryUses, ConsideredInsts,
+ TLI))
+ return true;
+ }
+
+ return false;
+}
+
+/// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at
+/// the use site that we're folding it into. If so, there is no cost to
+/// include it in the addressing mode. KnownLive1 and KnownLive2 are two values
+/// that we know are live at the instruction already.
+bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
+ Value *KnownLive2) {
+ // If Val is either of the known-live values, we know it is live!
+ if (Val == 0 || Val == KnownLive1 || Val == KnownLive2)
+ return true;
+
+ // All values other than instructions and arguments (e.g. constants) are live.
+ if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
+
+ // If Val is a constant sized alloca in the entry block, it is live, this is
+ // true because it is just a reference to the stack/frame pointer, which is
+ // live for the whole function.
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
+ if (AI->isStaticAlloca())
+ return true;
+
+ // Check to see if this value is already used in the memory instruction's
+ // block. If so, it's already live into the block at the very least, so we
+ // can reasonably fold it.
+ return Val->isUsedInBasicBlock(MemoryInst->getParent());
+}
+
+/// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing
+/// mode of the machine to fold the specified instruction into a load or store
+/// that ultimately uses it. However, the specified instruction has multiple
+/// uses. Given this, it may actually increase register pressure to fold it
+/// into the load. For example, consider this code:
+///
+/// X = ...
+/// Y = X+1
+/// use(Y) -> nonload/store
+/// Z = Y+1
+/// load Z
+///
+/// In this case, Y has multiple uses, and can be folded into the load of Z
+/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
+/// be live at the use(Y) line. If we don't fold Y into load Z, we use one
+/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
+/// number of computations either.
+///
+/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
+/// X was live across 'load Z' for other reasons, we actually *would* want to
+/// fold the addressing mode in the Z case. This would make Y die earlier.
+bool AddressingModeMatcher::
+IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
+ ExtAddrMode &AMAfter) {
+ if (IgnoreProfitability) return true;
+
+ // AMBefore is the addressing mode before this instruction was folded into it,
+ // and AMAfter is the addressing mode after the instruction was folded. Get
+ // the set of registers referenced by AMAfter and subtract out those
+ // referenced by AMBefore: this is the set of values which folding in this
+ // address extends the lifetime of.
+ //
+ // Note that there are only two potential values being referenced here,
+ // BaseReg and ScaleReg (global addresses are always available, as are any
+ // folded immediates).
+ Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
+
+ // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
+ // lifetime wasn't extended by adding this instruction.
+ if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
+ BaseReg = 0;
+ if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
+ ScaledReg = 0;
+
+ // If folding this instruction (and it's subexprs) didn't extend any live
+ // ranges, we're ok with it.
+ if (BaseReg == 0 && ScaledReg == 0)
+ return true;
+
+ // If all uses of this instruction are ultimately load/store/inlineasm's,
+ // check to see if their addressing modes will include this instruction. If
+ // so, we can fold it into all uses, so it doesn't matter if it has multiple
+ // uses.
+ SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
+ SmallPtrSet<Instruction*, 16> ConsideredInsts;
+ if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI))
+ return false; // Has a non-memory, non-foldable use!
+
+ // Now that we know that all uses of this instruction are part of a chain of
+ // computation involving only operations that could theoretically be folded
+ // into a memory use, loop over each of these uses and see if they could
+ // *actually* fold the instruction.
+ SmallVector<Instruction*, 32> MatchedAddrModeInsts;
+ for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
+ Instruction *User = MemoryUses[i].first;
+ unsigned OpNo = MemoryUses[i].second;
+
+ // Get the access type of this use. If the use isn't a pointer, we don't
+ // know what it accesses.
+ Value *Address = User->getOperand(OpNo);
+ if (!Address->getType()->isPointerTy())
+ return false;
+ Type *AddressAccessTy = Address->getType()->getPointerElementType();
+
+ // Do a match against the root of this address, ignoring profitability. This
+ // will tell us if the addressing mode for the memory operation will
+ // *actually* cover the shared instruction.
+ ExtAddrMode Result;
+ AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy,
+ MemoryInst, Result);
+ Matcher.IgnoreProfitability = true;
+ bool Success = Matcher.MatchAddr(Address, 0);
+ (void)Success; assert(Success && "Couldn't select *anything*?");
+
+ // If the match didn't cover I, then it won't be shared by it.
+ if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
+ I) == MatchedAddrModeInsts.end())
+ return false;
+
+ MatchedAddrModeInsts.clear();
+ }
+
+ return true;
+}
+
+} // end anonymous namespace
+
/// IsNonLocalValue - Return true if the specified values are defined in a
/// different basic block than BB.
static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
} else {
DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
<< *MemoryInst);
- Type *IntPtrTy =
- TLI->getTargetData()->getIntPtrType(AccessTy->getContext());
-
+ Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType());
Value *Result = 0;
// Start with the base register. Do this first so that subsequent address
// start of the block.
CurInstIterator = BB->begin();
SunkAddrs.clear();
- } else {
- // This address is now available for reassignment, so erase the table
- // entry; we don't want to match some completely different instruction.
- SunkAddrs[Addr] = 0;
}
}
++NumMemoryInsts;
if (!DefIsLiveOut)
return false;
- // Make sure non of the uses are PHI nodes.
+ // Make sure none of the uses are PHI nodes.
for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
UI != E; ++UI) {
Instruction *User = cast<Instruction>(*UI);
// It is possible for very late stage optimizations (such as SimplifyCFG)
// to introduce PHI nodes too late to be cleaned up. If we detect such a
// trivial PHI, go ahead and zap it here.
- if (Value *V = SimplifyInstruction(P)) {
+ if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : 0,
+ TLInfo, DT)) {
P->replaceAllUsesWith(V);
P->eraseFromParent();
++NumPHIsElim;
}
if (CmpInst *CI = dyn_cast<CmpInst>(I))
- return OptimizeCmpExpression(CI);
+ if (!TLI || !TLI->hasMultipleConditionRegisters())
+ return OptimizeCmpExpression(CI);
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- bool Changed = false;
if (TLI)
- Changed |= OptimizeMemoryInst(I, I->getOperand(0), LI->getType());
- Changed |= ConvertLoadToSwitch(LI);
- return Changed;
+ return OptimizeMemoryInst(I, I->getOperand(0), LI->getType());
+ return false;
}
if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
if (CallInst *CI = dyn_cast<CallInst>(I))
return OptimizeCallInst(CI);
- if (ReturnInst *RI = dyn_cast<ReturnInst>(I))
- return DupRetToEnableTailCallOpts(RI);
-
if (SelectInst *SI = dyn_cast<SelectInst>(I))
return OptimizeSelectInst(SI);
while (CurInstIterator != BB.end())
MadeChange |= OptimizeInst(CurInstIterator++);
+ MadeChange |= DupRetToEnableTailCallOpts(&BB);
+
return MadeChange;
}
}
return MadeChange;
}
-
-static bool TargetSupportsJumpTables(const TargetLowering &TLI) {
- return TLI.supportJumpTables() &&
- (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
- TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
-}
-
-/// ConvertLoadToSwitch - Convert loads from constant lookup tables into
-/// switches. This undos the switch-to-lookup table transformation in
-/// SimplifyCFG for targets where that is inprofitable.
-bool CodeGenPrepare::ConvertLoadToSwitch(LoadInst *LI) {
- // This only applies to targets that don't support jump tables.
- if (!TLI || TargetSupportsJumpTables(*TLI))
- return false;
-
- // FIXME: In the future, it would be desirable to have enough target
- // information in SimplifyCFG, so we could decide at that stage whether to
- // transform the switch to a lookup table or not, and this
- // reverse-transformation could be removed.
-
- GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getPointerOperand());
- if (!GEP || !GEP->isInBounds() || GEP->getPointerAddressSpace())
- return false;
- if (GEP->getNumIndices() != 2)
- return false;
- Value *FirstIndex = GEP->idx_begin()[0];
- ConstantInt *FirstIndexInt = dyn_cast<ConstantInt>(FirstIndex);
- if (!FirstIndexInt || !FirstIndexInt->isZero())
- return false;
-
- Value *TableIndex = GEP->idx_begin()[1];
- IntegerType *TableIndexTy = cast<IntegerType>(TableIndex->getType());
-
- GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getPointerOperand());
- if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
- return false;
-
- Constant *Arr = GV->getInitializer();
- uint64_t NumElements;
- if (ConstantArray *CA = dyn_cast<ConstantArray>(Arr))
- NumElements = CA->getType()->getNumElements();
- else if (ConstantDataArray *CDA = dyn_cast<ConstantDataArray>(Arr))
- NumElements = CDA->getNumElements();
- else
- return false;
- if (NumElements < 2)
- return false;
-
- // Split the block.
- BasicBlock *OriginalBB = LI->getParent();
- BasicBlock *PostSwitchBB = OriginalBB->splitBasicBlock(LI);
-
- // Replace OriginalBB's terminator with a switch.
- IRBuilder<> Builder(OriginalBB->getTerminator());
- SwitchInst *Switch = Builder.CreateSwitch(TableIndex, PostSwitchBB,
- NumElements - 1);
- OriginalBB->getTerminator()->eraseFromParent();
-
- // Count the frequency of each value to decide which to use as default.
- SmallDenseMap<Constant*, uint64_t> ValueFreq;
- for (uint64_t I = 0; I < NumElements; ++I)
- ++ValueFreq[Arr->getAggregateElement(I)];
- uint64_t MaxCount = 0;
- Constant *DefaultValue = NULL;
- for (SmallDenseMap<Constant*, uint64_t>::iterator I = ValueFreq.begin(),
- E = ValueFreq.end(); I != E; ++I) {
- if (I->second > MaxCount) {
- MaxCount = I->second;
- DefaultValue = I->first;
- }
- }
- assert(DefaultValue && "No values in the array?");
-
- // Create the phi node in PostSwitchBB, which will replace the load.
- Builder.SetInsertPoint(PostSwitchBB->begin());
- PHINode *PHI = Builder.CreatePHI(LI->getType(), NumElements);
- PHI->addIncoming(DefaultValue, OriginalBB);
-
- // Build basic blocks to target with the switch.
- for (uint64_t I = 0; I < NumElements; ++I) {
- Constant *C = Arr->getAggregateElement(I);
- if (C == DefaultValue) continue; // Already covered by the default case.
-
- BasicBlock *BB = BasicBlock::Create(PostSwitchBB->getContext(),
- "lookup.bb",
- PostSwitchBB->getParent(),
- PostSwitchBB);
- Switch->addCase(ConstantInt::get(TableIndexTy, I), BB);
- Builder.SetInsertPoint(BB);
- Builder.CreateBr(PostSwitchBB);
- PHI->addIncoming(C, BB);
- }
-
- // Remove the load.
- LI->replaceAllUsesWith(PHI);
- LI->eraseFromParent();
-
- // Clean up.
- if (GEP->use_empty())
- GEP->eraseFromParent();
- if (GV->hasUnnamedAddr() && GV->hasPrivateLinkage() && GV->use_empty())
- GV->eraseFromParent();
-
- CurInstIterator = Switch;
- return true;
-}