#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/ValueMap.h"
-#include "llvm/Analysis/DominatorInternals.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/Assembly/Writer.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InlineAsm.h"
class CodeGenPrepare : public FunctionPass {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
/// transformation profitability.
+ const TargetMachine *TM;
const TargetLowering *TLI;
const TargetLibraryInfo *TLInfo;
DominatorTree *DT;
- ProfileInfo *PFI;
/// CurInstIterator - As we scan instructions optimizing them, this is the
/// next instruction to optimize. Xforms that can invalidate this should
public:
static char ID; // Pass identification, replacement for typeid
- explicit CodeGenPrepare(const TargetLowering *tli = 0)
- : FunctionPass(ID), TLI(tli) {
+ explicit CodeGenPrepare(const TargetMachine *TM = 0)
+ : FunctionPass(ID), TM(TM), TLI(0) {
initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F);
const char *getPassName() const { return "CodeGen Prepare"; }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addPreserved<DominatorTree>();
- AU.addPreserved<ProfileInfo>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<TargetLibraryInfo>();
}
}
char CodeGenPrepare::ID = 0;
-INITIALIZE_PASS_BEGIN(CodeGenPrepare, "codegenprepare",
- "Optimize for code generation", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
-INITIALIZE_PASS_END(CodeGenPrepare, "codegenprepare",
- "Optimize for code generation", false, false)
-
-FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
- return new CodeGenPrepare(TLI);
+static void *initializeCodeGenPreparePassOnce(PassRegistry &Registry) {
+ initializeTargetLibraryInfoPass(Registry);
+ PassInfo *PI = new PassInfo(
+ "Optimize for code generation", "codegenprepare", &CodeGenPrepare::ID,
+ PassInfo::NormalCtor_t(callDefaultCtor<CodeGenPrepare>), false, false,
+ PassInfo::TargetMachineCtor_t(callTargetMachineCtor<CodeGenPrepare>));
+ Registry.registerPass(*PI, true);
+ return PI;
+}
+
+void llvm::initializeCodeGenPreparePass(PassRegistry &Registry) {
+ CALL_ONCE_INITIALIZATION(initializeCodeGenPreparePassOnce)
+}
+
+FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) {
+ return new CodeGenPrepare(TM);
}
bool CodeGenPrepare::runOnFunction(Function &F) {
bool EverMadeChange = false;
ModifiedDT = false;
+ if (TM) TLI = TM->getTargetLowering();
TLInfo = &getAnalysis<TargetLibraryInfo>();
- DT = getAnalysisIfAvailable<DominatorTree>();
- PFI = getAnalysisIfAvailable<ProfileInfo>();
+ DominatorTreeWrapperPass *DTWP =
+ getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ DT = DTWP ? &DTWP->getDomTree() : 0;
OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
Attribute::OptimizeForSize);
SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
DeleteDeadBlock(BB);
-
+
for (SmallVectorImpl<BasicBlock*>::iterator
II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
if (pred_begin(*II) == pred_end(*II))
}
if (ModifiedDT && DT)
- DT->DT->recalculate(F);
+ DT->recalculate(F);
return EverMadeChange;
}
bool CodeGenPrepare::EliminateFallThrough(Function &F) {
bool Changed = false;
// Scan all of the blocks in the function, except for the entry block.
- for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) {
+ for (Function::iterator I = llvm::next(F.begin()), E = F.end(); I != E; ) {
BasicBlock *BB = I++;
// If the destination block has a single pred, then this is a trivial
// edge, just collapse it.
bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) {
bool MadeChange = false;
// Note that this intentionally skips the entry block.
- for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) {
+ for (Function::iterator I = llvm::next(F.begin()), E = F.end(); I != E; ) {
BasicBlock *BB = I++;
// If this block doesn't end with an uncond branch, ignore it.
DT->changeImmediateDominator(DestBB, NewIDom);
DT->eraseNode(BB);
}
- if (PFI) {
- PFI->replaceAllUses(BB, DestBB);
- PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB));
- }
BB->eraseFromParent();
++NumBlocksElim;
ExtAddrMode() : BaseReg(0), ScaledReg(0) {}
void print(raw_ostream &OS) const;
void dump() const;
-
+
bool operator==(const ExtAddrMode& O) const {
return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) &&
(BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) &&
}
};
+#ifndef NDEBUG
static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
AM.print(OS);
return OS;
}
+#endif
void ExtAddrMode::print(raw_ostream &OS) const {
bool NeedPlus = false;
if (BaseGV) {
OS << (NeedPlus ? " + " : "")
<< "GV:";
- WriteAsOperand(OS, BaseGV, /*PrintType=*/false);
+ BaseGV->printAsOperand(OS, /*PrintType=*/false);
NeedPlus = true;
}
if (BaseReg) {
OS << (NeedPlus ? " + " : "")
<< "Base:";
- WriteAsOperand(OS, BaseReg, /*PrintType=*/false);
+ BaseReg->printAsOperand(OS, /*PrintType=*/false);
NeedPlus = true;
}
if (Scale) {
OS << (NeedPlus ? " + " : "")
<< Scale << "*";
- WriteAsOperand(OS, ScaledReg, /*PrintType=*/false);
- NeedPlus = true;
+ ScaledReg->printAsOperand(OS, /*PrintType=*/false);
}
OS << ']';
/// the memory instruction that we're computing this address for.
Type *AccessTy;
Instruction *MemoryInst;
-
+
/// AddrMode - This is the addressing mode that we're building up. This is
/// part of the return value of this addressing mode matching stuff.
ExtAddrMode &AddrMode;
-
+
/// IgnoreProfitability - This is set to true when we should not do
/// profitability checks. When true, IsProfitableToFoldIntoAddressingMode
/// always returns true.
bool IgnoreProfitability;
-
+
AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI,
const TargetLowering &T, Type *AT,
Instruction *MI, ExtAddrMode &AM)
IgnoreProfitability = false;
}
public:
-
+
/// Match - Find the maximal addressing mode that a load/store of V can fold,
/// give an access type of AccessTy. This returns a list of involved
/// instructions in AddrModeInsts.
const TargetLowering &TLI) {
ExtAddrMode Result;
- bool Success =
+ bool Success =
AddressingModeMatcher(AddrModeInsts, TLI, AccessTy,
MemoryInst, Result).MatchAddr(V, 0);
(void)Success; assert(Success && "Couldn't select *anything*?");
// mode. Just process that directly.
if (Scale == 1)
return MatchAddr(ScaleReg, Depth);
-
+
// If the scale is 0, it takes nothing to add this.
if (Scale == 0)
return true;
-
+
// If we already have a scale of this value, we can add to it, otherwise, we
// need an available scale field.
if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
// It was legal, so commit it.
AddrMode = TestAddrMode;
-
+
// Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
// to see if ScaleReg is actually X+C. If so, we can turn this into adding
// X*Scale + C*Scale to addr mode.
match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
TestAddrMode.ScaledReg = AddLHS;
TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
-
+
// If this addressing mode is legal, commit it and remember that we folded
// this instruction.
if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) {
unsigned Depth) {
// Avoid exponential behavior on extremely deep expression trees.
if (Depth >= 5) return false;
-
+
switch (Opcode) {
case Instruction::PtrToInt:
// PtrToInt is always a noop, as we know that the int type is pointer sized.
case Instruction::IntToPtr:
// This inttoptr is a no-op if the integer type is pointer sized.
if (TLI.getValueType(AddrInst->getOperand(0)->getType()) ==
- TLI.getPointerTy())
+ TLI.getPointerTy(AddrInst->getType()->getPointerAddressSpace()))
return MatchAddr(AddrInst->getOperand(0), Depth);
return false;
case Instruction::BitCast:
if (MatchAddr(AddrInst->getOperand(1), Depth+1) &&
MatchAddr(AddrInst->getOperand(0), Depth+1))
return true;
-
+
// Restore the old addr mode info.
AddrMode = BackupAddrMode;
AddrModeInsts.resize(OldSize);
-
+
// Otherwise this was over-aggressive. Try merging in the LHS then the RHS.
if (MatchAddr(AddrInst->getOperand(0), Depth+1) &&
MatchAddr(AddrInst->getOperand(1), Depth+1))
return true;
-
+
// Otherwise we definitely can't merge the ADD in.
AddrMode = BackupAddrMode;
AddrModeInsts.resize(OldSize);
int64_t Scale = RHS->getSExtValue();
if (Opcode == Instruction::Shl)
Scale = 1LL << Scale;
-
+
return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth);
}
case Instruction::GetElementPtr: {
// one variable offset.
int VariableOperand = -1;
unsigned VariableScale = 0;
-
+
int64_t ConstantOffset = 0;
const DataLayout *TD = TLI.getDataLayout();
gep_type_iterator GTI = gep_type_begin(AddrInst);
// We only allow one variable index at the moment.
if (VariableOperand != -1)
return false;
-
+
// Remember the variable index.
VariableOperand = i;
VariableScale = TypeSize;
}
}
}
-
+
// A common case is for the GEP to only do a constant offset. In this case,
// just add it to the disp field and check validity.
if (VariableOperand == -1) {
AddrModeInsts.push_back(I);
return true;
}
-
+
// It isn't profitable to do this, roll back.
//cerr << "NOT FOLDING: " << *I;
AddrMode = BackupAddrMode;
TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI));
for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
-
+
// Compute the constraint code and ConstraintType to use.
TLI.ComputeConstraintToUse(OpInfo, SDValue());
// If we already considered this instruction, we're done.
if (!ConsideredInsts.insert(I))
return false;
-
+
// If this is an obviously unfoldable instruction, bail out.
if (!MightBeFoldableInst(I))
return true;
MemoryUses.push_back(std::make_pair(LI, UI.getOperandNo()));
continue;
}
-
+
if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
unsigned opNo = UI.getOperandNo();
if (opNo == 0) return true; // Storing addr, not into addr.
MemoryUses.push_back(std::make_pair(SI, opNo));
continue;
}
-
+
if (CallInst *CI = dyn_cast<CallInst>(U)) {
InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
if (!IA) return true;
-
+
// If this is a memory operand, we're cool, otherwise bail out.
if (!IsOperandAMemoryOperand(CI, IA, I, TLI))
return true;
continue;
}
-
+
if (FindAllMemoryUses(cast<Instruction>(U), MemoryUses, ConsideredInsts,
TLI))
return true;
// If Val is either of the known-live values, we know it is live!
if (Val == 0 || Val == KnownLive1 || Val == KnownLive2)
return true;
-
+
// All values other than instructions and arguments (e.g. constants) are live.
if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
-
+
// If Val is a constant sized alloca in the entry block, it is live, this is
// true because it is just a reference to the stack/frame pointer, which is
// live for the whole function.
if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
if (AI->isStaticAlloca())
return true;
-
+
// Check to see if this value is already used in the memory instruction's
// block. If so, it's already live into the block at the very least, so we
// can reasonably fold it.
IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
ExtAddrMode &AMAfter) {
if (IgnoreProfitability) return true;
-
+
// AMBefore is the addressing mode before this instruction was folded into it,
// and AMAfter is the addressing mode after the instruction was folded. Get
// the set of registers referenced by AMAfter and subtract out those
// BaseReg and ScaleReg (global addresses are always available, as are any
// folded immediates).
Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
-
+
// If the BaseReg or ScaledReg was referenced by the previous addrmode, their
// lifetime wasn't extended by adding this instruction.
if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
SmallPtrSet<Instruction*, 16> ConsideredInsts;
if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI))
return false; // Has a non-memory, non-foldable use!
-
+
// Now that we know that all uses of this instruction are part of a chain of
// computation involving only operations that could theoretically be folded
// into a memory use, loop over each of these uses and see if they could
for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
Instruction *User = MemoryUses[i].first;
unsigned OpNo = MemoryUses[i].second;
-
+
// Get the access type of this use. If the use isn't a pointer, we don't
// know what it accesses.
Value *Address = User->getOperand(OpNo);
if (!Address->getType()->isPointerTy())
return false;
- Type *AddressAccessTy =
- cast<PointerType>(Address->getType())->getElementType();
-
+ Type *AddressAccessTy = Address->getType()->getPointerElementType();
+
// Do a match against the root of this address, ignoring profitability. This
// will tell us if the addressing mode for the memory operation will
// *actually* cover the shared instruction.
if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
I) == MatchedAddrModeInsts.end())
return false;
-
+
MatchedAddrModeInsts.clear();
}
-
+
return true;
}
} else {
DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
<< *MemoryInst);
- Type *IntPtrTy =
- TLI->getDataLayout()->getIntPtrType(AccessTy->getContext());
-
+ Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType());
Value *Result = 0;
// Start with the base register. Do this first so that subsequent address
// It is possible for very late stage optimizations (such as SimplifyCFG)
// to introduce PHI nodes too late to be cleaned up. If we detect such a
// trivial PHI, go ahead and zap it here.
- if (Value *V = SimplifyInstruction(P)) {
+ if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : 0,
+ TLInfo, DT)) {
P->replaceAllUsesWith(V);
P->eraseFromParent();
++NumPHIsElim;
}
if (CmpInst *CI = dyn_cast<CmpInst>(I))
- return OptimizeCmpExpression(CI);
+ if (!TLI || !TLI->hasMultipleConditionRegisters())
+ return OptimizeCmpExpression(CI);
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
if (TLI)