#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
+#include "llvm/IRBuilder.h"
#include "llvm/InlineAsm.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Pass.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Transforms/Utils/AddrModeMatcher.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ProfileInfo.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/PatternMatch.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/AddrModeMatcher.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
using namespace llvm::PatternMatch;
STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
STATISTIC(NumRetsDup, "Number of return instructions duplicated");
+STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
+STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
static cl::opt<bool> DisableBranchOpts(
"disable-cgp-branch-opts", cl::Hidden, cl::init(false),
cl::desc("Disable branch optimizations in CodeGenPrepare"));
+// FIXME: Remove this abomination once all of the tests pass without it!
+static cl::opt<bool> DisableDeleteDeadBlocks(
+ "disable-cgp-delete-dead-blocks", cl::Hidden, cl::init(false),
+ cl::desc("Disable deleting dead blocks in CodeGenPrepare"));
+
+static cl::opt<bool> DisableSelectToBranch(
+ "disable-cgp-select2branch", cl::Hidden, cl::init(false),
+ cl::desc("Disable select to branch conversion."));
+
namespace {
class CodeGenPrepare : public FunctionPass {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
/// transformation profitability.
const TargetLowering *TLI;
+ const TargetLibraryInfo *TLInfo;
DominatorTree *DT;
ProfileInfo *PFI;
/// be updated.
bool ModifiedDT;
+ /// OptSize - True if optimizing for size.
+ bool OptSize;
+
public:
static char ID; // Pass identification, replacement for typeid
explicit CodeGenPrepare(const TargetLowering *tli = 0)
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<DominatorTree>();
AU.addPreserved<ProfileInfo>();
+ AU.addRequired<TargetLibraryInfo>();
}
private:
void EliminateMostlyEmptyBlock(BasicBlock *BB);
bool OptimizeBlock(BasicBlock &BB);
bool OptimizeInst(Instruction *I);
- bool OptimizeMemoryInst(Instruction *I, Value *Addr, const Type *AccessTy);
+ bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy);
bool OptimizeInlineAsmInst(CallInst *CS);
bool OptimizeCallInst(CallInst *CI);
bool MoveExtToFormExtLoad(Instruction *I);
bool OptimizeExtUses(Instruction *I);
+ bool OptimizeSelectInst(SelectInst *SI);
bool DupRetToEnableTailCallOpts(ReturnInst *RI);
+ bool PlaceDbgValues(Function &F);
};
}
char CodeGenPrepare::ID = 0;
-INITIALIZE_PASS(CodeGenPrepare, "codegenprepare",
+INITIALIZE_PASS_BEGIN(CodeGenPrepare, "codegenprepare",
+ "Optimize for code generation", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(CodeGenPrepare, "codegenprepare",
"Optimize for code generation", false, false)
FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
bool EverMadeChange = false;
ModifiedDT = false;
+ TLInfo = &getAnalysis<TargetLibraryInfo>();
DT = getAnalysisIfAvailable<DominatorTree>();
PFI = getAnalysisIfAvailable<ProfileInfo>();
+ OptSize = F.hasFnAttr(Attribute::OptimizeForSize);
// First pass, eliminate blocks that contain only PHI nodes and an
// unconditional branch.
EverMadeChange |= EliminateMostlyEmptyBlocks(F);
+ // llvm.dbg.value is far away from the value then iSel may not be able
+ // handle it properly. iSel will drop llvm.dbg.value if it can not
+ // find a node corresponding to the value.
+ EverMadeChange |= PlaceDbgValues(F);
+
bool MadeChange = true;
while (MadeChange) {
MadeChange = false;
if (!DisableBranchOpts) {
MadeChange = false;
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
- MadeChange |= ConstantFoldTerminator(BB);
+ SmallPtrSet<BasicBlock*, 8> WorkList;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
+ MadeChange |= ConstantFoldTerminator(BB, true);
+ if (!MadeChange) continue;
+
+ for (SmallVectorImpl<BasicBlock*>::iterator
+ II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
+ if (pred_begin(*II) == pred_end(*II))
+ WorkList.insert(*II);
+ }
+
+ if (!DisableDeleteDeadBlocks)
+ for (SmallPtrSet<BasicBlock*, 8>::iterator
+ I = WorkList.begin(), E = WorkList.end(); I != E; ++I)
+ DeleteDeadBlock(*I);
if (MadeChange)
ModifiedDT = true;
// If these values will be promoted, find out what they will be promoted
// to. This helps us consider truncates on PPC as noop copies when they
// are.
- if (TLI.getTypeAction(SrcVT) == TargetLowering::Promote)
+ if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
+ TargetLowering::TypePromoteInteger)
SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
- if (TLI.getTypeAction(DstVT) == TargetLowering::Promote)
+ if (TLI.getTypeAction(CI->getContext(), DstVT) ==
+ TargetLowering::TypePromoteInteger)
DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
// If, after promotion, these are the same types, this is a noop copy.
CastInst *&InsertedCast = InsertedCasts[UserBB];
if (!InsertedCast) {
- BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
-
+ BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
InsertedCast =
CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
InsertPt);
CmpInst *&InsertedCmp = InsertedCmps[UserBB];
if (!InsertedCmp) {
- BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
-
+ BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
InsertedCmp =
CmpInst::Create(CI->getOpcode(),
CI->getPredicate(), CI->getOperand(0),
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
if (II && II->getIntrinsicID() == Intrinsic::objectsize) {
bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
- const Type *ReturnTy = CI->getType();
+ Type *ReturnTy = CI->getType();
Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
// Substituting this can cause recursive simplifications, which can
// happens.
WeakVH IterHandle(CurInstIterator);
- ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0,
- ModifiedDT ? 0 : DT);
+ replaceAndRecursivelySimplify(CI, RetVal, TLI ? TLI->getTargetData() : 0,
+ TLInfo, ModifiedDT ? 0 : DT);
// If the iterator instruction was recursively deleted, start over at the
// start of the block.
return true;
}
+ if (II && TLI) {
+ SmallVector<Value*, 2> PtrOps;
+ Type *AccessTy;
+ if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy))
+ while (!PtrOps.empty())
+ if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy))
+ return true;
+ }
+
// From here on out we're working with named functions.
if (CI->getCalledFunction() == 0) return false;
-
+
// We'll need TargetData from here on out.
const TargetData *TD = TLI ? TLI->getTargetData() : 0;
if (!TD) return false;
// It's not safe to eliminate the sign / zero extension of the return value.
// See llvm::isInTailCallPosition().
const Function *F = BB->getParent();
- unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+ Attributes CallerRetAttr = F->getAttributes().getRetAttributes();
if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
return false;
// Conservatively require the attributes of the call to match those of the
// return. Ignore noalias because it doesn't affect the call sequence.
- unsigned CalleeRetAttr = CS.getAttributes().getRetAttributes();
+ Attributes CalleeRetAttr = CS.getAttributes().getRetAttributes();
if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
continue;
/// This method is used to optimize both load/store and inline asms with memory
/// operands.
bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
- const Type *AccessTy) {
+ Type *AccessTy) {
Value *Repl = Addr;
// Try to collapse single-value PHI nodes. This is necessary to undo
worklist.pop_back();
// Break use-def graph loops.
- if (Visited.count(V)) {
+ if (!Visited.insert(V)) {
Consensus = 0;
break;
}
- Visited.insert(V);
-
// For a PHI node, push all of its incoming values.
if (PHINode *P = dyn_cast<PHINode>(V)) {
for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i)
// For non-PHIs, determine the addressing mode being computed.
SmallVector<Instruction*, 16> NewAddrModeInsts;
ExtAddrMode NewAddrMode =
- AddressingModeMatcher::Match(V, AccessTy,MemoryInst,
+ AddressingModeMatcher::Match(V, AccessTy, MemoryInst,
NewAddrModeInsts, *TLI);
// This check is broken into two cases with very similar code to avoid using
// Insert this computation right after this user. Since our caller is
// scanning from the top of the BB to the bottom, reuse of the expr are
// guaranteed to happen later.
- BasicBlock::iterator InsertPt = MemoryInst;
+ IRBuilder<> Builder(MemoryInst);
// Now that we determined the addressing expression we want to use and know
// that we have to sink it into this block. Check to see if we have already
DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
<< *MemoryInst);
if (SunkAddr->getType() != Addr->getType())
- SunkAddr = new BitCastInst(SunkAddr, Addr->getType(), "tmp", InsertPt);
+ SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
} else {
DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
<< *MemoryInst);
- const Type *IntPtrTy =
+ Type *IntPtrTy =
TLI->getTargetData()->getIntPtrType(AccessTy->getContext());
Value *Result = 0;
if (AddrMode.BaseReg) {
Value *V = AddrMode.BaseReg;
if (V->getType()->isPointerTy())
- V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt);
+ V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
if (V->getType() != IntPtrTy)
- V = CastInst::CreateIntegerCast(V, IntPtrTy, /*isSigned=*/true,
- "sunkaddr", InsertPt);
+ V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
Result = V;
}
if (V->getType() == IntPtrTy) {
// done.
} else if (V->getType()->isPointerTy()) {
- V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt);
+ V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
} else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
cast<IntegerType>(V->getType())->getBitWidth()) {
- V = new TruncInst(V, IntPtrTy, "sunkaddr", InsertPt);
+ V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
} else {
- V = new SExtInst(V, IntPtrTy, "sunkaddr", InsertPt);
+ V = Builder.CreateSExt(V, IntPtrTy, "sunkaddr");
}
if (AddrMode.Scale != 1)
- V = BinaryOperator::CreateMul(V, ConstantInt::get(IntPtrTy,
- AddrMode.Scale),
- "sunkaddr", InsertPt);
+ V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
+ "sunkaddr");
if (Result)
- Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
+ Result = Builder.CreateAdd(Result, V, "sunkaddr");
else
Result = V;
}
// Add in the BaseGV if present.
if (AddrMode.BaseGV) {
- Value *V = new PtrToIntInst(AddrMode.BaseGV, IntPtrTy, "sunkaddr",
- InsertPt);
+ Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr");
if (Result)
- Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
+ Result = Builder.CreateAdd(Result, V, "sunkaddr");
else
Result = V;
}
if (AddrMode.BaseOffs) {
Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
if (Result)
- Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
+ Result = Builder.CreateAdd(Result, V, "sunkaddr");
else
Result = V;
}
if (Result == 0)
SunkAddr = Constant::getNullValue(Addr->getType());
else
- SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt);
+ SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
}
MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
+ // If we have no uses, recursively delete the value and all dead instructions
+ // using it.
if (Repl->use_empty()) {
+ // This can cause recursive deletion, which can invalidate our iterator.
+ // Use a WeakVH to hold onto it in case this happens.
+ WeakVH IterHandle(CurInstIterator);
+ BasicBlock *BB = CurInstIterator->getParent();
+
RecursivelyDeleteTriviallyDeadInstructions(Repl);
- // This address is now available for reassignment, so erase the table entry;
- // we don't want to match some completely different instruction.
- SunkAddrs[Addr] = 0;
+
+ if (IterHandle != CurInstIterator) {
+ // If the iterator instruction was recursively deleted, start over at the
+ // start of the block.
+ CurInstIterator = BB->begin();
+ SunkAddrs.clear();
+ } else {
+ // This address is now available for reassignment, so erase the table
+ // entry; we don't want to match some completely different instruction.
+ SunkAddrs[Addr] = 0;
+ }
}
++NumMemoryInsts;
return true;
Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
if (!InsertedTrunc) {
- BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
-
+ BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt);
}
return MadeChange;
}
+/// isFormingBranchFromSelectProfitable - Returns true if a SelectInst should be
+/// turned into an explicit branch.
+static bool isFormingBranchFromSelectProfitable(SelectInst *SI) {
+ // FIXME: This should use the same heuristics as IfConversion to determine
+ // whether a select is better represented as a branch. This requires that
+ // branch probability metadata is preserved for the select, which is not the
+ // case currently.
+
+ CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
+
+ // If the branch is predicted right, an out of order CPU can avoid blocking on
+ // the compare. Emit cmovs on compares with a memory operand as branches to
+ // avoid stalls on the load from memory. If the compare has more than one use
+ // there's probably another cmov or setcc around so it's not worth emitting a
+ // branch.
+ if (!Cmp)
+ return false;
+
+ Value *CmpOp0 = Cmp->getOperand(0);
+ Value *CmpOp1 = Cmp->getOperand(1);
+
+ // We check that the memory operand has one use to avoid uses of the loaded
+ // value directly after the compare, making branches unprofitable.
+ return Cmp->hasOneUse() &&
+ ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) ||
+ (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse()));
+}
+
+
+bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) {
+ // If we have a SelectInst that will likely profit from branch prediction,
+ // turn it into a branch.
+ if (DisableSelectToBranch || OptSize || !TLI ||
+ !TLI->isPredictableSelectExpensive())
+ return false;
+
+ if (!SI->getCondition()->getType()->isIntegerTy(1) ||
+ !isFormingBranchFromSelectProfitable(SI))
+ return false;
+
+ ModifiedDT = true;
+
+ // First, we split the block containing the select into 2 blocks.
+ BasicBlock *StartBlock = SI->getParent();
+ BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI));
+ BasicBlock *NextBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
+
+ // Create a new block serving as the landing pad for the branch.
+ BasicBlock *SmallBlock = BasicBlock::Create(SI->getContext(), "select.mid",
+ NextBlock->getParent(), NextBlock);
+
+ // Move the unconditional branch from the block with the select in it into our
+ // landing pad block.
+ StartBlock->getTerminator()->eraseFromParent();
+ BranchInst::Create(NextBlock, SmallBlock);
+
+ // Insert the real conditional branch based on the original condition.
+ BranchInst::Create(NextBlock, SmallBlock, SI->getCondition(), SI);
+
+ // The select itself is replaced with a PHI Node.
+ PHINode *PN = PHINode::Create(SI->getType(), 2, "", NextBlock->begin());
+ PN->takeName(SI);
+ PN->addIncoming(SI->getTrueValue(), StartBlock);
+ PN->addIncoming(SI->getFalseValue(), SmallBlock);
+ SI->replaceAllUsesWith(PN);
+ SI->eraseFromParent();
+
+ // Instruct OptimizeBlock to skip to the next block.
+ CurInstIterator = StartBlock->end();
+ ++NumSelectsExpanded;
+ return true;
+}
+
bool CodeGenPrepare::OptimizeInst(Instruction *I) {
if (PHINode *P = dyn_cast<PHINode>(I)) {
// It is possible for very late stage optimizations (such as SimplifyCFG)
if (ReturnInst *RI = dyn_cast<ReturnInst>(I))
return DupRetToEnableTailCallOpts(RI);
+ if (SelectInst *SI = dyn_cast<SelectInst>(I))
+ return OptimizeSelectInst(SI);
+
return false;
}
return MadeChange;
}
+
+// llvm.dbg.value is far away from the value then iSel may not be able
+// handle it properly. iSel will drop llvm.dbg.value if it can not
+// find a node corresponding to the value.
+bool CodeGenPrepare::PlaceDbgValues(Function &F) {
+ bool MadeChange = false;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ Instruction *PrevNonDbgInst = NULL;
+ for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) {
+ Instruction *Insn = BI; ++BI;
+ DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
+ if (!DVI) {
+ PrevNonDbgInst = Insn;
+ continue;
+ }
+
+ Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue());
+ if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) {
+ DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI);
+ DVI->removeFromParent();
+ if (isa<PHINode>(VI))
+ DVI->insertBefore(VI->getParent()->getFirstInsertionPt());
+ else
+ DVI->insertAfter(VI);
+ MadeChange = true;
+ ++NumDbgValueMoved;
+ }
+ }
+ }
+ return MadeChange;
+}