X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;ds=sidebyside;f=lib%2FTransforms%2FUtils%2FSimplifyCFG.cpp;h=fc84c4ab28ff3d8db3da3d7a511aebb8eb9f8616;hb=9e620952617b90992e50dcf9bca8078c535cbaef;hp=bf2cb49bdcde66c985f5614afa629ea56e60cb11;hpb=f0426601977c3e386d2d26c72a2cca691dc42072;p=oota-llvm.git diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index bf2cb49bdcd..fc84c4ab28f 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -13,31 +13,38 @@ #define DEBUG_TYPE "simplifycfg" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Constants.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Type.h" -#include "llvm/DerivedTypes.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/NoFolder.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include -#include #include +#include using namespace llvm; static cl::opt @@ -48,24 +55,47 @@ static cl::opt DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false), cl::desc("Duplicate return instructions into unconditional branches")); +static cl::opt +SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), + cl::desc("Sink common instructions down to the end block")); + +STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); +STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables"); +STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block"); STATISTIC(NumSpeculations, "Number of speculative executed instructions"); namespace { + /// ValueEqualityComparisonCase - Represents a case of a switch. + struct ValueEqualityComparisonCase { + ConstantInt *Value; + BasicBlock *Dest; + + ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest) + : Value(Value), Dest(Dest) {} + + bool operator<(ValueEqualityComparisonCase RHS) const { + // Comparing pointers is ok as we only rely on the order for uniquing. + return Value < RHS.Value; + } + + bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; } + }; + class SimplifyCFGOpt { - const TargetData *const TD; + const TargetTransformInfo &TTI; + const DataLayout *const TD; Value *isValueEqualityComparison(TerminatorInst *TI); BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI, - std::vector > &Cases); + std::vector &Cases); bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, BasicBlock *Pred, IRBuilder<> &Builder); bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI, IRBuilder<> &Builder); - bool SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder); bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder); - bool SimplifyUnwind(UnwindInst *UI, IRBuilder<> &Builder); + bool SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder); bool SimplifyUnreachable(UnreachableInst *UI); bool SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder); bool SimplifyIndirectBr(IndirectBrInst *IBI); @@ -73,7 +103,8 @@ class SimplifyCFGOpt { bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder); public: - explicit SimplifyCFGOpt(const TargetData *td) : TD(td) {} + SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout *TD) + : TTI(TTI), TD(TD) {} bool run(BasicBlock *BB); }; } @@ -83,14 +114,14 @@ public: /// static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) { if (SI1 == SI2) return false; // Can't merge with self! - + // It is not safe to merge these two switch instructions if they have a common // successor, and if that successor has a PHI node, and if *that* PHI node has // conflicting incoming values from the two switch blocks. BasicBlock *SI1BB = SI1->getParent(); BasicBlock *SI2BB = SI2->getParent(); SmallPtrSet SI1Succs(succ_begin(SI1BB), succ_end(SI1BB)); - + for (succ_iterator I = succ_begin(SI2BB), E = succ_end(SI2BB); I != E; ++I) if (SI1Succs.count(*I)) for (BasicBlock::iterator BBI = (*I)->begin(); @@ -100,7 +131,48 @@ static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) { PN->getIncomingValueForBlock(SI2BB)) return false; } - + + return true; +} + +/// isProfitableToFoldUnconditional - Return true if it is safe and profitable +/// to merge these two terminator instructions together, where SI1 is an +/// unconditional branch. PhiNodes will store all PHI nodes in common +/// successors. +/// +static bool isProfitableToFoldUnconditional(BranchInst *SI1, + BranchInst *SI2, + Instruction *Cond, + SmallVectorImpl &PhiNodes) { + if (SI1 == SI2) return false; // Can't merge with self! + assert(SI1->isUnconditional() && SI2->isConditional()); + + // We fold the unconditional branch if we can easily update all PHI nodes in + // common successors: + // 1> We have a constant incoming value for the conditional branch; + // 2> We have "Cond" as the incoming value for the unconditional branch; + // 3> SI2->getCondition() and Cond have same operands. + CmpInst *Ci2 = dyn_cast(SI2->getCondition()); + if (!Ci2) return false; + if (!(Cond->getOperand(0) == Ci2->getOperand(0) && + Cond->getOperand(1) == Ci2->getOperand(1)) && + !(Cond->getOperand(0) == Ci2->getOperand(1) && + Cond->getOperand(1) == Ci2->getOperand(0))) + return false; + + BasicBlock *SI1BB = SI1->getParent(); + BasicBlock *SI2BB = SI2->getParent(); + SmallPtrSet SI1Succs(succ_begin(SI1BB), succ_end(SI1BB)); + for (succ_iterator I = succ_begin(SI2BB), E = succ_end(SI2BB); I != E; ++I) + if (SI1Succs.count(*I)) + for (BasicBlock::iterator BBI = (*I)->begin(); + isa(BBI); ++BBI) { + PHINode *PN = cast(BBI); + if (PN->getIncomingValueForBlock(SI1BB) != Cond || + !isa(PN->getIncomingValueForBlock(SI2BB))) + return false; + PhiNodes.push_back(PN); + } return true; } @@ -111,7 +183,7 @@ static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) { static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred) { if (!isa(Succ->begin())) return; // Quick exit if nothing to do - + PHINode *PN; for (BasicBlock::iterator I = Succ->begin(); (PN = dyn_cast(I)); ++I) @@ -163,7 +235,7 @@ static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, // doesn't dominate BB. if (Pred2->getSinglePredecessor() == 0) return 0; - + // If we found a conditional branch predecessor, make sure that it branches // to BB and Pred2Br. If it doesn't, this isn't an "if statement". if (Pred1Br->getSuccessor(0) == BB && @@ -193,7 +265,7 @@ static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, // Otherwise, if this is a conditional branch, then we can use it! BranchInst *BI = dyn_cast(CommonPred->getTerminator()); if (BI == 0) return 0; - + assert(BI->isConditional() && "Two successors but not conditional?"); if (BI->getSuccessor(0) == Pred1) { IfTrue = Pred1; @@ -205,6 +277,42 @@ static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, return BI->getCondition(); } +/// ComputeSpeculuationCost - Compute an abstract "cost" of speculating the +/// given instruction, which is assumed to be safe to speculate. 1 means +/// cheap, 2 means less cheap, and UINT_MAX means prohibitively expensive. +static unsigned ComputeSpeculationCost(const User *I) { + assert(isSafeToSpeculativelyExecute(I) && + "Instruction is not safe to speculatively execute!"); + switch (Operator::getOpcode(I)) { + default: + // In doubt, be conservative. + return UINT_MAX; + case Instruction::GetElementPtr: + // GEPs are cheap if all indices are constant. + if (!cast(I)->hasAllConstantIndices()) + return UINT_MAX; + return 1; + case Instruction::Load: + case Instruction::Add: + case Instruction::Sub: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::ICmp: + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + return 1; // These are all cheap. + + case Instruction::Call: + case Instruction::Select: + return 2; + } +} + /// DominatesMergePoint - If we have a merge point of an "if condition" as /// accepted above, return true if the specified value dominates the block. We /// don't handle the true generality of domination here, just a special case @@ -250,7 +358,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // If we aren't allowing aggressive promotion anymore, then don't consider // instructions in the 'if region'. if (AggressiveInsts == 0) return false; - + // If we have seen this instruction before, don't count it again. if (AggressiveInsts->count(I)) return true; @@ -260,43 +368,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, if (!isSafeToSpeculativelyExecute(I)) return false; - unsigned Cost = 0; - - switch (I->getOpcode()) { - default: return false; // Cannot hoist this out safely. - case Instruction::Load: - // We have to check to make sure there are no instructions before the - // load in its basic block, as we are going to hoist the load out to its - // predecessor. - if (PBB->getFirstNonPHIOrDbg() != I) - return false; - Cost = 1; - break; - case Instruction::GetElementPtr: - // GEPs are cheap if all indices are constant. - if (!cast(I)->hasAllConstantIndices()) - return false; - Cost = 1; - break; - case Instruction::Add: - case Instruction::Sub: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - case Instruction::ICmp: - case Instruction::Trunc: - case Instruction::ZExt: - case Instruction::SExt: - Cost = 1; - break; // These are all cheap and non-trapping instructions. - - case Instruction::Select: - Cost = 2; - break; - } + unsigned Cost = ComputeSpeculationCost(I); if (Cost > CostRemaining) return false; @@ -315,7 +387,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, /// GetConstantInt - Extract ConstantInt from value, looking through IntToPtr /// and PointerNullValue. Return NULL if value is not a constant int. -static ConstantInt *GetConstantInt(Value *V, const TargetData *TD) { +static ConstantInt *GetConstantInt(Value *V, const DataLayout *TD) { // Normal constant int. ConstantInt *CI = dyn_cast(V); if (CI || !TD || !isa(V) || !V->getType()->isPointerTy()) @@ -323,7 +395,7 @@ static ConstantInt *GetConstantInt(Value *V, const TargetData *TD) { // This is some kind of pointer constant. Turn it into a pointer-sized // ConstantInt if possible. - IntegerType *PtrTy = TD->getIntPtrType(V->getContext()); + IntegerType *PtrTy = cast(TD->getIntPtrType(V->getType())); // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*). if (isa(V)) @@ -349,10 +421,10 @@ static ConstantInt *GetConstantInt(Value *V, const TargetData *TD) { /// Values vector. static Value * GatherConstantCompares(Value *V, std::vector &Vals, Value *&Extra, - const TargetData *TD, bool isEQ, unsigned &UsedICmps) { + const DataLayout *TD, bool isEQ, unsigned &UsedICmps) { Instruction *I = dyn_cast(V); if (I == 0) return 0; - + // If this is an icmp against a constant, handle this as one of the cases. if (ICmpInst *ICI = dyn_cast(I)) { if (ConstantInt *C = GetConstantInt(I->getOperand(1), TD)) { @@ -361,23 +433,21 @@ GatherConstantCompares(Value *V, std::vector &Vals, Value *&Extra, Vals.push_back(C); return I->getOperand(0); } - + // If we have "x ult 3" comparison, for example, then we can add 0,1,2 to // the set. ConstantRange Span = ConstantRange::makeICmpRegion(ICI->getPredicate(), C->getValue()); - + // If this is an and/!= check then we want to optimize "x ugt 2" into // x != 0 && x != 1. if (!isEQ) Span = Span.inverse(); - + // If there are a ton of values, we don't want to make a ginormous switch. - if (Span.getSetSize().ugt(8) || Span.isEmptySet() || - // We don't handle wrapped sets yet. - Span.isWrappedSet()) + if (Span.getSetSize().ugt(8) || Span.isEmptySet()) return 0; - + for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp) Vals.push_back(ConstantInt::get(V->getContext(), Tmp)); UsedICmps++; @@ -385,11 +455,11 @@ GatherConstantCompares(Value *V, std::vector &Vals, Value *&Extra, } return 0; } - + // Otherwise, we can only handle an | or &, depending on isEQ. if (I->getOpcode() != (isEQ ? Instruction::Or : Instruction::And)) return 0; - + unsigned NumValsBeforeLHS = Vals.size(); unsigned UsedICmpsBeforeLHS = UsedICmps; if (Value *LHS = GatherConstantCompares(I->getOperand(0), Vals, Extra, TD, @@ -410,12 +480,12 @@ GatherConstantCompares(Value *V, std::vector &Vals, Value *&Extra, Extra = I->getOperand(1); return LHS; } - + Vals.resize(NumValsBeforeLHS); UsedICmps = UsedICmpsBeforeLHS; return 0; } - + // If the LHS can't be folded in, but Extra is available and RHS can, try to // use LHS as Extra. if (Extra == 0 || Extra == I->getOperand(0)) { @@ -427,12 +497,12 @@ GatherConstantCompares(Value *V, std::vector &Vals, Value *&Extra, assert(Vals.size() == NumValsBeforeLHS); Extra = OldExtra; } - + return 0; } - + static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) { - Instruction* Cond = 0; + Instruction *Cond = 0; if (SwitchInst *SI = dyn_cast(TI)) { Cond = dyn_cast(SI->getCondition()); } else if (BranchInst *BI = dyn_cast(TI)) { @@ -475,20 +545,22 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) { /// decode all of the 'cases' that it represents and return the 'default' block. BasicBlock *SimplifyCFGOpt:: GetValueEqualityComparisonCases(TerminatorInst *TI, - std::vector > &Cases) { + std::vector + &Cases) { if (SwitchInst *SI = dyn_cast(TI)) { Cases.reserve(SI->getNumCases()); - for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) - Cases.push_back(std::make_pair(SI->getCaseValue(i), SI->getSuccessor(i))); + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) + Cases.push_back(ValueEqualityComparisonCase(i.getCaseValue(), + i.getCaseSuccessor())); return SI->getDefaultDest(); } BranchInst *BI = cast(TI); ICmpInst *ICI = cast(BI->getCondition()); - Cases.push_back(std::make_pair(GetConstantInt(ICI->getOperand(1), TD), - BI->getSuccessor(ICI->getPredicate() == - ICmpInst::ICMP_NE))); + BasicBlock *Succ = BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE); + Cases.push_back(ValueEqualityComparisonCase(GetConstantInt(ICI->getOperand(1), + TD), + Succ)); return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ); } @@ -496,20 +568,16 @@ GetValueEqualityComparisonCases(TerminatorInst *TI, /// EliminateBlockCases - Given a vector of bb/value pairs, remove any entries /// in the list that match the specified block. static void EliminateBlockCases(BasicBlock *BB, - std::vector > &Cases) { - for (unsigned i = 0, e = Cases.size(); i != e; ++i) - if (Cases[i].second == BB) { - Cases.erase(Cases.begin()+i); - --i; --e; - } + std::vector &Cases) { + Cases.erase(std::remove(Cases.begin(), Cases.end(), BB), Cases.end()); } /// ValuesOverlap - Return true if there are any keys in C1 that exist in C2 as /// well. static bool -ValuesOverlap(std::vector > &C1, - std::vector > &C2) { - std::vector > *V1 = &C1, *V2 = &C2; +ValuesOverlap(std::vector &C1, + std::vector &C2) { + std::vector *V1 = &C1, *V2 = &C2; // Make V1 be smaller than V2. if (V1->size() > V2->size()) @@ -518,9 +586,9 @@ ValuesOverlap(std::vector > &C1, if (V1->size() == 0) return false; if (V1->size() == 1) { // Just scan V2. - ConstantInt *TheVal = (*V1)[0].first; + ConstantInt *TheVal = (*V1)[0].Value; for (unsigned i = 0, e = V2->size(); i != e; ++i) - if (TheVal == (*V2)[i].first) + if (TheVal == (*V2)[i].Value) return true; } @@ -529,9 +597,9 @@ ValuesOverlap(std::vector > &C1, array_pod_sort(V2->begin(), V2->end()); unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size(); while (i1 != e1 && i2 != e2) { - if ((*V1)[i1].first == (*V2)[i2].first) + if ((*V1)[i1].Value == (*V2)[i2].Value) return true; - if ((*V1)[i1].first < (*V2)[i2].first) + if ((*V1)[i1].Value < (*V2)[i2].Value) ++i1; else ++i2; @@ -556,14 +624,17 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, assert(ThisVal && "This isn't a value comparison!!"); if (ThisVal != PredVal) return false; // Different predicates. + // TODO: Preserve branch weight metadata, similarly to how + // FoldValueComparisonIntoPredecessors preserves it. + // Find out information about when control will move from Pred to TI's block. - std::vector > PredCases; + std::vector PredCases; BasicBlock *PredDef = GetValueEqualityComparisonCases(Pred->getTerminator(), PredCases); EliminateBlockCases(PredDef, PredCases); // Remove default from cases. // Find information about how control leaves this block. - std::vector > ThisCases; + std::vector ThisCases; BasicBlock *ThisDef = GetValueEqualityComparisonCases(TI, ThisCases); EliminateBlockCases(ThisDef, ThisCases); // Remove default from cases. @@ -575,7 +646,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, // can simplify TI. if (!ValuesOverlap(PredCases, ThisCases)) return false; - + if (isa(TI)) { // Okay, one of the successors of this condbr is dead. Convert it to a // uncond br. @@ -585,7 +656,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, (void) NI; // Remove PHI node entries for the dead edge. - ThisCases[0].second->removePredecessor(TI->getParent()); + ThisCases[0].Dest->removePredecessor(TI->getParent()); DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"); @@ -593,35 +664,56 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, EraseTerminatorInstAndDCECond(TI); return true; } - + SwitchInst *SI = cast(TI); // Okay, TI has cases that are statically dead, prune them away. SmallPtrSet DeadCases; for (unsigned i = 0, e = PredCases.size(); i != e; ++i) - DeadCases.insert(PredCases[i].first); + DeadCases.insert(PredCases[i].Value); DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() << "Through successor TI: " << *TI); - for (unsigned i = SI->getNumCases()-1; i != 0; --i) - if (DeadCases.count(SI->getCaseValue(i))) { - SI->getSuccessor(i)->removePredecessor(TI->getParent()); + // Collect branch weights into a vector. + SmallVector Weights; + MDNode* MD = SI->getMetadata(LLVMContext::MD_prof); + bool HasWeight = MD && (MD->getNumOperands() == 2 + SI->getNumCases()); + if (HasWeight) + for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e; + ++MD_i) { + ConstantInt* CI = dyn_cast(MD->getOperand(MD_i)); + assert(CI); + Weights.push_back(CI->getValue().getZExtValue()); + } + for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) { + --i; + if (DeadCases.count(i.getCaseValue())) { + if (HasWeight) { + std::swap(Weights[i.getCaseIndex()+1], Weights.back()); + Weights.pop_back(); + } + i.getCaseSuccessor()->removePredecessor(TI->getParent()); SI->removeCase(i); } + } + if (HasWeight && Weights.size() >= 2) + SI->setMetadata(LLVMContext::MD_prof, + MDBuilder(SI->getParent()->getContext()). + createBranchWeights(Weights)); DEBUG(dbgs() << "Leaving: " << *TI << "\n"); return true; } - + // Otherwise, TI's block must correspond to some matched value. Find out // which value (or set of values) this is. ConstantInt *TIV = 0; BasicBlock *TIBB = TI->getParent(); for (unsigned i = 0, e = PredCases.size(); i != e; ++i) - if (PredCases[i].second == TIBB) { + if (PredCases[i].Dest == TIBB) { if (TIV != 0) return false; // Cannot handle multiple values coming to this block. - TIV = PredCases[i].first; + TIV = PredCases[i].Value; } assert(TIV && "No edge from pred to succ?"); @@ -629,8 +721,8 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, // BB. Find out which successor will unconditionally be branched to. BasicBlock *TheRealDest = 0; for (unsigned i = 0, e = ThisCases.size(); i != e; ++i) - if (ThisCases[i].first == TIV) { - TheRealDest = ThisCases[i].second; + if (ThisCases[i].Value == TIV) { + TheRealDest = ThisCases[i].Dest; break; } @@ -668,8 +760,8 @@ namespace { } static int ConstantIntSortPredicate(const void *P1, const void *P2) { - const ConstantInt *LHS = *(const ConstantInt**)P1; - const ConstantInt *RHS = *(const ConstantInt**)P2; + const ConstantInt *LHS = *(const ConstantInt*const*)P1; + const ConstantInt *RHS = *(const ConstantInt*const*)P2; if (LHS->getValue().ult(RHS->getValue())) return 1; if (LHS->getValue() == RHS->getValue()) @@ -677,6 +769,56 @@ static int ConstantIntSortPredicate(const void *P1, const void *P2) { return -1; } +static inline bool HasBranchWeights(const Instruction* I) { + MDNode* ProfMD = I->getMetadata(LLVMContext::MD_prof); + if (ProfMD && ProfMD->getOperand(0)) + if (MDString* MDS = dyn_cast(ProfMD->getOperand(0))) + return MDS->getString().equals("branch_weights"); + + return false; +} + +/// Get Weights of a given TerminatorInst, the default weight is at the front +/// of the vector. If TI is a conditional eq, we need to swap the branch-weight +/// metadata. +static void GetBranchWeights(TerminatorInst *TI, + SmallVectorImpl &Weights) { + MDNode* MD = TI->getMetadata(LLVMContext::MD_prof); + assert(MD); + for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) { + ConstantInt* CI = dyn_cast(MD->getOperand(i)); + assert(CI); + Weights.push_back(CI->getValue().getZExtValue()); + } + + // If TI is a conditional eq, the default case is the false case, + // and the corresponding branch-weight data is at index 2. We swap the + // default weight to be the first entry. + if (BranchInst* BI = dyn_cast(TI)) { + assert(Weights.size() == 2); + ICmpInst *ICI = cast(BI->getCondition()); + if (ICI->getPredicate() == ICmpInst::ICMP_EQ) + std::swap(Weights.front(), Weights.back()); + } +} + +/// Sees if any of the weights are too big for a uint32_t, and halves all the +/// weights if any are. +static void FitWeights(MutableArrayRef Weights) { + bool Halve = false; + for (unsigned i = 0; i < Weights.size(); ++i) + if (Weights[i] > UINT_MAX) { + Halve = true; + break; + } + + if (! Halve) + return; + + for (unsigned i = 0; i < Weights.size(); ++i) + Weights[i] /= 2; +} + /// FoldValueComparisonIntoPredecessors - The specified terminator is a value /// equality comparison instruction (either a switch or a branch on "X == c"). /// See if any of the predecessors of the terminator block are value comparisons @@ -698,10 +840,10 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, if (PCV == CV && SafeToMergeTerminators(TI, PTI)) { // Figure out which 'cases' to copy from SI to PSI. - std::vector > BBCases; + std::vector BBCases; BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases); - std::vector > PredCases; + std::vector PredCases; BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases); // Based on whether the default edge from PTI goes to BB or not, fill in @@ -709,16 +851,49 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, // build. SmallVector NewSuccessors; + // Update the branch weight metadata along the way + SmallVector Weights; + bool PredHasWeights = HasBranchWeights(PTI); + bool SuccHasWeights = HasBranchWeights(TI); + + if (PredHasWeights) { + GetBranchWeights(PTI, Weights); + // branch-weight metadata is inconsistent here. + if (Weights.size() != 1 + PredCases.size()) + PredHasWeights = SuccHasWeights = false; + } else if (SuccHasWeights) + // If there are no predecessor weights but there are successor weights, + // populate Weights with 1, which will later be scaled to the sum of + // successor's weights + Weights.assign(1 + PredCases.size(), 1); + + SmallVector SuccWeights; + if (SuccHasWeights) { + GetBranchWeights(TI, SuccWeights); + // branch-weight metadata is inconsistent here. + if (SuccWeights.size() != 1 + BBCases.size()) + PredHasWeights = SuccHasWeights = false; + } else if (PredHasWeights) + SuccWeights.assign(1 + BBCases.size(), 1); + if (PredDefault == BB) { // If this is the default destination from PTI, only the edges in TI // that don't occur in PTI, or that branch to BB will be activated. std::set PTIHandled; for (unsigned i = 0, e = PredCases.size(); i != e; ++i) - if (PredCases[i].second != BB) - PTIHandled.insert(PredCases[i].first); + if (PredCases[i].Dest != BB) + PTIHandled.insert(PredCases[i].Value); else { // The default destination is BB, we don't need explicit targets. std::swap(PredCases[i], PredCases.back()); + + if (PredHasWeights || SuccHasWeights) { + // Increase weight for the default case. + Weights[0] += Weights[i+1]; + std::swap(Weights[i+1], Weights.back()); + Weights.pop_back(); + } + PredCases.pop_back(); --i; --e; } @@ -729,21 +904,47 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, PredDefault = BBDefault; NewSuccessors.push_back(BBDefault); } + + unsigned CasesFromPred = Weights.size(); + uint64_t ValidTotalSuccWeight = 0; for (unsigned i = 0, e = BBCases.size(); i != e; ++i) - if (!PTIHandled.count(BBCases[i].first) && - BBCases[i].second != BBDefault) { + if (!PTIHandled.count(BBCases[i].Value) && + BBCases[i].Dest != BBDefault) { PredCases.push_back(BBCases[i]); - NewSuccessors.push_back(BBCases[i].second); + NewSuccessors.push_back(BBCases[i].Dest); + if (SuccHasWeights || PredHasWeights) { + // The default weight is at index 0, so weight for the ith case + // should be at index i+1. Scale the cases from successor by + // PredDefaultWeight (Weights[0]). + Weights.push_back(Weights[0] * SuccWeights[i+1]); + ValidTotalSuccWeight += SuccWeights[i+1]; + } } + if (SuccHasWeights || PredHasWeights) { + ValidTotalSuccWeight += SuccWeights[0]; + // Scale the cases from predecessor by ValidTotalSuccWeight. + for (unsigned i = 1; i < CasesFromPred; ++i) + Weights[i] *= ValidTotalSuccWeight; + // Scale the default weight by SuccDefaultWeight (SuccWeights[0]). + Weights[0] *= SuccWeights[0]; + } } else { // If this is not the default destination from PSI, only the edges // in SI that occur in PSI with a destination of BB will be // activated. std::set PTIHandled; + std::map WeightsForHandled; for (unsigned i = 0, e = PredCases.size(); i != e; ++i) - if (PredCases[i].second == BB) { - PTIHandled.insert(PredCases[i].first); + if (PredCases[i].Dest == BB) { + PTIHandled.insert(PredCases[i].Value); + + if (PredHasWeights || SuccHasWeights) { + WeightsForHandled[PredCases[i].Value] = Weights[i+1]; + std::swap(Weights[i+1], Weights.back()); + Weights.pop_back(); + } + std::swap(PredCases[i], PredCases.back()); PredCases.pop_back(); --i; --e; @@ -752,19 +953,23 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, // Okay, now we know which constants were sent to BB from the // predecessor. Figure out where they will all go now. for (unsigned i = 0, e = BBCases.size(); i != e; ++i) - if (PTIHandled.count(BBCases[i].first)) { + if (PTIHandled.count(BBCases[i].Value)) { // If this is one we are capable of getting... + if (PredHasWeights || SuccHasWeights) + Weights.push_back(WeightsForHandled[BBCases[i].Value]); PredCases.push_back(BBCases[i]); - NewSuccessors.push_back(BBCases[i].second); - PTIHandled.erase(BBCases[i].first);// This constant is taken care of + NewSuccessors.push_back(BBCases[i].Dest); + PTIHandled.erase(BBCases[i].Value);// This constant is taken care of } // If there are any constants vectored to BB that TI doesn't handle, // they must go to the default destination of TI. - for (std::set::iterator I = + for (std::set::iterator I = PTIHandled.begin(), E = PTIHandled.end(); I != E; ++I) { - PredCases.push_back(std::make_pair(*I, BBDefault)); + if (PredHasWeights || SuccHasWeights) + Weights.push_back(WeightsForHandled[*I]); + PredCases.push_back(ValueEqualityComparisonCase(*I, BBDefault)); NewSuccessors.push_back(BBDefault); } } @@ -778,7 +983,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, Builder.SetInsertPoint(PTI); // Convert pointer to int before we switch. if (CV->getType()->isPointerTy()) { - assert(TD && "Cannot switch on pointer without TargetData"); + assert(TD && "Cannot switch on pointer without DataLayout"); CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getContext()), "magicptr"); } @@ -788,7 +993,18 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, PredCases.size()); NewSI->setDebugLoc(PTI->getDebugLoc()); for (unsigned i = 0, e = PredCases.size(); i != e; ++i) - NewSI->addCase(PredCases[i].first, PredCases[i].second); + NewSI->addCase(PredCases[i].Value, PredCases[i].Dest); + + if (PredHasWeights || SuccHasWeights) { + // Halve the weights if any of them cannot fit in an uint32_t + FitWeights(Weights); + + SmallVector MDWeights(Weights.begin(), Weights.end()); + + NewSI->setMetadata(LLVMContext::MD_prof, + MDBuilder(BB->getContext()). + createBranchWeights(MDWeights)); + } EraseTerminatorInstAndDCECond(PTI); @@ -923,11 +1139,11 @@ HoistTerminator: Value *BB1V = PN->getIncomingValueForBlock(BB1); Value *BB2V = PN->getIncomingValueForBlock(BB2); if (BB1V == BB2V) continue; - + // These values do not agree. Insert a select instruction before NT // that determines the right value. SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)]; - if (SI == 0) + if (SI == 0) SI = cast (Builder.CreateSelect(BI->getCondition(), BB1V, BB2V, BB1V->getName()+"."+BB2V->getName())); @@ -947,11 +1163,218 @@ HoistTerminator: return true; } -/// SpeculativelyExecuteBB - Given a conditional branch that goes to BB1 -/// and an BB2 and the only successor of BB1 is BB2, hoist simple code -/// (for now, restricted to a single instruction that's side effect free) from -/// the BB1 into the branch block to speculatively execute it. +/// SinkThenElseCodeToEnd - Given an unconditional branch that goes to BBEnd, +/// check whether BBEnd has only two predecessors and the other predecessor +/// ends with an unconditional branch. If it is true, sink any common code +/// in the two predecessors to BBEnd. +static bool SinkThenElseCodeToEnd(BranchInst *BI1) { + assert(BI1->isUnconditional()); + BasicBlock *BB1 = BI1->getParent(); + BasicBlock *BBEnd = BI1->getSuccessor(0); + + // Check that BBEnd has two predecessors and the other predecessor ends with + // an unconditional branch. + pred_iterator PI = pred_begin(BBEnd), PE = pred_end(BBEnd); + BasicBlock *Pred0 = *PI++; + if (PI == PE) // Only one predecessor. + return false; + BasicBlock *Pred1 = *PI++; + if (PI != PE) // More than two predecessors. + return false; + BasicBlock *BB2 = (Pred0 == BB1) ? Pred1 : Pred0; + BranchInst *BI2 = dyn_cast(BB2->getTerminator()); + if (!BI2 || !BI2->isUnconditional()) + return false; + + // Gather the PHI nodes in BBEnd. + std::map > MapValueFromBB1ToBB2; + Instruction *FirstNonPhiInBBEnd = 0; + for (BasicBlock::iterator I = BBEnd->begin(), E = BBEnd->end(); + I != E; ++I) { + if (PHINode *PN = dyn_cast(I)) { + Value *BB1V = PN->getIncomingValueForBlock(BB1); + Value *BB2V = PN->getIncomingValueForBlock(BB2); + MapValueFromBB1ToBB2[BB1V] = std::make_pair(BB2V, PN); + } else { + FirstNonPhiInBBEnd = &*I; + break; + } + } + if (!FirstNonPhiInBBEnd) + return false; + + + // This does very trivial matching, with limited scanning, to find identical + // instructions in the two blocks. We scan backward for obviously identical + // instructions in an identical order. + BasicBlock::InstListType::reverse_iterator RI1 = BB1->getInstList().rbegin(), + RE1 = BB1->getInstList().rend(), RI2 = BB2->getInstList().rbegin(), + RE2 = BB2->getInstList().rend(); + // Skip debug info. + while (RI1 != RE1 && isa(&*RI1)) ++RI1; + if (RI1 == RE1) + return false; + while (RI2 != RE2 && isa(&*RI2)) ++RI2; + if (RI2 == RE2) + return false; + // Skip the unconditional branches. + ++RI1; + ++RI2; + + bool Changed = false; + while (RI1 != RE1 && RI2 != RE2) { + // Skip debug info. + while (RI1 != RE1 && isa(&*RI1)) ++RI1; + if (RI1 == RE1) + return Changed; + while (RI2 != RE2 && isa(&*RI2)) ++RI2; + if (RI2 == RE2) + return Changed; + + Instruction *I1 = &*RI1, *I2 = &*RI2; + // I1 and I2 should have a single use in the same PHI node, and they + // perform the same operation. + // Cannot move control-flow-involving, volatile loads, vaarg, etc. + if (isa(I1) || isa(I2) || + isa(I1) || isa(I2) || + isa(I1) || isa(I2) || + isa(I1) || isa(I2) || + I1->mayHaveSideEffects() || I2->mayHaveSideEffects() || + I1->mayReadOrWriteMemory() || I2->mayReadOrWriteMemory() || + !I1->hasOneUse() || !I2->hasOneUse() || + MapValueFromBB1ToBB2.find(I1) == MapValueFromBB1ToBB2.end() || + MapValueFromBB1ToBB2[I1].first != I2) + return Changed; + + // Check whether we should swap the operands of ICmpInst. + ICmpInst *ICmp1 = dyn_cast(I1), *ICmp2 = dyn_cast(I2); + bool SwapOpnds = false; + if (ICmp1 && ICmp2 && + ICmp1->getOperand(0) != ICmp2->getOperand(0) && + ICmp1->getOperand(1) != ICmp2->getOperand(1) && + (ICmp1->getOperand(0) == ICmp2->getOperand(1) || + ICmp1->getOperand(1) == ICmp2->getOperand(0))) { + ICmp2->swapOperands(); + SwapOpnds = true; + } + if (!I1->isSameOperationAs(I2)) { + if (SwapOpnds) + ICmp2->swapOperands(); + return Changed; + } + + // The operands should be either the same or they need to be generated + // with a PHI node after sinking. We only handle the case where there is + // a single pair of different operands. + Value *DifferentOp1 = 0, *DifferentOp2 = 0; + unsigned Op1Idx = 0; + for (unsigned I = 0, E = I1->getNumOperands(); I != E; ++I) { + if (I1->getOperand(I) == I2->getOperand(I)) + continue; + // Early exit if we have more-than one pair of different operands or + // the different operand is already in MapValueFromBB1ToBB2. + // Early exit if we need a PHI node to replace a constant. + if (DifferentOp1 || + MapValueFromBB1ToBB2.find(I1->getOperand(I)) != + MapValueFromBB1ToBB2.end() || + isa(I1->getOperand(I)) || + isa(I2->getOperand(I))) { + // If we can't sink the instructions, undo the swapping. + if (SwapOpnds) + ICmp2->swapOperands(); + return Changed; + } + DifferentOp1 = I1->getOperand(I); + Op1Idx = I; + DifferentOp2 = I2->getOperand(I); + } + + // We insert the pair of different operands to MapValueFromBB1ToBB2 and + // remove (I1, I2) from MapValueFromBB1ToBB2. + if (DifferentOp1) { + PHINode *NewPN = PHINode::Create(DifferentOp1->getType(), 2, + DifferentOp1->getName() + ".sink", + BBEnd->begin()); + MapValueFromBB1ToBB2[DifferentOp1] = std::make_pair(DifferentOp2, NewPN); + // I1 should use NewPN instead of DifferentOp1. + I1->setOperand(Op1Idx, NewPN); + NewPN->addIncoming(DifferentOp1, BB1); + NewPN->addIncoming(DifferentOp2, BB2); + DEBUG(dbgs() << "Create PHI node " << *NewPN << "\n";); + } + PHINode *OldPN = MapValueFromBB1ToBB2[I1].second; + MapValueFromBB1ToBB2.erase(I1); + + DEBUG(dbgs() << "SINK common instructions " << *I1 << "\n";); + DEBUG(dbgs() << " " << *I2 << "\n";); + // We need to update RE1 and RE2 if we are going to sink the first + // instruction in the basic block down. + bool UpdateRE1 = (I1 == BB1->begin()), UpdateRE2 = (I2 == BB2->begin()); + // Sink the instruction. + BBEnd->getInstList().splice(FirstNonPhiInBBEnd, BB1->getInstList(), I1); + if (!OldPN->use_empty()) + OldPN->replaceAllUsesWith(I1); + OldPN->eraseFromParent(); + + if (!I2->use_empty()) + I2->replaceAllUsesWith(I1); + I1->intersectOptionalDataWith(I2); + I2->eraseFromParent(); + + if (UpdateRE1) + RE1 = BB1->getInstList().rend(); + if (UpdateRE2) + RE2 = BB2->getInstList().rend(); + FirstNonPhiInBBEnd = I1; + NumSinkCommons++; + Changed = true; + } + return Changed; +} + +/// \brief Speculate a conditional basic block flattening the CFG. +/// +/// Note that this is a very risky transform currently. Speculating +/// instructions like this is most often not desirable. Instead, there is an MI +/// pass which can do it with full awareness of the resource constraints. +/// However, some cases are "obvious" and we should do directly. An example of +/// this is speculating a single, reasonably cheap instruction. +/// +/// There is only one distinct advantage to flattening the CFG at the IR level: +/// it makes very common but simplistic optimizations such as are common in +/// instcombine and the DAG combiner more powerful by removing CFG edges and +/// modeling their effects with easier to reason about SSA value graphs. +/// +/// +/// An illustration of this transform is turning this IR: +/// \code +/// BB: +/// %cmp = icmp ult %x, %y +/// br i1 %cmp, label %EndBB, label %ThenBB +/// ThenBB: +/// %sub = sub %x, %y +/// br label BB2 +/// EndBB: +/// %phi = phi [ %sub, %ThenBB ], [ 0, %EndBB ] +/// ... +/// \endcode +/// +/// Into this IR: +/// \code +/// BB: +/// %cmp = icmp ult %x, %y +/// %sub = sub %x, %y +/// %cond = select i1 %cmp, 0, %sub +/// ... +/// \endcode +/// +/// \returns true if the conditional block is removed. static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { + // Be conservative for now. FP select instruction can often be expensive. + Value *BrCond = BI->getCondition(); + if (isa(BrCond)) + return false; + // Only speculatively execution a single instruction (not counting the // terminator) for now. Instruction *HInst = NULL; @@ -967,13 +1390,29 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { return false; HInst = I; } - if (!HInst) - return false; - // Be conservative for now. FP select instruction can often be expensive. - Value *BrCond = BI->getCondition(); - if (isa(BrCond)) - return false; + BasicBlock *BIParent = BI->getParent(); + + // Check the instruction to be hoisted, if there is one. + if (HInst) { + // Don't hoist the instruction if it's unsafe or expensive. + if (!isSafeToSpeculativelyExecute(HInst)) + return false; + if (ComputeSpeculationCost(HInst) > PHINodeFoldingThreshold) + return false; + + // Do not hoist the instruction if any of its operands are defined but not + // used in this BB. The transformation will prevent the operand from + // being sunk into the use block. + for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end(); + i != e; ++i) { + Instruction *OpI = dyn_cast(*i); + if (OpI && OpI->getParent() == BIParent && + !OpI->mayHaveSideEffects() && + !OpI->isUsedInBasicBlock(BIParent)) + return false; + } + } // If BB1 is actually on the false edge of the conditional branch, remember // to swap the select operands later. @@ -983,130 +1422,78 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { Invert = true; } - // Turn - // BB: - // %t1 = icmp - // br i1 %t1, label %BB1, label %BB2 - // BB1: - // %t3 = add %t2, c - // br label BB2 - // BB2: - // => - // BB: - // %t1 = icmp - // %t4 = add %t2, c - // %t3 = select i1 %t1, %t2, %t3 - switch (HInst->getOpcode()) { - default: return false; // Not safe / profitable to hoist. - case Instruction::Add: - case Instruction::Sub: - // Not worth doing for vector ops. - if (HInst->getType()->isVectorTy()) - return false; - break; - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - // Don't mess with vector operations. - if (HInst->getType()->isVectorTy()) - return false; - break; // These are all cheap and non-trapping instructions. - } - - // If the instruction is obviously dead, don't try to predicate it. - if (HInst->use_empty()) { - HInst->eraseFromParent(); - return true; - } - - // Can we speculatively execute the instruction? And what is the value - // if the condition is false? Consider the phi uses, if the incoming value - // from the "if" block are all the same V, then V is the value of the - // select if the condition is false. - BasicBlock *BIParent = BI->getParent(); - SmallVector PHIUses; - Value *FalseV = NULL; - + // Collect interesting PHIs, and scan for hazards. + SmallSetVector, 4> PHIs; BasicBlock *BB2 = BB1->getTerminator()->getSuccessor(0); - for (Value::use_iterator UI = HInst->use_begin(), E = HInst->use_end(); - UI != E; ++UI) { - // Ignore any user that is not a PHI node in BB2. These can only occur in - // unreachable blocks, because they would not be dominated by the instr. - PHINode *PN = dyn_cast(*UI); - if (!PN || PN->getParent() != BB2) - return false; - PHIUses.push_back(PN); - - Value *PHIV = PN->getIncomingValueForBlock(BIParent); - if (!FalseV) - FalseV = PHIV; - else if (FalseV != PHIV) - return false; // Inconsistent value when condition is false. - } - - assert(FalseV && "Must have at least one user, and it must be a PHI"); - - // Do not hoist the instruction if any of its operands are defined but not - // used in this BB. The transformation will prevent the operand from - // being sunk into the use block. - for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end(); - i != e; ++i) { - Instruction *OpI = dyn_cast(*i); - if (OpI && OpI->getParent() == BIParent && - !OpI->isUsedInBasicBlock(BIParent)) - return false; - } + for (BasicBlock::iterator I = BB2->begin(); + PHINode *PN = dyn_cast(I); ++I) { + Value *BB1V = PN->getIncomingValueForBlock(BB1); + Value *BIParentV = PN->getIncomingValueForBlock(BIParent); - // If we get here, we can hoist the instruction. Try to place it - // before the icmp instruction preceding the conditional branch. - BasicBlock::iterator InsertPos = BI; - if (InsertPos != BIParent->begin()) - --InsertPos; - // Skip debug info between condition and branch. - while (InsertPos != BIParent->begin() && isa(InsertPos)) - --InsertPos; - if (InsertPos == BrCond && !isa(BrCond)) { - SmallPtrSet BB1Insns; - for(BasicBlock::iterator BB1I = BB1->begin(), BB1E = BB1->end(); - BB1I != BB1E; ++BB1I) - BB1Insns.insert(BB1I); - for(Value::use_iterator UI = BrCond->use_begin(), UE = BrCond->use_end(); - UI != UE; ++UI) { - Instruction *Use = cast(*UI); - if (!BB1Insns.count(Use)) continue; - - // If BrCond uses the instruction that place it just before - // branch instruction. - InsertPos = BI; - break; + // Skip PHIs which are trivial. + if (BB1V == BIParentV) + continue; + + // Check for safety. + if (ConstantExpr *CE = dyn_cast(BB1V)) { + // An unfolded ConstantExpr could end up getting expanded into + // Instructions. Don't speculate this and another instruction at + // the same time. + if (HInst) + return false; + if (!isSafeToSpeculativelyExecute(CE)) + return false; + if (ComputeSpeculationCost(CE) > PHINodeFoldingThreshold) + return false; } - } else - InsertPos = BI; - BIParent->getInstList().splice(InsertPos, BB1->getInstList(), HInst); - // Create a select whose true value is the speculatively executed value and - // false value is the previously determined FalseV. - IRBuilder Builder(BI); - SelectInst *SI; - if (Invert) - SI = cast - (Builder.CreateSelect(BrCond, FalseV, HInst, - FalseV->getName() + "." + HInst->getName())); - else - SI = cast - (Builder.CreateSelect(BrCond, HInst, FalseV, - HInst->getName() + "." + FalseV->getName())); + // Ok, we may insert a select for this PHI. + PHIs.insert(std::make_pair(BB1V, BIParentV)); + } + + // If there are no PHIs to process, bail early. This helps ensure idempotence + // as well. + if (PHIs.empty()) + return false; + + // If we get here, we can hoist the instruction and if-convert. + DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *BB1 << "\n";); + + // Hoist the instruction. + if (HInst) + BIParent->getInstList().splice(BI, BB1->getInstList(), HInst); - // Make the PHI node use the select for all incoming values for "then" and - // "if" blocks. - for (unsigned i = 0, e = PHIUses.size(); i != e; ++i) { - PHINode *PN = PHIUses[i]; - for (unsigned j = 0, ee = PN->getNumIncomingValues(); j != ee; ++j) - if (PN->getIncomingBlock(j) == BB1 || PN->getIncomingBlock(j) == BIParent) - PN->setIncomingValue(j, SI); + // Insert selects and rewrite the PHI operands. + IRBuilder Builder(BI); + for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { + Value *TrueV = PHIs[i].first; + Value *FalseV = PHIs[i].second; + + // Create a select whose true value is the speculatively executed value and + // false value is the previously determined FalseV. + SelectInst *SI; + if (Invert) + SI = cast + (Builder.CreateSelect(BrCond, FalseV, TrueV, + FalseV->getName() + "." + TrueV->getName())); + else + SI = cast + (Builder.CreateSelect(BrCond, TrueV, FalseV, + TrueV->getName() + "." + FalseV->getName())); + + // Make the PHI node use the select for all incoming values for "then" and + // "if" blocks. + for (BasicBlock::iterator I = BB2->begin(); + PHINode *PN = dyn_cast(I); ++I) { + unsigned BB1I = PN->getBasicBlockIndex(BB1); + unsigned BIParentI = PN->getBasicBlockIndex(BIParent); + Value *BB1V = PN->getIncomingValue(BB1I); + Value *BIParentV = PN->getIncomingValue(BIParentI); + if (TrueV == BB1V && FalseV == BIParentV) { + PN->setIncomingValue(BB1I, SI); + PN->setIncomingValue(BIParentI, SI); + } + } } ++NumSpeculations; @@ -1118,13 +1505,13 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { BranchInst *BI = cast(BB->getTerminator()); unsigned Size = 0; - + for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) { if (isa(BBI)) continue; if (Size > 10) return false; // Don't clone large BB's. ++Size; - + // We can only support instructions that do not define values that are // live outside of the current basic block. for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end(); @@ -1132,7 +1519,7 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { Instruction *U = cast(*UI); if (U->getParent() != BB || isa(U)) return false; } - + // Looks ok, continue checking. } @@ -1143,38 +1530,38 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { /// that is defined in the same block as the branch and if any PHI entries are /// constants, thread edges corresponding to that entry to be branches to their /// ultimate destination. -static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) { +static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *TD) { BasicBlock *BB = BI->getParent(); PHINode *PN = dyn_cast(BI->getCondition()); // NOTE: we currently cannot transform this case if the PHI node is used // outside of the block. if (!PN || PN->getParent() != BB || !PN->hasOneUse()) return false; - + // Degenerate case of a single entry PHI. if (PN->getNumIncomingValues() == 1) { FoldSingleEntryPHINodes(PN->getParent()); - return true; + return true; } // Now we know that this block has multiple preds and two succs. if (!BlockIsSimpleEnoughToThreadThrough(BB)) return false; - + // Okay, this is a simple enough basic block. See if any phi values are // constants. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { ConstantInt *CB = dyn_cast(PN->getIncomingValue(i)); if (CB == 0 || !CB->getType()->isIntegerTy(1)) continue; - + // Okay, we now know that all edges from PredBB should be revectored to // branch to RealDest. BasicBlock *PredBB = PN->getIncomingBlock(i); BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue()); - + if (RealDest == BB) continue; // Skip self loops. // Skip if the predecessor's terminator is an indirect branch. if (isa(PredBB->getTerminator())) continue; - + // The dest block might have PHI nodes, other predecessors and other // difficult cases. Instead of being smart about this, just insert a new // block that jumps to the destination block, effectively splitting @@ -1183,7 +1570,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) { RealDest->getName()+".critedge", RealDest->getParent(), RealDest); BranchInst::Create(RealDest, EdgeBB); - + // Update PHI nodes. AddPredecessorToBlock(RealDest, EdgeBB, BB); @@ -1200,7 +1587,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) { // Clone the instruction. Instruction *N = BBI->clone(); if (BBI->hasName()) N->setName(BBI->getName()+".c"); - + // Update operands due to translation. for (User::op_iterator i = N->op_begin(), e = N->op_end(); i != e; ++i) { @@ -1208,7 +1595,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) { if (PI != TranslateMap.end()) *i = PI->second; } - + // Check for trivial simplification. if (Value *V = SimplifyInstruction(N, TD)) { TranslateMap[BBI] = V; @@ -1239,7 +1626,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) { /// FoldTwoEntryPHINode - Given a BB that starts with the specified two-entry /// PHI node, see if we can eliminate it. -static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { +static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *TD) { // Ok, this is a two entry PHI node. Check to see if this is a simple "if // statement", which has a very simple dominance structure. Basically, we // are trying to find the condition that is being branched on, which @@ -1253,7 +1640,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { // Don't bother if the branch will be constant folded trivially. isa(IfCond)) return false; - + // Okay, we found that we can merge this two-entry phi node into a select. // Doing so would require us to fold *all* two entry phi nodes in this block. // At some point this becomes non-profitable (particularly if the target @@ -1263,14 +1650,14 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { for (BasicBlock::iterator I = BB->begin(); isa(I); ++NumPhis, ++I) if (NumPhis > 2) return false; - + // Loop over the PHI's seeing if we can promote them all to select // instructions. While we are at it, keep track of the instructions // that need to be moved to the dominating block. SmallPtrSet AggressiveInsts; unsigned MaxCostVal0 = PHINodeFoldingThreshold, MaxCostVal1 = PHINodeFoldingThreshold; - + for (BasicBlock::iterator II = BB->begin(); isa(II);) { PHINode *PN = cast(II++); if (Value *V = SimplifyInstruction(PN, TD)) { @@ -1278,19 +1665,19 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { PN->eraseFromParent(); continue; } - + if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts, MaxCostVal0) || !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts, MaxCostVal1)) return false; } - - // If we folded the the first phi, PN dangles at this point. Refresh it. If + + // If we folded the first phi, PN dangles at this point. Refresh it. If // we ran out of PHIs then we simplified them all. PN = dyn_cast(BB->begin()); if (PN == 0) return true; - + // Don't fold i1 branches on PHIs which contain binary operators. These can // often be turned into switches and other things. if (PN->getType()->isIntegerTy(1) && @@ -1298,7 +1685,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { isa(PN->getIncomingValue(1)) || isa(IfCond))) return false; - + // If we all PHI nodes are promotable, check to make sure that all // instructions in the predecessor blocks can be promoted as well. If // not, we won't be able to get rid of the control flow, so it's not @@ -1318,7 +1705,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { return false; } } - + if (cast(IfBlock2->getTerminator())->isConditional()) { IfBlock2 = 0; } else { @@ -1331,15 +1718,15 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { return false; } } - + DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: " << IfTrue->getName() << " F: " << IfFalse->getName() << "\n"); - + // If we can still promote the PHI nodes after this gauntlet of tests, // do all of the PHI's now. Instruction *InsertPt = DomBlock->getTerminator(); IRBuilder Builder(InsertPt); - + // Move all 'aggressive' instructions, which are defined in the // conditional parts of the if's up to the dominating block. if (IfBlock1) @@ -1350,19 +1737,19 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { DomBlock->getInstList().splice(InsertPt, IfBlock2->getInstList(), IfBlock2->begin(), IfBlock2->getTerminator()); - + while (PHINode *PN = dyn_cast(BB->begin())) { // Change the PHI node into a select instruction. Value *TrueVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse); Value *FalseVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue); - - SelectInst *NV = + + SelectInst *NV = cast(Builder.CreateSelect(IfCond, TrueVal, FalseVal, "")); PN->replaceAllUsesWith(NV); NV->takeName(PN); PN->eraseFromParent(); } - + // At this point, IfBlock1 and IfBlock2 are both empty, so our if statement // has been flattened. Change DomBlock to jump directly to our new block to // avoid other simplifycfg's kicking in on the diamond. @@ -1376,14 +1763,14 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { /// SimplifyCondBranchToTwoReturns - If we found a conditional branch that goes /// to two returning blocks, try to merge them together into one return, /// introducing a select if the return values disagree. -static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, +static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, IRBuilder<> &Builder) { assert(BI->isConditional() && "Must be a conditional branch"); BasicBlock *TrueSucc = BI->getSuccessor(0); BasicBlock *FalseSucc = BI->getSuccessor(1); ReturnInst *TrueRet = cast(TrueSucc->getTerminator()); ReturnInst *FalseRet = cast(FalseSucc->getTerminator()); - + // Check to ensure both blocks are empty (just a return) or optionally empty // with PHI nodes. If there are other instructions, merging would cause extra // computation on one path or the other. @@ -1403,12 +1790,12 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, EraseTerminatorInstAndDCECond(BI); return true; } - + // Otherwise, figure out what the true and false return values are // so we can insert a new select instruction. Value *TrueValue = TrueRet->getReturnValue(); Value *FalseValue = FalseRet->getReturnValue(); - + // Unwrap any PHI nodes in the return blocks. if (PHINode *TVPN = dyn_cast_or_null(TrueValue)) if (TVPN->getParent() == TrueSucc) @@ -1416,7 +1803,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, if (PHINode *FVPN = dyn_cast_or_null(FalseValue)) if (FVPN->getParent() == FalseSucc) FalseValue = FVPN->getIncomingValueForBlock(BI->getParent()); - + // In order for this transformation to be safe, we must be able to // unconditionally execute both operands to the return. This is // normally the case, but we could have a potentially-trapping @@ -1428,12 +1815,12 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, if (ConstantExpr *FCV = dyn_cast_or_null(FalseValue)) if (FCV->canTrap()) return false; - + // Okay, we collected all the mapped values and checked them for sanity, and // defined to really do this transformation. First, update the CFG. TrueSucc->removePredecessor(BI->getParent()); FalseSucc->removePredecessor(BI->getParent()); - + // Insert select instructions where needed. Value *BrCond = BI->getCondition(); if (TrueValue) { @@ -1447,27 +1834,92 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, } } - Value *RI = !TrueValue ? + Value *RI = !TrueValue ? Builder.CreateRetVoid() : Builder.CreateRet(TrueValue); (void) RI; - + DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:" << "\n " << *BI << "NewRet = " << *RI << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc); - + EraseTerminatorInstAndDCECond(BI); return true; } +/// ExtractBranchMetadata - Given a conditional BranchInstruction, retrieve the +/// probabilities of the branch taking each edge. Fills in the two APInt +/// parameters and return true, or returns false if no or invalid metadata was +/// found. +static bool ExtractBranchMetadata(BranchInst *BI, + uint64_t &ProbTrue, uint64_t &ProbFalse) { + assert(BI->isConditional() && + "Looking for probabilities on unconditional branch?"); + MDNode *ProfileData = BI->getMetadata(LLVMContext::MD_prof); + if (!ProfileData || ProfileData->getNumOperands() != 3) return false; + ConstantInt *CITrue = dyn_cast(ProfileData->getOperand(1)); + ConstantInt *CIFalse = dyn_cast(ProfileData->getOperand(2)); + if (!CITrue || !CIFalse) return false; + ProbTrue = CITrue->getValue().getZExtValue(); + ProbFalse = CIFalse->getValue().getZExtValue(); + return true; +} + +/// checkCSEInPredecessor - Return true if the given instruction is available +/// in its predecessor block. If yes, the instruction will be removed. +/// +static bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) { + if (!isa(Inst) && !isa(Inst)) + return false; + for (BasicBlock::iterator I = PB->begin(), E = PB->end(); I != E; I++) { + Instruction *PBI = &*I; + // Check whether Inst and PBI generate the same value. + if (Inst->isIdenticalTo(PBI)) { + Inst->replaceAllUsesWith(PBI); + Inst->eraseFromParent(); + return true; + } + } + return false; +} + /// FoldBranchToCommonDest - If this basic block is simple enough, and if a /// predecessor branches to us and one of our successors, fold the block into /// the predecessor and use logical operations to pick the right destination. bool llvm::FoldBranchToCommonDest(BranchInst *BI) { BasicBlock *BB = BI->getParent(); - Instruction *Cond = dyn_cast(BI->getCondition()); + Instruction *Cond = 0; + if (BI->isConditional()) + Cond = dyn_cast(BI->getCondition()); + else { + // For unconditional branch, check for a simple CFG pattern, where + // BB has a single predecessor and BB's successor is also its predecessor's + // successor. If such pattern exisits, check for CSE between BB and its + // predecessor. + if (BasicBlock *PB = BB->getSinglePredecessor()) + if (BranchInst *PBI = dyn_cast(PB->getTerminator())) + if (PBI->isConditional() && + (BI->getSuccessor(0) == PBI->getSuccessor(0) || + BI->getSuccessor(0) == PBI->getSuccessor(1))) { + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); + I != E; ) { + Instruction *Curr = I++; + if (isa(Curr)) { + Cond = Curr; + break; + } + // Quit if we can't remove this instruction. + if (!checkCSEInPredecessor(Curr, PB)) + return false; + } + } + + if (Cond == 0) + return false; + } + if (Cond == 0 || (!isa(Cond) && !isa(Cond)) || Cond->getParent() != BB || !Cond->hasOneUse()) return false; @@ -1479,7 +1931,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // Ignore dbg intrinsics. while (isa(FrontIt)) ++FrontIt; - + // Allow a single instruction to be hoisted in addition to the compare // that feeds the branch. We later ensure that any values that _it_ uses // were also live in the predecessor, so that we don't unnecessarily create @@ -1490,7 +1942,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { isSafeToSpeculativelyExecute(FrontIt)) { BonusInst = &*FrontIt; ++FrontIt; - + // Ignore dbg intrinsics. while (isa(FrontIt)) ++FrontIt; } @@ -1498,13 +1950,13 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // Only a single bonus inst is allowed. if (&*FrontIt != Cond) return false; - + // Make sure the instruction after the condition is the cond branch. BasicBlock::iterator CondIt = Cond; ++CondIt; // Ingore dbg intrinsics. while (isa(CondIt)) ++CondIt; - + if (&*CondIt != BI) return false; @@ -1516,98 +1968,105 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { if (ConstantExpr *CE = dyn_cast(Cond->getOperand(1))) if (CE->canTrap()) return false; - + // Finally, don't infinitely unroll conditional loops. BasicBlock *TrueDest = BI->getSuccessor(0); - BasicBlock *FalseDest = BI->getSuccessor(1); + BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : 0; if (TrueDest == BB || FalseDest == BB) return false; for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { BasicBlock *PredBlock = *PI; BranchInst *PBI = dyn_cast(PredBlock->getTerminator()); - + // Check that we have two conditional branches. If there is a PHI node in // the common successor, verify that the same value flows in from both // blocks. - if (PBI == 0 || PBI->isUnconditional() || !SafeToMergeTerminators(BI, PBI)) + SmallVector PHIs; + if (PBI == 0 || PBI->isUnconditional() || + (BI->isConditional() && + !SafeToMergeTerminators(BI, PBI)) || + (!BI->isConditional() && + !isProfitableToFoldUnconditional(BI, PBI, Cond, PHIs))) continue; - + // Determine if the two branches share a common destination. - Instruction::BinaryOps Opc; + Instruction::BinaryOps Opc = Instruction::BinaryOpsEnd; bool InvertPredCond = false; - - if (PBI->getSuccessor(0) == TrueDest) - Opc = Instruction::Or; - else if (PBI->getSuccessor(1) == FalseDest) - Opc = Instruction::And; - else if (PBI->getSuccessor(0) == FalseDest) - Opc = Instruction::And, InvertPredCond = true; - else if (PBI->getSuccessor(1) == TrueDest) - Opc = Instruction::Or, InvertPredCond = true; - else - continue; + + if (BI->isConditional()) { + if (PBI->getSuccessor(0) == TrueDest) + Opc = Instruction::Or; + else if (PBI->getSuccessor(1) == FalseDest) + Opc = Instruction::And; + else if (PBI->getSuccessor(0) == FalseDest) + Opc = Instruction::And, InvertPredCond = true; + else if (PBI->getSuccessor(1) == TrueDest) + Opc = Instruction::Or, InvertPredCond = true; + else + continue; + } else { + if (PBI->getSuccessor(0) != TrueDest && PBI->getSuccessor(1) != TrueDest) + continue; + } // Ensure that any values used in the bonus instruction are also used // by the terminator of the predecessor. This means that those values - // must already have been resolved, so we won't be inhibiting the + // must already have been resolved, so we won't be inhibiting the // out-of-order core by speculating them earlier. if (BonusInst) { // Collect the values used by the bonus inst SmallPtrSet UsedValues; for (Instruction::op_iterator OI = BonusInst->op_begin(), OE = BonusInst->op_end(); OI != OE; ++OI) { - Value* V = *OI; + Value *V = *OI; if (!isa(V)) UsedValues.insert(V); } SmallVector, 4> Worklist; Worklist.push_back(std::make_pair(PBI->getOperand(0), 0)); - + // Walk up to four levels back up the use-def chain of the predecessor's // terminator to see if all those values were used. The choice of four // levels is arbitrary, to provide a compile-time-cost bound. while (!Worklist.empty()) { std::pair Pair = Worklist.back(); Worklist.pop_back(); - + if (Pair.second >= 4) continue; UsedValues.erase(Pair.first); if (UsedValues.empty()) break; - + if (Instruction *I = dyn_cast(Pair.first)) { for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end(); OI != OE; ++OI) Worklist.push_back(std::make_pair(OI->get(), Pair.second+1)); - } + } } - + if (!UsedValues.empty()) return false; } DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); - IRBuilder<> Builder(PBI); + IRBuilder<> Builder(PBI); // If we need to invert the condition in the pred block to match, do so now. if (InvertPredCond) { Value *NewCond = PBI->getCondition(); - + if (NewCond->hasOneUse() && isa(NewCond)) { CmpInst *CI = cast(NewCond); CI->setPredicate(CI->getInversePredicate()); } else { - NewCond = Builder.CreateNot(NewCond, + NewCond = Builder.CreateNot(NewCond, PBI->getCondition()->getName()+".not"); } - + PBI->setCondition(NewCond); - BasicBlock *OldTrue = PBI->getSuccessor(0); - BasicBlock *OldFalse = PBI->getSuccessor(1); - PBI->setSuccessor(0, OldFalse); - PBI->setSuccessor(1, OldTrue); + PBI->swapSuccessors(); } - + // If we have a bonus inst, clone it into the predecessor block. Instruction *NewBonus = 0; if (BonusInst) { @@ -1616,7 +2075,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { NewBonus->takeName(BonusInst); BonusInst->setName(BonusInst->getName()+".old"); } - + // Clone Cond into the predecessor basic block, and or/and the // two conditions together. Instruction *New = Cond->clone(); @@ -1624,25 +2083,119 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { PredBlock->getInstList().insert(PBI, New); New->takeName(Cond); Cond->setName(New->getName()+".old"); - - Instruction *NewCond = - cast(Builder.CreateBinOp(Opc, PBI->getCondition(), + + if (BI->isConditional()) { + Instruction *NewCond = + cast(Builder.CreateBinOp(Opc, PBI->getCondition(), New, "or.cond")); - PBI->setCondition(NewCond); - if (PBI->getSuccessor(0) == BB) { - AddPredecessorToBlock(TrueDest, PredBlock, BB); - PBI->setSuccessor(0, TrueDest); - } - if (PBI->getSuccessor(1) == BB) { - AddPredecessorToBlock(FalseDest, PredBlock, BB); - PBI->setSuccessor(1, FalseDest); + PBI->setCondition(NewCond); + + uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight; + bool PredHasWeights = ExtractBranchMetadata(PBI, PredTrueWeight, + PredFalseWeight); + bool SuccHasWeights = ExtractBranchMetadata(BI, SuccTrueWeight, + SuccFalseWeight); + SmallVector NewWeights; + + if (PBI->getSuccessor(0) == BB) { + if (PredHasWeights && SuccHasWeights) { + // PBI: br i1 %x, BB, FalseDest + // BI: br i1 %y, TrueDest, FalseDest + //TrueWeight is TrueWeight for PBI * TrueWeight for BI. + NewWeights.push_back(PredTrueWeight * SuccTrueWeight); + //FalseWeight is FalseWeight for PBI * TotalWeight for BI + + // TrueWeight for PBI * FalseWeight for BI. + // We assume that total weights of a BranchInst can fit into 32 bits. + // Therefore, we will not have overflow using 64-bit arithmetic. + NewWeights.push_back(PredFalseWeight * (SuccFalseWeight + + SuccTrueWeight) + PredTrueWeight * SuccFalseWeight); + } + AddPredecessorToBlock(TrueDest, PredBlock, BB); + PBI->setSuccessor(0, TrueDest); + } + if (PBI->getSuccessor(1) == BB) { + if (PredHasWeights && SuccHasWeights) { + // PBI: br i1 %x, TrueDest, BB + // BI: br i1 %y, TrueDest, FalseDest + //TrueWeight is TrueWeight for PBI * TotalWeight for BI + + // FalseWeight for PBI * TrueWeight for BI. + NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + + SuccTrueWeight) + PredFalseWeight * SuccTrueWeight); + //FalseWeight is FalseWeight for PBI * FalseWeight for BI. + NewWeights.push_back(PredFalseWeight * SuccFalseWeight); + } + AddPredecessorToBlock(FalseDest, PredBlock, BB); + PBI->setSuccessor(1, FalseDest); + } + if (NewWeights.size() == 2) { + // Halve the weights if any of them cannot fit in an uint32_t + FitWeights(NewWeights); + + SmallVector MDWeights(NewWeights.begin(),NewWeights.end()); + PBI->setMetadata(LLVMContext::MD_prof, + MDBuilder(BI->getContext()). + createBranchWeights(MDWeights)); + } else + PBI->setMetadata(LLVMContext::MD_prof, NULL); + } else { + // Update PHI nodes in the common successors. + for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { + ConstantInt *PBI_C = cast( + PHIs[i]->getIncomingValueForBlock(PBI->getParent())); + assert(PBI_C->getType()->isIntegerTy(1)); + Instruction *MergedCond = 0; + if (PBI->getSuccessor(0) == TrueDest) { + // Create (PBI_Cond and PBI_C) or (!PBI_Cond and BI_Value) + // PBI_C is true: PBI_Cond or (!PBI_Cond and BI_Value) + // is false: !PBI_Cond and BI_Value + Instruction *NotCond = + cast(Builder.CreateNot(PBI->getCondition(), + "not.cond")); + MergedCond = + cast(Builder.CreateBinOp(Instruction::And, + NotCond, New, + "and.cond")); + if (PBI_C->isOne()) + MergedCond = + cast(Builder.CreateBinOp(Instruction::Or, + PBI->getCondition(), MergedCond, + "or.cond")); + } else { + // Create (PBI_Cond and BI_Value) or (!PBI_Cond and PBI_C) + // PBI_C is true: (PBI_Cond and BI_Value) or (!PBI_Cond) + // is false: PBI_Cond and BI_Value + MergedCond = + cast(Builder.CreateBinOp(Instruction::And, + PBI->getCondition(), New, + "and.cond")); + if (PBI_C->isOne()) { + Instruction *NotCond = + cast(Builder.CreateNot(PBI->getCondition(), + "not.cond")); + MergedCond = + cast(Builder.CreateBinOp(Instruction::Or, + NotCond, MergedCond, + "or.cond")); + } + } + // Update PHI Node. + PHIs[i]->setIncomingValue(PHIs[i]->getBasicBlockIndex(PBI->getParent()), + MergedCond); + } + // Change PBI from Conditional to Unconditional. + BranchInst *New_PBI = BranchInst::Create(TrueDest, PBI); + EraseTerminatorInstAndDCECond(PBI); + PBI = New_PBI; } + // TODO: If BB is reachable from all paths through PredBlock, then we + // could replace PBI's branch probabilities with BI's. + // Copy any debug value intrinsics into the end of PredBlock. for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) if (isa(*I)) I->clone()->insertBefore(PBI); - + return true; } return false; @@ -1657,7 +2210,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { BasicBlock *BB = BI->getParent(); // If this block ends with a branch instruction, and if there is a - // predecessor that ends on a branch of the same condition, make + // predecessor that ends on a branch of the same condition, make // this conditional branch redundant. if (PBI->getCondition() == BI->getCondition() && PBI->getSuccessor(0) != PBI->getSuccessor(1)) { @@ -1666,11 +2219,11 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { if (BB->getSinglePredecessor()) { // Turn this into a branch on constant. bool CondIsTrue = PBI->getSuccessor(0) == BB; - BI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()), + BI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue)); return true; // Nuke the branch on constant. } - + // Otherwise, if there are multiple predecessors, insert a PHI that merges // in the constant and simplify the block result. Subsequent passes of // simplifycfg will thread the block. @@ -1690,18 +2243,18 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { PBI->getCondition() == BI->getCondition() && PBI->getSuccessor(0) != PBI->getSuccessor(1)) { bool CondIsTrue = PBI->getSuccessor(0) == BB; - NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()), + NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue), P); } else { NewPN->addIncoming(BI->getCondition(), P); } } - + BI->setCondition(NewPN); return true; } } - + // If this is a conditional branch in an empty block, and if any // predecessors is a conditional branch to one of our destinations, // fold the conditions into logical ops and one cond br. @@ -1712,11 +2265,11 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { if (&*BBI != BI) return false; - + if (ConstantExpr *CE = dyn_cast(BI->getCondition())) if (CE->canTrap()) return false; - + int PBIOp, BIOp; if (PBI->getSuccessor(0) == BI->getSuccessor(0)) PBIOp = BIOp = 0; @@ -1728,31 +2281,31 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { PBIOp = BIOp = 1; else return false; - + // Check to make sure that the other destination of this branch // isn't BB itself. If so, this is an infinite loop that will // keep getting unwound. if (PBI->getSuccessor(PBIOp) == BB) return false; - - // Do not perform this transformation if it would require + + // Do not perform this transformation if it would require // insertion of a large number of select instructions. For targets // without predication/cmovs, this is a big pessimization. BasicBlock *CommonDest = PBI->getSuccessor(PBIOp); - + unsigned NumPhis = 0; for (BasicBlock::iterator II = CommonDest->begin(); isa(II); ++II, ++NumPhis) if (NumPhis > 2) // Disable this xform. return false; - + // Finally, if everything is ok, fold the branches to logical ops. BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1); - + DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent() << "AND: " << *BI->getParent()); - - + + // If OtherDest *is* BB, then BB is a basic block with a single conditional // branch in it, where one edge (OtherDest) goes back to itself but the other // exits. We don't *know* that the program avoids the infinite loop @@ -1767,13 +2320,13 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { "infloop", BB->getParent()); BranchInst::Create(InfLoopBlock, InfLoopBlock); OtherDest = InfLoopBlock; - } - + } + DEBUG(dbgs() << *PBI->getParent()->getParent()); // BI may have other predecessors. Because of this, we leave // it alone, but modify PBI. - + // Make sure we get to CommonDest on True&True directions. Value *PBICond = PBI->getCondition(); IRBuilder Builder(PBI); @@ -1786,16 +2339,43 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { // Merge the conditions. Value *Cond = Builder.CreateOr(PBICond, BICond, "brmerge"); - + // Modify PBI to branch on the new condition to the new dests. PBI->setCondition(Cond); PBI->setSuccessor(0, CommonDest); PBI->setSuccessor(1, OtherDest); - + + // Update branch weight for PBI. + uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight; + bool PredHasWeights = ExtractBranchMetadata(PBI, PredTrueWeight, + PredFalseWeight); + bool SuccHasWeights = ExtractBranchMetadata(BI, SuccTrueWeight, + SuccFalseWeight); + if (PredHasWeights && SuccHasWeights) { + uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight; + uint64_t PredOther = PBIOp ?PredTrueWeight : PredFalseWeight; + uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight; + uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight; + // The weight to CommonDest should be PredCommon * SuccTotal + + // PredOther * SuccCommon. + // The weight to OtherDest should be PredOther * SuccOther. + SmallVector NewWeights; + NewWeights.push_back(PredCommon * (SuccCommon + SuccOther) + + PredOther * SuccCommon); + NewWeights.push_back(PredOther * SuccOther); + // Halve the weights if any of them cannot fit in an uint32_t + FitWeights(NewWeights); + + SmallVector MDWeights(NewWeights.begin(),NewWeights.end()); + PBI->setMetadata(LLVMContext::MD_prof, + MDBuilder(BI->getContext()). + createBranchWeights(MDWeights)); + } + // OtherDest may have phi nodes. If so, add an entry from PBI's // block that are identical to the entries for BI's block. AddPredecessorToBlock(OtherDest, PBI->getParent(), BB); - + // We know that the CommonDest already had an edge from PBI to // it. If it has PHIs though, the PHIs may have different // entries for BB and PBI's BB. If so, insert a select to make @@ -1813,10 +2393,10 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { PN->setIncomingValue(PBBIdx, NV); } } - + DEBUG(dbgs() << "INTO: " << *PBI->getParent()); DEBUG(dbgs() << *PBI->getParent()->getParent()); - + // This basic block is probably dead. We know it has at least // one fewer predecessor. return true; @@ -1828,7 +2408,9 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { // Also makes sure not to introduce new successors by assuming that edges to // non-successor TrueBBs and FalseBBs aren't reachable. static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond, - BasicBlock *TrueBB, BasicBlock *FalseBB){ + BasicBlock *TrueBB, BasicBlock *FalseBB, + uint32_t TrueWeight, + uint32_t FalseWeight){ // Remove any superfluous successor edges from the CFG. // First, figure out which successors to preserve. // If TrueBB and FalseBB are equal, only try to preserve one copy of that @@ -1857,10 +2439,15 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond, // We were only looking for one successor, and it was present. // Create an unconditional branch to it. Builder.CreateBr(TrueBB); - else + else { // We found both of the successors we were looking for. // Create a conditional branch sharing the condition of the select. - Builder.CreateCondBr(Cond, TrueBB, FalseBB); + BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB); + if (TrueWeight != FalseWeight) + NewBI->setMetadata(LLVMContext::MD_prof, + MDBuilder(OldTerm->getContext()). + createBranchWeights(TrueWeight, FalseWeight)); + } } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) { // Neither of the selected blocks were successors, so this // terminator must be unreachable. @@ -1894,11 +2481,26 @@ static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) { // Find the relevant condition and destinations. Value *Condition = Select->getCondition(); - BasicBlock *TrueBB = SI->getSuccessor(SI->findCaseValue(TrueVal)); - BasicBlock *FalseBB = SI->getSuccessor(SI->findCaseValue(FalseVal)); + BasicBlock *TrueBB = SI->findCaseValue(TrueVal).getCaseSuccessor(); + BasicBlock *FalseBB = SI->findCaseValue(FalseVal).getCaseSuccessor(); + + // Get weight for TrueBB and FalseBB. + uint32_t TrueWeight = 0, FalseWeight = 0; + SmallVector Weights; + bool HasWeights = HasBranchWeights(SI); + if (HasWeights) { + GetBranchWeights(SI, Weights); + if (Weights.size() == 1 + SI->getNumCases()) { + TrueWeight = (uint32_t)Weights[SI->findCaseValue(TrueVal). + getSuccessorIndex()]; + FalseWeight = (uint32_t)Weights[SI->findCaseValue(FalseVal). + getSuccessorIndex()]; + } + } // Perform the actual simplification. - return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB); + return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, + TrueWeight, FalseWeight); } // SimplifyIndirectBrOnSelect - Replaces @@ -1918,7 +2520,8 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) { BasicBlock *FalseBB = FBA->getBasicBlock(); // Perform the actual simplification. - return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB); + return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, + 0, 0); } /// TryToSimplifyUncondBranchWithICmpInIt - This is called when we find an icmp @@ -1935,12 +2538,12 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) { /// br label %end /// end: /// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ] -/// +/// /// We prefer to split the edge to 'end' so that there is a true/false entry to /// the PHI, merging the third icmp into the switch. -static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, - const TargetData *TD, - IRBuilder<> &Builder) { +static bool TryToSimplifyUncondBranchWithICmpInIt( + ICmpInst *ICI, IRBuilder<> &Builder, const TargetTransformInfo &TTI, + const DataLayout *TD) { BasicBlock *BB = ICI->getParent(); // If the block has any PHIs in it or the icmp has multiple uses, it is too @@ -1949,17 +2552,17 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, Value *V = ICI->getOperand(0); ConstantInt *Cst = cast(ICI->getOperand(1)); - + // The pattern we're looking for is where our only predecessor is a switch on // 'V' and this block is the default case for the switch. In this case we can // fold the compared value into the switch to simplify things. BasicBlock *Pred = BB->getSinglePredecessor(); if (Pred == 0 || !isa(Pred->getTerminator())) return false; - + SwitchInst *SI = cast(Pred->getTerminator()); if (SI->getCondition() != V) return false; - + // If BB is reachable on a non-default case, then we simply know the value of // V in this block. Substitute it and constant fold the icmp instruction // away. @@ -1967,31 +2570,31 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, ConstantInt *VVal = SI->findCaseDest(BB); assert(VVal && "Should have a unique destination value"); ICI->setOperand(0, VVal); - + if (Value *V = SimplifyInstruction(ICI, TD)) { ICI->replaceAllUsesWith(V); ICI->eraseFromParent(); } // BB is now empty, so it is likely to simplify away. - return SimplifyCFG(BB) | true; + return SimplifyCFG(BB, TTI, TD) | true; } - + // Ok, the block is reachable from the default dest. If the constant we're // comparing exists in one of the other edges, then we can constant fold ICI // and zap it. - if (SI->findCaseValue(Cst) != 0) { + if (SI->findCaseValue(Cst) != SI->case_default()) { Value *V; if (ICI->getPredicate() == ICmpInst::ICMP_EQ) V = ConstantInt::getFalse(BB->getContext()); else V = ConstantInt::getTrue(BB->getContext()); - + ICI->replaceAllUsesWith(V); ICI->eraseFromParent(); // BB is now empty, so it is likely to simplify away. - return SimplifyCFG(BB) | true; + return SimplifyCFG(BB, TTI, TD) | true; } - + // The use of the icmp has to be in the 'end' block, by the only PHI node in // the block. BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0); @@ -2017,8 +2620,23 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, // the switch to the merge point on the compared value. BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB); + SmallVector Weights; + bool HasWeights = HasBranchWeights(SI); + if (HasWeights) { + GetBranchWeights(SI, Weights); + if (Weights.size() == 1 + SI->getNumCases()) { + // Split weight for default case to case for "Cst". + Weights[0] = (Weights[0]+1) >> 1; + Weights.push_back(Weights[0]); + + SmallVector MDWeights(Weights.begin(), Weights.end()); + SI->setMetadata(LLVMContext::MD_prof, + MDBuilder(SI->getContext()). + createBranchWeights(MDWeights)); + } + } SI->addCase(Cst, NewBB); - + // NewBB branches to the phi block, add the uncond branch and the phi entry. Builder.SetInsertPoint(NewBB); Builder.SetCurrentDebugLocation(SI->getDebugLoc()); @@ -2030,12 +2648,12 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, /// SimplifyBranchOnICmpChain - The specified branch is a conditional branch. /// Check to see if it is branching on an or/and chain of icmp instructions, and /// fold it into a switch instruction if so. -static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, +static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *TD, IRBuilder<> &Builder) { Instruction *Cond = dyn_cast(BI->getCondition()); if (Cond == 0) return false; - - + + // Change br (X == 0 | X == 1), T, F into a switch instruction. // If this is a bunch of seteq's or'd together, or if it's a bunch of // 'setne's and'ed together, collect them. @@ -2044,7 +2662,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, bool TrueWhenEqual = true; Value *ExtraCase = 0; unsigned UsedICmps = 0; - + if (Cond->getOpcode() == Instruction::Or) { CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, true, UsedICmps); @@ -2053,7 +2671,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, UsedICmps); TrueWhenEqual = false; } - + // If we didn't have a multiply compared value, fail. if (CompVal == 0) return false; @@ -2065,21 +2683,24 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, // instruction can't handle, remove them now. array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate); Values.erase(std::unique(Values.begin(), Values.end()), Values.end()); - + // If Extra was used, we require at least two switch values to do the // transformation. A switch with one value is just an cond branch. if (ExtraCase && Values.size() < 2) return false; - + + // TODO: Preserve branch weight metadata, similarly to how + // FoldValueComparisonIntoPredecessors preserves it. + // Figure out which block is which destination. BasicBlock *DefaultBB = BI->getSuccessor(1); BasicBlock *EdgeBB = BI->getSuccessor(0); if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB); - + BasicBlock *BB = BI->getParent(); - + DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size() << " cases into SWITCH. BB is:\n" << *BB); - + // If there are any extra values that couldn't be folded into the switch // then we evaluate them with an explicit branch first. Split the block // right before the condbr to handle it. @@ -2093,13 +2714,13 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB); else Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB); - + OldTI->eraseFromParent(); - + // If there are PHI nodes in EdgeBB, then we need to add a new entry to them // for the edge we just added. AddPredecessorToBlock(EdgeBB, BB, NewBB); - + DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase << "\nEXTRABB = " << *BB); BB = NewBB; @@ -2108,19 +2729,19 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, Builder.SetInsertPoint(BI); // Convert pointer to int before we switch. if (CompVal->getType()->isPointerTy()) { - assert(TD && "Cannot switch on pointer without TargetData"); + assert(TD && "Cannot switch on pointer without DataLayout"); CompVal = Builder.CreatePtrToInt(CompVal, TD->getIntPtrType(CompVal->getContext()), "magicptr"); } - + // Create the new switch instruction now. SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size()); // Add all of the 'cases' to the switch instruction. for (unsigned i = 0, e = Values.size(); i != e; ++i) New->addCase(Values[i], EdgeBB); - + // We added edges from PI to the EdgeBB. As such, if there were any // PHI nodes in EdgeBB, they need entries to be added corresponding to // the number of edges added. @@ -2131,10 +2752,10 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, for (unsigned i = 0, e = Values.size()-1; i != e; ++i) PN->addIncoming(InVal, BB); } - + // Erase the old branch instruction. EraseTerminatorInstAndDCECond(BI); - + DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n'); return true; } @@ -2188,7 +2809,7 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) { bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { BasicBlock *BB = RI->getParent(); if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false; - + // Find predecessors that end with branches. SmallVector UncondBranchPreds; SmallVector CondBranchPreds; @@ -2202,7 +2823,7 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { CondBranchPreds.push_back(BI); } } - + // If we found some, do the transformation! if (!UncondBranchPreds.empty() && DupRet) { while (!UncondBranchPreds.empty()) { @@ -2211,21 +2832,21 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { << "INTO UNCOND BRANCH PRED: " << *Pred); (void)FoldReturnIntoUncondBranch(RI, BB, Pred); } - + // If we eliminated all predecessors of the block, delete the block now. if (pred_begin(BB) == pred_end(BB)) // We know there are no successors, so just nuke the block. BB->eraseFromParent(); - + return true; } - + // Check out all of the conditional branches going to this return // instruction. If any of them just select between returns, change the // branch itself into a select/return pair. while (!CondBranchPreds.empty()) { BranchInst *BI = CondBranchPreds.pop_back_val(); - + // Check to see if the non-BB successor is also a return block. if (isa(BI->getSuccessor(0)->getTerminator()) && isa(BI->getSuccessor(1)->getTerminator()) && @@ -2235,57 +2856,11 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { return false; } -bool SimplifyCFGOpt::SimplifyUnwind(UnwindInst *UI, IRBuilder<> &Builder) { - // Check to see if the first instruction in this block is just an unwind. - // If so, replace any invoke instructions which use this as an exception - // destination with call instructions. +bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { BasicBlock *BB = UI->getParent(); - if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false; bool Changed = false; - SmallVector Preds(pred_begin(BB), pred_end(BB)); - while (!Preds.empty()) { - BasicBlock *Pred = Preds.back(); - InvokeInst *II = dyn_cast(Pred->getTerminator()); - if (II && II->getUnwindDest() == BB) { - // Insert a new branch instruction before the invoke, because this - // is now a fall through. - Builder.SetInsertPoint(II); - BranchInst *BI = Builder.CreateBr(II->getNormalDest()); - Pred->getInstList().remove(II); // Take out of symbol table - - // Insert the call now. - SmallVector Args(II->op_begin(), II->op_end()-3); - Builder.SetInsertPoint(BI); - CallInst *CI = Builder.CreateCall(II->getCalledValue(), - Args, II->getName()); - CI->setCallingConv(II->getCallingConv()); - CI->setAttributes(II->getAttributes()); - // If the invoke produced a value, the Call now does instead. - II->replaceAllUsesWith(CI); - delete II; - Changed = true; - } - - Preds.pop_back(); - } - - // If this block is now dead (and isn't the entry block), remove it. - if (pred_begin(BB) == pred_end(BB) && - BB != &BB->getParent()->getEntryBlock()) { - // We know there are no successors, so just nuke the block. - BB->eraseFromParent(); - return true; - } - - return Changed; -} -bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { - BasicBlock *BB = UI->getParent(); - - bool Changed = false; - // If there are any instructions immediately before the unreachable that can // be removed, do so. while (UI != BB->begin()) { @@ -2325,11 +2900,11 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { BBI->eraseFromParent(); Changed = true; } - + // If the unreachable instruction is the first in the block, take a gander // at all of the predecessors of this instruction, and simplify them. if (&BB->front() != UI) return Changed; - + SmallVector Preds(pred_begin(BB), pred_end(BB)); for (unsigned i = 0, e = Preds.size(); i != e; ++i) { TerminatorInst *TI = Preds[i]->getTerminator(); @@ -2352,8 +2927,9 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { } } } else if (SwitchInst *SI = dyn_cast(TI)) { - for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) - if (SI->getSuccessor(i) == BB) { + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); + i != e; ++i) + if (i.getCaseSuccessor() == BB) { BB->removePredecessor(SI->getParent()); SI->removeCase(i); --i; --e; @@ -2361,14 +2937,15 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { } // If the default value is unreachable, figure out the most popular // destination and make it the default. - if (SI->getSuccessor(0) == BB) { + if (SI->getDefaultDest() == BB) { std::map > Popularity; - for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) { - std::pair& entry = - Popularity[SI->getSuccessor(i)]; + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); + i != e; ++i) { + std::pair &entry = + Popularity[i.getCaseSuccessor()]; if (entry.first == 0) { entry.first = 1; - entry.second = i; + entry.second = i.getCaseIndex(); } else { entry.first++; } @@ -2380,7 +2957,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { BasicBlock *MaxBlock = 0; for (std::map >::iterator I = Popularity.begin(), E = Popularity.end(); I != E; ++I) { - if (I->second.first > MaxPop || + if (I->second.first > MaxPop || (I->second.first == MaxPop && MaxIndex > I->second.second)) { MaxPop = I->second.first; MaxIndex = I->second.second; @@ -2390,17 +2967,18 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { if (MaxBlock) { // Make this the new default, allowing us to delete any explicit // edges to it. - SI->setSuccessor(0, MaxBlock); + SI->setDefaultDest(MaxBlock); Changed = true; - + // If MaxBlock has phinodes in it, remove MaxPop-1 entries from // it. if (isa(MaxBlock->begin())) for (unsigned i = 0; i != MaxPop-1; ++i) MaxBlock->removePredecessor(SI->getParent()); - - for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) - if (SI->getSuccessor(i) == MaxBlock) { + + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); + i != e; ++i) + if (i.getCaseSuccessor() == MaxBlock) { SI->removeCase(i); --i; --e; } @@ -2412,7 +2990,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { // place to note that the call does not throw though. BranchInst *BI = Builder.CreateBr(II->getNormalDest()); II->removeFromParent(); // Take out of symbol table - + // Insert the call now... SmallVector Args(II->op_begin(), II->op_end()-3); Builder.SetInsertPoint(BI); @@ -2427,7 +3005,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { } } } - + // If this block is now dead, remove it. if (pred_begin(BB) == pred_end(BB) && BB != &BB->getParent()->getEntryBlock()) { @@ -2442,17 +3020,19 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { /// TurnSwitchRangeIntoICmp - Turns a switch with that contains only a /// integer range comparison into a sub, an icmp and a branch. static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { - assert(SI->getNumCases() > 2 && "Degenerate switch?"); + assert(SI->getNumCases() > 1 && "Degenerate switch?"); // Make sure all cases point to the same destination and gather the values. SmallVector Cases; - Cases.push_back(SI->getCaseValue(1)); - for (unsigned I = 2, E = SI->getNumCases(); I != E; ++I) { - if (SI->getSuccessor(I-1) != SI->getSuccessor(I)) + SwitchInst::CaseIt I = SI->case_begin(); + Cases.push_back(I.getCaseValue()); + SwitchInst::CaseIt PrevI = I++; + for (SwitchInst::CaseIt E = SI->case_end(); I != E; PrevI = I++) { + if (PrevI.getCaseSuccessor() != I.getCaseSuccessor()) return false; - Cases.push_back(SI->getCaseValue(I)); + Cases.push_back(I.getCaseValue()); } - assert(Cases.size() == SI->getNumCases()-1 && "Not all cases gathered"); + assert(Cases.size() == SI->getNumCases() && "Not all cases gathered"); // Sort the case values, then check if they form a range we can transform. array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate); @@ -2462,18 +3042,38 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { } Constant *Offset = ConstantExpr::getNeg(Cases.back()); - Constant *NumCases = ConstantInt::get(Offset->getType(), SI->getNumCases()-1); + Constant *NumCases = ConstantInt::get(Offset->getType(), SI->getNumCases()); Value *Sub = SI->getCondition(); if (!Offset->isNullValue()) Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off"); Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch"); - Builder.CreateCondBr(Cmp, SI->getSuccessor(1), SI->getDefaultDest()); + BranchInst *NewBI = Builder.CreateCondBr( + Cmp, SI->case_begin().getCaseSuccessor(), SI->getDefaultDest()); + + // Update weight for the newly-created conditional branch. + SmallVector Weights; + bool HasWeights = HasBranchWeights(SI); + if (HasWeights) { + GetBranchWeights(SI, Weights); + if (Weights.size() == 1 + SI->getNumCases()) { + // Combine all weights for the cases to be the true weight of NewBI. + // We assume that the sum of all weights for a Terminator can fit into 32 + // bits. + uint32_t NewTrueWeight = 0; + for (unsigned I = 1, E = Weights.size(); I != E; ++I) + NewTrueWeight += (uint32_t)Weights[I]; + NewBI->setMetadata(LLVMContext::MD_prof, + MDBuilder(SI->getContext()). + createBranchWeights(NewTrueWeight, + (uint32_t)Weights[0])); + } + } // Prune obsolete incoming values off the successor's PHI nodes. - for (BasicBlock::iterator BBI = SI->getSuccessor(1)->begin(); + for (BasicBlock::iterator BBI = SI->case_begin().getCaseSuccessor()->begin(); isa(BBI); ++BBI) { - for (unsigned I = 0, E = SI->getNumCases()-2; I != E; ++I) + for (unsigned I = 0, E = SI->getNumCases()-1; I != E; ++I) cast(BBI)->removeIncomingValue(SI->getParent()); } SI->eraseFromParent(); @@ -2487,26 +3087,46 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI) { Value *Cond = SI->getCondition(); unsigned Bits = cast(Cond->getType())->getBitWidth(); APInt KnownZero(Bits, 0), KnownOne(Bits, 0); - ComputeMaskedBits(Cond, APInt::getAllOnesValue(Bits), KnownZero, KnownOne); + ComputeMaskedBits(Cond, KnownZero, KnownOne); // Gather dead cases. SmallVector DeadCases; - for (unsigned I = 1, E = SI->getNumCases(); I != E; ++I) { - if ((SI->getCaseValue(I)->getValue() & KnownZero) != 0 || - (SI->getCaseValue(I)->getValue() & KnownOne) != KnownOne) { - DeadCases.push_back(SI->getCaseValue(I)); + for (SwitchInst::CaseIt I = SI->case_begin(), E = SI->case_end(); I != E; ++I) { + if ((I.getCaseValue()->getValue() & KnownZero) != 0 || + (I.getCaseValue()->getValue() & KnownOne) != KnownOne) { + DeadCases.push_back(I.getCaseValue()); DEBUG(dbgs() << "SimplifyCFG: switch case '" - << SI->getCaseValue(I)->getValue() << "' is dead.\n"); + << I.getCaseValue() << "' is dead.\n"); } } + SmallVector Weights; + bool HasWeight = HasBranchWeights(SI); + if (HasWeight) { + GetBranchWeights(SI, Weights); + HasWeight = (Weights.size() == 1 + SI->getNumCases()); + } + // Remove dead cases from the switch. for (unsigned I = 0, E = DeadCases.size(); I != E; ++I) { - unsigned Case = SI->findCaseValue(DeadCases[I]); + SwitchInst::CaseIt Case = SI->findCaseValue(DeadCases[I]); + assert(Case != SI->case_default() && + "Case was not found. Probably mistake in DeadCases forming."); + if (HasWeight) { + std::swap(Weights[Case.getCaseIndex()+1], Weights.back()); + Weights.pop_back(); + } + // Prune unused values from PHI nodes. - SI->getSuccessor(Case)->removePredecessor(SI->getParent()); + Case.getCaseSuccessor()->removePredecessor(SI->getParent()); SI->removeCase(Case); } + if (HasWeight) { + SmallVector MDWeights(Weights.begin(), Weights.end()); + SI->setMetadata(LLVMContext::MD_prof, + MDBuilder(SI->getParent()->getContext()). + createBranchWeights(MDWeights)); + } return !DeadCases.empty(); } @@ -2553,9 +3173,9 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) { typedef DenseMap > ForwardingNodesMap; ForwardingNodesMap ForwardingNodes; - for (unsigned I = 1; I < SI->getNumCases(); ++I) { // 0 is the default case. - ConstantInt *CaseValue = SI->getCaseValue(I); - BasicBlock *CaseDest = SI->getSuccessor(I); + for (SwitchInst::CaseIt I = SI->case_begin(), E = SI->case_end(); I != E; ++I) { + ConstantInt *CaseValue = I.getCaseValue(); + BasicBlock *CaseDest = I.getCaseSuccessor(); int PhiIndex; PHINode *PHI = FindPHIForConditionForwarding(CaseValue, CaseDest, @@ -2582,44 +3202,547 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) { return Changed; } -bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { - // If this switch is too complex to want to look at, ignore it. - if (!isValueEqualityComparison(SI)) +/// ValidLookupTableConstant - Return true if the backend will be able to handle +/// initializing an array of constants like C. +static bool ValidLookupTableConstant(Constant *C) { + if (ConstantExpr *CE = dyn_cast(C)) + return CE->isGEPWithNoNotionalOverIndexing(); + + return isa(C) || + isa(C) || + isa(C) || + isa(C) || + isa(C); +} + +/// LookupConstant - If V is a Constant, return it. Otherwise, try to look up +/// its constant value in ConstantPool, returning 0 if it's not there. +static Constant *LookupConstant(Value *V, + const SmallDenseMap& ConstantPool) { + if (Constant *C = dyn_cast(V)) + return C; + return ConstantPool.lookup(V); +} + +/// ConstantFold - Try to fold instruction I into a constant. This works for +/// simple instructions such as binary operations where both operands are +/// constant or can be replaced by constants from the ConstantPool. Returns the +/// resulting constant on success, 0 otherwise. +static Constant *ConstantFold(Instruction *I, + const SmallDenseMap& ConstantPool) { + if (BinaryOperator *BO = dyn_cast(I)) { + Constant *A = LookupConstant(BO->getOperand(0), ConstantPool); + if (!A) + return 0; + Constant *B = LookupConstant(BO->getOperand(1), ConstantPool); + if (!B) + return 0; + return ConstantExpr::get(BO->getOpcode(), A, B); + } + + if (CmpInst *Cmp = dyn_cast(I)) { + Constant *A = LookupConstant(I->getOperand(0), ConstantPool); + if (!A) + return 0; + Constant *B = LookupConstant(I->getOperand(1), ConstantPool); + if (!B) + return 0; + return ConstantExpr::getCompare(Cmp->getPredicate(), A, B); + } + + if (SelectInst *Select = dyn_cast(I)) { + Constant *A = LookupConstant(Select->getCondition(), ConstantPool); + if (!A) + return 0; + if (A->isAllOnesValue()) + return LookupConstant(Select->getTrueValue(), ConstantPool); + if (A->isNullValue()) + return LookupConstant(Select->getFalseValue(), ConstantPool); + return 0; + } + + if (CastInst *Cast = dyn_cast(I)) { + Constant *A = LookupConstant(I->getOperand(0), ConstantPool); + if (!A) + return 0; + return ConstantExpr::getCast(Cast->getOpcode(), A, Cast->getDestTy()); + } + + return 0; +} + +/// GetCaseResults - Try to determine the resulting constant values in phi nodes +/// at the common destination basic block, *CommonDest, for one of the case +/// destionations CaseDest corresponding to value CaseVal (0 for the default +/// case), of a switch instruction SI. +static bool GetCaseResults(SwitchInst *SI, + ConstantInt *CaseVal, + BasicBlock *CaseDest, + BasicBlock **CommonDest, + SmallVector, 4> &Res) { + // The block from which we enter the common destination. + BasicBlock *Pred = SI->getParent(); + + // If CaseDest is empty except for some side-effect free instructions through + // which we can constant-propagate the CaseVal, continue to its successor. + SmallDenseMap ConstantPool; + ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal)); + for (BasicBlock::iterator I = CaseDest->begin(), E = CaseDest->end(); I != E; + ++I) { + if (TerminatorInst *T = dyn_cast(I)) { + // If the terminator is a simple branch, continue to the next block. + if (T->getNumSuccessors() != 1) + return false; + Pred = CaseDest; + CaseDest = T->getSuccessor(0); + } else if (isa(I)) { + // Skip debug intrinsic. + continue; + } else if (Constant *C = ConstantFold(I, ConstantPool)) { + // Instruction is side-effect free and constant. + ConstantPool.insert(std::make_pair(I, C)); + } else { + break; + } + } + + // If we did not have a CommonDest before, use the current one. + if (!*CommonDest) + *CommonDest = CaseDest; + // If the destination isn't the common one, abort. + if (CaseDest != *CommonDest) + return false; + + // Get the values for this case from phi nodes in the destination block. + BasicBlock::iterator I = (*CommonDest)->begin(); + while (PHINode *PHI = dyn_cast(I++)) { + int Idx = PHI->getBasicBlockIndex(Pred); + if (Idx == -1) + continue; + + Constant *ConstVal = LookupConstant(PHI->getIncomingValue(Idx), + ConstantPool); + if (!ConstVal) + return false; + + // Note: If the constant comes from constant-propagating the case value + // through the CaseDest basic block, it will be safe to remove the + // instructions in that block. They cannot be used (except in the phi nodes + // we visit) outside CaseDest, because that block does not dominate its + // successor. If it did, we would not be in this phi node. + + // Be conservative about which kinds of constants we support. + if (!ValidLookupTableConstant(ConstVal)) + return false; + + Res.push_back(std::make_pair(PHI, ConstVal)); + } + + return true; +} + +namespace { + /// SwitchLookupTable - This class represents a lookup table that can be used + /// to replace a switch. + class SwitchLookupTable { + public: + /// SwitchLookupTable - Create a lookup table to use as a switch replacement + /// with the contents of Values, using DefaultValue to fill any holes in the + /// table. + SwitchLookupTable(Module &M, + uint64_t TableSize, + ConstantInt *Offset, + const SmallVector, 4>& Values, + Constant *DefaultValue, + const DataLayout *TD); + + /// BuildLookup - Build instructions with Builder to retrieve the value at + /// the position given by Index in the lookup table. + Value *BuildLookup(Value *Index, IRBuilder<> &Builder); + + /// WouldFitInRegister - Return true if a table with TableSize elements of + /// type ElementType would fit in a target-legal register. + static bool WouldFitInRegister(const DataLayout *TD, + uint64_t TableSize, + const Type *ElementType); + + private: + // Depending on the contents of the table, it can be represented in + // different ways. + enum { + // For tables where each element contains the same value, we just have to + // store that single value and return it for each lookup. + SingleValueKind, + + // For small tables with integer elements, we can pack them into a bitmap + // that fits into a target-legal register. Values are retrieved by + // shift and mask operations. + BitMapKind, + + // The table is stored as an array of values. Values are retrieved by load + // instructions from the table. + ArrayKind + } Kind; + + // For SingleValueKind, this is the single value. + Constant *SingleValue; + + // For BitMapKind, this is the bitmap. + ConstantInt *BitMap; + IntegerType *BitMapElementTy; + + // For ArrayKind, this is the array. + GlobalVariable *Array; + }; +} + +SwitchLookupTable::SwitchLookupTable(Module &M, + uint64_t TableSize, + ConstantInt *Offset, + const SmallVector, 4>& Values, + Constant *DefaultValue, + const DataLayout *TD) + : SingleValue(0), BitMap(0), BitMapElementTy(0), Array(0) { + assert(Values.size() && "Can't build lookup table without values!"); + assert(TableSize >= Values.size() && "Can't fit values in table!"); + + // If all values in the table are equal, this is that value. + SingleValue = Values.begin()->second; + + // Build up the table contents. + SmallVector TableContents(TableSize); + for (size_t I = 0, E = Values.size(); I != E; ++I) { + ConstantInt *CaseVal = Values[I].first; + Constant *CaseRes = Values[I].second; + assert(CaseRes->getType() == DefaultValue->getType()); + + uint64_t Idx = (CaseVal->getValue() - Offset->getValue()) + .getLimitedValue(); + TableContents[Idx] = CaseRes; + + if (CaseRes != SingleValue) + SingleValue = 0; + } + + // Fill in any holes in the table with the default result. + if (Values.size() < TableSize) { + for (uint64_t I = 0; I < TableSize; ++I) { + if (!TableContents[I]) + TableContents[I] = DefaultValue; + } + + if (DefaultValue != SingleValue) + SingleValue = 0; + } + + // If each element in the table contains the same value, we only need to store + // that single value. + if (SingleValue) { + Kind = SingleValueKind; + return; + } + + // If the type is integer and the table fits in a register, build a bitmap. + if (WouldFitInRegister(TD, TableSize, DefaultValue->getType())) { + IntegerType *IT = cast(DefaultValue->getType()); + APInt TableInt(TableSize * IT->getBitWidth(), 0); + for (uint64_t I = TableSize; I > 0; --I) { + TableInt <<= IT->getBitWidth(); + // Insert values into the bitmap. Undef values are set to zero. + if (!isa(TableContents[I - 1])) { + ConstantInt *Val = cast(TableContents[I - 1]); + TableInt |= Val->getValue().zext(TableInt.getBitWidth()); + } + } + BitMap = ConstantInt::get(M.getContext(), TableInt); + BitMapElementTy = IT; + Kind = BitMapKind; + ++NumBitMaps; + return; + } + + // Store the table in an array. + ArrayType *ArrayTy = ArrayType::get(DefaultValue->getType(), TableSize); + Constant *Initializer = ConstantArray::get(ArrayTy, TableContents); + + Array = new GlobalVariable(M, ArrayTy, /*constant=*/ true, + GlobalVariable::PrivateLinkage, + Initializer, + "switch.table"); + Array->setUnnamedAddr(true); + Kind = ArrayKind; +} + +Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) { + switch (Kind) { + case SingleValueKind: + return SingleValue; + case BitMapKind: { + // Type of the bitmap (e.g. i59). + IntegerType *MapTy = BitMap->getType(); + + // Cast Index to the same type as the bitmap. + // Note: The Index is <= the number of elements in the table, so + // truncating it to the width of the bitmask is safe. + Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast"); + + // Multiply the shift amount by the element width. + ShiftAmt = Builder.CreateMul(ShiftAmt, + ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()), + "switch.shiftamt"); + + // Shift down. + Value *DownShifted = Builder.CreateLShr(BitMap, ShiftAmt, + "switch.downshift"); + // Mask off. + return Builder.CreateTrunc(DownShifted, BitMapElementTy, + "switch.masked"); + } + case ArrayKind: { + Value *GEPIndices[] = { Builder.getInt32(0), Index }; + Value *GEP = Builder.CreateInBoundsGEP(Array, GEPIndices, + "switch.gep"); + return Builder.CreateLoad(GEP, "switch.load"); + } + } + llvm_unreachable("Unknown lookup table kind!"); +} + +bool SwitchLookupTable::WouldFitInRegister(const DataLayout *TD, + uint64_t TableSize, + const Type *ElementType) { + if (!TD) + return false; + const IntegerType *IT = dyn_cast(ElementType); + if (!IT) + return false; + // FIXME: If the type is wider than it needs to be, e.g. i8 but all values + // are <= 15, we could try to narrow the type. + + // Avoid overflow, fitsInLegalInteger uses unsigned int for the width. + if (TableSize >= UINT_MAX/IT->getBitWidth()) + return false; + return TD->fitsInLegalInteger(TableSize * IT->getBitWidth()); +} + +/// ShouldBuildLookupTable - Determine whether a lookup table should be built +/// for this switch, based on the number of caes, size of the table and the +/// types of the results. +static bool ShouldBuildLookupTable(SwitchInst *SI, + uint64_t TableSize, + const TargetTransformInfo &TTI, + const DataLayout *TD, + const SmallDenseMap& ResultTypes) { + if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10) + return false; // TableSize overflowed, or mul below might overflow. + + bool AllTablesFitInRegister = true; + bool HasIllegalType = false; + for (SmallDenseMap::const_iterator I = ResultTypes.begin(), + E = ResultTypes.end(); I != E; ++I) { + Type *Ty = I->second; + + // Saturate this flag to true. + HasIllegalType = HasIllegalType || !TTI.isTypeLegal(Ty); + + // Saturate this flag to false. + AllTablesFitInRegister = AllTablesFitInRegister && + SwitchLookupTable::WouldFitInRegister(TD, TableSize, Ty); + + // If both flags saturate, we're done. NOTE: This *only* works with + // saturating flags, and all flags have to saturate first due to the + // non-deterministic behavior of iterating over a dense map. + if (HasIllegalType && !AllTablesFitInRegister) + break; + } + + // If each table would fit in a register, we should build it anyway. + if (AllTablesFitInRegister) + return true; + + // Don't build a table that doesn't fit in-register if it has illegal types. + if (HasIllegalType) + return false; + + // The table density should be at least 40%. This is the same criterion as for + // jump tables, see SelectionDAGBuilder::handleJTSwitchCase. + // FIXME: Find the best cut-off. + return SI->getNumCases() * 10 >= TableSize * 4; +} + +/// SwitchToLookupTable - If the switch is only used to initialize one or more +/// phi nodes in a common successor block with different constant values, +/// replace the switch with lookup tables. +static bool SwitchToLookupTable(SwitchInst *SI, + IRBuilder<> &Builder, + const TargetTransformInfo &TTI, + const DataLayout* TD) { + assert(SI->getNumCases() > 1 && "Degenerate switch?"); + + // Only build lookup table when we have a target that supports it. + if (!TTI.shouldBuildLookupTables()) + return false; + + // FIXME: If the switch is too sparse for a lookup table, perhaps we could + // split off a dense part and build a lookup table for that. + + // FIXME: This creates arrays of GEPs to constant strings, which means each + // GEP needs a runtime relocation in PIC code. We should just build one big + // string and lookup indices into that. + + // Ignore the switch if the number of cases is too small. + // This is similar to the check when building jump tables in + // SelectionDAGBuilder::handleJTSwitchCase. + // FIXME: Determine the best cut-off. + if (SI->getNumCases() < 4) + return false; + + // Figure out the corresponding result for each case value and phi node in the + // common destination, as well as the the min and max case values. + assert(SI->case_begin() != SI->case_end()); + SwitchInst::CaseIt CI = SI->case_begin(); + ConstantInt *MinCaseVal = CI.getCaseValue(); + ConstantInt *MaxCaseVal = CI.getCaseValue(); + + BasicBlock *CommonDest = 0; + typedef SmallVector, 4> ResultListTy; + SmallDenseMap ResultLists; + SmallDenseMap DefaultResults; + SmallDenseMap ResultTypes; + SmallVector PHIs; + + for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) { + ConstantInt *CaseVal = CI.getCaseValue(); + if (CaseVal->getValue().slt(MinCaseVal->getValue())) + MinCaseVal = CaseVal; + if (CaseVal->getValue().sgt(MaxCaseVal->getValue())) + MaxCaseVal = CaseVal; + + // Resulting value at phi nodes for this case value. + typedef SmallVector, 4> ResultsTy; + ResultsTy Results; + if (!GetCaseResults(SI, CaseVal, CI.getCaseSuccessor(), &CommonDest, + Results)) + return false; + + // Append the result from this case to the list for each phi. + for (ResultsTy::iterator I = Results.begin(), E = Results.end(); I!=E; ++I) { + if (!ResultLists.count(I->first)) + PHIs.push_back(I->first); + ResultLists[I->first].push_back(std::make_pair(CaseVal, I->second)); + } + } + + // Get the resulting values for the default case. + SmallVector, 4> DefaultResultsList; + if (!GetCaseResults(SI, 0, SI->getDefaultDest(), &CommonDest, + DefaultResultsList)) return false; + for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) { + PHINode *PHI = DefaultResultsList[I].first; + Constant *Result = DefaultResultsList[I].second; + DefaultResults[PHI] = Result; + ResultTypes[PHI] = Result->getType(); + } + + APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue(); + uint64_t TableSize = RangeSpread.getLimitedValue() + 1; + if (!ShouldBuildLookupTable(SI, TableSize, TTI, TD, ResultTypes)) + return false; + + // Create the BB that does the lookups. + Module &Mod = *CommonDest->getParent()->getParent(); + BasicBlock *LookupBB = BasicBlock::Create(Mod.getContext(), + "switch.lookup", + CommonDest->getParent(), + CommonDest); + + // Check whether the condition value is within the case range, and branch to + // the new BB. + Builder.SetInsertPoint(SI); + Value *TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal, + "switch.tableidx"); + Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get( + MinCaseVal->getType(), TableSize)); + Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest()); + + // Populate the BB that does the lookups. + Builder.SetInsertPoint(LookupBB); + bool ReturnedEarly = false; + for (size_t I = 0, E = PHIs.size(); I != E; ++I) { + PHINode *PHI = PHIs[I]; + + SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultLists[PHI], + DefaultResults[PHI], TD); + + Value *Result = Table.BuildLookup(TableIndex, Builder); + + // If the result is used to return immediately from the function, we want to + // do that right here. + if (PHI->hasOneUse() && isa(*PHI->use_begin()) && + *PHI->use_begin() == CommonDest->getFirstNonPHIOrDbg()) { + Builder.CreateRet(Result); + ReturnedEarly = true; + break; + } + + PHI->addIncoming(Result, LookupBB); + } + + if (!ReturnedEarly) + Builder.CreateBr(CommonDest); + + // Remove the switch. + for (unsigned i = 0; i < SI->getNumSuccessors(); ++i) { + BasicBlock *Succ = SI->getSuccessor(i); + if (Succ == SI->getDefaultDest()) continue; + Succ->removePredecessor(SI->getParent()); + } + SI->eraseFromParent(); + ++NumLookupTables; + return true; +} + +bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { BasicBlock *BB = SI->getParent(); - // If we only have one predecessor, and if it is a branch on this value, - // see if that predecessor totally determines the outcome of this switch. - if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) - if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder)) - return SimplifyCFG(BB) | true; + if (isValueEqualityComparison(SI)) { + // If we only have one predecessor, and if it is a branch on this value, + // see if that predecessor totally determines the outcome of this switch. + if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) + if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder)) + return SimplifyCFG(BB, TTI, TD) | true; - Value *Cond = SI->getCondition(); - if (SelectInst *Select = dyn_cast(Cond)) - if (SimplifySwitchOnSelect(SI, Select)) - return SimplifyCFG(BB) | true; + Value *Cond = SI->getCondition(); + if (SelectInst *Select = dyn_cast(Cond)) + if (SimplifySwitchOnSelect(SI, Select)) + return SimplifyCFG(BB, TTI, TD) | true; - // If the block only contains the switch, see if we can fold the block - // away into any preds. - BasicBlock::iterator BBI = BB->begin(); - // Ignore dbg intrinsics. - while (isa(BBI)) - ++BBI; - if (SI == &*BBI) - if (FoldValueComparisonIntoPredecessors(SI, Builder)) - return SimplifyCFG(BB) | true; + // If the block only contains the switch, see if we can fold the block + // away into any preds. + BasicBlock::iterator BBI = BB->begin(); + // Ignore dbg intrinsics. + while (isa(BBI)) + ++BBI; + if (SI == &*BBI) + if (FoldValueComparisonIntoPredecessors(SI, Builder)) + return SimplifyCFG(BB, TTI, TD) | true; + } // Try to transform the switch into an icmp and a branch. if (TurnSwitchRangeIntoICmp(SI, Builder)) - return SimplifyCFG(BB) | true; + return SimplifyCFG(BB, TTI, TD) | true; // Remove unreachable cases. if (EliminateDeadSwitchCases(SI)) - return SimplifyCFG(BB) | true; + return SimplifyCFG(BB, TTI, TD) | true; if (ForwardSwitchConditionToPHI(SI)) - return SimplifyCFG(BB) | true; + return SimplifyCFG(BB, TTI, TD) | true; + + if (SwitchToLookupTable(SI, Builder, TTI, TD)) + return SimplifyCFG(BB, TTI, TD) | true; return false; } @@ -2627,7 +3750,7 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { BasicBlock *BB = IBI->getParent(); bool Changed = false; - + // Eliminate redundant destinations. SmallPtrSet Succs; for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { @@ -2638,7 +3761,7 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { --i; --e; Changed = true; } - } + } if (IBI->getNumDestinations() == 0) { // If the indirectbr has no successors, change it to unreachable. @@ -2646,48 +3769,57 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { EraseTerminatorInstAndDCECond(IBI); return true; } - + if (IBI->getNumDestinations() == 1) { // If the indirectbr has one successor, change it to a direct branch. BranchInst::Create(IBI->getDestination(0), IBI); EraseTerminatorInstAndDCECond(IBI); return true; } - + if (SelectInst *SI = dyn_cast(IBI->getAddress())) { if (SimplifyIndirectBrOnSelect(IBI, SI)) - return SimplifyCFG(BB) | true; + return SimplifyCFG(BB, TTI, TD) | true; } return Changed; } bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ BasicBlock *BB = BI->getParent(); - + + if (SinkCommon && SinkThenElseCodeToEnd(BI)) + return true; + // If the Terminator is the only non-phi instruction, simplify the block. BasicBlock::iterator I = BB->getFirstNonPHIOrDbgOrLifetime(); if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && TryToSimplifyUncondBranchFromEmptyBlock(BB)) return true; - + // If the only instruction in the block is a seteq/setne comparison // against a constant, try to simplify the block. if (ICmpInst *ICI = dyn_cast(I)) if (ICI->isEquality() && isa(ICI->getOperand(1))) { for (++I; isa(I); ++I) ; - if (I->isTerminator() - && TryToSimplifyUncondBranchWithICmpInIt(ICI, TD, Builder)) + if (I->isTerminator() && + TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, TTI, TD)) return true; } - + + // If this basic block is ONLY a compare and a branch, and if a predecessor + // branches to us and our successor, fold the comparison into the + // predecessor and use logical operations to update the incoming value + // for PHI nodes in common successor. + if (FoldBranchToCommonDest(BI)) + return SimplifyCFG(BB, TTI, TD) | true; return false; } bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { BasicBlock *BB = BI->getParent(); - + // Conditional branch if (isValueEqualityComparison(BI)) { // If we only have one predecessor, and if it is a branch on this value, @@ -2695,8 +3827,8 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // switch. if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder)) - return SimplifyCFG(BB) | true; - + return SimplifyCFG(BB, TTI, TD) | true; + // This block must be empty, except for the setcond inst, if it exists. // Ignore dbg intrinsics. BasicBlock::iterator I = BB->begin(); @@ -2705,21 +3837,27 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { ++I; if (&*I == BI) { if (FoldValueComparisonIntoPredecessors(BI, Builder)) - return SimplifyCFG(BB) | true; + return SimplifyCFG(BB, TTI, TD) | true; } else if (&*I == cast(BI->getCondition())){ ++I; // Ignore dbg intrinsics. while (isa(I)) ++I; if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder)) - return SimplifyCFG(BB) | true; + return SimplifyCFG(BB, TTI, TD) | true; } } - + // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction. if (SimplifyBranchOnICmpChain(BI, TD, Builder)) return true; - + + // If this basic block is ONLY a compare and a branch, and if a predecessor + // branches to us and one of our successors, fold the comparison into the + // predecessor and use logical operations to pick the right destination. + if (FoldBranchToCommonDest(BI)) + return SimplifyCFG(BB, TTI, TD) | true; + // We have a conditional branch to two blocks that are only reachable // from BI. We know that the condbr dominates the two blocks, so see if // there is any identical code in the "then" and "else" blocks. If so, we @@ -2727,7 +3865,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (BI->getSuccessor(0)->getSinglePredecessor() != 0) { if (BI->getSuccessor(1)->getSinglePredecessor() != 0) { if (HoistThenElseCodeToIf(BI)) - return SimplifyCFG(BB) | true; + return SimplifyCFG(BB, TTI, TD) | true; } else { // If Successor #1 has multiple preds, we may be able to conditionally // execute Successor #0 if it branches to successor #1. @@ -2735,7 +3873,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (Succ0TI->getNumSuccessors() == 1 && Succ0TI->getSuccessor(0) == BI->getSuccessor(1)) if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0))) - return SimplifyCFG(BB) | true; + return SimplifyCFG(BB, TTI, TD) | true; } } else if (BI->getSuccessor(1)->getSinglePredecessor() != 0) { // If Successor #0 has multiple preds, we may be able to conditionally @@ -2744,28 +3882,22 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (Succ1TI->getNumSuccessors() == 1 && Succ1TI->getSuccessor(0) == BI->getSuccessor(0)) if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1))) - return SimplifyCFG(BB) | true; + return SimplifyCFG(BB, TTI, TD) | true; } - + // If this is a branch on a phi node in the current block, thread control // through this block if any PHI node entries are constants. if (PHINode *PN = dyn_cast(BI->getCondition())) if (PN->getParent() == BI->getParent()) if (FoldCondBranchOnPHI(BI, TD)) - return SimplifyCFG(BB) | true; - - // If this basic block is ONLY a setcc and a branch, and if a predecessor - // branches to us and one of our successors, fold the setcc into the - // predecessor and use logical operations to pick the right destination. - if (FoldBranchToCommonDest(BI)) - return SimplifyCFG(BB) | true; - + return SimplifyCFG(BB, TTI, TD) | true; + // Scan predecessor blocks for conditional branches. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) if (BranchInst *PBI = dyn_cast((*PI)->getTerminator())) if (PBI != BI && PBI->isConditional()) if (SimplifyCondBranchToCondBranch(PBI, BI)) - return SimplifyCFG(BB) | true; + return SimplifyCFG(BB, TTI, TD) | true; return false; } @@ -2776,11 +3908,12 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) { if (!C) return false; - if (!I->hasOneUse()) // Only look at single-use instructions, for compile time + if (I->use_empty()) return false; if (C->isNullValue()) { - Instruction *Use = I->use_back(); + // Only look at the first use, avoid hurting compile time with long uselists + User *Use = *I->use_begin(); // Now make sure that there are no instructions in between that can alter // control flow (eg. calls) @@ -2809,7 +3942,7 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) { } /// If BB has an incoming value that will always trigger undefined behavior -/// (eg. null pointer derefence), remove the branch leading here. +/// (eg. null pointer dereference), remove the branch leading here. static bool removeUndefIntroducingPredecessor(BasicBlock *BB) { for (BasicBlock::iterator i = BB->begin(); PHINode *PHI = dyn_cast(i); ++i) @@ -2867,7 +4000,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { // if (MergeBlockIntoPredecessor(BB)) return true; - + IRBuilder<> Builder(BB); // If there is a trivial two-entry PHI node in this basic block, and we can @@ -2883,17 +4016,15 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { } else { if (SimplifyCondBranch(BI, Builder)) return true; } - } else if (ResumeInst *RI = dyn_cast(BB->getTerminator())) { - if (SimplifyResume(RI, Builder)) return true; } else if (ReturnInst *RI = dyn_cast(BB->getTerminator())) { if (SimplifyReturn(RI, Builder)) return true; + } else if (ResumeInst *RI = dyn_cast(BB->getTerminator())) { + if (SimplifyResume(RI, Builder)) return true; } else if (SwitchInst *SI = dyn_cast(BB->getTerminator())) { if (SimplifySwitch(SI, Builder)) return true; } else if (UnreachableInst *UI = dyn_cast(BB->getTerminator())) { if (SimplifyUnreachable(UI)) return true; - } else if (UnwindInst *UI = dyn_cast(BB->getTerminator())) { - if (SimplifyUnwind(UI, Builder)) return true; } else if (IndirectBrInst *IBI = dyn_cast(BB->getTerminator())) { if (SimplifyIndirectBr(IBI)) return true; @@ -2907,6 +4038,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { /// eliminates unreachable basic blocks, and does other "peephole" optimization /// of the CFG. It returns true if a modification was made. /// -bool llvm::SimplifyCFG(BasicBlock *BB, const TargetData *TD) { - return SimplifyCFGOpt(TD).run(BB); +bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, + const DataLayout *TD) { + return SimplifyCFGOpt(TTI, TD).run(BB); }