#define DEBUG_TYPE "simplifycfg"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Constants.h"
+#include "llvm/DataLayout.h"
#include "llvm/DerivedTypes.h"
#include "llvm/GlobalVariable.h"
#include "llvm/IRBuilder.h"
#include "llvm/LLVMContext.h"
#include "llvm/MDBuilder.h"
#include "llvm/Metadata.h"
+#include "llvm/Module.h"
#include "llvm/Operator.h"
#include "llvm/Type.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/NoFolder.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/TargetTransformInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <algorithm>
#include <set>
DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false),
cl::desc("Duplicate return instructions into unconditional branches"));
+static cl::opt<bool>
+SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
+ cl::desc("Sink common instructions down to the end block"));
+
+STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
+STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
+STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block");
STATISTIC(NumSpeculations, "Number of speculative executed instructions");
namespace {
// Comparing pointers is ok as we only rely on the order for uniquing.
return Value < RHS.Value;
}
+
+ bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
};
class SimplifyCFGOpt {
- const TargetData *const TD;
+ const DataLayout *const TD;
+ const TargetTransformInfo *const TTI;
Value *isValueEqualityComparison(TerminatorInst *TI);
BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI,
bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder);
public:
- explicit SimplifyCFGOpt(const TargetData *td) : TD(td) {}
+ SimplifyCFGOpt(const DataLayout *td, const TargetTransformInfo *tti)
+ : TD(td), TTI(tti) {}
bool run(BasicBlock *BB);
};
}
/// GetConstantInt - Extract ConstantInt from value, looking through IntToPtr
/// and PointerNullValue. Return NULL if value is not a constant int.
-static ConstantInt *GetConstantInt(Value *V, const TargetData *TD) {
+static ConstantInt *GetConstantInt(Value *V, const DataLayout *TD) {
// Normal constant int.
ConstantInt *CI = dyn_cast<ConstantInt>(V);
if (CI || !TD || !isa<Constant>(V) || !V->getType()->isPointerTy())
// This is some kind of pointer constant. Turn it into a pointer-sized
// ConstantInt if possible.
- IntegerType *PtrTy = TD->getIntPtrType(V->getContext());
+ IntegerType *PtrTy = cast<IntegerType>(TD->getIntPtrType(V->getType()));
// Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
if (isa<ConstantPointerNull>(V))
/// Values vector.
static Value *
GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
- const TargetData *TD, bool isEQ, unsigned &UsedICmps) {
+ const DataLayout *TD, bool isEQ, unsigned &UsedICmps) {
Instruction *I = dyn_cast<Instruction>(V);
if (I == 0) return 0;
CV = ICI->getOperand(0);
// Unwrap any lossless ptrtoint cast.
- if (TD && CV && CV->getType() == TD->getIntPtrType(CV->getContext()))
- if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV))
+ if (TD && CV) {
+ PtrToIntInst *PTII = NULL;
+ if ((PTII = dyn_cast<PtrToIntInst>(CV)) &&
+ CV->getType() == TD->getIntPtrType(CV->getContext(),
+ PTII->getPointerAddressSpace()))
CV = PTII->getOperand(0);
+ }
return CV;
}
/// in the list that match the specified block.
static void EliminateBlockCases(BasicBlock *BB,
std::vector<ValueEqualityComparisonCase> &Cases) {
- for (unsigned i = 0, e = Cases.size(); i != e; ++i)
- if (Cases[i].Dest == BB) {
- Cases.erase(Cases.begin()+i);
- --i; --e;
- }
+ Cases.erase(std::remove(Cases.begin(), Cases.end(), BB), Cases.end());
}
/// ValuesOverlap - Return true if there are any keys in C1 that exist in C2 as
DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
<< "Through successor TI: " << *TI);
+ // Collect branch weights into a vector.
+ SmallVector<uint32_t, 8> Weights;
+ MDNode* MD = SI->getMetadata(LLVMContext::MD_prof);
+ bool HasWeight = MD && (MD->getNumOperands() == 2 + SI->getNumCases());
+ if (HasWeight)
+ for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e;
+ ++MD_i) {
+ ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(MD_i));
+ assert(CI);
+ Weights.push_back(CI->getValue().getZExtValue());
+ }
for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
--i;
if (DeadCases.count(i.getCaseValue())) {
+ if (HasWeight) {
+ std::swap(Weights[i.getCaseIndex()+1], Weights.back());
+ Weights.pop_back();
+ }
i.getCaseSuccessor()->removePredecessor(TI->getParent());
SI->removeCase(i);
}
}
+ if (HasWeight && Weights.size() >= 2)
+ SI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(SI->getParent()->getContext()).
+ createBranchWeights(Weights));
DEBUG(dbgs() << "Leaving: " << *TI << "\n");
return true;
}
static int ConstantIntSortPredicate(const void *P1, const void *P2) {
- const ConstantInt *LHS = *(const ConstantInt**)P1;
- const ConstantInt *RHS = *(const ConstantInt**)P2;
+ const ConstantInt *LHS = *(const ConstantInt*const*)P1;
+ const ConstantInt *RHS = *(const ConstantInt*const*)P2;
if (LHS->getValue().ult(RHS->getValue()))
return 1;
if (LHS->getValue() == RHS->getValue())
return false;
}
-/// Tries to get a branch weight for the given instruction, returns NULL if it
-/// can't. Pos starts at 0.
-static ConstantInt* GetWeight(Instruction* I, int Pos) {
- MDNode* ProfMD = I->getMetadata(LLVMContext::MD_prof);
- if (ProfMD && ProfMD->getOperand(0)) {
- if (MDString* MDS = dyn_cast<MDString>(ProfMD->getOperand(0))) {
- if (MDS->getString().equals("branch_weights")) {
- assert(ProfMD->getNumOperands() >= 3);
- return dyn_cast<ConstantInt>(ProfMD->getOperand(1 + Pos));
- }
- }
- }
-
- return 0;
-}
-
-/// Scale the given weights based on the successor TI's metadata. Scaling is
-/// done by multiplying every weight by the sum of the successor's weights.
-static void ScaleWeights(Instruction* STI, MutableArrayRef<uint64_t> Weights) {
- // Sum the successor's weights
- assert(HasBranchWeights(STI));
- unsigned Scale = 0;
- MDNode* ProfMD = STI->getMetadata(LLVMContext::MD_prof);
- for (unsigned i = 1; i < ProfMD->getNumOperands(); ++i) {
- ConstantInt* CI = dyn_cast<ConstantInt>(ProfMD->getOperand(i));
+/// Get Weights of a given TerminatorInst, the default weight is at the front
+/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
+/// metadata.
+static void GetBranchWeights(TerminatorInst *TI,
+ SmallVectorImpl<uint64_t> &Weights) {
+ MDNode* MD = TI->getMetadata(LLVMContext::MD_prof);
+ assert(MD);
+ for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) {
+ ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(i));
assert(CI);
- Scale += CI->getValue().getZExtValue();
+ Weights.push_back(CI->getValue().getZExtValue());
}
- // Skip default, as it's replaced during the folding
- for (unsigned i = 1; i < Weights.size(); ++i) {
- Weights[i] *= Scale;
+ // If TI is a conditional eq, the default case is the false case,
+ // and the corresponding branch-weight data is at index 2. We swap the
+ // default weight to be the first entry.
+ if (BranchInst* BI = dyn_cast<BranchInst>(TI)) {
+ assert(Weights.size() == 2);
+ ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
+ if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+ std::swap(Weights.front(), Weights.back());
}
}
// Update the branch weight metadata along the way
SmallVector<uint64_t, 8> Weights;
- uint64_t PredDefaultWeight = 0;
bool PredHasWeights = HasBranchWeights(PTI);
bool SuccHasWeights = HasBranchWeights(TI);
if (PredHasWeights) {
- MDNode* MD = PTI->getMetadata(LLVMContext::MD_prof);
- assert(MD);
- for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) {
- ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(i));
- assert(CI);
- Weights.push_back(CI->getValue().getZExtValue());
- }
-
- // If the predecessor is a conditional eq, then swap the default weight
- // to be the first entry.
- if (BranchInst* BI = dyn_cast<BranchInst>(PTI)) {
- assert(Weights.size() == 2);
- ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
-
- if (ICI->getPredicate() == ICmpInst::ICMP_EQ) {
- std::swap(Weights.front(), Weights.back());
- }
- }
-
- PredDefaultWeight = Weights.front();
- } else if (SuccHasWeights) {
+ GetBranchWeights(PTI, Weights);
+ // branch-weight metadata is inconsistant here.
+ if (Weights.size() != 1 + PredCases.size())
+ PredHasWeights = SuccHasWeights = false;
+ } else if (SuccHasWeights)
// If there are no predecessor weights but there are successor weights,
// populate Weights with 1, which will later be scaled to the sum of
// successor's weights
Weights.assign(1 + PredCases.size(), 1);
- PredDefaultWeight = 1;
- }
- uint64_t SuccDefaultWeight = 0;
+ SmallVector<uint64_t, 8> SuccWeights;
if (SuccHasWeights) {
- int Index = 0;
- if (BranchInst* BI = dyn_cast<BranchInst>(TI)) {
- ICmpInst* ICI = dyn_cast<ICmpInst>(BI->getCondition());
- assert(ICI);
-
- if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
- Index = 1;
- }
-
- SuccDefaultWeight = GetWeight(TI, Index)->getValue().getZExtValue();
- }
+ GetBranchWeights(TI, SuccWeights);
+ // branch-weight metadata is inconsistant here.
+ if (SuccWeights.size() != 1 + BBCases.size())
+ PredHasWeights = SuccHasWeights = false;
+ } else if (PredHasWeights)
+ SuccWeights.assign(1 + BBCases.size(), 1);
if (PredDefault == BB) {
// If this is the default destination from PTI, only the edges in TI
// The default destination is BB, we don't need explicit targets.
std::swap(PredCases[i], PredCases.back());
- if (PredHasWeights) {
+ if (PredHasWeights || SuccHasWeights) {
+ // Increase weight for the default case.
+ Weights[0] += Weights[i+1];
std::swap(Weights[i+1], Weights.back());
Weights.pop_back();
}
NewSuccessors.push_back(BBDefault);
}
- if (SuccHasWeights) {
- ScaleWeights(TI, Weights);
- Weights.front() *= SuccDefaultWeight;
- } else if (PredHasWeights) {
- Weights.front() /= (1 + BBCases.size());
- }
-
+ unsigned CasesFromPred = Weights.size();
+ uint64_t ValidTotalSuccWeight = 0;
for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
if (!PTIHandled.count(BBCases[i].Value) &&
BBCases[i].Dest != BBDefault) {
PredCases.push_back(BBCases[i]);
NewSuccessors.push_back(BBCases[i].Dest);
- if (SuccHasWeights) {
- Weights.push_back(PredDefaultWeight *
- GetWeight(TI, i)->getValue().getZExtValue());
- } else if (PredHasWeights) {
- // Split the old default's weight amongst the children
- assert(PredDefaultWeight != 0);
- Weights.push_back(PredDefaultWeight / (1 + BBCases.size()));
+ if (SuccHasWeights || PredHasWeights) {
+ // The default weight is at index 0, so weight for the ith case
+ // should be at index i+1. Scale the cases from successor by
+ // PredDefaultWeight (Weights[0]).
+ Weights.push_back(Weights[0] * SuccWeights[i+1]);
+ ValidTotalSuccWeight += SuccWeights[i+1];
}
}
+ if (SuccHasWeights || PredHasWeights) {
+ ValidTotalSuccWeight += SuccWeights[0];
+ // Scale the cases from predecessor by ValidTotalSuccWeight.
+ for (unsigned i = 1; i < CasesFromPred; ++i)
+ Weights[i] *= ValidTotalSuccWeight;
+ // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
+ Weights[0] *= SuccWeights[0];
+ }
} else {
- // FIXME: preserve branch weight metadata, similarly to the 'then'
- // above. For now, drop it.
- PredHasWeights = false;
- SuccHasWeights = false;
-
// If this is not the default destination from PSI, only the edges
// in SI that occur in PSI with a destination of BB will be
// activated.
std::set<ConstantInt*, ConstantIntOrdering> PTIHandled;
+ std::map<ConstantInt*, uint64_t> WeightsForHandled;
for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
if (PredCases[i].Dest == BB) {
PTIHandled.insert(PredCases[i].Value);
+
+ if (PredHasWeights || SuccHasWeights) {
+ WeightsForHandled[PredCases[i].Value] = Weights[i+1];
+ std::swap(Weights[i+1], Weights.back());
+ Weights.pop_back();
+ }
+
std::swap(PredCases[i], PredCases.back());
PredCases.pop_back();
--i; --e;
for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
if (PTIHandled.count(BBCases[i].Value)) {
// If this is one we are capable of getting...
+ if (PredHasWeights || SuccHasWeights)
+ Weights.push_back(WeightsForHandled[BBCases[i].Value]);
PredCases.push_back(BBCases[i]);
NewSuccessors.push_back(BBCases[i].Dest);
PTIHandled.erase(BBCases[i].Value);// This constant is taken care of
for (std::set<ConstantInt*, ConstantIntOrdering>::iterator I =
PTIHandled.begin(),
E = PTIHandled.end(); I != E; ++I) {
+ if (PredHasWeights || SuccHasWeights)
+ Weights.push_back(WeightsForHandled[*I]);
PredCases.push_back(ValueEqualityComparisonCase(*I, BBDefault));
NewSuccessors.push_back(BBDefault);
}
Builder.SetInsertPoint(PTI);
// Convert pointer to int before we switch.
if (CV->getType()->isPointerTy()) {
- assert(TD && "Cannot switch on pointer without TargetData");
- CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getContext()),
+ assert(TD && "Cannot switch on pointer without DataLayout");
+ CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getType()),
"magicptr");
}
return true;
}
+/// SinkThenElseCodeToEnd - Given an unconditional branch that goes to BBEnd,
+/// check whether BBEnd has only two predecessors and the other predecessor
+/// ends with an unconditional branch. If it is true, sink any common code
+/// in the two predecessors to BBEnd.
+static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
+ assert(BI1->isUnconditional());
+ BasicBlock *BB1 = BI1->getParent();
+ BasicBlock *BBEnd = BI1->getSuccessor(0);
+
+ // Check that BBEnd has two predecessors and the other predecessor ends with
+ // an unconditional branch.
+ pred_iterator PI = pred_begin(BBEnd), PE = pred_end(BBEnd);
+ BasicBlock *Pred0 = *PI++;
+ if (PI == PE) // Only one predecessor.
+ return false;
+ BasicBlock *Pred1 = *PI++;
+ if (PI != PE) // More than two predecessors.
+ return false;
+ BasicBlock *BB2 = (Pred0 == BB1) ? Pred1 : Pred0;
+ BranchInst *BI2 = dyn_cast<BranchInst>(BB2->getTerminator());
+ if (!BI2 || !BI2->isUnconditional())
+ return false;
+
+ // Gather the PHI nodes in BBEnd.
+ std::map<Value*, std::pair<Value*, PHINode*> > MapValueFromBB1ToBB2;
+ Instruction *FirstNonPhiInBBEnd = 0;
+ for (BasicBlock::iterator I = BBEnd->begin(), E = BBEnd->end();
+ I != E; ++I) {
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ Value *BB1V = PN->getIncomingValueForBlock(BB1);
+ Value *BB2V = PN->getIncomingValueForBlock(BB2);
+ MapValueFromBB1ToBB2[BB1V] = std::make_pair(BB2V, PN);
+ } else {
+ FirstNonPhiInBBEnd = &*I;
+ break;
+ }
+ }
+ if (!FirstNonPhiInBBEnd)
+ return false;
+
+
+ // This does very trivial matching, with limited scanning, to find identical
+ // instructions in the two blocks. We scan backward for obviously identical
+ // instructions in an identical order.
+ BasicBlock::InstListType::reverse_iterator RI1 = BB1->getInstList().rbegin(),
+ RE1 = BB1->getInstList().rend(), RI2 = BB2->getInstList().rbegin(),
+ RE2 = BB2->getInstList().rend();
+ // Skip debug info.
+ while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1)) ++RI1;
+ if (RI1 == RE1)
+ return false;
+ while (RI2 != RE2 && isa<DbgInfoIntrinsic>(&*RI2)) ++RI2;
+ if (RI2 == RE2)
+ return false;
+ // Skip the unconditional branches.
+ ++RI1;
+ ++RI2;
+
+ bool Changed = false;
+ while (RI1 != RE1 && RI2 != RE2) {
+ // Skip debug info.
+ while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1)) ++RI1;
+ if (RI1 == RE1)
+ return Changed;
+ while (RI2 != RE2 && isa<DbgInfoIntrinsic>(&*RI2)) ++RI2;
+ if (RI2 == RE2)
+ return Changed;
+
+ Instruction *I1 = &*RI1, *I2 = &*RI2;
+ // I1 and I2 should have a single use in the same PHI node, and they
+ // perform the same operation.
+ // Cannot move control-flow-involving, volatile loads, vaarg, etc.
+ if (isa<PHINode>(I1) || isa<PHINode>(I2) ||
+ isa<TerminatorInst>(I1) || isa<TerminatorInst>(I2) ||
+ isa<LandingPadInst>(I1) || isa<LandingPadInst>(I2) ||
+ isa<AllocaInst>(I1) || isa<AllocaInst>(I2) ||
+ I1->mayHaveSideEffects() || I2->mayHaveSideEffects() ||
+ I1->mayReadOrWriteMemory() || I2->mayReadOrWriteMemory() ||
+ !I1->hasOneUse() || !I2->hasOneUse() ||
+ MapValueFromBB1ToBB2.find(I1) == MapValueFromBB1ToBB2.end() ||
+ MapValueFromBB1ToBB2[I1].first != I2)
+ return Changed;
+
+ // Check whether we should swap the operands of ICmpInst.
+ ICmpInst *ICmp1 = dyn_cast<ICmpInst>(I1), *ICmp2 = dyn_cast<ICmpInst>(I2);
+ bool SwapOpnds = false;
+ if (ICmp1 && ICmp2 &&
+ ICmp1->getOperand(0) != ICmp2->getOperand(0) &&
+ ICmp1->getOperand(1) != ICmp2->getOperand(1) &&
+ (ICmp1->getOperand(0) == ICmp2->getOperand(1) ||
+ ICmp1->getOperand(1) == ICmp2->getOperand(0))) {
+ ICmp2->swapOperands();
+ SwapOpnds = true;
+ }
+ if (!I1->isSameOperationAs(I2)) {
+ if (SwapOpnds)
+ ICmp2->swapOperands();
+ return Changed;
+ }
+
+ // The operands should be either the same or they need to be generated
+ // with a PHI node after sinking. We only handle the case where there is
+ // a single pair of different operands.
+ Value *DifferentOp1 = 0, *DifferentOp2 = 0;
+ unsigned Op1Idx = 0;
+ for (unsigned I = 0, E = I1->getNumOperands(); I != E; ++I) {
+ if (I1->getOperand(I) == I2->getOperand(I))
+ continue;
+ // Early exit if we have more-than one pair of different operands or
+ // the different operand is already in MapValueFromBB1ToBB2.
+ // Early exit if we need a PHI node to replace a constant.
+ if (DifferentOp1 ||
+ MapValueFromBB1ToBB2.find(I1->getOperand(I)) !=
+ MapValueFromBB1ToBB2.end() ||
+ isa<Constant>(I1->getOperand(I)) ||
+ isa<Constant>(I2->getOperand(I))) {
+ // If we can't sink the instructions, undo the swapping.
+ if (SwapOpnds)
+ ICmp2->swapOperands();
+ return Changed;
+ }
+ DifferentOp1 = I1->getOperand(I);
+ Op1Idx = I;
+ DifferentOp2 = I2->getOperand(I);
+ }
+
+ // We insert the pair of different operands to MapValueFromBB1ToBB2 and
+ // remove (I1, I2) from MapValueFromBB1ToBB2.
+ if (DifferentOp1) {
+ PHINode *NewPN = PHINode::Create(DifferentOp1->getType(), 2,
+ DifferentOp1->getName() + ".sink",
+ BBEnd->begin());
+ MapValueFromBB1ToBB2[DifferentOp1] = std::make_pair(DifferentOp2, NewPN);
+ // I1 should use NewPN instead of DifferentOp1.
+ I1->setOperand(Op1Idx, NewPN);
+ NewPN->addIncoming(DifferentOp1, BB1);
+ NewPN->addIncoming(DifferentOp2, BB2);
+ DEBUG(dbgs() << "Create PHI node " << *NewPN << "\n";);
+ }
+ PHINode *OldPN = MapValueFromBB1ToBB2[I1].second;
+ MapValueFromBB1ToBB2.erase(I1);
+
+ DEBUG(dbgs() << "SINK common instructions " << *I1 << "\n";);
+ DEBUG(dbgs() << " " << *I2 << "\n";);
+ // We need to update RE1 and RE2 if we are going to sink the first
+ // instruction in the basic block down.
+ bool UpdateRE1 = (I1 == BB1->begin()), UpdateRE2 = (I2 == BB2->begin());
+ // Sink the instruction.
+ BBEnd->getInstList().splice(FirstNonPhiInBBEnd, BB1->getInstList(), I1);
+ if (!OldPN->use_empty())
+ OldPN->replaceAllUsesWith(I1);
+ OldPN->eraseFromParent();
+
+ if (!I2->use_empty())
+ I2->replaceAllUsesWith(I1);
+ I1->intersectOptionalDataWith(I2);
+ I2->eraseFromParent();
+
+ if (UpdateRE1)
+ RE1 = BB1->getInstList().rend();
+ if (UpdateRE2)
+ RE2 = BB2->getInstList().rend();
+ FirstNonPhiInBBEnd = I1;
+ NumSinkCommons++;
+ Changed = true;
+ }
+ return Changed;
+}
+
/// SpeculativelyExecuteBB - Given a conditional branch that goes to BB1
/// and an BB2 and the only successor of BB1 is BB2, hoist simple code
/// (for now, restricted to a single instruction that's side effect free) from
/// that is defined in the same block as the branch and if any PHI entries are
/// constants, thread edges corresponding to that entry to be branches to their
/// ultimate destination.
-static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) {
+static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *TD) {
BasicBlock *BB = BI->getParent();
PHINode *PN = dyn_cast<PHINode>(BI->getCondition());
// NOTE: we currently cannot transform this case if the PHI node is used
/// FoldTwoEntryPHINode - Given a BB that starts with the specified two-entry
/// PHI node, see if we can eliminate it.
-static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
+static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *TD) {
// Ok, this is a two entry PHI node. Check to see if this is a simple "if
// statement", which has a very simple dominance structure. Basically, we
// are trying to find the condition that is being branched on, which
/// parameters and return true, or returns false if no or invalid metadata was
/// found.
static bool ExtractBranchMetadata(BranchInst *BI,
- APInt &ProbTrue, APInt &ProbFalse) {
+ uint64_t &ProbTrue, uint64_t &ProbFalse) {
assert(BI->isConditional() &&
"Looking for probabilities on unconditional branch?");
MDNode *ProfileData = BI->getMetadata(LLVMContext::MD_prof);
ConstantInt *CITrue = dyn_cast<ConstantInt>(ProfileData->getOperand(1));
ConstantInt *CIFalse = dyn_cast<ConstantInt>(ProfileData->getOperand(2));
if (!CITrue || !CIFalse) return false;
- ProbTrue = CITrue->getValue();
- ProbFalse = CIFalse->getValue();
- assert(ProbTrue.getBitWidth() == 32 && ProbFalse.getBitWidth() == 32 &&
- "Branch probability metadata must be 32-bit integers");
+ ProbTrue = CITrue->getValue().getZExtValue();
+ ProbFalse = CIFalse->getValue().getZExtValue();
return true;
}
-/// MultiplyAndLosePrecision - Multiplies A and B, then returns the result. In
-/// the event of overflow, logically-shifts all four inputs right until the
-/// multiply fits.
-static APInt MultiplyAndLosePrecision(APInt &A, APInt &B, APInt &C, APInt &D,
- unsigned &BitsLost) {
- BitsLost = 0;
- bool Overflow = false;
- APInt Result = A.umul_ov(B, Overflow);
- if (Overflow) {
- APInt MaxB = APInt::getMaxValue(A.getBitWidth()).udiv(A);
- do {
- B = B.lshr(1);
- ++BitsLost;
- } while (B.ugt(MaxB));
- A = A.lshr(BitsLost);
- C = C.lshr(BitsLost);
- D = D.lshr(BitsLost);
- Result = A * B;
- }
- return Result;
-}
-
/// checkCSEInPredecessor - Return true if the given instruction is available
/// in its predecessor block. If yes, the instruction will be removed.
///
continue;
// Determine if the two branches share a common destination.
- Instruction::BinaryOps Opc;
+ Instruction::BinaryOps Opc = Instruction::BinaryOpsEnd;
bool InvertPredCond = false;
if (BI->isConditional()) {
New, "or.cond"));
PBI->setCondition(NewCond);
+ uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
+ bool PredHasWeights = ExtractBranchMetadata(PBI, PredTrueWeight,
+ PredFalseWeight);
+ bool SuccHasWeights = ExtractBranchMetadata(BI, SuccTrueWeight,
+ SuccFalseWeight);
+ SmallVector<uint64_t, 8> NewWeights;
+
if (PBI->getSuccessor(0) == BB) {
+ if (PredHasWeights && SuccHasWeights) {
+ // PBI: br i1 %x, BB, FalseDest
+ // BI: br i1 %y, TrueDest, FalseDest
+ //TrueWeight is TrueWeight for PBI * TrueWeight for BI.
+ NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
+ //FalseWeight is FalseWeight for PBI * TotalWeight for BI +
+ // TrueWeight for PBI * FalseWeight for BI.
+ // We assume that total weights of a BranchInst can fit into 32 bits.
+ // Therefore, we will not have overflow using 64-bit arithmetic.
+ NewWeights.push_back(PredFalseWeight * (SuccFalseWeight +
+ SuccTrueWeight) + PredTrueWeight * SuccFalseWeight);
+ }
AddPredecessorToBlock(TrueDest, PredBlock, BB);
PBI->setSuccessor(0, TrueDest);
}
if (PBI->getSuccessor(1) == BB) {
+ if (PredHasWeights && SuccHasWeights) {
+ // PBI: br i1 %x, TrueDest, BB
+ // BI: br i1 %y, TrueDest, FalseDest
+ //TrueWeight is TrueWeight for PBI * TotalWeight for BI +
+ // FalseWeight for PBI * TrueWeight for BI.
+ NewWeights.push_back(PredTrueWeight * (SuccFalseWeight +
+ SuccTrueWeight) + PredFalseWeight * SuccTrueWeight);
+ //FalseWeight is FalseWeight for PBI * FalseWeight for BI.
+ NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
+ }
AddPredecessorToBlock(FalseDest, PredBlock, BB);
PBI->setSuccessor(1, FalseDest);
}
+ if (NewWeights.size() == 2) {
+ // Halve the weights if any of them cannot fit in an uint32_t
+ FitWeights(NewWeights);
+
+ SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(),NewWeights.end());
+ PBI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(BI->getContext()).
+ createBranchWeights(MDWeights));
+ } else
+ PBI->setMetadata(LLVMContext::MD_prof, NULL);
} else {
// Update PHI nodes in the common successors.
for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
// TODO: If BB is reachable from all paths through PredBlock, then we
// could replace PBI's branch probabilities with BI's.
- // Merge probability data into PredBlock's branch.
- APInt A, B, C, D;
- if (PBI->isConditional() && BI->isConditional() &&
- ExtractBranchMetadata(PBI, C, D) && ExtractBranchMetadata(BI, A, B)) {
- // Given IR which does:
- // bbA:
- // br i1 %x, label %bbB, label %bbC
- // bbB:
- // br i1 %y, label %bbD, label %bbC
- // Let's call the probability that we take the edge from %bbA to %bbB
- // 'a', from %bbA to %bbC, 'b', from %bbB to %bbD 'c' and from %bbB to
- // %bbC probability 'd'.
- //
- // We transform the IR into:
- // bbA:
- // br i1 %z, label %bbD, label %bbC
- // where the probability of going to %bbD is (a*c) and going to bbC is
- // (b+a*d).
- //
- // Probabilities aren't stored as ratios directly. Using branch weights,
- // we get:
- // (a*c)% = A*C, (b+(a*d))% = A*D+B*C+B*D.
-
- // In the event of overflow, we want to drop the LSB of the input
- // probabilities.
- unsigned BitsLost;
-
- // Ignore overflow result on ProbTrue.
- APInt ProbTrue = MultiplyAndLosePrecision(A, C, B, D, BitsLost);
-
- APInt Tmp1 = MultiplyAndLosePrecision(B, D, A, C, BitsLost);
- if (BitsLost) {
- ProbTrue = ProbTrue.lshr(BitsLost*2);
- }
-
- APInt Tmp2 = MultiplyAndLosePrecision(A, D, C, B, BitsLost);
- if (BitsLost) {
- ProbTrue = ProbTrue.lshr(BitsLost*2);
- Tmp1 = Tmp1.lshr(BitsLost*2);
- }
-
- APInt Tmp3 = MultiplyAndLosePrecision(B, C, A, D, BitsLost);
- if (BitsLost) {
- ProbTrue = ProbTrue.lshr(BitsLost*2);
- Tmp1 = Tmp1.lshr(BitsLost*2);
- Tmp2 = Tmp2.lshr(BitsLost*2);
- }
-
- bool Overflow1 = false, Overflow2 = false;
- APInt Tmp4 = Tmp2.uadd_ov(Tmp3, Overflow1);
- APInt ProbFalse = Tmp4.uadd_ov(Tmp1, Overflow2);
-
- if (Overflow1 || Overflow2) {
- ProbTrue = ProbTrue.lshr(1);
- Tmp1 = Tmp1.lshr(1);
- Tmp2 = Tmp2.lshr(1);
- Tmp3 = Tmp3.lshr(1);
- Tmp4 = Tmp2 + Tmp3;
- ProbFalse = Tmp4 + Tmp1;
- }
-
- // The sum of branch weights must fit in 32-bits.
- if (ProbTrue.isNegative() && ProbFalse.isNegative()) {
- ProbTrue = ProbTrue.lshr(1);
- ProbFalse = ProbFalse.lshr(1);
- }
-
- if (ProbTrue != ProbFalse) {
- // Normalize the result.
- APInt GCD = APIntOps::GreatestCommonDivisor(ProbTrue, ProbFalse);
- ProbTrue = ProbTrue.udiv(GCD);
- ProbFalse = ProbFalse.udiv(GCD);
-
- MDBuilder MDB(BI->getContext());
- MDNode *N = MDB.createBranchWeights(ProbTrue.getZExtValue(),
- ProbFalse.getZExtValue());
- PBI->setMetadata(LLVMContext::MD_prof, N);
- } else {
- PBI->setMetadata(LLVMContext::MD_prof, NULL);
- }
- } else {
- PBI->setMetadata(LLVMContext::MD_prof, NULL);
- }
-
// Copy any debug value intrinsics into the end of PredBlock.
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
if (isa<DbgInfoIntrinsic>(*I))
PBI->setSuccessor(0, CommonDest);
PBI->setSuccessor(1, OtherDest);
+ // Update branch weight for PBI.
+ uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
+ bool PredHasWeights = ExtractBranchMetadata(PBI, PredTrueWeight,
+ PredFalseWeight);
+ bool SuccHasWeights = ExtractBranchMetadata(BI, SuccTrueWeight,
+ SuccFalseWeight);
+ if (PredHasWeights && SuccHasWeights) {
+ uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
+ uint64_t PredOther = PBIOp ?PredTrueWeight : PredFalseWeight;
+ uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
+ uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
+ // The weight to CommonDest should be PredCommon * SuccTotal +
+ // PredOther * SuccCommon.
+ // The weight to OtherDest should be PredOther * SuccOther.
+ SmallVector<uint64_t, 2> NewWeights;
+ NewWeights.push_back(PredCommon * (SuccCommon + SuccOther) +
+ PredOther * SuccCommon);
+ NewWeights.push_back(PredOther * SuccOther);
+ // Halve the weights if any of them cannot fit in an uint32_t
+ FitWeights(NewWeights);
+
+ SmallVector<uint32_t, 2> MDWeights(NewWeights.begin(),NewWeights.end());
+ PBI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(BI->getContext()).
+ createBranchWeights(MDWeights));
+ }
+
// OtherDest may have phi nodes. If so, add an entry from PBI's
// block that are identical to the entries for BI's block.
AddPredecessorToBlock(OtherDest, PBI->getParent(), BB);
// Also makes sure not to introduce new successors by assuming that edges to
// non-successor TrueBBs and FalseBBs aren't reachable.
static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
- BasicBlock *TrueBB, BasicBlock *FalseBB){
+ BasicBlock *TrueBB, BasicBlock *FalseBB,
+ uint32_t TrueWeight,
+ uint32_t FalseWeight){
// Remove any superfluous successor edges from the CFG.
// First, figure out which successors to preserve.
// If TrueBB and FalseBB are equal, only try to preserve one copy of that
// We were only looking for one successor, and it was present.
// Create an unconditional branch to it.
Builder.CreateBr(TrueBB);
- else
+ else {
// We found both of the successors we were looking for.
// Create a conditional branch sharing the condition of the select.
- Builder.CreateCondBr(Cond, TrueBB, FalseBB);
+ BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
+ if (TrueWeight != FalseWeight)
+ NewBI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(OldTerm->getContext()).
+ createBranchWeights(TrueWeight, FalseWeight));
+ }
} else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
// Neither of the selected blocks were successors, so this
// terminator must be unreachable.
BasicBlock *TrueBB = SI->findCaseValue(TrueVal).getCaseSuccessor();
BasicBlock *FalseBB = SI->findCaseValue(FalseVal).getCaseSuccessor();
+ // Get weight for TrueBB and FalseBB.
+ uint32_t TrueWeight = 0, FalseWeight = 0;
+ SmallVector<uint64_t, 8> Weights;
+ bool HasWeights = HasBranchWeights(SI);
+ if (HasWeights) {
+ GetBranchWeights(SI, Weights);
+ if (Weights.size() == 1 + SI->getNumCases()) {
+ TrueWeight = (uint32_t)Weights[SI->findCaseValue(TrueVal).
+ getSuccessorIndex()];
+ FalseWeight = (uint32_t)Weights[SI->findCaseValue(FalseVal).
+ getSuccessorIndex()];
+ }
+ }
+
// Perform the actual simplification.
- return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB);
+ return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB,
+ TrueWeight, FalseWeight);
}
// SimplifyIndirectBrOnSelect - Replaces
BasicBlock *FalseBB = FBA->getBasicBlock();
// Perform the actual simplification.
- return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB);
+ return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB,
+ 0, 0);
}
/// TryToSimplifyUncondBranchWithICmpInIt - This is called when we find an icmp
/// We prefer to split the edge to 'end' so that there is a true/false entry to
/// the PHI, merging the third icmp into the switch.
static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
- const TargetData *TD,
+ const DataLayout *TD,
IRBuilder<> &Builder) {
BasicBlock *BB = ICI->getParent();
// the switch to the merge point on the compared value.
BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "switch.edge",
BB->getParent(), BB);
+ SmallVector<uint64_t, 8> Weights;
+ bool HasWeights = HasBranchWeights(SI);
+ if (HasWeights) {
+ GetBranchWeights(SI, Weights);
+ if (Weights.size() == 1 + SI->getNumCases()) {
+ // Split weight for default case to case for "Cst".
+ Weights[0] = (Weights[0]+1) >> 1;
+ Weights.push_back(Weights[0]);
+
+ SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
+ SI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(SI->getContext()).
+ createBranchWeights(MDWeights));
+ }
+ }
SI->addCase(Cst, NewBB);
// NewBB branches to the phi block, add the uncond branch and the phi entry.
/// SimplifyBranchOnICmpChain - The specified branch is a conditional branch.
/// Check to see if it is branching on an or/and chain of icmp instructions, and
/// fold it into a switch instruction if so.
-static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD,
+static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *TD,
IRBuilder<> &Builder) {
Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
if (Cond == 0) return false;
Builder.SetInsertPoint(BI);
// Convert pointer to int before we switch.
if (CompVal->getType()->isPointerTy()) {
- assert(TD && "Cannot switch on pointer without TargetData");
+ assert(TD && "Cannot switch on pointer without DataLayout");
CompVal = Builder.CreatePtrToInt(CompVal,
- TD->getIntPtrType(CompVal->getContext()),
+ TD->getIntPtrType(CompVal->getType()),
"magicptr");
}
if (!Offset->isNullValue())
Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off");
Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
- Builder.CreateCondBr(
+ BranchInst *NewBI = Builder.CreateCondBr(
Cmp, SI->case_begin().getCaseSuccessor(), SI->getDefaultDest());
+ // Update weight for the newly-created conditional branch.
+ SmallVector<uint64_t, 8> Weights;
+ bool HasWeights = HasBranchWeights(SI);
+ if (HasWeights) {
+ GetBranchWeights(SI, Weights);
+ if (Weights.size() == 1 + SI->getNumCases()) {
+ // Combine all weights for the cases to be the true weight of NewBI.
+ // We assume that the sum of all weights for a Terminator can fit into 32
+ // bits.
+ uint32_t NewTrueWeight = 0;
+ for (unsigned I = 1, E = Weights.size(); I != E; ++I)
+ NewTrueWeight += (uint32_t)Weights[I];
+ NewBI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(SI->getContext()).
+ createBranchWeights(NewTrueWeight,
+ (uint32_t)Weights[0]));
+ }
+ }
+
// Prune obsolete incoming values off the successor's PHI nodes.
for (BasicBlock::iterator BBI = SI->case_begin().getCaseSuccessor()->begin();
isa<PHINode>(BBI); ++BBI) {
}
}
+ SmallVector<uint64_t, 8> Weights;
+ bool HasWeight = HasBranchWeights(SI);
+ if (HasWeight) {
+ GetBranchWeights(SI, Weights);
+ HasWeight = (Weights.size() == 1 + SI->getNumCases());
+ }
+
// Remove dead cases from the switch.
for (unsigned I = 0, E = DeadCases.size(); I != E; ++I) {
SwitchInst::CaseIt Case = SI->findCaseValue(DeadCases[I]);
assert(Case != SI->case_default() &&
"Case was not found. Probably mistake in DeadCases forming.");
+ if (HasWeight) {
+ std::swap(Weights[Case.getCaseIndex()+1], Weights.back());
+ Weights.pop_back();
+ }
+
// Prune unused values from PHI nodes.
Case.getCaseSuccessor()->removePredecessor(SI->getParent());
SI->removeCase(Case);
}
+ if (HasWeight) {
+ SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
+ SI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(SI->getParent()->getContext()).
+ createBranchWeights(MDWeights));
+ }
return !DeadCases.empty();
}
return Changed;
}
-bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
- // If this switch is too complex to want to look at, ignore it.
- if (!isValueEqualityComparison(SI))
+/// ValidLookupTableConstant - Return true if the backend will be able to handle
+/// initializing an array of constants like C.
+static bool ValidLookupTableConstant(Constant *C) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ return CE->isGEPWithNoNotionalOverIndexing();
+
+ return isa<ConstantFP>(C) ||
+ isa<ConstantInt>(C) ||
+ isa<ConstantPointerNull>(C) ||
+ isa<GlobalValue>(C) ||
+ isa<UndefValue>(C);
+}
+
+/// ConstantFold - Try to fold instruction I into a constant. This works for
+/// simple instructions such as binary operations where both operands are
+/// constant or can be replaced by constants from the ConstantPool. Returns the
+/// resulting constant on success, NULL otherwise.
+static Constant* ConstantFold(Instruction *I,
+ const SmallDenseMap<Value*, Constant*>& ConstantPool) {
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+ Constant *A = dyn_cast<Constant>(BO->getOperand(0));
+ if (!A) A = ConstantPool.lookup(BO->getOperand(0));
+ if (!A) return NULL;
+
+ Constant *B = dyn_cast<Constant>(BO->getOperand(1));
+ if (!B) B = ConstantPool.lookup(BO->getOperand(1));
+ if (!B) return NULL;
+
+ Constant *C = ConstantExpr::get(BO->getOpcode(), A, B);
+ return C;
+ }
+
+ if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
+ Constant *A = dyn_cast<Constant>(I->getOperand(0));
+ if (!A) A = ConstantPool.lookup(I->getOperand(0));
+ if (!A) return NULL;
+
+ Constant *B = dyn_cast<Constant>(I->getOperand(1));
+ if (!B) B = ConstantPool.lookup(I->getOperand(1));
+ if (!B) return NULL;
+
+ Constant *C = ConstantExpr::getCompare(Cmp->getPredicate(), A, B);
+ return C;
+ }
+
+ if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
+ Constant *A = dyn_cast<Constant>(Select->getCondition());
+ if (!A) A = ConstantPool.lookup(Select->getCondition());
+ if (!A) return NULL;
+
+ Value *Res;
+ if (A->isAllOnesValue()) Res = Select->getTrueValue();
+ else if (A->isNullValue()) Res = Select->getFalseValue();
+ else return NULL;
+
+ Constant *C = dyn_cast<Constant>(Res);
+ if (!C) C = ConstantPool.lookup(Res);
+ if (!C) return NULL;
+ return C;
+ }
+
+ if (CastInst *Cast = dyn_cast<CastInst>(I)) {
+ Constant *A = dyn_cast<Constant>(I->getOperand(0));
+ if (!A) A = ConstantPool.lookup(I->getOperand(0));
+ if (!A) return NULL;
+
+ Constant *C = ConstantExpr::getCast(Cast->getOpcode(), A, Cast->getDestTy());
+ return C;
+ }
+
+ return NULL;
+}
+
+/// GetCaseResults - Try to determine the resulting constant values in phi nodes
+/// at the common destination basic block, *CommonDest, for one of the case
+/// destionations CaseDest corresponding to value CaseVal (NULL for the default
+/// case), of a switch instruction SI.
+static bool GetCaseResults(SwitchInst *SI,
+ ConstantInt *CaseVal,
+ BasicBlock *CaseDest,
+ BasicBlock **CommonDest,
+ SmallVector<std::pair<PHINode*,Constant*>, 4> &Res) {
+ // The block from which we enter the common destination.
+ BasicBlock *Pred = SI->getParent();
+
+ // If CaseDest is empty except for some side-effect free instructions through
+ // which we can constant-propagate the CaseVal, continue to its successor.
+ SmallDenseMap<Value*, Constant*> ConstantPool;
+ ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
+ for (BasicBlock::iterator I = CaseDest->begin(), E = CaseDest->end(); I != E;
+ ++I) {
+ if (TerminatorInst *T = dyn_cast<TerminatorInst>(I)) {
+ // If the terminator is a simple branch, continue to the next block.
+ if (T->getNumSuccessors() != 1)
+ return false;
+ Pred = CaseDest;
+ CaseDest = T->getSuccessor(0);
+ } else if (isa<DbgInfoIntrinsic>(I)) {
+ // Skip debug intrinsic.
+ continue;
+ } else if (Constant *C = ConstantFold(I, ConstantPool)) {
+ // Instruction is side-effect free and constant.
+ ConstantPool.insert(std::make_pair(I, C));
+ } else {
+ break;
+ }
+ }
+
+ // If we did not have a CommonDest before, use the current one.
+ if (!*CommonDest)
+ *CommonDest = CaseDest;
+ // If the destination isn't the common one, abort.
+ if (CaseDest != *CommonDest)
+ return false;
+
+ // Get the values for this case from phi nodes in the destination block.
+ BasicBlock::iterator I = (*CommonDest)->begin();
+ while (PHINode *PHI = dyn_cast<PHINode>(I++)) {
+ int Idx = PHI->getBasicBlockIndex(Pred);
+ if (Idx == -1)
+ continue;
+
+ Constant *ConstVal = dyn_cast<Constant>(PHI->getIncomingValue(Idx));
+ if (!ConstVal)
+ ConstVal = ConstantPool.lookup(PHI->getIncomingValue(Idx));
+ if (!ConstVal)
+ return false;
+
+ // Note: If the constant comes from constant-propagating the case value
+ // through the CaseDest basic block, it will be safe to remove the
+ // instructions in that block. They cannot be used (except in the phi nodes
+ // we visit) outside CaseDest, because that block does not dominate its
+ // successor. If it did, we would not be in this phi node.
+
+ // Be conservative about which kinds of constants we support.
+ if (!ValidLookupTableConstant(ConstVal))
+ return false;
+
+ Res.push_back(std::make_pair(PHI, ConstVal));
+ }
+
+ return true;
+}
+
+namespace {
+ /// SwitchLookupTable - This class represents a lookup table that can be used
+ /// to replace a switch.
+ class SwitchLookupTable {
+ public:
+ /// SwitchLookupTable - Create a lookup table to use as a switch replacement
+ /// with the contents of Values, using DefaultValue to fill any holes in the
+ /// table.
+ SwitchLookupTable(Module &M,
+ uint64_t TableSize,
+ ConstantInt *Offset,
+ const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values,
+ Constant *DefaultValue,
+ const DataLayout *TD);
+
+ /// BuildLookup - Build instructions with Builder to retrieve the value at
+ /// the position given by Index in the lookup table.
+ Value *BuildLookup(Value *Index, IRBuilder<> &Builder);
+
+ /// WouldFitInRegister - Return true if a table with TableSize elements of
+ /// type ElementType would fit in a target-legal register.
+ static bool WouldFitInRegister(const DataLayout *TD,
+ uint64_t TableSize,
+ const Type *ElementType);
+
+ private:
+ // Depending on the contents of the table, it can be represented in
+ // different ways.
+ enum {
+ // For tables where each element contains the same value, we just have to
+ // store that single value and return it for each lookup.
+ SingleValueKind,
+
+ // For small tables with integer elements, we can pack them into a bitmap
+ // that fits into a target-legal register. Values are retrieved by
+ // shift and mask operations.
+ BitMapKind,
+
+ // The table is stored as an array of values. Values are retrieved by load
+ // instructions from the table.
+ ArrayKind
+ } Kind;
+
+ // For SingleValueKind, this is the single value.
+ Constant *SingleValue;
+
+ // For BitMapKind, this is the bitmap.
+ ConstantInt *BitMap;
+ IntegerType *BitMapElementTy;
+
+ // For ArrayKind, this is the array.
+ GlobalVariable *Array;
+ };
+}
+
+SwitchLookupTable::SwitchLookupTable(Module &M,
+ uint64_t TableSize,
+ ConstantInt *Offset,
+ const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values,
+ Constant *DefaultValue,
+ const DataLayout *TD) {
+ assert(Values.size() && "Can't build lookup table without values!");
+ assert(TableSize >= Values.size() && "Can't fit values in table!");
+
+ // If all values in the table are equal, this is that value.
+ SingleValue = Values.begin()->second;
+
+ // Build up the table contents.
+ SmallVector<Constant*, 64> TableContents(TableSize);
+ for (size_t I = 0, E = Values.size(); I != E; ++I) {
+ ConstantInt *CaseVal = Values[I].first;
+ Constant *CaseRes = Values[I].second;
+ assert(CaseRes->getType() == DefaultValue->getType());
+
+ uint64_t Idx = (CaseVal->getValue() - Offset->getValue())
+ .getLimitedValue();
+ TableContents[Idx] = CaseRes;
+
+ if (CaseRes != SingleValue)
+ SingleValue = NULL;
+ }
+
+ // Fill in any holes in the table with the default result.
+ if (Values.size() < TableSize) {
+ for (uint64_t I = 0; I < TableSize; ++I) {
+ if (!TableContents[I])
+ TableContents[I] = DefaultValue;
+ }
+
+ if (DefaultValue != SingleValue)
+ SingleValue = NULL;
+ }
+
+ // If each element in the table contains the same value, we only need to store
+ // that single value.
+ if (SingleValue) {
+ Kind = SingleValueKind;
+ return;
+ }
+
+ // If the type is integer and the table fits in a register, build a bitmap.
+ if (WouldFitInRegister(TD, TableSize, DefaultValue->getType())) {
+ IntegerType *IT = cast<IntegerType>(DefaultValue->getType());
+ APInt TableInt(TableSize * IT->getBitWidth(), 0);
+ for (uint64_t I = TableSize; I > 0; --I) {
+ TableInt <<= IT->getBitWidth();
+ // Insert values into the bitmap. Undef values are set to zero.
+ if (!isa<UndefValue>(TableContents[I - 1])) {
+ ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
+ TableInt |= Val->getValue().zext(TableInt.getBitWidth());
+ }
+ }
+ BitMap = ConstantInt::get(M.getContext(), TableInt);
+ BitMapElementTy = IT;
+ Kind = BitMapKind;
+ ++NumBitMaps;
+ return;
+ }
+
+ // Store the table in an array.
+ ArrayType *ArrayTy = ArrayType::get(DefaultValue->getType(), TableSize);
+ Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
+
+ Array = new GlobalVariable(M, ArrayTy, /*constant=*/ true,
+ GlobalVariable::PrivateLinkage,
+ Initializer,
+ "switch.table");
+ Array->setUnnamedAddr(true);
+ Kind = ArrayKind;
+}
+
+Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
+ switch (Kind) {
+ case SingleValueKind:
+ return SingleValue;
+ case BitMapKind: {
+ // Type of the bitmap (e.g. i59).
+ IntegerType *MapTy = BitMap->getType();
+
+ // Cast Index to the same type as the bitmap.
+ // Note: The Index is <= the number of elements in the table, so
+ // truncating it to the width of the bitmask is safe.
+ Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
+
+ // Multiply the shift amount by the element width.
+ ShiftAmt = Builder.CreateMul(ShiftAmt,
+ ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
+ "switch.shiftamt");
+
+ // Shift down.
+ Value *DownShifted = Builder.CreateLShr(BitMap, ShiftAmt,
+ "switch.downshift");
+ // Mask off.
+ return Builder.CreateTrunc(DownShifted, BitMapElementTy,
+ "switch.masked");
+ }
+ case ArrayKind: {
+ Value *GEPIndices[] = { Builder.getInt32(0), Index };
+ Value *GEP = Builder.CreateInBoundsGEP(Array, GEPIndices,
+ "switch.gep");
+ return Builder.CreateLoad(GEP, "switch.load");
+ }
+ }
+ llvm_unreachable("Unknown lookup table kind!");
+}
+
+bool SwitchLookupTable::WouldFitInRegister(const DataLayout *TD,
+ uint64_t TableSize,
+ const Type *ElementType) {
+ if (!TD)
+ return false;
+ const IntegerType *IT = dyn_cast<IntegerType>(ElementType);
+ if (!IT)
return false;
+ // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
+ // are <= 15, we could try to narrow the type.
+ // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
+ if (TableSize >= UINT_MAX/IT->getBitWidth())
+ return false;
+ return TD->fitsInLegalInteger(TableSize * IT->getBitWidth());
+}
+
+/// ShouldBuildLookupTable - Determine whether a lookup table should be built
+/// for this switch, based on the number of caes, size of the table and the
+/// types of the results.
+static bool ShouldBuildLookupTable(SwitchInst *SI,
+ uint64_t TableSize,
+ const DataLayout *TD,
+ const SmallDenseMap<PHINode*, Type*>& ResultTypes) {
+ // The table density should be at least 40%. This is the same criterion as for
+ // jump tables, see SelectionDAGBuilder::handleJTSwitchCase.
+ // FIXME: Find the best cut-off.
+ if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10)
+ return false; // TableSize overflowed, or mul below might overflow.
+ if (SI->getNumCases() * 10 >= TableSize * 4)
+ return true;
+
+ // If each table would fit in a register, we should build it anyway.
+ for (SmallDenseMap<PHINode*, Type*>::const_iterator I = ResultTypes.begin(),
+ E = ResultTypes.end(); I != E; ++I) {
+ if (!SwitchLookupTable::WouldFitInRegister(TD, TableSize, I->second))
+ return false;
+ }
+ return true;
+}
+
+/// SwitchToLookupTable - If the switch is only used to initialize one or more
+/// phi nodes in a common successor block with different constant values,
+/// replace the switch with lookup tables.
+static bool SwitchToLookupTable(SwitchInst *SI,
+ IRBuilder<> &Builder,
+ const DataLayout* TD,
+ const TargetTransformInfo *TTI) {
+ assert(SI->getNumCases() > 1 && "Degenerate switch?");
+
+ if (TTI && !TTI->getScalarTargetTransformInfo()->shouldBuildLookupTables())
+ return false;
+
+ // FIXME: Handle unreachable cases.
+
+ // FIXME: If the switch is too sparse for a lookup table, perhaps we could
+ // split off a dense part and build a lookup table for that.
+
+ // FIXME: This creates arrays of GEPs to constant strings, which means each
+ // GEP needs a runtime relocation in PIC code. We should just build one big
+ // string and lookup indices into that.
+
+ // Ignore the switch if the number of cases is too small.
+ // This is similar to the check when building jump tables in
+ // SelectionDAGBuilder::handleJTSwitchCase.
+ // FIXME: Determine the best cut-off.
+ if (SI->getNumCases() < 4)
+ return false;
+
+ // Figure out the corresponding result for each case value and phi node in the
+ // common destination, as well as the the min and max case values.
+ assert(SI->case_begin() != SI->case_end());
+ SwitchInst::CaseIt CI = SI->case_begin();
+ ConstantInt *MinCaseVal = CI.getCaseValue();
+ ConstantInt *MaxCaseVal = CI.getCaseValue();
+
+ BasicBlock *CommonDest = NULL;
+ typedef SmallVector<std::pair<ConstantInt*, Constant*>, 4> ResultListTy;
+ SmallDenseMap<PHINode*, ResultListTy> ResultLists;
+ SmallDenseMap<PHINode*, Constant*> DefaultResults;
+ SmallDenseMap<PHINode*, Type*> ResultTypes;
+ SmallVector<PHINode*, 4> PHIs;
+
+ for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
+ ConstantInt *CaseVal = CI.getCaseValue();
+ if (CaseVal->getValue().slt(MinCaseVal->getValue()))
+ MinCaseVal = CaseVal;
+ if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
+ MaxCaseVal = CaseVal;
+
+ // Resulting value at phi nodes for this case value.
+ typedef SmallVector<std::pair<PHINode*, Constant*>, 4> ResultsTy;
+ ResultsTy Results;
+ if (!GetCaseResults(SI, CaseVal, CI.getCaseSuccessor(), &CommonDest,
+ Results))
+ return false;
+
+ // Append the result from this case to the list for each phi.
+ for (ResultsTy::iterator I = Results.begin(), E = Results.end(); I!=E; ++I) {
+ if (!ResultLists.count(I->first))
+ PHIs.push_back(I->first);
+ ResultLists[I->first].push_back(std::make_pair(CaseVal, I->second));
+ }
+ }
+
+ // Get the resulting values for the default case.
+ SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList;
+ if (!GetCaseResults(SI, NULL, SI->getDefaultDest(), &CommonDest,
+ DefaultResultsList))
+ return false;
+ for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) {
+ PHINode *PHI = DefaultResultsList[I].first;
+ Constant *Result = DefaultResultsList[I].second;
+ DefaultResults[PHI] = Result;
+ ResultTypes[PHI] = Result->getType();
+ }
+
+ APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue();
+ uint64_t TableSize = RangeSpread.getLimitedValue() + 1;
+ if (!ShouldBuildLookupTable(SI, TableSize, TD, ResultTypes))
+ return false;
+
+ // Create the BB that does the lookups.
+ Module &Mod = *CommonDest->getParent()->getParent();
+ BasicBlock *LookupBB = BasicBlock::Create(Mod.getContext(),
+ "switch.lookup",
+ CommonDest->getParent(),
+ CommonDest);
+
+ // Check whether the condition value is within the case range, and branch to
+ // the new BB.
+ Builder.SetInsertPoint(SI);
+ Value *TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal,
+ "switch.tableidx");
+ Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get(
+ MinCaseVal->getType(), TableSize));
+ Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
+
+ // Populate the BB that does the lookups.
+ Builder.SetInsertPoint(LookupBB);
+ bool ReturnedEarly = false;
+ for (size_t I = 0, E = PHIs.size(); I != E; ++I) {
+ PHINode *PHI = PHIs[I];
+
+ SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultLists[PHI],
+ DefaultResults[PHI], TD);
+
+ Value *Result = Table.BuildLookup(TableIndex, Builder);
+
+ // If the result is used to return immediately from the function, we want to
+ // do that right here.
+ if (PHI->hasOneUse() && isa<ReturnInst>(*PHI->use_begin()) &&
+ *PHI->use_begin() == CommonDest->getFirstNonPHIOrDbg()) {
+ Builder.CreateRet(Result);
+ ReturnedEarly = true;
+ break;
+ }
+
+ PHI->addIncoming(Result, LookupBB);
+ }
+
+ if (!ReturnedEarly)
+ Builder.CreateBr(CommonDest);
+
+ // Remove the switch.
+ for (unsigned i = 0; i < SI->getNumSuccessors(); ++i) {
+ BasicBlock *Succ = SI->getSuccessor(i);
+ if (Succ == SI->getDefaultDest()) continue;
+ Succ->removePredecessor(SI->getParent());
+ }
+ SI->eraseFromParent();
+
+ ++NumLookupTables;
+ return true;
+}
+
+bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
BasicBlock *BB = SI->getParent();
- // If we only have one predecessor, and if it is a branch on this value,
- // see if that predecessor totally determines the outcome of this switch.
- if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
- if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
- return SimplifyCFG(BB) | true;
+ if (isValueEqualityComparison(SI)) {
+ // If we only have one predecessor, and if it is a branch on this value,
+ // see if that predecessor totally determines the outcome of this switch.
+ if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
+ if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
+ return SimplifyCFG(BB) | true;
- Value *Cond = SI->getCondition();
- if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
- if (SimplifySwitchOnSelect(SI, Select))
- return SimplifyCFG(BB) | true;
+ Value *Cond = SI->getCondition();
+ if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
+ if (SimplifySwitchOnSelect(SI, Select))
+ return SimplifyCFG(BB) | true;
- // If the block only contains the switch, see if we can fold the block
- // away into any preds.
- BasicBlock::iterator BBI = BB->begin();
- // Ignore dbg intrinsics.
- while (isa<DbgInfoIntrinsic>(BBI))
- ++BBI;
- if (SI == &*BBI)
- if (FoldValueComparisonIntoPredecessors(SI, Builder))
- return SimplifyCFG(BB) | true;
+ // If the block only contains the switch, see if we can fold the block
+ // away into any preds.
+ BasicBlock::iterator BBI = BB->begin();
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(BBI))
+ ++BBI;
+ if (SI == &*BBI)
+ if (FoldValueComparisonIntoPredecessors(SI, Builder))
+ return SimplifyCFG(BB) | true;
+ }
// Try to transform the switch into an icmp and a branch.
if (TurnSwitchRangeIntoICmp(SI, Builder))
if (ForwardSwitchConditionToPHI(SI))
return SimplifyCFG(BB) | true;
+ if (SwitchToLookupTable(SI, Builder, TD, TTI))
+ return SimplifyCFG(BB) | true;
+
return false;
}
bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
BasicBlock *BB = BI->getParent();
+ if (SinkCommon && SinkThenElseCodeToEnd(BI))
+ return true;
+
// If the Terminator is the only non-phi instruction, simplify the block.
BasicBlock::iterator I = BB->getFirstNonPHIOrDbgOrLifetime();
if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
if (!C)
return false;
- if (!I->hasOneUse()) // Only look at single-use instructions, for compile time
+ if (I->use_empty())
return false;
if (C->isNullValue()) {
- Instruction *Use = I->use_back();
+ // Only look at the first use, avoid hurting compile time with long uselists
+ User *Use = *I->use_begin();
// Now make sure that there are no instructions in between that can alter
// control flow (eg. calls)
/// eliminates unreachable basic blocks, and does other "peephole" optimization
/// of the CFG. It returns true if a modification was made.
///
-bool llvm::SimplifyCFG(BasicBlock *BB, const TargetData *TD) {
- return SimplifyCFGOpt(TD).run(BB);
+bool llvm::SimplifyCFG(BasicBlock *BB, const DataLayout *TD,
+ const TargetTransformInfo *TTI) {
+ return SimplifyCFGOpt(TD, TTI).run(BB);
}