X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FUtils%2FSimplifyCFG.cpp;h=e43c9e2708df753b7708d47217914fd9f5f5cc12;hb=ae43cab6bab0e5bcdbe2971bf718712559625e39;hp=a63d31d5af5a100e4a8fa9f0832e581cf44e29c0;hpb=455151e4f7b6dc264183ecc892a2f9678ff5fe46;p=oota-llvm.git diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index a63d31d5af5..e43c9e2708d 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -40,12 +41,14 @@ #include "llvm/Support/ConstantRange.h" #include "llvm/Support/Debug.h" #include "llvm/Support/NoFolder.h" +#include "llvm/Support/PatternMatch.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include #include #include using namespace llvm; +using namespace PatternMatch; static cl::opt PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(1), @@ -59,6 +62,10 @@ static cl::opt SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block")); +static cl::opt HoistCondStores( + "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), + cl::desc("Hoist conditional stores if an unconditional store precedes")); + STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables"); STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block"); @@ -84,7 +91,6 @@ namespace { class SimplifyCFGOpt { const TargetTransformInfo &TTI; const DataLayout *const TD; - Value *isValueEqualityComparison(TerminatorInst *TI); BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI, std::vector &Cases); @@ -190,94 +196,7 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, PN->addIncoming(PN->getIncomingValueForBlock(ExistPred), NewPred); } - -/// GetIfCondition - Given a basic block (BB) with two predecessors (and at -/// least one PHI node in it), check to see if the merge at this block is due -/// to an "if condition". If so, return the boolean condition that determines -/// which entry into BB will be taken. Also, return by references the block -/// that will be entered from if the condition is true, and the block that will -/// be entered if the condition is false. -/// -/// This does no checking to see if the true/false blocks have large or unsavory -/// instructions in them. -static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, - BasicBlock *&IfFalse) { - PHINode *SomePHI = cast(BB->begin()); - assert(SomePHI->getNumIncomingValues() == 2 && - "Function can only handle blocks with 2 predecessors!"); - BasicBlock *Pred1 = SomePHI->getIncomingBlock(0); - BasicBlock *Pred2 = SomePHI->getIncomingBlock(1); - - // We can only handle branches. Other control flow will be lowered to - // branches if possible anyway. - BranchInst *Pred1Br = dyn_cast(Pred1->getTerminator()); - BranchInst *Pred2Br = dyn_cast(Pred2->getTerminator()); - if (Pred1Br == 0 || Pred2Br == 0) - return 0; - - // Eliminate code duplication by ensuring that Pred1Br is conditional if - // either are. - if (Pred2Br->isConditional()) { - // If both branches are conditional, we don't have an "if statement". In - // reality, we could transform this case, but since the condition will be - // required anyway, we stand no chance of eliminating it, so the xform is - // probably not profitable. - if (Pred1Br->isConditional()) - return 0; - - std::swap(Pred1, Pred2); - std::swap(Pred1Br, Pred2Br); - } - - if (Pred1Br->isConditional()) { - // The only thing we have to watch out for here is to make sure that Pred2 - // doesn't have incoming edges from other blocks. If it does, the condition - // doesn't dominate BB. - if (Pred2->getSinglePredecessor() == 0) - return 0; - - // If we found a conditional branch predecessor, make sure that it branches - // to BB and Pred2Br. If it doesn't, this isn't an "if statement". - if (Pred1Br->getSuccessor(0) == BB && - Pred1Br->getSuccessor(1) == Pred2) { - IfTrue = Pred1; - IfFalse = Pred2; - } else if (Pred1Br->getSuccessor(0) == Pred2 && - Pred1Br->getSuccessor(1) == BB) { - IfTrue = Pred2; - IfFalse = Pred1; - } else { - // We know that one arm of the conditional goes to BB, so the other must - // go somewhere unrelated, and this must not be an "if statement". - return 0; - } - - return Pred1Br->getCondition(); - } - - // Ok, if we got here, both predecessors end with an unconditional branch to - // BB. Don't panic! If both blocks only have a single (identical) - // predecessor, and THAT is a conditional branch, then we're all ok! - BasicBlock *CommonPred = Pred1->getSinglePredecessor(); - if (CommonPred == 0 || CommonPred != Pred2->getSinglePredecessor()) - return 0; - - // Otherwise, if this is a conditional branch, then we can use it! - BranchInst *BI = dyn_cast(CommonPred->getTerminator()); - if (BI == 0) return 0; - - assert(BI->isConditional() && "Two successors but not conditional?"); - if (BI->getSuccessor(0) == Pred1) { - IfTrue = Pred1; - IfFalse = Pred2; - } else { - IfTrue = Pred2; - IfFalse = Pred1; - } - return BI->getCondition(); -} - -/// ComputeSpeculuationCost - Compute an abstract "cost" of speculating the +/// ComputeSpeculationCost - Compute an abstract "cost" of speculating the /// given instruction, which is assumed to be safe to speculate. 1 means /// cheap, 2 means less cheap, and UINT_MAX means prohibitively expensive. static unsigned ComputeSpeculationCost(const User *I) { @@ -428,7 +347,24 @@ GatherConstantCompares(Value *V, std::vector &Vals, Value *&Extra, // If this is an icmp against a constant, handle this as one of the cases. if (ICmpInst *ICI = dyn_cast(I)) { if (ConstantInt *C = GetConstantInt(I->getOperand(1), TD)) { + Value *RHSVal; + ConstantInt *RHSC; + if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ:ICmpInst::ICMP_NE)) { + // (x & ~2^x) == y --> x == y || x == y|2^x + // This undoes a transformation done by instcombine to fuse 2 compares. + if (match(ICI->getOperand(0), + m_And(m_Value(RHSVal), m_ConstantInt(RHSC)))) { + APInt Not = ~RHSC->getValue(); + if (Not.isPowerOf2()) { + Vals.push_back(C); + Vals.push_back( + ConstantInt::get(C->getContext(), C->getValue() | Not)); + UsedICmps++; + return RHSVal; + } + } + UsedICmps++; Vals.push_back(C); return I->getOperand(0); @@ -439,6 +375,13 @@ GatherConstantCompares(Value *V, std::vector &Vals, Value *&Extra, ConstantRange Span = ConstantRange::makeICmpRegion(ICI->getPredicate(), C->getValue()); + // Shift the range if the compare is fed by an add. This is the range + // compare idiom as emitted by instcombine. + bool hasAdd = + match(I->getOperand(0), m_Add(m_Value(RHSVal), m_ConstantInt(RHSC))); + if (hasAdd) + Span = Span.subtract(RHSC->getValue()); + // If this is an and/!= check then we want to optimize "x ugt 2" into // x != 0 && x != 1. if (!isEQ) @@ -451,7 +394,7 @@ GatherConstantCompares(Value *V, std::vector &Vals, Value *&Extra, for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp) Vals.push_back(ConstantInt::get(V->getContext(), Tmp)); UsedICmps++; - return I->getOperand(0); + return hasAdd ? RHSVal : I->getOperand(0); } return 0; } @@ -529,15 +472,17 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) { } else if (BranchInst *BI = dyn_cast(TI)) if (BI->isConditional() && BI->getCondition()->hasOneUse()) if (ICmpInst *ICI = dyn_cast(BI->getCondition())) - if ((ICI->getPredicate() == ICmpInst::ICMP_EQ || - ICI->getPredicate() == ICmpInst::ICMP_NE) && - GetConstantInt(ICI->getOperand(1), TD)) + if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), TD)) CV = ICI->getOperand(0); // Unwrap any lossless ptrtoint cast. - if (TD && CV && CV->getType() == TD->getIntPtrType(CV->getContext())) - if (PtrToIntInst *PTII = dyn_cast(CV)) - CV = PTII->getOperand(0); + if (TD && CV) { + if (PtrToIntInst *PTII = dyn_cast(CV)) { + Value *Ptr = PTII->getPointerOperand(); + if (PTII->getType() == TD->getIntPtrType(Ptr->getType())) + CV = Ptr; + } + } return CV; } @@ -759,9 +704,10 @@ namespace { }; } -static int ConstantIntSortPredicate(const void *P1, const void *P2) { - const ConstantInt *LHS = *(const ConstantInt*const*)P1; - const ConstantInt *RHS = *(const ConstantInt*const*)P2; +static int ConstantIntSortPredicate(ConstantInt *const *P1, + ConstantInt *const *P2) { + const ConstantInt *LHS = *P1; + const ConstantInt *RHS = *P2; if (LHS->getValue().ult(RHS->getValue())) return 1; if (LHS->getValue() == RHS->getValue()) @@ -984,7 +930,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, // Convert pointer to int before we switch. if (CV->getType()->isPointerTy()) { assert(TD && "Cannot switch on pointer without DataLayout"); - CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getContext()), + CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getType()), "magicptr"); } @@ -1079,9 +1025,9 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) { (isa(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))) return false; - // If we get here, we can hoist at least one instruction. BasicBlock *BIParent = BI->getParent(); + bool Changed = false; do { // If we are hoisting the terminator instruction, don't move one (making a // broken BB), instead clone it, and remove BI. @@ -1096,6 +1042,7 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) { I2->replaceAllUsesWith(I1); I1->intersectOptionalDataWith(I2); I2->eraseFromParent(); + Changed = true; I1 = BB1_Itr++; I2 = BB2_Itr++; @@ -1115,7 +1062,23 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) { HoistTerminator: // It may not be possible to hoist an invoke. if (isa(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)) - return true; + return Changed; + + for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) { + PHINode *PN; + for (BasicBlock::iterator BBI = SI->begin(); + (PN = dyn_cast(BBI)); ++BBI) { + Value *BB1V = PN->getIncomingValueForBlock(BB1); + Value *BB2V = PN->getIncomingValueForBlock(BB2); + if (BB1V == BB2V) + continue; + + if (isa(BB1V) && !isSafeToSpeculativelyExecute(BB1V)) + return Changed; + if (isa(BB2V) && !isSafeToSpeculativelyExecute(BB2V)) + return Changed; + } + } // Okay, it is safe to hoist the terminator. Instruction *NT = I1->clone(); @@ -1332,6 +1295,66 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { return Changed; } +/// \brief Determine if we can hoist sink a sole store instruction out of a +/// conditional block. +/// +/// We are looking for code like the following: +/// BrBB: +/// store i32 %add, i32* %arrayidx2 +/// ... // No other stores or function calls (we could be calling a memory +/// ... // function). +/// %cmp = icmp ult %x, %y +/// br i1 %cmp, label %EndBB, label %ThenBB +/// ThenBB: +/// store i32 %add5, i32* %arrayidx2 +/// br label EndBB +/// EndBB: +/// ... +/// We are going to transform this into: +/// BrBB: +/// store i32 %add, i32* %arrayidx2 +/// ... // +/// %cmp = icmp ult %x, %y +/// %add.add5 = select i1 %cmp, i32 %add, %add5 +/// store i32 %add.add5, i32* %arrayidx2 +/// ... +/// +/// \return The pointer to the value of the previous store if the store can be +/// hoisted into the predecessor block. 0 otherwise. +static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, + BasicBlock *StoreBB, BasicBlock *EndBB) { + StoreInst *StoreToHoist = dyn_cast(I); + if (!StoreToHoist) + return 0; + + // Volatile or atomic. + if (!StoreToHoist->isSimple()) + return 0; + + Value *StorePtr = StoreToHoist->getPointerOperand(); + + // Look for a store to the same pointer in BrBB. + unsigned MaxNumInstToLookAt = 10; + for (BasicBlock::reverse_iterator RI = BrBB->rbegin(), + RE = BrBB->rend(); RI != RE && (--MaxNumInstToLookAt); ++RI) { + Instruction *CurI = &*RI; + + // Could be calling an instruction that effects memory like free(). + if (CurI->mayHaveSideEffects() && !isa(CurI)) + return 0; + + StoreInst *SI = dyn_cast(CurI); + // Found the previous store make sure it stores to the same location. + if (SI && SI->getPointerOperand() == StorePtr) + // Found the previous store, return its value operand. + return SI->getValueOperand(); + else if (SI) + return 0; // Unknown store. + } + + return 0; +} + /// \brief Speculate a conditional basic block flattening the CFG. /// /// Note that this is a very risky transform currently. Speculating @@ -1395,6 +1418,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { SmallDenseMap SinkCandidateUseCounts; unsigned SpeculationCost = 0; + Value *SpeculatedStoreValue = 0; + StoreInst *SpeculatedStore = 0; for (BasicBlock::iterator BBI = ThenBB->begin(), BBE = llvm::prior(ThenBB->end()); BBI != BBE; ++BBI) { @@ -1410,13 +1435,21 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { return false; // Don't hoist the instruction if it's unsafe or expensive. - if (!isSafeToSpeculativelyExecute(I)) + if (!isSafeToSpeculativelyExecute(I) && + !(HoistCondStores && + (SpeculatedStoreValue = isSafeToSpeculateStore(I, BB, ThenBB, + EndBB)))) return false; - if (ComputeSpeculationCost(I) > PHINodeFoldingThreshold) + if (!SpeculatedStoreValue && + ComputeSpeculationCost(I) > PHINodeFoldingThreshold) return false; + // Store the store speculation candidate. + if (SpeculatedStoreValue) + SpeculatedStore = cast(I); + // Do not hoist the instruction if any of its operands are defined but not - // used in this BB. The transformation will prevent the operand from + // used in BB. The transformation will prevent the operand from // being sunk into the use block. for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) { @@ -1448,18 +1481,23 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { Value *OrigV = PN->getIncomingValueForBlock(BB); Value *ThenV = PN->getIncomingValueForBlock(ThenBB); + // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf. // Skip PHIs which are trivial. if (ThenV == OrigV) continue; HaveRewritablePHIs = true; - ConstantExpr *CE = dyn_cast(ThenV); - if (!CE) + ConstantExpr *OrigCE = dyn_cast(OrigV); + ConstantExpr *ThenCE = dyn_cast(ThenV); + if (!OrigCE && !ThenCE) continue; // Known safe and cheap. - if (!isSafeToSpeculativelyExecute(CE)) + if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) || + (OrigCE && !isSafeToSpeculativelyExecute(OrigCE))) return false; - if (ComputeSpeculationCost(CE) > PHINodeFoldingThreshold) + unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE) : 0; + unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE) : 0; + if (OrigCost + ThenCost > 2 * PHINodeFoldingThreshold) return false; // Account for the cost of an unfolded ConstantExpr which could end up @@ -1473,12 +1511,24 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { // If there are no PHIs to process, bail early. This helps ensure idempotence // as well. - if (!HaveRewritablePHIs) + if (!HaveRewritablePHIs && !(HoistCondStores && SpeculatedStoreValue)) return false; // If we get here, we can hoist the instruction and if-convert. DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";); + // Insert a select of the value of the speculated store. + if (SpeculatedStoreValue) { + IRBuilder Builder(BI); + Value *TrueV = SpeculatedStore->getValueOperand(); + Value *FalseV = SpeculatedStoreValue; + if (Invert) + std::swap(TrueV, FalseV); + Value *S = Builder.CreateSelect(BrCond, TrueV, FalseV, TrueV->getName() + + "." + FalseV->getName()); + SpeculatedStore->setOperand(0, S); + } + // Hoist the instructions. BB->getInstList().splice(BI, ThenBB->getInstList(), ThenBB->begin(), llvm::prior(ThenBB->end())); @@ -1512,6 +1562,19 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { return true; } +/// \returns True if this block contains a CallInst with the NoDuplicate +/// attribute. +static bool HasNoDuplicateCall(const BasicBlock *BB) { + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + const CallInst *CI = dyn_cast(I); + if (!CI) + continue; + if (CI->cannotDuplicate()) + return true; + } + return false; +} + /// BlockIsSimpleEnoughToThreadThrough - Return true if we can thread a branch /// across this block. static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { @@ -1559,6 +1622,8 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *TD) { // Now we know that this block has multiple preds and two succs. if (!BlockIsSimpleEnoughToThreadThrough(BB)) return false; + if (HasNoDuplicateCall(BB)) return false; + // Okay, this is a simple enough basic block. See if any phi values are // constants. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { @@ -2025,14 +2090,19 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // Ensure that any values used in the bonus instruction are also used // by the terminator of the predecessor. This means that those values // must already have been resolved, so we won't be inhibiting the - // out-of-order core by speculating them earlier. - if (BonusInst) { + // out-of-order core by speculating them earlier. We also allow + // instructions that are used by the terminator's condition because it + // exposes more merging opportunities. + bool UsedByBranch = (BonusInst && BonusInst->hasOneUse() && + *BonusInst->use_begin() == Cond); + + if (BonusInst && !UsedByBranch) { // Collect the values used by the bonus inst SmallPtrSet UsedValues; for (Instruction::op_iterator OI = BonusInst->op_begin(), OE = BonusInst->op_end(); OI != OE; ++OI) { Value *V = *OI; - if (!isa(V)) + if (!isa(V) && !isa(V)) UsedValues.insert(V); } @@ -2743,7 +2813,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *TD, if (CompVal->getType()->isPointerTy()) { assert(TD && "Cannot switch on pointer without DataLayout"); CompVal = Builder.CreatePtrToInt(CompVal, - TD->getIntPtrType(CompVal->getContext()), + TD->getIntPtrType(CompVal->getType()), "magicptr"); } @@ -2789,9 +2859,20 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) { return false; // Turn all invokes that unwind here into calls and delete the basic block. + bool InvokeRequiresTableEntry = false; + bool Changed = false; for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) { InvokeInst *II = cast((*PI++)->getTerminator()); + + if (II->hasFnAttr(Attribute::UWTable)) { + // Don't remove an `invoke' instruction if the ABI requires an entry into + // the table. + InvokeRequiresTableEntry = true; + continue; + } + SmallVector Args(II->op_begin(), II->op_end() - 3); + // Insert a call instruction before the invoke. CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II); Call->takeName(II); @@ -2811,11 +2892,14 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) { // Finally, delete the invoke instruction! II->eraseFromParent(); + Changed = true; } - // The landingpad is now unreachable. Zap it. - BB->eraseFromParent(); - return true; + if (!InvokeRequiresTableEntry) + // The landingpad is now unreachable. Zap it. + BB->eraseFromParent(); + + return Changed; } bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { @@ -3059,7 +3143,12 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { Value *Sub = SI->getCondition(); if (!Offset->isNullValue()) Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off"); - Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch"); + Value *Cmp; + // If NumCases overflowed, then all possible values jump to the successor. + if (NumCases->isNullValue() && SI->getNumCases() != 0) + Cmp = ConstantInt::getTrue(SI->getContext()); + else + Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch"); BranchInst *NewBI = Builder.CreateCondBr( Cmp, SI->case_begin().getCaseSuccessor(), SI->getDefaultDest()); @@ -3097,7 +3186,7 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { /// and use it to remove dead cases. static bool EliminateDeadSwitchCases(SwitchInst *SI) { Value *Cond = SI->getCondition(); - unsigned Bits = cast(Cond->getType())->getBitWidth(); + unsigned Bits = Cond->getType()->getIntegerBitWidth(); APInt KnownZero(Bits, 0), KnownOne(Bits, 0); ComputeMaskedBits(Cond, KnownZero, KnownOne); @@ -3133,7 +3222,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI) { Case.getCaseSuccessor()->removePredecessor(SI->getParent()); SI->removeCase(Case); } - if (HasWeight) { + if (HasWeight && Weights.size() >= 2) { SmallVector MDWeights(Weights.begin(), Weights.end()); SI->setMetadata(LLVMContext::MD_prof, MDBuilder(SI->getParent()->getContext()). @@ -3202,7 +3291,7 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) { for (ForwardingNodesMap::iterator I = ForwardingNodes.begin(), E = ForwardingNodes.end(); I != E; ++I) { PHINode *Phi = I->first; - SmallVector &Indexes = I->second; + SmallVectorImpl &Indexes = I->second; if (Indexes.size() < 2) continue; @@ -3240,28 +3329,10 @@ static Constant *LookupConstant(Value *V, /// simple instructions such as binary operations where both operands are /// constant or can be replaced by constants from the ConstantPool. Returns the /// resulting constant on success, 0 otherwise. -static Constant *ConstantFold(Instruction *I, - const SmallDenseMap& ConstantPool) { - if (BinaryOperator *BO = dyn_cast(I)) { - Constant *A = LookupConstant(BO->getOperand(0), ConstantPool); - if (!A) - return 0; - Constant *B = LookupConstant(BO->getOperand(1), ConstantPool); - if (!B) - return 0; - return ConstantExpr::get(BO->getOpcode(), A, B); - } - - if (CmpInst *Cmp = dyn_cast(I)) { - Constant *A = LookupConstant(I->getOperand(0), ConstantPool); - if (!A) - return 0; - Constant *B = LookupConstant(I->getOperand(1), ConstantPool); - if (!B) - return 0; - return ConstantExpr::getCompare(Cmp->getPredicate(), A, B); - } - +static Constant * +ConstantFold(Instruction *I, + const SmallDenseMap &ConstantPool, + const DataLayout *DL) { if (SelectInst *Select = dyn_cast(I)) { Constant *A = LookupConstant(Select->getCondition(), ConstantPool); if (!A) @@ -3273,25 +3344,32 @@ static Constant *ConstantFold(Instruction *I, return 0; } - if (CastInst *Cast = dyn_cast(I)) { - Constant *A = LookupConstant(I->getOperand(0), ConstantPool); - if (!A) + SmallVector COps; + for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) { + if (Constant *A = LookupConstant(I->getOperand(N), ConstantPool)) + COps.push_back(A); + else return 0; - return ConstantExpr::getCast(Cast->getOpcode(), A, Cast->getDestTy()); } - return 0; + if (CmpInst *Cmp = dyn_cast(I)) + return ConstantFoldCompareInstOperands(Cmp->getPredicate(), COps[0], + COps[1], DL); + + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), COps, DL); } /// GetCaseResults - Try to determine the resulting constant values in phi nodes /// at the common destination basic block, *CommonDest, for one of the case /// destionations CaseDest corresponding to value CaseVal (0 for the default /// case), of a switch instruction SI. -static bool GetCaseResults(SwitchInst *SI, - ConstantInt *CaseVal, - BasicBlock *CaseDest, - BasicBlock **CommonDest, - SmallVector, 4> &Res) { +static bool +GetCaseResults(SwitchInst *SI, + ConstantInt *CaseVal, + BasicBlock *CaseDest, + BasicBlock **CommonDest, + SmallVectorImpl > &Res, + const DataLayout *DL) { // The block from which we enter the common destination. BasicBlock *Pred = SI->getParent(); @@ -3310,7 +3388,7 @@ static bool GetCaseResults(SwitchInst *SI, } else if (isa(I)) { // Skip debug intrinsic. continue; - } else if (Constant *C = ConstantFold(I, ConstantPool)) { + } else if (Constant *C = ConstantFold(I, ConstantPool, DL)) { // Instruction is side-effect free and constant. ConstantPool.insert(std::make_pair(I, C)); } else { @@ -3350,7 +3428,7 @@ static bool GetCaseResults(SwitchInst *SI, Res.push_back(std::make_pair(PHI, ConstVal)); } - return true; + return Res.size() > 0; } namespace { @@ -3364,7 +3442,7 @@ namespace { SwitchLookupTable(Module &M, uint64_t TableSize, ConstantInt *Offset, - const SmallVector, 4>& Values, + const SmallVectorImpl >& Values, Constant *DefaultValue, const DataLayout *TD); @@ -3411,7 +3489,7 @@ namespace { SwitchLookupTable::SwitchLookupTable(Module &M, uint64_t TableSize, ConstantInt *Offset, - const SmallVector, 4>& Values, + const SmallVectorImpl >& Values, Constant *DefaultValue, const DataLayout *TD) : SingleValue(0), BitMap(0), BitMapElementTy(0), Array(0) { @@ -3421,12 +3499,14 @@ SwitchLookupTable::SwitchLookupTable(Module &M, // If all values in the table are equal, this is that value. SingleValue = Values.begin()->second; + Type *ValueType = Values.begin()->second->getType(); + // Build up the table contents. SmallVector TableContents(TableSize); for (size_t I = 0, E = Values.size(); I != E; ++I) { ConstantInt *CaseVal = Values[I].first; Constant *CaseRes = Values[I].second; - assert(CaseRes->getType() == DefaultValue->getType()); + assert(CaseRes->getType() == ValueType); uint64_t Idx = (CaseVal->getValue() - Offset->getValue()) .getLimitedValue(); @@ -3438,6 +3518,8 @@ SwitchLookupTable::SwitchLookupTable(Module &M, // Fill in any holes in the table with the default result. if (Values.size() < TableSize) { + assert(DefaultValue && "Need a default value to fill the lookup table holes."); + assert(DefaultValue->getType() == ValueType); for (uint64_t I = 0; I < TableSize; ++I) { if (!TableContents[I]) TableContents[I] = DefaultValue; @@ -3455,8 +3537,8 @@ SwitchLookupTable::SwitchLookupTable(Module &M, } // If the type is integer and the table fits in a register, build a bitmap. - if (WouldFitInRegister(TD, TableSize, DefaultValue->getType())) { - IntegerType *IT = cast(DefaultValue->getType()); + if (WouldFitInRegister(TD, TableSize, ValueType)) { + IntegerType *IT = cast(ValueType); APInt TableInt(TableSize * IT->getBitWidth(), 0); for (uint64_t I = TableSize; I > 0; --I) { TableInt <<= IT->getBitWidth(); @@ -3474,7 +3556,7 @@ SwitchLookupTable::SwitchLookupTable(Module &M, } // Store the table in an array. - ArrayType *ArrayTy = ArrayType::get(DefaultValue->getType(), TableSize); + ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize); Constant *Initializer = ConstantArray::get(ArrayTy, TableContents); Array = new GlobalVariable(M, ArrayTy, /*constant=*/ true, @@ -3538,7 +3620,7 @@ bool SwitchLookupTable::WouldFitInRegister(const DataLayout *TD, } /// ShouldBuildLookupTable - Determine whether a lookup table should be built -/// for this switch, based on the number of caes, size of the table and the +/// for this switch, based on the number of cases, size of the table and the /// types of the results. static bool ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, @@ -3602,11 +3684,9 @@ static bool SwitchToLookupTable(SwitchInst *SI, // GEP needs a runtime relocation in PIC code. We should just build one big // string and lookup indices into that. - // Ignore the switch if the number of cases is too small. - // This is similar to the check when building jump tables in - // SelectionDAGBuilder::handleJTSwitchCase. - // FIXME: Determine the best cut-off. - if (SI->getNumCases() < 4) + // Ignore switches with less than three cases. Lookup tables will not make them + // faster, so we don't analyze them. + if (SI->getNumCases() < 3) return false; // Figure out the corresponding result for each case value and phi node in the @@ -3634,7 +3714,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, typedef SmallVector, 4> ResultsTy; ResultsTy Results; if (!GetCaseResults(SI, CaseVal, CI.getCaseSuccessor(), &CommonDest, - Results)) + Results, TD)) return false; // Append the result from this case to the list for each phi. @@ -3645,20 +3725,29 @@ static bool SwitchToLookupTable(SwitchInst *SI, } } - // Get the resulting values for the default case. + // Keep track of the result types. + for (size_t I = 0, E = PHIs.size(); I != E; ++I) { + PHINode *PHI = PHIs[I]; + ResultTypes[PHI] = ResultLists[PHI][0].second->getType(); + } + + uint64_t NumResults = ResultLists[PHIs[0]].size(); + APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue(); + uint64_t TableSize = RangeSpread.getLimitedValue() + 1; + bool TableHasHoles = (NumResults < TableSize); + + // If the table has holes, we need a constant result for the default case. SmallVector, 4> DefaultResultsList; - if (!GetCaseResults(SI, 0, SI->getDefaultDest(), &CommonDest, - DefaultResultsList)) + if (TableHasHoles && !GetCaseResults(SI, 0, SI->getDefaultDest(), &CommonDest, + DefaultResultsList, TD)) return false; + for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) { PHINode *PHI = DefaultResultsList[I].first; Constant *Result = DefaultResultsList[I].second; DefaultResults[PHI] = Result; - ResultTypes[PHI] = Result->getType(); } - APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue(); - uint64_t TableSize = RangeSpread.getLimitedValue() + 1; if (!ShouldBuildLookupTable(SI, TableSize, TTI, TD, ResultTypes)) return false; @@ -3669,14 +3758,32 @@ static bool SwitchToLookupTable(SwitchInst *SI, CommonDest->getParent(), CommonDest); - // Check whether the condition value is within the case range, and branch to - // the new BB. + // Compute the table index value. Builder.SetInsertPoint(SI); Value *TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal, "switch.tableidx"); - Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get( - MinCaseVal->getType(), TableSize)); - Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest()); + + // Compute the maximum table size representable by the integer type we are + // switching upon. + unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits(); + uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize; + assert(MaxTableSize >= TableSize && + "It is impossible for a switch to have more entries than the max " + "representable value of its input integer type's size."); + + // If we have a fully covered lookup table, unconditionally branch to the + // lookup table BB. Otherwise, check if the condition value is within the case + // range. If it is so, branch to the new BB. Otherwise branch to SI's default + // destination. + const bool GeneratingCoveredLookupTable = MaxTableSize == TableSize; + if (GeneratingCoveredLookupTable) { + Builder.CreateBr(LookupBB); + SI->getDefaultDest()->removePredecessor(SI->getParent()); + } else { + Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get( + MinCaseVal->getType(), TableSize)); + Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest()); + } // Populate the BB that does the lookups. Builder.SetInsertPoint(LookupBB); @@ -3705,9 +3812,11 @@ static bool SwitchToLookupTable(SwitchInst *SI, Builder.CreateBr(CommonDest); // Remove the switch. - for (unsigned i = 0; i < SI->getNumSuccessors(); ++i) { + for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) { BasicBlock *Succ = SI->getSuccessor(i); - if (Succ == SI->getDefaultDest()) continue; + + if (Succ == SI->getDefaultDest()) + continue; Succ->removePredecessor(SI->getParent()); } SI->eraseFromParent(); @@ -3944,11 +4053,13 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) { // Load from null is undefined. if (LoadInst *LI = dyn_cast(Use)) - return LI->getPointerAddressSpace() == 0; + if (!LI->isVolatile()) + return LI->getPointerAddressSpace() == 0; // Store to null is undefined. if (StoreInst *SI = dyn_cast(Use)) - return SI->getPointerAddressSpace() == 0 && SI->getPointerOperand() == I; + if (!SI->isVolatile()) + return SI->getPointerAddressSpace() == 0 && SI->getPointerOperand() == I; } return false; }