Consistent use of the noduplicate attribute.

[oota-llvm.git] / lib / Transforms / Scalar / JumpThreading.cpp
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp

index 6b8430f02830a45b27cf0355d1f0f7900d594c40..067deb7d78d2a824b6c7a863a0ba314d34ade06f 100644 (file)
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -13,28 +13,29 @@
  
  #define DEBUG_TYPE "jump-threading"
  #include "llvm/Transforms/Scalar.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Pass.h"
-#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/LazyValueInfo.h"
-#include "llvm/Analysis/Loads.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLibraryInfo.h"
  #include "llvm/ADT/DenseMap.h"
  #include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/Statistic.h"
  #include "llvm/ADT/STLExtras.h"
  #include "llvm/ADT/SmallPtrSet.h"
  #include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/Pass.h"
  #include "llvm/Support/CommandLine.h"
  #include "llvm/Support/Debug.h"
-#include "llvm/Support/ValueHandle.h"
  #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
  using namespace llvm;
  
  STATISTIC(NumThreads, "Number of jumps threaded");
@@ -75,7 +76,7 @@ namespace {
    /// revectored to the false side of the second if.
    ///
    class JumpThreading : public FunctionPass {
-    TargetData *TD;
+    const DataLayout *DL;
      TargetLibraryInfo *TLI;
      LazyValueInfo *LVI;
  #ifdef NDEBUG
@@ -104,9 +105,9 @@ namespace {
        initializeJumpThreadingPass(*PassRegistry::getPassRegistry());
      }
  
-    bool runOnFunction(Function &F);
+    bool runOnFunction(Function &F) override;
  
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
        AU.addRequired<LazyValueInfo>();
        AU.addPreserved<LazyValueInfo>();
        AU.addRequired<TargetLibraryInfo>();
@@ -129,6 +130,7 @@ namespace {
      bool ProcessBranchOnXOR(BinaryOperator *BO);
  
      bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
+    bool TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB);
    };
  }
  
@@ -146,8 +148,12 @@ FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
  /// runOnFunction - Top level algorithm.
  ///
  bool JumpThreading::runOnFunction(Function &F) {
+  if (skipOptnoneFunction(F))
+    return false;
+
    DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
-  TD = getAnalysisIfAvailable<TargetData>();
+  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+  DL = DLP ? &DLP->getDataLayout() : 0;
    TLI = &getAnalysis<TargetLibraryInfo>();
    LVI = &getAnalysis<LazyValueInfo>();
  
@@ -216,19 +222,24 @@ bool JumpThreading::runOnFunction(Function &F) {
  }
  
  /// getJumpThreadDuplicationCost - Return the cost of duplicating this block to
-/// thread across it.
-static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
+/// thread across it. Stop scanning the block when passing the threshold.
+static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB,
+                                             unsigned Threshold) {
    /// Ignore PHI nodes, these will be flattened when duplication happens.
    BasicBlock::const_iterator I = BB->getFirstNonPHI();
  
    // FIXME: THREADING will delete values that are just used to compute the
    // branch, so they shouldn't count against the duplication cost.
  
-
    // Sum up the cost of each instruction until we get to the terminator.  Don't
    // include the terminator because the copy won't include it.
    unsigned Size = 0;
    for (; !isa<TerminatorInst>(I); ++I) {
+
+    // Stop scanning the block if we've reached the threshold.
+    if (Size > Threshold)
+      return Size;
+
      // Debugger intrinsics don't incur code size.
      if (isa<DbgInfoIntrinsic>(I)) continue;
  
@@ -244,7 +255,11 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
      // as having cost of 2 total, and if they are a vector intrinsic, we model
      // them as having cost 1.
      if (const CallInst *CI = dyn_cast<CallInst>(I)) {
-      if (!isa<IntrinsicInst>(CI))
+      if (CI->cannotDuplicate())
+        // Blocks with NoDuplicate are modelled as having infinite cost, so they
+        // are never duplicated.
+        return ~0U;
+      else if (!isa<IntrinsicInst>(CI))
          Size += 3;
        else if (!CI->getType()->isVectorTy())
          Size += 1;
@@ -479,7 +494,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
          Value *LHS = PN->getIncomingValue(i);
          Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB);
  
-        Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, TD);
+        Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, DL);
          if (Res == 0) {
            if (!isa<Constant>(RHS))
              continue;
@@ -681,7 +696,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
    // Run constant folding to see if we can reduce the condition to a simple
    // constant.
    if (Instruction *I = dyn_cast<Instruction>(Condition)) {
-    Value *SimpleVal = ConstantFoldInstruction(I, TD, TLI);
+    Value *SimpleVal = ConstantFoldInstruction(I, DL, TLI);
      if (SimpleVal) {
        I->replaceAllUsesWith(SimpleVal);
        I->eraseFromParent();
@@ -766,7 +781,11 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
            return true;
          }
        }
+
      }
+
+    if (CondBr && CondConst && TryToUnfoldSelect(CondCmp, BB))
+      return true;
    }
  
    // Check for some cases that are worth simplifying.  Right now we want to look
@@ -812,7 +831,6 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
    return false;
  }
  
-
  /// SimplifyPartiallyRedundantLoad - If LI is an obviously partially redundant
  /// load instruction, eliminate it by replacing it with a PHI node.  This is an
  /// important optimization that encourages jump threading, and needs to be run
@@ -827,6 +845,12 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
    if (LoadBB->getSinglePredecessor())
      return false;
  
+  // If the load is defined in a landing pad, it can't be partially redundant,
+  // because the edges between the invoke and the landing pad cannot have other
+  // instructions between them.
+  if (LoadBB->isLandingPad())
+    return false;
+
    Value *LoadedPtr = LI->getOperand(0);
  
    // If the loaded operand is defined in the LoadBB, it can't be available.
@@ -861,7 +885,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
  
    // If all of the loads and stores that feed the value have the same TBAA tag,
    // then we can propagate it onto any newly inserted loads.
-  MDNode *TBAATag = LI->getMetadata(LLVMContext::MD_tbaa); 
+  MDNode *TBAATag = LI->getMetadata(LLVMContext::MD_tbaa);
  
    SmallPtrSet<BasicBlock*, 8> PredsScanned;
    typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy;
@@ -887,7 +911,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
        OneUnavailablePred = PredBB;
        continue;
      }
-    
+
      // If tbaa tags disagree or are not present, forget about them.
      if (TBAATag != ThisTBAATag) TBAATag = 0;
  
@@ -951,7 +975,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
      NewVal->setDebugLoc(LI->getDebugLoc());
      if (TBAATag)
        NewVal->setMetadata(LLVMContext::MD_tbaa, TBAATag);
-    
+
      AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal));
    }
  
@@ -1337,7 +1361,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
      return false;
    }
  
-  unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB);
+  unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB, Threshold);
    if (JumpThreadCost > Threshold) {
      DEBUG(dbgs() << "  Not threading BB '" << BB->getName()
            << "' - Cost is too high: " << JumpThreadCost << "\n");
@@ -1411,16 +1435,15 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
    for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
      // Scan all uses of this instruction to see if it is used outside of its
      // block, and if so, record them in UsesToRename.
-    for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
-         ++UI) {
-      Instruction *User = cast<Instruction>(*UI);
+    for (Use &U : I->uses()) {
+      Instruction *User = cast<Instruction>(U.getUser());
        if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
-        if (UserPN->getIncomingBlock(UI) == BB)
+        if (UserPN->getIncomingBlock(U) == BB)
            continue;
        } else if (User->getParent() == BB)
          continue;
  
-      UsesToRename.push_back(&UI.getUse());
+      UsesToRename.push_back(&U);
      }
  
      // If there are no uses outside the block, we're done with this instruction.
@@ -1455,7 +1478,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
    // At this point, the IR is fully up to date and consistent.  Do a quick scan
    // over the new instructions and zap any that are constants or dead.  This
    // frequently happens because of phi translation.
-  SimplifyInstructionsInBlock(NewBB, TD);
+  SimplifyInstructionsInBlock(NewBB, DL, TLI);
  
    // Threaded an edge!
    ++NumThreads;
@@ -1481,7 +1504,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
      return false;
    }
  
-  unsigned DuplicationCost = getJumpThreadDuplicationCost(BB);
+  unsigned DuplicationCost = getJumpThreadDuplicationCost(BB, Threshold);
    if (DuplicationCost > Threshold) {
      DEBUG(dbgs() << "  Not duplicating BB '" << BB->getName()
            << "' - Cost is too high: " << DuplicationCost << "\n");
@@ -1537,7 +1560,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
      // If this instruction can be simplified after the operands are updated,
      // just use the simplified value instead.  This frequently happens due to
      // phi translation.
-    if (Value *IV = SimplifyInstruction(New, TD)) {
+    if (Value *IV = SimplifyInstruction(New, DL)) {
        delete New;
        ValueMapping[BI] = IV;
      } else {
@@ -1565,16 +1588,15 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
    for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
      // Scan all uses of this instruction to see if it is used outside of its
      // block, and if so, record them in UsesToRename.
-    for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
-         ++UI) {
-      Instruction *User = cast<Instruction>(*UI);
+    for (Use &U : I->uses()) {
+      Instruction *User = cast<Instruction>(U.getUser());
        if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
-        if (UserPN->getIncomingBlock(UI) == BB)
+        if (UserPN->getIncomingBlock(U) == BB)
            continue;
        } else if (User->getParent() == BB)
          continue;
  
-      UsesToRename.push_back(&UI.getUse());
+      UsesToRename.push_back(&U);
      }
  
      // If there are no uses outside the block, we're done with this instruction.
@@ -1606,4 +1628,80 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
    return true;
  }
  
+/// TryToUnfoldSelect - Look for blocks of the form
+/// bb1:
+///   %a = select
+///   br bb
+///
+/// bb2:
+///   %p = phi [%a, %bb] ...
+///   %c = icmp %p
+///   br i1 %c
+///
+/// And expand the select into a branch structure if one of its arms allows %c
+/// to be folded. This later enables threading from bb1 over bb2.
+bool JumpThreading::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {
+  BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
+  PHINode *CondLHS = dyn_cast<PHINode>(CondCmp->getOperand(0));
+  Constant *CondRHS = cast<Constant>(CondCmp->getOperand(1));
+
+  if (!CondBr || !CondBr->isConditional() || !CondLHS ||
+      CondLHS->getParent() != BB)
+    return false;
+
+  for (unsigned I = 0, E = CondLHS->getNumIncomingValues(); I != E; ++I) {
+    BasicBlock *Pred = CondLHS->getIncomingBlock(I);
+    SelectInst *SI = dyn_cast<SelectInst>(CondLHS->getIncomingValue(I));
+
+    // Look if one of the incoming values is a select in the corresponding
+    // predecessor.
+    if (!SI || SI->getParent() != Pred || !SI->hasOneUse())
+      continue;
+
+    BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator());
+    if (!PredTerm || !PredTerm->isUnconditional())
+      continue;
  
+    // Now check if one of the select values would allow us to constant fold the
+    // terminator in BB. We don't do the transform if both sides fold, those
+    // cases will be threaded in any case.
+    LazyValueInfo::Tristate LHSFolds =
+        LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(1),
+                                CondRHS, Pred, BB);
+    LazyValueInfo::Tristate RHSFolds =
+        LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(2),
+                                CondRHS, Pred, BB);
+    if ((LHSFolds != LazyValueInfo::Unknown ||
+         RHSFolds != LazyValueInfo::Unknown) &&
+        LHSFolds != RHSFolds) {
+      // Expand the select.
+      //
+      // Pred --
+      //  |    v
+      //  |  NewBB
+      //  |    |
+      //  |-----
+      //  v
+      // BB
+      BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "select.unfold",
+                                             BB->getParent(), BB);
+      // Move the unconditional branch to NewBB.
+      PredTerm->removeFromParent();
+      NewBB->getInstList().insert(NewBB->end(), PredTerm);
+      // Create a conditional branch and update PHI nodes.
+      BranchInst::Create(NewBB, BB, SI->getCondition(), Pred);
+      CondLHS->setIncomingValue(I, SI->getFalseValue());
+      CondLHS->addIncoming(SI->getTrueValue(), NewBB);
+      // The select is now dead.
+      SI->eraseFromParent();
+
+      // Update any other PHI nodes in BB.
+      for (BasicBlock::iterator BI = BB->begin();
+           PHINode *Phi = dyn_cast<PHINode>(BI); ++BI)
+        if (Phi != CondLHS)
+          Phi->addIncoming(Phi->getIncomingValueForBlock(Pred), NewBB);
+      return true;
+    }
+  }
+  return false;
+}