Revert "Revert "Add Constant Hoisting Pass" (r200034)"

[oota-llvm.git] / lib / Transforms / Scalar / CodeGenPrepare.cpp
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp

index f0d29c88a82ea91613e7d620b73a9010b6abeb8f..6acbd5eaa146829c778a819b945145a68c66ff83 100644 (file)
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -19,14 +19,11 @@
  #include "llvm/ADT/SmallSet.h"
  #include "llvm/ADT/Statistic.h"
  #include "llvm/ADT/ValueMap.h"
-#include "llvm/Analysis/DominatorInternals.h"
-#include "llvm/Analysis/Dominators.h"
  #include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/Assembly/Writer.h"
  #include "llvm/IR/Constants.h"
  #include "llvm/IR/DataLayout.h"
  #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
  #include "llvm/IR/Function.h"
  #include "llvm/IR/IRBuilder.h"
  #include "llvm/IR/InlineAsm.h"
@@ -76,10 +73,10 @@ namespace {
    class CodeGenPrepare : public FunctionPass {
      /// TLI - Keep a pointer of a TargetLowering to consult for determining
      /// transformation profitability.
+    const TargetMachine *TM;
      const TargetLowering *TLI;
      const TargetLibraryInfo *TLInfo;
      DominatorTree *DT;
-    ProfileInfo *PFI;
  
      /// CurInstIterator - As we scan instructions optimizing them, this is the
      /// next instruction to optimize.  Xforms that can invalidate this should
@@ -100,8 +97,8 @@ namespace {
  
    public:
      static char ID; // Pass identification, replacement for typeid
-    explicit CodeGenPrepare(const TargetLowering *tli = 0)
-      : FunctionPass(ID), TLI(tli) {
+    explicit CodeGenPrepare(const TargetMachine *TM = 0)
+      : FunctionPass(ID), TM(TM), TLI(0) {
          initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
        }
      bool runOnFunction(Function &F);
@@ -109,8 +106,7 @@ namespace {
      const char *getPassName() const { return "CodeGen Prepare"; }
  
      virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addPreserved<DominatorTree>();
-      AU.addPreserved<ProfileInfo>();
+      AU.addPreserved<DominatorTreeWrapperPass>();
        AU.addRequired<TargetLibraryInfo>();
      }
  
@@ -133,23 +129,33 @@ namespace {
  }
  
  char CodeGenPrepare::ID = 0;
-INITIALIZE_PASS_BEGIN(CodeGenPrepare, "codegenprepare",
-                "Optimize for code generation", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
-INITIALIZE_PASS_END(CodeGenPrepare, "codegenprepare",
-                "Optimize for code generation", false, false)
-
-FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
-  return new CodeGenPrepare(TLI);
+static void *initializeCodeGenPreparePassOnce(PassRegistry &Registry) {
+  initializeTargetLibraryInfoPass(Registry);
+  PassInfo *PI = new PassInfo(
+      "Optimize for code generation", "codegenprepare", &CodeGenPrepare::ID,
+      PassInfo::NormalCtor_t(callDefaultCtor<CodeGenPrepare>), false, false,
+      PassInfo::TargetMachineCtor_t(callTargetMachineCtor<CodeGenPrepare>));
+  Registry.registerPass(*PI, true);
+  return PI;
+}
+
+void llvm::initializeCodeGenPreparePass(PassRegistry &Registry) {
+  CALL_ONCE_INITIALIZATION(initializeCodeGenPreparePassOnce)
+}
+
+FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) {
+  return new CodeGenPrepare(TM);
  }
  
  bool CodeGenPrepare::runOnFunction(Function &F) {
    bool EverMadeChange = false;
  
    ModifiedDT = false;
+  if (TM) TLI = TM->getTargetLowering();
    TLInfo = &getAnalysis<TargetLibraryInfo>();
-  DT = getAnalysisIfAvailable<DominatorTree>();
-  PFI = getAnalysisIfAvailable<ProfileInfo>();
+  DominatorTreeWrapperPass *DTWP =
+      getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+  DT = DTWP ? &DTWP->getDomTree() : 0;
    OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
                                             Attribute::OptimizeForSize);
  
@@ -205,7 +211,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
        SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
  
        DeleteDeadBlock(BB);
-      
+
        for (SmallVectorImpl<BasicBlock*>::iterator
               II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
          if (pred_begin(*II) == pred_end(*II))
@@ -223,7 +229,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
    }
  
    if (ModifiedDT && DT)
-    DT->DT->recalculate(F);
+    DT->recalculate(F);
  
    return EverMadeChange;
  }
@@ -234,7 +240,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
  bool CodeGenPrepare::EliminateFallThrough(Function &F) {
    bool Changed = false;
    // Scan all of the blocks in the function, except for the entry block.
-  for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) {
+  for (Function::iterator I = llvm::next(F.begin()), E = F.end(); I != E; ) {
      BasicBlock *BB = I++;
      // If the destination block has a single pred, then this is a trivial
      // edge, just collapse it.
@@ -270,7 +276,7 @@ bool CodeGenPrepare::EliminateFallThrough(Function &F) {
  bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) {
    bool MadeChange = false;
    // Note that this intentionally skips the entry block.
-  for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) {
+  for (Function::iterator I = llvm::next(F.begin()), E = F.end(); I != E; ) {
      BasicBlock *BB = I++;
  
      // If this block doesn't end with an uncond branch, ignore it.
@@ -440,10 +446,6 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
      DT->changeImmediateDominator(DestBB, NewIDom);
      DT->eraseNode(BB);
    }
-  if (PFI) {
-    PFI->replaceAllUses(BB, DestBB);
-    PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB));
-  }
    BB->eraseFromParent();
    ++NumBlocksElim;
  
@@ -830,7 +832,7 @@ struct ExtAddrMode : public TargetLowering::AddrMode {
    ExtAddrMode() : BaseReg(0), ScaledReg(0) {}
    void print(raw_ostream &OS) const;
    void dump() const;
-  
+
    bool operator==(const ExtAddrMode& O) const {
      return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) &&
             (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) &&
@@ -838,10 +840,12 @@ struct ExtAddrMode : public TargetLowering::AddrMode {
    }
  };
  
+#ifndef NDEBUG
  static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
    AM.print(OS);
    return OS;
  }
+#endif
  
  void ExtAddrMode::print(raw_ostream &OS) const {
    bool NeedPlus = false;
@@ -849,7 +853,7 @@ void ExtAddrMode::print(raw_ostream &OS) const {
    if (BaseGV) {
      OS << (NeedPlus ? " + " : "")
         << "GV:";
-    WriteAsOperand(OS, BaseGV, /*PrintType=*/false);
+    BaseGV->printAsOperand(OS, /*PrintType=*/false);
      NeedPlus = true;
    }
  
@@ -859,14 +863,13 @@ void ExtAddrMode::print(raw_ostream &OS) const {
    if (BaseReg) {
      OS << (NeedPlus ? " + " : "")
         << "Base:";
-    WriteAsOperand(OS, BaseReg, /*PrintType=*/false);
+    BaseReg->printAsOperand(OS, /*PrintType=*/false);
      NeedPlus = true;
    }
    if (Scale) {
      OS << (NeedPlus ? " + " : "")
         << Scale << "*";
-    WriteAsOperand(OS, ScaledReg, /*PrintType=*/false);
-    NeedPlus = true;
+    ScaledReg->printAsOperand(OS, /*PrintType=*/false);
    }
  
    OS << ']';
@@ -891,16 +894,16 @@ class AddressingModeMatcher {
    /// the memory instruction that we're computing this address for.
    Type *AccessTy;
    Instruction *MemoryInst;
-  
+
    /// AddrMode - This is the addressing mode that we're building up.  This is
    /// part of the return value of this addressing mode matching stuff.
    ExtAddrMode &AddrMode;
-  
+
    /// IgnoreProfitability - This is set to true when we should not do
    /// profitability checks.  When true, IsProfitableToFoldIntoAddressingMode
    /// always returns true.
    bool IgnoreProfitability;
-  
+
    AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI,
                          const TargetLowering &T, Type *AT,
                          Instruction *MI, ExtAddrMode &AM)
@@ -908,7 +911,7 @@ class AddressingModeMatcher {
      IgnoreProfitability = false;
    }
  public:
-  
+
    /// Match - Find the maximal addressing mode that a load/store of V can fold,
    /// give an access type of AccessTy.  This returns a list of involved
    /// instructions in AddrModeInsts.
@@ -918,7 +921,7 @@ public:
                             const TargetLowering &TLI) {
      ExtAddrMode Result;
  
-    bool Success = 
+    bool Success =
        AddressingModeMatcher(AddrModeInsts, TLI, AccessTy,
                              MemoryInst, Result).MatchAddr(V, 0);
      (void)Success; assert(Success && "Couldn't select *anything*?");
@@ -943,11 +946,11 @@ bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
    // mode.  Just process that directly.
    if (Scale == 1)
      return MatchAddr(ScaleReg, Depth);
-  
+
    // If the scale is 0, it takes nothing to add this.
    if (Scale == 0)
      return true;
-  
+
    // If we already have a scale of this value, we can add to it, otherwise, we
    // need an available scale field.
    if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
@@ -966,7 +969,7 @@ bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
  
    // It was legal, so commit it.
    AddrMode = TestAddrMode;
-  
+
    // Okay, we decided that we can add ScaleReg+Scale to AddrMode.  Check now
    // to see if ScaleReg is actually X+C.  If so, we can turn this into adding
    // X*Scale + C*Scale to addr mode.
@@ -975,7 +978,7 @@ bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
        match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
      TestAddrMode.ScaledReg = AddLHS;
      TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
-      
+
      // If this addressing mode is legal, commit it and remember that we folded
      // this instruction.
      if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) {
@@ -1026,7 +1029,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
                                                 unsigned Depth) {
    // Avoid exponential behavior on extremely deep expression trees.
    if (Depth >= 5) return false;
-  
+
    switch (Opcode) {
    case Instruction::PtrToInt:
      // PtrToInt is always a noop, as we know that the int type is pointer sized.
@@ -1034,7 +1037,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
    case Instruction::IntToPtr:
      // This inttoptr is a no-op if the integer type is pointer sized.
      if (TLI.getValueType(AddrInst->getOperand(0)->getType()) ==
-        TLI.getPointerTy())
+        TLI.getPointerTy(AddrInst->getType()->getPointerAddressSpace()))
        return MatchAddr(AddrInst->getOperand(0), Depth);
      return false;
    case Instruction::BitCast:
@@ -1055,16 +1058,16 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
      if (MatchAddr(AddrInst->getOperand(1), Depth+1) &&
          MatchAddr(AddrInst->getOperand(0), Depth+1))
        return true;
-    
+
      // Restore the old addr mode info.
      AddrMode = BackupAddrMode;
      AddrModeInsts.resize(OldSize);
-    
+
      // Otherwise this was over-aggressive.  Try merging in the LHS then the RHS.
      if (MatchAddr(AddrInst->getOperand(0), Depth+1) &&
          MatchAddr(AddrInst->getOperand(1), Depth+1))
        return true;
-    
+
      // Otherwise we definitely can't merge the ADD in.
      AddrMode = BackupAddrMode;
      AddrModeInsts.resize(OldSize);
@@ -1081,7 +1084,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
      int64_t Scale = RHS->getSExtValue();
      if (Opcode == Instruction::Shl)
        Scale = 1LL << Scale;
-    
+
      return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth);
    }
    case Instruction::GetElementPtr: {
@@ -1089,7 +1092,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
      // one variable offset.
      int VariableOperand = -1;
      unsigned VariableScale = 0;
-    
+
      int64_t ConstantOffset = 0;
      const DataLayout *TD = TLI.getDataLayout();
      gep_type_iterator GTI = gep_type_begin(AddrInst);
@@ -1107,14 +1110,14 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
            // We only allow one variable index at the moment.
            if (VariableOperand != -1)
              return false;
-          
+
            // Remember the variable index.
            VariableOperand = i;
            VariableScale = TypeSize;
          }
        }
      }
-    
+
      // A common case is for the GEP to only do a constant offset.  In this case,
      // just add it to the disp field and check validity.
      if (VariableOperand == -1) {
@@ -1208,7 +1211,7 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
          AddrModeInsts.push_back(I);
          return true;
        }
-      
+
        // It isn't profitable to do this, roll back.
        //cerr << "NOT FOLDING: " << *I;
        AddrMode = BackupAddrMode;
@@ -1254,7 +1257,7 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
    TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI));
    for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
      TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
-    
+
      // Compute the constraint code and ConstraintType to use.
      TLI.ComputeConstraintToUse(OpInfo, SDValue());
  
@@ -1279,7 +1282,7 @@ static bool FindAllMemoryUses(Instruction *I,
    // If we already considered this instruction, we're done.
    if (!ConsideredInsts.insert(I))
      return false;
-  
+
    // If this is an obviously unfoldable instruction, bail out.
    if (!MightBeFoldableInst(I))
      return true;
@@ -1293,24 +1296,24 @@ static bool FindAllMemoryUses(Instruction *I,
        MemoryUses.push_back(std::make_pair(LI, UI.getOperandNo()));
        continue;
      }
-    
+
      if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
        unsigned opNo = UI.getOperandNo();
        if (opNo == 0) return true; // Storing addr, not into addr.
        MemoryUses.push_back(std::make_pair(SI, opNo));
        continue;
      }
-    
+
      if (CallInst *CI = dyn_cast<CallInst>(U)) {
        InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
        if (!IA) return true;
-      
+
        // If this is a memory operand, we're cool, otherwise bail out.
        if (!IsOperandAMemoryOperand(CI, IA, I, TLI))
          return true;
        continue;
      }
-    
+
      if (FindAllMemoryUses(cast<Instruction>(U), MemoryUses, ConsideredInsts,
                            TLI))
        return true;
@@ -1328,17 +1331,17 @@ bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
    // If Val is either of the known-live values, we know it is live!
    if (Val == 0 || Val == KnownLive1 || Val == KnownLive2)
      return true;
-  
+
    // All values other than instructions and arguments (e.g. constants) are live.
    if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
-  
+
    // If Val is a constant sized alloca in the entry block, it is live, this is
    // true because it is just a reference to the stack/frame pointer, which is
    // live for the whole function.
    if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
      if (AI->isStaticAlloca())
        return true;
-  
+
    // Check to see if this value is already used in the memory instruction's
    // block.  If so, it's already live into the block at the very least, so we
    // can reasonably fold it.
@@ -1370,7 +1373,7 @@ bool AddressingModeMatcher::
  IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
                                       ExtAddrMode &AMAfter) {
    if (IgnoreProfitability) return true;
-  
+
    // AMBefore is the addressing mode before this instruction was folded into it,
    // and AMAfter is the addressing mode after the instruction was folded.  Get
    // the set of registers referenced by AMAfter and subtract out those
@@ -1381,7 +1384,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
    // BaseReg and ScaleReg (global addresses are always available, as are any
    // folded immediates).
    Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
-  
+
    // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
    // lifetime wasn't extended by adding this instruction.
    if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
@@ -1402,7 +1405,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
    SmallPtrSet<Instruction*, 16> ConsideredInsts;
    if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI))
      return false;  // Has a non-memory, non-foldable use!
-  
+
    // Now that we know that all uses of this instruction are part of a chain of
    // computation involving only operations that could theoretically be folded
    // into a memory use, loop over each of these uses and see if they could
@@ -1411,15 +1414,14 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
    for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
      Instruction *User = MemoryUses[i].first;
      unsigned OpNo = MemoryUses[i].second;
-    
+
      // Get the access type of this use.  If the use isn't a pointer, we don't
      // know what it accesses.
      Value *Address = User->getOperand(OpNo);
      if (!Address->getType()->isPointerTy())
        return false;
-    Type *AddressAccessTy =
-      cast<PointerType>(Address->getType())->getElementType();
-    
+    Type *AddressAccessTy = Address->getType()->getPointerElementType();
+
      // Do a match against the root of this address, ignoring profitability. This
      // will tell us if the addressing mode for the memory operation will
      // *actually* cover the shared instruction.
@@ -1434,10 +1436,10 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
      if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
                    I) == MatchedAddrModeInsts.end())
        return false;
-    
+
      MatchedAddrModeInsts.clear();
    }
-  
+
    return true;
  }
  
@@ -1572,9 +1574,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
    } else {
      DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
                   << *MemoryInst);
-    Type *IntPtrTy =
-          TLI->getDataLayout()->getIntPtrType(AccessTy->getContext());
-
+    Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType());
      Value *Result = 0;
  
      // Start with the base register. Do this first so that subsequent address
@@ -1893,7 +1893,8 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
      // It is possible for very late stage optimizations (such as SimplifyCFG)
      // to introduce PHI nodes too late to be cleaned up.  If we detect such a
      // trivial PHI, go ahead and zap it here.
-    if (Value *V = SimplifyInstruction(P)) {
+    if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : 0,
+                                       TLInfo, DT)) {
        P->replaceAllUsesWith(V);
        P->eraseFromParent();
        ++NumPHIsElim;
@@ -1923,7 +1924,8 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
    }
  
    if (CmpInst *CI = dyn_cast<CmpInst>(I))
-    return OptimizeCmpExpression(CI);
+    if (!TLI || !TLI->hasMultipleConditionRegisters())
+      return OptimizeCmpExpression(CI);
  
    if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
      if (TLI)