[Modules] Move GetElementPtrTypeIterator into the IR library. As its
[oota-llvm.git] / lib / Target / PowerPC / PPCCTRLoops.cpp
index d36fec0a0a51179256e281d19564da6f99ea5ff8..feba3f1b794ec553610b7c221cf13cb6d58b28b6 100644 (file)
 #define DEBUG_TYPE "ctrloops"
 
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/ADT/Statistic.h"
+#include "PPC.h"
+#include "PPCTargetMachine.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/Analysis/Dominators.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Target/TargetLibraryInfo.h"
-#include "PPCTargetMachine.h"
-#include "PPC.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
 
 #ifndef NDEBUG
 #include "llvm/CodeGen/MachineDominators.h"
@@ -95,25 +96,20 @@ namespace {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequired<LoopInfo>();
       AU.addPreserved<LoopInfo>();
-      AU.addRequired<DominatorTree>();
-      AU.addPreserved<DominatorTree>();
+      AU.addRequired<DominatorTreeWrapperPass>();
+      AU.addPreserved<DominatorTreeWrapperPass>();
       AU.addRequired<ScalarEvolution>();
     }
 
   private:
-    // FIXME: Copied from LoopSimplify.
-    BasicBlock *InsertPreheaderForLoop(Loop *L);
-    void PlaceSplitBlockCarefully(BasicBlock *NewBB,
-                                  SmallVectorImpl<BasicBlock*> &SplitPreds,
-                                  Loop *L);
-
     bool mightUseCTR(const Triple &TT, BasicBlock *BB);
     bool convertToCTRLoop(Loop *L);
+
   private:
     PPCTargetMachine *TM;
     LoopInfo *LI;
     ScalarEvolution *SE;
-    DataLayout *TD;
+    const DataLayout *DL;
     DominatorTree *DT;
     const TargetLibraryInfo *LibInfo;
   };
@@ -149,7 +145,7 @@ namespace {
 
 INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
                       false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopInfo)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
 INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
@@ -174,8 +170,9 @@ FunctionPass *llvm::createPPCCTRLoopsVerify() {
 bool PPCCTRLoops::runOnFunction(Function &F) {
   LI = &getAnalysis<LoopInfo>();
   SE = &getAnalysis<ScalarEvolution>();
-  DT = &getAnalysis<DominatorTree>();
-  TD = getAnalysisIfAvailable<DataLayout>();
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+  DL = DLP ? &DLP->getDataLayout() : 0;
   LibInfo = getAnalysisIfAvailable<TargetLibraryInfo>();
 
   bool MadeChange = false;
@@ -190,6 +187,13 @@ bool PPCCTRLoops::runOnFunction(Function &F) {
   return MadeChange;
 }
 
+static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) {
+  if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
+    return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);
+
+  return false;
+}
+
 bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
   for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
        J != JE; ++J) {
@@ -237,6 +241,13 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
 #endif
 
           case Intrinsic::longjmp:
+
+          // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
+          // because, although it does clobber the counter register, the
+          // control can't then return to inside the loop unless there is also
+          // an eh_sjlj_setjmp.
+          case Intrinsic::eh_sjlj_setjmp:
+
           case Intrinsic::memcpy:
           case Intrinsic::memmove:
           case Intrinsic::memset:
@@ -250,12 +261,19 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
           case Intrinsic::sin:
           case Intrinsic::cos:
             return true;
+          case Intrinsic::copysign:
+            if (CI->getArgOperand(0)->getType()->getScalarType()->
+                isPPC_FP128Ty())
+              return true;
+            else
+              continue; // ISD::FCOPYSIGN is never a library call.
           case Intrinsic::sqrt:      Opcode = ISD::FSQRT;      break;
           case Intrinsic::floor:     Opcode = ISD::FFLOOR;     break;
           case Intrinsic::ceil:      Opcode = ISD::FCEIL;      break;
           case Intrinsic::trunc:     Opcode = ISD::FTRUNC;     break;
           case Intrinsic::rint:      Opcode = ISD::FRINT;      break;
           case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
+          case Intrinsic::round:     Opcode = ISD::FROUND;     break;
           }
         }
 
@@ -280,8 +298,9 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
           default: return true;
           case LibFunc::copysign:
           case LibFunc::copysignf:
-          case LibFunc::copysignl:
             continue; // ISD::FCOPYSIGN is never a library call.
+          case LibFunc::copysignl:
+            return true;
           case LibFunc::fabs:
           case LibFunc::fabsf:
           case LibFunc::fabsl:
@@ -306,6 +325,10 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
           case LibFunc::rintf:
           case LibFunc::rintl:
             Opcode = ISD::FRINT; break;
+          case LibFunc::round:
+          case LibFunc::roundf:
+          case LibFunc::roundl:
+            Opcode = ISD::FROUND; break;
           case LibFunc::trunc:
           case LibFunc::truncf:
           case LibFunc::truncl:
@@ -337,11 +360,16 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
       CastInst *CI = cast<CastInst>(J);
       if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
           CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
-          (TT.isArch32Bit() &&
-           (CI->getSrcTy()->getScalarType()->isIntegerTy(64) ||
-            CI->getDestTy()->getScalarType()->isIntegerTy(64))
-          ))
+          isLargeIntegerTy(TT.isArch32Bit(), CI->getSrcTy()->getScalarType()) ||
+          isLargeIntegerTy(TT.isArch32Bit(), CI->getDestTy()->getScalarType()))
         return true;
+    } else if (isLargeIntegerTy(TT.isArch32Bit(),
+                                J->getType()->getScalarType()) &&
+               (J->getOpcode() == Instruction::UDiv ||
+                J->getOpcode() == Instruction::SDiv ||
+                J->getOpcode() == Instruction::URem ||
+                J->getOpcode() == Instruction::SRem)) {
+      return true;
     } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
       // On PowerPC, indirect jumps use the counter register.
       return true;
@@ -399,7 +427,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
   BasicBlock *CountedExitBlock = 0;
   const SCEV *ExitCount = 0;
   BranchInst *CountedExitBranch = 0;
-  for (SmallVector<BasicBlock*, 4>::iterator I = ExitingBlocks.begin(),
+  for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
        IE = ExitingBlocks.end(); I != IE; ++I) {
     const SCEV *EC = SE->getExitCount(L, *I);
     DEBUG(dbgs() << "Exit Count for " << *L << " from block " <<
@@ -412,6 +440,9 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
     } else if (!SE->isLoopInvariant(EC, L))
       continue;
 
+    if (SE->getTypeSizeInBits(EC->getType()) > (TT.isArch64Bit() ? 64 : 32))
+      continue;
+
     // We now have a loop-invariant count of loop iterations (which is not the
     // constant zero) for which we know that this loop will not exit via this
     // exisiting block.
@@ -464,7 +495,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
   // the CTR register because some such uses might be reordered by the
   // selection DAG after the mtctr instruction).
   if (!Preheader || mightUseCTR(TT, Preheader))
-    Preheader = InsertPreheaderForLoop(L);
+    Preheader = InsertPreheaderForLoop(L, this);
   if (!Preheader)
     return MadeChange;
 
@@ -512,84 +543,6 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
   return MadeChange;
 }
 
-// FIXME: Copied from LoopSimplify.
-BasicBlock *PPCCTRLoops::InsertPreheaderForLoop(Loop *L) {
-  BasicBlock *Header = L->getHeader();
-
-  // Compute the set of predecessors of the loop that are not in the loop.
-  SmallVector<BasicBlock*, 8> OutsideBlocks;
-  for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
-       PI != PE; ++PI) {
-    BasicBlock *P = *PI;
-    if (!L->contains(P)) {         // Coming in from outside the loop?
-      // If the loop is branched to from an indirect branch, we won't
-      // be able to fully transform the loop, because it prohibits
-      // edge splitting.
-      if (isa<IndirectBrInst>(P->getTerminator())) return 0;
-
-      // Keep track of it.
-      OutsideBlocks.push_back(P);
-    }
-  }
-
-  // Split out the loop pre-header.
-  BasicBlock *PreheaderBB;
-  if (!Header->isLandingPad()) {
-    PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader",
-                                         this);
-  } else {
-    SmallVector<BasicBlock*, 2> NewBBs;
-    SplitLandingPadPredecessors(Header, OutsideBlocks, ".preheader",
-                                ".split-lp", this, NewBBs);
-    PreheaderBB = NewBBs[0];
-  }
-
-  PreheaderBB->getTerminator()->setDebugLoc(
-                                      Header->getFirstNonPHI()->getDebugLoc());
-  DEBUG(dbgs() << "Creating pre-header "
-               << PreheaderBB->getName() << "\n");
-
-  // Make sure that NewBB is put someplace intelligent, which doesn't mess up
-  // code layout too horribly.
-  PlaceSplitBlockCarefully(PreheaderBB, OutsideBlocks, L);
-
-  return PreheaderBB;
-}
-
-void PPCCTRLoops::PlaceSplitBlockCarefully(BasicBlock *NewBB,
-                                       SmallVectorImpl<BasicBlock*> &SplitPreds,
-                                            Loop *L) {
-  // Check to see if NewBB is already well placed.
-  Function::iterator BBI = NewBB; --BBI;
-  for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
-    if (&*BBI == SplitPreds[i])
-      return;
-  }
-
-  // If it isn't already after an outside block, move it after one.  This is
-  // always good as it makes the uncond branch from the outside block into a
-  // fall-through.
-
-  // Figure out *which* outside block to put this after.  Prefer an outside
-  // block that neighbors a BB actually in the loop.
-  BasicBlock *FoundBB = 0;
-  for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
-    Function::iterator BBI = SplitPreds[i];
-    if (++BBI != NewBB->getParent()->end() &&
-        L->contains(BBI)) {
-      FoundBB = SplitPreds[i];
-      break;
-    }
-  }
-
-  // If our heuristic for a *good* bb to place this after doesn't find
-  // anything, just pick something.  It's likely better than leaving it within
-  // the loop.
-  if (!FoundBB)
-    FoundBB = SplitPreds[0];
-  NewBB->moveAfter(FoundBB);
-}
-
 #ifndef NDEBUG
 static bool clobbersCTR(const MachineInstr *MI) {
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -627,7 +580,7 @@ check_block:
   CheckPreds = true;
   for (MachineBasicBlock::iterator IE = MBB->begin();; --I) {
     unsigned Opc = I->getOpcode();
-    if (Opc == PPC::MTCTRse || Opc == PPC::MTCTR8se) {
+    if (Opc == PPC::MTCTRloop || Opc == PPC::MTCTR8loop) {
       CheckPreds = false;
       break;
     }