Enable the shrink wrapping optimization for PPC64.

[oota-llvm.git] / lib / Analysis / LoopAccessAnalysis.cpp
diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp

index 7fe2c999ddc79208ac4bb415169cc0287a295b45..14c3c57e4c96d1b670af120f21f8528efa442ccb 100644 (file)
--- a/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/lib/Analysis/LoopAccessAnalysis.cpp
@@ -148,12 +148,34 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
    Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
  }
  
-bool RuntimePointerChecking::needsChecking(
-    const CheckingPtrGroup &M, const CheckingPtrGroup &N,
-    const SmallVectorImpl<int> *PtrPartition) const {
+SmallVector<RuntimePointerChecking::PointerCheck, 4>
+RuntimePointerChecking::generateChecks() const {
+  SmallVector<PointerCheck, 4> Checks;
+
+  for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
+    for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) {
+      const RuntimePointerChecking::CheckingPtrGroup &CGI = CheckingGroups[I];
+      const RuntimePointerChecking::CheckingPtrGroup &CGJ = CheckingGroups[J];
+
+      if (needsChecking(CGI, CGJ))
+        Checks.push_back(std::make_pair(&CGI, &CGJ));
+    }
+  }
+  return Checks;
+}
+
+void RuntimePointerChecking::generateChecks(
+    MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
+  assert(Checks.empty() && "Checks is not empty");
+  groupChecks(DepCands, UseDependencies);
+  Checks = generateChecks();
+}
+
+bool RuntimePointerChecking::needsChecking(const CheckingPtrGroup &M,
+                                           const CheckingPtrGroup &N) const {
    for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)
      for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J)
-      if (needsChecking(M.Members[I], N.Members[J], PtrPartition))
+      if (needsChecking(M.Members[I], N.Members[J]))
          return true;
    return false;
  }
@@ -220,8 +242,31 @@ void RuntimePointerChecking::groupChecks(
  
    CheckingGroups.clear();
  
+  // If we need to check two pointers to the same underlying object
+  // with a non-constant difference, we shouldn't perform any pointer
+  // grouping with those pointers. This is because we can easily get
+  // into cases where the resulting check would return false, even when
+  // the accesses are safe.
+  //
+  // The following example shows this:
+  // for (i = 0; i < 1000; ++i)
+  //   a[5000 + i * m] = a[i] + a[i + 9000]
+  //
+  // Here grouping gives a check of (5000, 5000 + 1000 * m) against
+  // (0, 10000) which is always false. However, if m is 1, there is no
+  // dependence. Not grouping the checks for a[i] and a[i + 9000] allows
+  // us to perform an accurate check in this case.
+  //
+  // The above case requires that we have an UnknownDependence between
+  // accesses to the same underlying object. This cannot happen unless
+  // ShouldRetryWithRuntimeCheck is set, and therefore UseDependencies
+  // is also false. In this case we will use the fallback path and create
+  // separate checking groups for all pointers.
+ 
    // If we don't have the dependency partitions, construct a new
-  // checking pointer group for each pointer.
+  // checking pointer group for each pointer. This is also required
+  // for correctness, because in this case we can have checking between
+  // pointers to the same underlying object.
    if (!UseDependencies) {
      for (unsigned I = 0; I < Pointers.size(); ++I)
        CheckingGroups.push_back(CheckingPtrGroup(I, *this));
@@ -303,8 +348,7 @@ bool RuntimePointerChecking::arePointersInSamePartition(
            PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
  }
  
-bool RuntimePointerChecking::needsChecking(
-    unsigned I, unsigned J, const SmallVectorImpl<int> *PtrPartition) const {
+bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const {
    const PointerInfo &PointerI = Pointers[I];
    const PointerInfo &PointerJ = Pointers[J];
  
@@ -320,82 +364,45 @@ bool RuntimePointerChecking::needsChecking(
    if (PointerI.AliasSetId != PointerJ.AliasSetId)
      return false;
  
-  // If PtrPartition is set omit checks between pointers of the same partition.
-  if (PtrPartition && arePointersInSamePartition(*PtrPartition, I, J))
-    return false;
-
    return true;
  }
  
-void RuntimePointerChecking::print(
-    raw_ostream &OS, unsigned Depth,
-    const SmallVectorImpl<int> *PtrPartition) const {
-
-  OS.indent(Depth) << "Run-time memory checks:\n";
-
+void RuntimePointerChecking::printChecks(
+    raw_ostream &OS, const SmallVectorImpl<PointerCheck> &Checks,
+    unsigned Depth) const {
    unsigned N = 0;
-  for (unsigned I = 0; I < CheckingGroups.size(); ++I)
-    for (unsigned J = I + 1; J < CheckingGroups.size(); ++J)
-      if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition)) {
-        OS.indent(Depth) << "Check " << N++ << ":\n";
-        OS.indent(Depth + 2) << "Comparing group " << I << ":\n";
-
-        for (unsigned K = 0; K < CheckingGroups[I].Members.size(); ++K) {
-          OS.indent(Depth + 2)
-              << *Pointers[CheckingGroups[I].Members[K]].PointerValue << "\n";
-          if (PtrPartition)
-            OS << " (Partition: "
-               << (*PtrPartition)[CheckingGroups[I].Members[K]] << ")"
-               << "\n";
-        }
+  for (const auto &Check : Checks) {
+    const auto &First = Check.first->Members, &Second = Check.second->Members;
  
-        OS.indent(Depth + 2) << "Against group " << J << ":\n";
+    OS.indent(Depth) << "Check " << N++ << ":\n";
  
-        for (unsigned K = 0; K < CheckingGroups[J].Members.size(); ++K) {
-          OS.indent(Depth + 2)
-              << *Pointers[CheckingGroups[J].Members[K]].PointerValue << "\n";
-          if (PtrPartition)
-            OS << " (Partition: "
-               << (*PtrPartition)[CheckingGroups[J].Members[K]] << ")"
-               << "\n";
-        }
-      }
+    OS.indent(Depth + 2) << "Comparing group (" << Check.first << "):\n";
+    for (unsigned K = 0; K < First.size(); ++K)
+      OS.indent(Depth + 2) << *Pointers[First[K]].PointerValue << "\n";
  
-  OS.indent(Depth) << "Grouped accesses:\n";
-  for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
-    OS.indent(Depth + 2) << "Group " << I << ":\n";
-    OS.indent(Depth + 4) << "(Low: " << *CheckingGroups[I].Low
-                         << " High: " << *CheckingGroups[I].High << ")\n";
-    for (unsigned J = 0; J < CheckingGroups[I].Members.size(); ++J) {
-      OS.indent(Depth + 6) << "Member: "
-                           << *Pointers[CheckingGroups[I].Members[J]].Expr
-                           << "\n";
-    }
+    OS.indent(Depth + 2) << "Against group (" << Check.second << "):\n";
+    for (unsigned K = 0; K < Second.size(); ++K)
+      OS.indent(Depth + 2) << *Pointers[Second[K]].PointerValue << "\n";
    }
  }
  
-unsigned RuntimePointerChecking::getNumberOfChecks(
-    const SmallVectorImpl<int> *PtrPartition) const {
+void RuntimePointerChecking::print(raw_ostream &OS, unsigned Depth) const {
  
-  unsigned NumPartitions = CheckingGroups.size();
-  unsigned CheckCount = 0;
-
-  for (unsigned I = 0; I < NumPartitions; ++I)
-    for (unsigned J = I + 1; J < NumPartitions; ++J)
-      if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition))
-        CheckCount++;
-  return CheckCount;
-}
+  OS.indent(Depth) << "Run-time memory checks:\n";
+  printChecks(OS, Checks, Depth);
  
-bool RuntimePointerChecking::needsAnyChecking(
-    const SmallVectorImpl<int> *PtrPartition) const {
-  unsigned NumPointers = Pointers.size();
+  OS.indent(Depth) << "Grouped accesses:\n";
+  for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
+    const auto &CG = CheckingGroups[I];
  
-  for (unsigned I = 0; I < NumPointers; ++I)
-    for (unsigned J = I + 1; J < NumPointers; ++J)
-      if (needsChecking(I, J, PtrPartition))
-        return true;
-  return false;
+    OS.indent(Depth + 2) << "Group " << &CG << ":\n";
+    OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High
+                         << ")\n";
+    for (unsigned J = 0; J < CG.Members.size(); ++J) {
+      OS.indent(Depth + 6) << "Member: " << *Pointers[CG.Members[J]].Expr
+                           << "\n";
+    }
+  }
  }
  
  namespace {
@@ -619,9 +626,9 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
    }
  
    if (NeedRTCheck && CanDoRT)
-    RtCheck.groupChecks(DepCands, IsDepCheckNeeded);
+    RtCheck.generateChecks(DepCands, IsDepCheckNeeded);
  
-  DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks(nullptr)
+  DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
                 << " pointer comparisons.\n");
  
    RtCheck.Need = NeedRTCheck;
@@ -797,11 +804,11 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
  /// \brief Check whether the access through \p Ptr has a constant stride.
  int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,
                         const ValueToValueMap &StridesMap) {
-  const Type *Ty = Ptr->getType();
+  Type *Ty = Ptr->getType();
    assert(Ty->isPointerTy() && "Unexpected non-ptr");
  
    // Make sure that the pointer does not point to aggregate types.
-  const PointerType *PtrTy = cast<PointerType>(Ty);
+  auto *PtrTy = cast<PointerType>(Ty);
    if (PtrTy->getElementType()->isAggregateType()) {
      DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"
            << *Ptr << "\n");
@@ -1586,10 +1593,13 @@ static Instruction *getFirstInst(Instruction *FirstInst, Value *V,
    return nullptr;
  }
  
-/// \brief IR Values for the lower and upper bounds of a pointer evolution.
+/// \brief IR Values for the lower and upper bounds of a pointer evolution.  We
+/// need to use value-handles because SCEV expansion can invalidate previously
+/// expanded values.  Thus expansion of a pointer can invalidate the bounds for
+/// a previous one.
  struct PointerBounds {
-  Value *Start;
-  Value *End;
+  TrackingVH<Value> Start;
+  TrackingVH<Value> End;
  };
  
  /// \brief Expand code for the lower and upper bound of the pointer group \p CG
@@ -1644,7 +1654,7 @@ static SmallVector<std::pair<PointerBounds, PointerBounds>, 4> expandBounds(
    return ChecksWithBounds;
  }
  
-std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(
+std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeChecks(
      Instruction *Loc,
      const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &PointerChecks)
      const {
@@ -1661,6 +1671,8 @@ std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(
  
    for (const auto &Check : ExpandedChecks) {
      const PointerBounds &A = Check.first, &B = Check.second;
+    // Check if two pointers (A and B) conflict where conflict is computed as:
+    // start(A) <= end(B) && start(B) <= end(A)
      unsigned AS0 = A.Start->getType()->getPointerAddressSpace();
      unsigned AS1 = B.Start->getType()->getPointerAddressSpace();
  
@@ -1703,25 +1715,12 @@ std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(
    return std::make_pair(FirstInst, Check);
  }
  
-std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(
-    Instruction *Loc, const SmallVectorImpl<int> *PtrPartition) const {
+std::pair<Instruction *, Instruction *>
+LoopAccessInfo::addRuntimeChecks(Instruction *Loc) const {
    if (!PtrRtChecking.Need)
      return std::make_pair(nullptr, nullptr);
  
-  SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks;
-  for (unsigned i = 0; i < PtrRtChecking.CheckingGroups.size(); ++i) {
-    for (unsigned j = i + 1; j < PtrRtChecking.CheckingGroups.size(); ++j) {
-      const RuntimePointerChecking::CheckingPtrGroup &CGI =
-          PtrRtChecking.CheckingGroups[i];
-      const RuntimePointerChecking::CheckingPtrGroup &CGJ =
-          PtrRtChecking.CheckingGroups[j];
-
-      if (PtrRtChecking.needsChecking(CGI, CGJ, PtrPartition))
-        Checks.push_back(std::make_pair(&CGI, &CGJ));
-    }
-  }
-
-  return addRuntimeCheck(Loc, Checks);
+  return addRuntimeChecks(Loc, PtrRtChecking.getChecks());
  }
  
  LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
@@ -1800,10 +1799,10 @@ void LoopAccessAnalysis::print(raw_ostream &OS, const Module *M) const {
  }
  
  bool LoopAccessAnalysis::runOnFunction(Function &F) {
-  SE = &getAnalysis<ScalarEvolution>();
+  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
    auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
    TLI = TLIP ? &TLIP->getTLI() : nullptr;
-  AA = &getAnalysis<AliasAnalysis>();
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
    DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
    LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
  
@@ -1811,8 +1810,8 @@ bool LoopAccessAnalysis::runOnFunction(Function &F) {
  }
  
  void LoopAccessAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
-    AU.addRequired<ScalarEvolution>();
-    AU.addRequired<AliasAnalysis>();
+    AU.addRequired<ScalarEvolutionWrapperPass>();
+    AU.addRequired<AAResultsWrapperPass>();
      AU.addRequired<DominatorTreeWrapperPass>();
      AU.addRequired<LoopInfoWrapperPass>();
  
@@ -1824,8 +1823,8 @@ static const char laa_name[] = "Loop Access Analysis";
  #define LAA_NAME "loop-accesses"
  
  INITIALIZE_PASS_BEGIN(LoopAccessAnalysis, LAA_NAME, laa_name, false, true)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
  INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
  INITIALIZE_PASS_END(LoopAccessAnalysis, LAA_NAME, laa_name, false, true)