[LoopAccesses] Cache the result of canVectorizeMemory

[oota-llvm.git] / lib / Analysis / LoopAccessAnalysis.cpp
diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp

index 927ae4945489f3bda7d4483264e96cc9c19faae4..5001b5fa3f1f798312b6404212918fe00e5885eb 100644 (file)
--- a/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/lib/Analysis/LoopAccessAnalysis.cpp
@@ -25,6 +25,31 @@ using namespace llvm;
  
  #define DEBUG_TYPE "loop-vectorize"
  
+static cl::opt<unsigned, true>
+VectorizationFactor("force-vector-width", cl::Hidden,
+                    cl::desc("Sets the SIMD width. Zero is autoselect."),
+                    cl::location(VectorizerParams::VectorizationFactor));
+unsigned VectorizerParams::VectorizationFactor = 0;
+
+static cl::opt<unsigned, true>
+VectorizationInterleave("force-vector-interleave", cl::Hidden,
+                        cl::desc("Sets the vectorization interleave count. "
+                                 "Zero is autoselect."),
+                        cl::location(
+                            VectorizerParams::VectorizationInterleave));
+unsigned VectorizerParams::VectorizationInterleave = 0;
+
+/// When performing memory disambiguation checks at runtime do not make more
+/// than this number of comparisons.
+const unsigned VectorizerParams::RuntimeMemoryCheckThreshold = 8;
+
+/// Maximum SIMD width.
+const unsigned VectorizerParams::MaxVectorWidth = 64;
+
+bool VectorizerParams::isInterleaveForced() {
+  return ::VectorizationInterleave.getNumOccurrences() > 0;
+}
+
  void VectorizationReport::emitAnalysis(VectorizationReport &Message,
                                         const Function *TheFunction,
                                         const Loop *TheLoop) {
@@ -92,6 +117,23 @@ void LoopAccessInfo::RuntimePointerCheck::insert(ScalarEvolution *SE, Loop *Lp,
    AliasSetId.push_back(ASId);
  }
  
+bool LoopAccessInfo::RuntimePointerCheck::needsChecking(unsigned I,
+                                                        unsigned J) const {
+  // No need to check if two readonly pointers intersect.
+  if (!IsWritePtr[I] && !IsWritePtr[J])
+    return false;
+
+  // Only need to check pointers between two different dependency sets.
+  if (DependencySetId[I] == DependencySetId[J])
+    return false;
+
+  // Only need to check pointers in the same alias set.
+  if (AliasSetId[I] != AliasSetId[J])
+    return false;
+
+  return true;
+}
+
  namespace {
  /// \brief Analyses memory accesses in a loop.
  ///
@@ -812,7 +854,7 @@ bool MemoryDepChecker::areDepsSafe(AccessAnalysis::DepCandidates &AccessSets,
    return true;
  }
  
-bool LoopAccessInfo::canVectorizeMemory(ValueToValueMap &Strides) {
+void LoopAccessInfo::analyzeLoop(ValueToValueMap &Strides) {
  
    typedef SmallVector<Value*, 16> ValueVector;
    typedef SmallPtrSet<Value*, 16> ValueSet;
@@ -855,7 +897,8 @@ bool LoopAccessInfo::canVectorizeMemory(ValueToValueMap &Strides) {
            emitAnalysis(VectorizationReport(Ld)
                         << "read with atomic ordering or volatile read");
            DEBUG(dbgs() << "LV: Found a non-simple load.\n");
-          return false;
+          CanVecMem = false;
+          return;
          }
          NumLoads++;
          Loads.push_back(Ld);
@@ -869,13 +912,15 @@ bool LoopAccessInfo::canVectorizeMemory(ValueToValueMap &Strides) {
          if (!St) {
            emitAnalysis(VectorizationReport(it) <<
                         "instruction cannot be vectorized");
-          return false;
+          CanVecMem = false;
+          return;
          }
          if (!St->isSimple() && !IsAnnotatedParallel) {
            emitAnalysis(VectorizationReport(St)
                         << "write with atomic ordering or volatile write");
            DEBUG(dbgs() << "LV: Found a non-simple store.\n");
-          return false;
+          CanVecMem = false;
+          return;
          }
          NumStores++;
          Stores.push_back(St);
@@ -891,7 +936,8 @@ bool LoopAccessInfo::canVectorizeMemory(ValueToValueMap &Strides) {
    // care if the pointers are *restrict*.
    if (!Stores.size()) {
      DEBUG(dbgs() << "LV: Found a read-only loop!\n");
-    return true;
+    CanVecMem = true;
+    return;
    }
  
    AccessAnalysis::DepCandidates DependentAccesses;
@@ -914,7 +960,8 @@ bool LoopAccessInfo::canVectorizeMemory(ValueToValueMap &Strides) {
            VectorizationReport(ST)
            << "write to a loop invariant address could not be vectorized");
        DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n");
-      return false;
+      CanVecMem = false;
+      return;
      }
  
      // If we did *not* see this pointer before, insert it to  the read-write
@@ -926,7 +973,7 @@ bool LoopAccessInfo::canVectorizeMemory(ValueToValueMap &Strides) {
        // The TBAA metadata could have a control dependency on the predication
        // condition, so we cannot rely on it when determining whether or not we
        // need runtime pointer checks.
-      if (blockNeedsPredication(ST->getParent()))
+      if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
          Loc.AATags.TBAA = nullptr;
  
        Accesses.addStore(Loc);
@@ -937,7 +984,8 @@ bool LoopAccessInfo::canVectorizeMemory(ValueToValueMap &Strides) {
      DEBUG(dbgs()
            << "LV: A loop annotated parallel, ignore memory dependency "
            << "checks.\n");
-    return true;
+    CanVecMem = true;
+    return;
    }
  
    for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) {
@@ -962,7 +1010,7 @@ bool LoopAccessInfo::canVectorizeMemory(ValueToValueMap &Strides) {
      // The TBAA metadata could have a control dependency on the predication
      // condition, so we cannot rely on it when determining whether or not we
      // need runtime pointer checks.
-    if (blockNeedsPredication(LD->getParent()))
+    if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
        Loc.AATags.TBAA = nullptr;
  
      Accesses.addLoad(Loc, IsReadOnlyPtr);
@@ -972,7 +1020,8 @@ bool LoopAccessInfo::canVectorizeMemory(ValueToValueMap &Strides) {
    // other reads in this loop then is it safe to vectorize.
    if (NumReadWrites == 1 && NumReads == 0) {
      DEBUG(dbgs() << "LV: Found a write-only loop!\n");
-    return true;
+    CanVecMem = true;
+    return;
    }
  
    // Build dependence sets and check whether we need a runtime pointer bounds
@@ -1013,12 +1062,13 @@ bool LoopAccessInfo::canVectorizeMemory(ValueToValueMap &Strides) {
      DEBUG(dbgs() << "LV: We can't vectorize because we can't find " <<
            "the array bounds.\n");
      PtrRtCheck.reset();
-    return false;
+    CanVecMem = false;
+    return;
    }
  
    PtrRtCheck.Need = NeedRTCheck;
  
-  bool CanVecMem = true;
+  CanVecMem = true;
    if (Accesses.isDependencyCheckNeeded()) {
      DEBUG(dbgs() << "LV: Checking memory dependencies\n");
      CanVecMem = DepChecker.areDepsSafe(
@@ -1051,7 +1101,8 @@ bool LoopAccessInfo::canVectorizeMemory(ValueToValueMap &Strides) {
                         << " dependent memory operations checked at runtime");
          DEBUG(dbgs() << "LV: Can't vectorize with memory checks\n");
          PtrRtCheck.reset();
-        return false;
+        CanVecMem = false;
+        return;
        }
  
        CanVecMem = true;
@@ -1064,11 +1115,10 @@ bool LoopAccessInfo::canVectorizeMemory(ValueToValueMap &Strides) {
  
    DEBUG(dbgs() << "LV: We" << (NeedRTCheck ? "" : " don't") <<
          " need a runtime memory check.\n");
-
-  return CanVecMem;
  }
  
-bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB)  {
+bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
+                                           DominatorTree *DT)  {
    assert(TheLoop->contains(BB) && "Unknown block used");
  
    // Blocks that do not dominate the latch need predication.
@@ -1077,7 +1127,7 @@ bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB)  {
  }
  
  void LoopAccessInfo::emitAnalysis(VectorizationReport &Message) {
-  assert(!Report && "Multiple report generated");
+  assert(!Report && "Multiple reports generated");
    Report = Message;
  }
  
@@ -1138,15 +1188,7 @@ LoopAccessInfo::addRuntimeCheck(Instruction *Loc) {
    Value *MemoryRuntimeCheck = nullptr;
    for (unsigned i = 0; i < NumPointers; ++i) {
      for (unsigned j = i+1; j < NumPointers; ++j) {
-      // No need to check if two readonly pointers intersect.
-      if (!PtrRtCheck.IsWritePtr[i] && !PtrRtCheck.IsWritePtr[j])
-        continue;
-
-      // Only need to check pointers between two different dependency sets.
-      if (PtrRtCheck.DependencySetId[i] == PtrRtCheck.DependencySetId[j])
-       continue;
-      // Only need to check pointers in the same alias set.
-      if (PtrRtCheck.AliasSetId[i] != PtrRtCheck.AliasSetId[j])
+      if (!PtrRtCheck.needsChecking(i, j))
          continue;
  
        unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace();