[LoopAccesses] Change LAA:getInfo to return a constant reference

[oota-llvm.git] / lib / Transforms / Vectorize / LoopVectorize.cpp
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp

index 988c8bef4c9f657e76ff7e38c7e91f13233674ec..d75eead0683a35c3f3fa27f882eff99f314008e2 100644 (file)
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -106,14 +106,19 @@ using namespace llvm::PatternMatch;
  STATISTIC(LoopsVectorized, "Number of loops vectorized");
  STATISTIC(LoopsAnalyzed, "Number of loops analyzed for vectorization");
  
-static cl::opt<unsigned>
-VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden,
-                    cl::desc("Sets the SIMD width. Zero is autoselect."));
-
-static cl::opt<unsigned>
-VectorizationInterleave("force-vector-interleave", cl::init(0), cl::Hidden,
-                    cl::desc("Sets the vectorization interleave count. "
-                             "Zero is autoselect."));
+static cl::opt<unsigned, true>
+VectorizationFactor("force-vector-width", cl::Hidden,
+                    cl::desc("Sets the SIMD width. Zero is autoselect."),
+                    cl::location(VectorizerParams::VectorizationFactor));
+unsigned VectorizerParams::VectorizationFactor = 0;
+
+static cl::opt<unsigned, true>
+VectorizationInterleave("force-vector-interleave", cl::Hidden,
+                        cl::desc("Sets the vectorization interleave count. "
+                                 "Zero is autoselect."),
+                        cl::location(
+                            VectorizerParams::VectorizationInterleave));
+unsigned VectorizerParams::VectorizationInterleave = 0;
  
  static cl::opt<bool>
  EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
@@ -147,10 +152,10 @@ static const unsigned TinyTripCountUnrollThreshold = 128;
  
  /// When performing memory disambiguation checks at runtime do not make more
  /// than this number of comparisons.
-static const unsigned RuntimeMemoryCheckThreshold = 8;
+const unsigned VectorizerParams::RuntimeMemoryCheckThreshold = 8;
  
  /// Maximum simd width.
-static const unsigned MaxVectorWidth = 64;
+const unsigned VectorizerParams::MaxVectorWidth = 64;
  
  static cl::opt<unsigned> ForceTargetNumScalarRegs(
      "force-target-num-scalar-regs", cl::init(0), cl::Hidden,
@@ -219,6 +224,21 @@ class LoopVectorizationLegality;
  class LoopVectorizationCostModel;
  class LoopVectorizeHints;
  
+/// \brief This modifies LoopAccessReport to initialize message with
+/// loop-vectorizer-specific part.
+class VectorizationReport : public LoopAccessReport {
+public:
+  VectorizationReport(Instruction *I = nullptr)
+      : LoopAccessReport("loop not vectorized: ", I) {}
+
+  /// \brief This allows promotion of the loop-access analysis report into the
+  /// loop-vectorizer report.  It modifies the message to add the
+  /// loop-vectorizer-specific part of the message.
+  explicit VectorizationReport(const LoopAccessReport &R)
+      : LoopAccessReport(Twine("loop not vectorized: ") + R.str(),
+                         R.getInstr()) {}
+};
+
  /// InnerLoopVectorizer vectorizes loops which contain only one basic
  /// block to a specified vectorization factor (VF).
  /// This class performs the widening of scalars into vectors, or multiple
@@ -271,13 +291,6 @@ protected:
    typedef DenseMap<std::pair<BasicBlock*, BasicBlock*>,
                     VectorParts> EdgeMaskCache;
  
-  /// \brief Add code that checks at runtime if the accessed arrays overlap.
-  ///
-  /// Returns a pair of instructions where the first element is the first
-  /// instruction generated in possibly a sequence of instructions and the
-  /// second value is the final comparator value or NULL if no check is needed.
-  std::pair<Instruction *, Instruction *> addRuntimeCheck(Instruction *Loc);
-
    /// \brief Add checks for strides that where assumed to be 1.
    ///
    /// Returns the last check instruction and the first check instruction in the
@@ -554,14 +567,11 @@ public:
    LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, const DataLayout *DL,
                              DominatorTree *DT, TargetLibraryInfo *TLI,
                              AliasAnalysis *AA, Function *F,
-                            const TargetTransformInfo *TTI)
-      : NumPredStores(0), TheLoop(L), SE(SE), DL(DL), TLI(TLI), TheFunction(F),
-        TTI(TTI), Induction(nullptr), WidestIndTy(nullptr),
-        LAA(F, L, SE, DL, TLI, AA, DT,
-            LoopAccessAnalysis::VectorizerParams(
-                MaxVectorWidth, VectorizationFactor, VectorizationInterleave,
-                RuntimeMemoryCheckThreshold)),
-        HasFunNoNaNAttr(false) {}
+                            const TargetTransformInfo *TTI,
+                            LoopAccessAnalysis *LAA)
+      : NumPredStores(0), TheLoop(L), SE(SE), DL(DL),
+        TLI(TLI), TheFunction(F), TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr),
+        Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false) {}
  
    /// This enum represents the kinds of reductions that we support.
    enum ReductionKind {
@@ -746,15 +756,19 @@ public:
    bool isUniformAfterVectorization(Instruction* I) { return Uniforms.count(I); }
  
    /// Returns the information that we collected about runtime memory check.
-  LoopAccessAnalysis::RuntimePointerCheck *getRuntimePointerCheck() {
-    return LAA.getRuntimePointerCheck();
+  const LoopAccessInfo::RuntimePointerCheck *getRuntimePointerCheck() const {
+    return LAI->getRuntimePointerCheck();
+  }
+
+  const LoopAccessInfo *getLAI() const {
+    return LAI;
    }
  
    /// This function returns the identity element (or neutral element) for
    /// the operation K.
    static Constant *getReductionIdentity(ReductionKind K, Type *Tp);
  
-  unsigned getMaxSafeDepDistBytes() { return LAA.getMaxSafeDepDistBytes(); }
+  unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); }
  
    bool hasStride(Value *V) { return StrideSet.count(V); }
    bool mustCheckStrides() { return !StrideSet.empty(); }
@@ -779,10 +793,10 @@ public:
      return (MaskedOp.count(I) != 0);
    }
    unsigned getNumStores() const {
-    return NumStores;
+    return LAI->getNumStores();
    }
    unsigned getNumLoads() const {
-    return NumLoads;
+    return LAI->getNumLoads();
    }
    unsigned getNumPredStores() const {
      return NumPredStores;
@@ -836,13 +850,13 @@ private:
    void collectStridedAccess(Value *LoadOrStoreInst);
  
    /// Report an analysis message to assist the user in diagnosing loops that are
-  /// not vectorized.
-  void emitAnalysis(VectorizationReport &Message) {
-    VectorizationReport::emitAnalysis(Message, TheFunction, TheLoop);
+  /// not vectorized.  These are handled as LoopAccessReport rather than
+  /// VectorizationReport because the << operator of VectorizationReport returns
+  /// LoopAccessReport.
+  void emitAnalysis(const LoopAccessReport &Message) {
+    LoopAccessReport::emitAnalysis(Message, TheFunction, TheLoop, LV_NAME);
    }
  
-  unsigned NumLoads;
-  unsigned NumStores;
    unsigned NumPredStores;
  
    /// The loop that we evaluate.
@@ -857,6 +871,13 @@ private:
    Function *TheFunction;
    /// Target Transform Info
    const TargetTransformInfo *TTI;
+  /// Dominator Tree.
+  DominatorTree *DT;
+  // LoopAccess analysis.
+  LoopAccessAnalysis *LAA;
+  // And the loop-accesses info corresponding to this loop.  This pointer is
+  // null until canVectorizeMemory sets it up.
+  const LoopAccessInfo *LAI;
  
    //  ---  vectorization state --- //
  
@@ -878,7 +899,7 @@ private:
    /// This set holds the variables which are known to be uniform after
    /// vectorization.
    SmallPtrSet<Instruction*, 4> Uniforms;
-  LoopAccessAnalysis LAA;
+
    /// Can we assume the absence of NaNs.
    bool HasFunNoNaNAttr;
  
@@ -968,9 +989,11 @@ private:
    bool isConsecutiveLoadOrStore(Instruction *I);
  
    /// Report an analysis message to assist the user in diagnosing loops that are
-  /// not vectorized.
-  void emitAnalysis(VectorizationReport &Message) {
-    VectorizationReport::emitAnalysis(Message, TheFunction, TheLoop);
+  /// not vectorized.  These are handled as LoopAccessReport rather than
+  /// VectorizationReport because the << operator of VectorizationReport returns
+  /// LoopAccessReport.
+  void emitAnalysis(const LoopAccessReport &Message) {
+    LoopAccessReport::emitAnalysis(Message, TheFunction, TheLoop, LV_NAME);
    }
  
    /// Values used only by @llvm.assume calls.
@@ -1023,7 +1046,7 @@ class LoopVectorizeHints {
      bool validate(unsigned Val) {
        switch (Kind) {
        case HK_WIDTH:
-        return isPowerOf2_32(Val) && Val <= MaxVectorWidth;
+        return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
        case HK_UNROLL:
          return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
        case HK_FORCE:
@@ -1259,6 +1282,7 @@ struct LoopVectorize : public FunctionPass {
    TargetLibraryInfo *TLI;
    AliasAnalysis *AA;
    AssumptionCache *AC;
+  LoopAccessAnalysis *LAA;
    bool DisableUnrolling;
    bool AlwaysVectorize;
  
@@ -1276,6 +1300,7 @@ struct LoopVectorize : public FunctionPass {
      TLI = TLIP ? &TLIP->getTLI() : nullptr;
      AA = &getAnalysis<AliasAnalysis>();
      AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+    LAA = &getAnalysis<LoopAccessAnalysis>();
  
      // Compute some weights outside of the loop over the loops. Compute this
      // using a BranchProbability to re-use its scaling math.
@@ -1386,7 +1411,7 @@ struct LoopVectorize : public FunctionPass {
      }
  
      // Check if it is legal to vectorize the loop.
-    LoopVectorizationLegality LVL(L, SE, DL, DT, TLI, AA, F, TTI);
+    LoopVectorizationLegality LVL(L, SE, DL, DT, TLI, AA, F, TTI, LAA);
      if (!LVL.canVectorize()) {
        DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
        emitMissedWarning(F, L, Hints);
@@ -1491,6 +1516,7 @@ struct LoopVectorize : public FunctionPass {
      AU.addRequired<ScalarEvolution>();
      AU.addRequired<TargetTransformInfoWrapperPass>();
      AU.addRequired<AliasAnalysis>();
+    AU.addRequired<LoopAccessAnalysis>();
      AU.addPreserved<LoopInfoWrapperPass>();
      AU.addPreserved<DominatorTreeWrapperPass>();
      AU.addPreserved<AliasAnalysis>();
@@ -1662,7 +1688,7 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
  }
  
  bool LoopVectorizationLegality::isUniform(Value *V) {
-  return LAA.isUniform(V);
+  return LAI->isUniform(V);
  }
  
  InnerLoopVectorizer::VectorParts&
@@ -2010,102 +2036,6 @@ InnerLoopVectorizer::addStrideCheck(Instruction *Loc) {
    return std::make_pair(FirstInst, TheCheck);
  }
  
-std::pair<Instruction *, Instruction *>
-InnerLoopVectorizer::addRuntimeCheck(Instruction *Loc) {
-  LoopAccessAnalysis::RuntimePointerCheck *PtrRtCheck =
-    Legal->getRuntimePointerCheck();
-
-  Instruction *tnullptr = nullptr;
-  if (!PtrRtCheck->Need)
-    return std::pair<Instruction *, Instruction *>(tnullptr, tnullptr);
-
-  unsigned NumPointers = PtrRtCheck->Pointers.size();
-  SmallVector<TrackingVH<Value> , 2> Starts;
-  SmallVector<TrackingVH<Value> , 2> Ends;
-
-  LLVMContext &Ctx = Loc->getContext();
-  SCEVExpander Exp(*SE, "induction");
-  Instruction *FirstInst = nullptr;
-
-  for (unsigned i = 0; i < NumPointers; ++i) {
-    Value *Ptr = PtrRtCheck->Pointers[i];
-    const SCEV *Sc = SE->getSCEV(Ptr);
-
-    if (SE->isLoopInvariant(Sc, OrigLoop)) {
-      DEBUG(dbgs() << "LV: Adding RT check for a loop invariant ptr:" <<
-            *Ptr <<"\n");
-      Starts.push_back(Ptr);
-      Ends.push_back(Ptr);
-    } else {
-      DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr << '\n');
-      unsigned AS = Ptr->getType()->getPointerAddressSpace();
-
-      // Use this type for pointer arithmetic.
-      Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
-
-      Value *Start = Exp.expandCodeFor(PtrRtCheck->Starts[i], PtrArithTy, Loc);
-      Value *End = Exp.expandCodeFor(PtrRtCheck->Ends[i], PtrArithTy, Loc);
-      Starts.push_back(Start);
-      Ends.push_back(End);
-    }
-  }
-
-  IRBuilder<> ChkBuilder(Loc);
-  // Our instructions might fold to a constant.
-  Value *MemoryRuntimeCheck = nullptr;
-  for (unsigned i = 0; i < NumPointers; ++i) {
-    for (unsigned j = i+1; j < NumPointers; ++j) {
-      // No need to check if two readonly pointers intersect.
-      if (!PtrRtCheck->IsWritePtr[i] && !PtrRtCheck->IsWritePtr[j])
-        continue;
-
-      // Only need to check pointers between two different dependency sets.
-      if (PtrRtCheck->DependencySetId[i] == PtrRtCheck->DependencySetId[j])
-       continue;
-      // Only need to check pointers in the same alias set.
-      if (PtrRtCheck->AliasSetId[i] != PtrRtCheck->AliasSetId[j])
-        continue;
-
-      unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace();
-      unsigned AS1 = Starts[j]->getType()->getPointerAddressSpace();
-
-      assert((AS0 == Ends[j]->getType()->getPointerAddressSpace()) &&
-             (AS1 == Ends[i]->getType()->getPointerAddressSpace()) &&
-             "Trying to bounds check pointers with different address spaces");
-
-      Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
-      Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
-
-      Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy0, "bc");
-      Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy1, "bc");
-      Value *End0 =   ChkBuilder.CreateBitCast(Ends[i],   PtrArithTy1, "bc");
-      Value *End1 =   ChkBuilder.CreateBitCast(Ends[j],   PtrArithTy0, "bc");
-
-      Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0");
-      FirstInst = getFirstInst(FirstInst, Cmp0, Loc);
-      Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1");
-      FirstInst = getFirstInst(FirstInst, Cmp1, Loc);
-      Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
-      FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
-      if (MemoryRuntimeCheck) {
-        IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict,
-                                         "conflict.rdx");
-        FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
-      }
-      MemoryRuntimeCheck = IsConflict;
-    }
-  }
-
-  // We have to do this trickery because the IRBuilder might fold the check to a
-  // constant expression in which case there is no Instruction anchored in a
-  // the block.
-  Instruction *Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck,
-                                                 ConstantInt::getTrue(Ctx));
-  ChkBuilder.Insert(Check, "memcheck.conflict");
-  FirstInst = getFirstInst(FirstInst, Check, Loc);
-  return std::make_pair(FirstInst, Check);
-}
-
  void InnerLoopVectorizer::createEmptyLoop() {
    /*
     In this function we generate a new loop. The new loop will contain
@@ -2330,11 +2260,11 @@ void InnerLoopVectorizer::createEmptyLoop() {
    // faster.
    Instruction *MemRuntimeCheck;
    std::tie(FirstCheckInst, MemRuntimeCheck) =
-      addRuntimeCheck(LastBypassBlock->getTerminator());
+    Legal->getLAI()->addRuntimeCheck(LastBypassBlock->getTerminator());
    if (MemRuntimeCheck) {
      // Create a new block containing the memory check.
      BasicBlock *CheckBlock =
-        LastBypassBlock->splitBasicBlock(MemRuntimeCheck, "vector.memcheck");
+        LastBypassBlock->splitBasicBlock(FirstCheckInst, "vector.memcheck");
      if (ParentLoop)
        ParentLoop->addBasicBlockToLoop(CheckBlock, *LI);
      LoopBypassBlocks.push_back(CheckBlock);
@@ -3498,7 +3428,7 @@ bool LoopVectorizationLegality::canVectorize() {
    collectLoopUniforms();
  
    DEBUG(dbgs() << "LV: We can vectorize this loop" <<
-        (LAA.getRuntimePointerCheck()->Need ? " (with a runtime bound check)" :
+        (LAI->getRuntimePointerCheck()->Need ? " (with a runtime bound check)" :
           "")
          <<"!\n");
  
@@ -3554,9 +3484,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
    // Look for the attribute signaling the absence of NaNs.
    Function &F = *Header->getParent();
    if (F.hasFnAttribute("no-nans-fp-math"))
-    HasFunNoNaNAttr = F.getAttributes().getAttribute(
-      AttributeSet::FunctionIndex,
-      "no-nans-fp-math").getValueAsString() == "true";
+    HasFunNoNaNAttr =
+        F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
  
    // For each block in the loop.
    for (Loop::block_iterator bb = TheLoop->block_begin(),
@@ -3924,7 +3853,11 @@ void LoopVectorizationLegality::collectLoopUniforms() {
  }
  
  bool LoopVectorizationLegality::canVectorizeMemory() {
-  return LAA.canVectorizeMemory(Strides);
+  LAI = &LAA->getInfo(TheLoop, Strides);
+  auto &OptionalReport = LAI->getReport();
+  if (OptionalReport)
+    emitAnalysis(VectorizationReport(*OptionalReport));
+  return LAI->canVectorizeMemory();
  }
  
  static bool hasMultipleUsesOf(Instruction *I,
@@ -4268,7 +4201,7 @@ bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
  }
  
  bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB)  {
-  return LAA.blockNeedsPredication(BB);
+  return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
  }
  
  bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB,
@@ -5013,7 +4946,11 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
  
      // Wide load/stores.
      unsigned Cost = TTI.getAddressComputationCost(VectorTy);
-    Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
+    if (Legal->isMaskRequired(I))
+      Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment,
+                                        AS);
+    else
+      Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
  
      if (Reverse)
        Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,
@@ -5095,6 +5032,7 @@ INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
  INITIALIZE_PASS_DEPENDENCY(LCSSA)
  INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
  INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LoopAccessAnalysis)
  INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
  
  namespace llvm {