lib/Analysis/LoopAccessAnalysis.cpp

   1 //===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // The implementation for the loop memory dependence that was originally
  11 // developed for the loop vectorizer.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "llvm/Analysis/LoopAccessAnalysis.h"
  16 #include "llvm/Analysis/LoopInfo.h"
  17 #include "llvm/Analysis/ScalarEvolutionExpander.h"
  18 #include "llvm/Analysis/ValueTracking.h"
  19 #include "llvm/IR/DiagnosticInfo.h"
  20 #include "llvm/IR/Dominators.h"
  21 #include "llvm/IR/IRBuilder.h"
  22 #include "llvm/Support/Debug.h"
  23 #include "llvm/Transforms/Utils/VectorUtils.h"
  24 using namespace llvm;
  25
  26 #define DEBUG_TYPE "loop-vectorize"
  27
  28 void VectorizationReport::emitAnalysis(VectorizationReport &Message,
  29                                        const Function *TheFunction,
  30                                        const Loop *TheLoop) {
  31   DebugLoc DL = TheLoop->getStartLoc();
  32   if (Instruction *I = Message.getInstr())
  33     DL = I->getDebugLoc();
  34   emitOptimizationRemarkAnalysis(TheFunction->getContext(), DEBUG_TYPE,
  35                                  *TheFunction, DL, Message.str());
  36 }
  37
  38 Value *llvm::stripIntegerCast(Value *V) {
  39   if (CastInst *CI = dyn_cast<CastInst>(V))
  40     if (CI->getOperand(0)->getType()->isIntegerTy())
  41       return CI->getOperand(0);
  42   return V;
  43 }
  44
  45 const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE,
  46                                             ValueToValueMap &PtrToStride,
  47                                             Value *Ptr, Value *OrigPtr) {
  48
  49   const SCEV *OrigSCEV = SE->getSCEV(Ptr);
  50
  51   // If there is an entry in the map return the SCEV of the pointer with the
  52   // symbolic stride replaced by one.
  53   ValueToValueMap::iterator SI = PtrToStride.find(OrigPtr ? OrigPtr : Ptr);
  54   if (SI != PtrToStride.end()) {
  55     Value *StrideVal = SI->second;
  56
  57     // Strip casts.
  58     StrideVal = stripIntegerCast(StrideVal);
  59
  60     // Replace symbolic stride by one.
  61     Value *One = ConstantInt::get(StrideVal->getType(), 1);
  62     ValueToValueMap RewriteMap;
  63     RewriteMap[StrideVal] = One;
  64
  65     const SCEV *ByOne =
  66         SCEVParameterRewriter::rewrite(OrigSCEV, *SE, RewriteMap, true);
  67     DEBUG(dbgs() << "LV: Replacing SCEV: " << *OrigSCEV << " by: " << *ByOne
  68                  << "\n");
  69     return ByOne;
  70   }
  71
  72   // Otherwise, just return the SCEV of the original pointer.
  73   return SE->getSCEV(Ptr);
  74 }
  75
  76 void LoopAccessInfo::RuntimePointerCheck::insert(ScalarEvolution *SE, Loop *Lp,
  77                                                  Value *Ptr, bool WritePtr,
  78                                                  unsigned DepSetId,
  79                                                  unsigned ASId,
  80                                                  ValueToValueMap &Strides) {
  81   // Get the stride replaced scev.
  82   const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
  83   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
  84   assert(AR && "Invalid addrec expression");
  85   const SCEV *Ex = SE->getBackedgeTakenCount(Lp);
  86   const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE);
  87   Pointers.push_back(Ptr);
  88   Starts.push_back(AR->getStart());
  89   Ends.push_back(ScEnd);
  90   IsWritePtr.push_back(WritePtr);
  91   DependencySetId.push_back(DepSetId);
  92   AliasSetId.push_back(ASId);
  93 }
  94
  95 namespace {
  96 /// \brief Analyses memory accesses in a loop.
  97 ///
  98 /// Checks whether run time pointer checks are needed and builds sets for data
  99 /// dependence checking.
 100 class AccessAnalysis {
 101 public:
 102   /// \brief Read or write access location.
 103   typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
 104   typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
 105
 106   /// \brief Set of potential dependent memory accesses.
 107   typedef EquivalenceClasses<MemAccessInfo> DepCandidates;
 108
 109   AccessAnalysis(const DataLayout *Dl, AliasAnalysis *AA, DepCandidates &DA) :
 110     DL(Dl), AST(*AA), DepCands(DA), IsRTCheckNeeded(false) {}
 111
 112   /// \brief Register a load  and whether it is only read from.
 113   void addLoad(AliasAnalysis::Location &Loc, bool IsReadOnly) {
 114     Value *Ptr = const_cast<Value*>(Loc.Ptr);
 115     AST.add(Ptr, AliasAnalysis::UnknownSize, Loc.AATags);
 116     Accesses.insert(MemAccessInfo(Ptr, false));
 117     if (IsReadOnly)
 118       ReadOnlyPtr.insert(Ptr);
 119   }
 120
 121   /// \brief Register a store.
 122   void addStore(AliasAnalysis::Location &Loc) {
 123     Value *Ptr = const_cast<Value*>(Loc.Ptr);
 124     AST.add(Ptr, AliasAnalysis::UnknownSize, Loc.AATags);
 125     Accesses.insert(MemAccessInfo(Ptr, true));
 126   }
 127
 128   /// \brief Check whether we can check the pointers at runtime for
 129   /// non-intersection.
 130   bool canCheckPtrAtRT(LoopAccessInfo::RuntimePointerCheck &RtCheck,
 131                        unsigned &NumComparisons,
 132                        ScalarEvolution *SE, Loop *TheLoop,
 133                        ValueToValueMap &Strides,
 134                        bool ShouldCheckStride = false);
 135
 136   /// \brief Goes over all memory accesses, checks whether a RT check is needed
 137   /// and builds sets of dependent accesses.
 138   void buildDependenceSets() {
 139     processMemAccesses();
 140   }
 141
 142   bool isRTCheckNeeded() { return IsRTCheckNeeded; }
 143
 144   bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
 145   void resetDepChecks() { CheckDeps.clear(); }
 146
 147   MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; }
 148
 149 private:
 150   typedef SetVector<MemAccessInfo> PtrAccessSet;
 151
 152   /// \brief Go over all memory access and check whether runtime pointer checks
 153   /// are needed /// and build sets of dependency check candidates.
 154   void processMemAccesses();
 155
 156   /// Set of all accesses.
 157   PtrAccessSet Accesses;
 158
 159   /// Set of accesses that need a further dependence check.
 160   MemAccessInfoSet CheckDeps;
 161
 162   /// Set of pointers that are read only.
 163   SmallPtrSet<Value*, 16> ReadOnlyPtr;
 164
 165   const DataLayout *DL;
 166
 167   /// An alias set tracker to partition the access set by underlying object and
 168   //intrinsic property (such as TBAA metadata).
 169   AliasSetTracker AST;
 170
 171   /// Sets of potentially dependent accesses - members of one set share an
 172   /// underlying pointer. The set "CheckDeps" identfies which sets really need a
 173   /// dependence check.
 174   DepCandidates &DepCands;
 175
 176   bool IsRTCheckNeeded;
 177 };
 178
 179 } // end anonymous namespace
 180
 181 /// \brief Check whether a pointer can participate in a runtime bounds check.
 182 static bool hasComputableBounds(ScalarEvolution *SE, ValueToValueMap &Strides,
 183                                 Value *Ptr) {
 184   const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
 185   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
 186   if (!AR)
 187     return false;
 188
 189   return AR->isAffine();
 190 }
 191
 192 /// \brief Check the stride of the pointer and ensure that it does not wrap in
 193 /// the address space.
 194 static int isStridedPtr(ScalarEvolution *SE, const DataLayout *DL, Value *Ptr,
 195                         const Loop *Lp, ValueToValueMap &StridesMap);
 196
 197 bool AccessAnalysis::canCheckPtrAtRT(
 198     LoopAccessInfo::RuntimePointerCheck &RtCheck,
 199     unsigned &NumComparisons, ScalarEvolution *SE, Loop *TheLoop,
 200     ValueToValueMap &StridesMap, bool ShouldCheckStride) {
 201   // Find pointers with computable bounds. We are going to use this information
 202   // to place a runtime bound check.
 203   bool CanDoRT = true;
 204
 205   bool IsDepCheckNeeded = isDependencyCheckNeeded();
 206   NumComparisons = 0;
 207
 208   // We assign a consecutive id to access from different alias sets.
 209   // Accesses between different groups doesn't need to be checked.
 210   unsigned ASId = 1;
 211   for (auto &AS : AST) {
 212     unsigned NumReadPtrChecks = 0;
 213     unsigned NumWritePtrChecks = 0;
 214
 215     // We assign consecutive id to access from different dependence sets.
 216     // Accesses within the same set don't need a runtime check.
 217     unsigned RunningDepId = 1;
 218     DenseMap<Value *, unsigned> DepSetId;
 219
 220     for (auto A : AS) {
 221       Value *Ptr = A.getValue();
 222       bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
 223       MemAccessInfo Access(Ptr, IsWrite);
 224
 225       if (IsWrite)
 226         ++NumWritePtrChecks;
 227       else
 228         ++NumReadPtrChecks;
 229
 230       if (hasComputableBounds(SE, StridesMap, Ptr) &&
 231           // When we run after a failing dependency check we have to make sure we
 232           // don't have wrapping pointers.
 233           (!ShouldCheckStride ||
 234            isStridedPtr(SE, DL, Ptr, TheLoop, StridesMap) == 1)) {
 235         // The id of the dependence set.
 236         unsigned DepId;
 237
 238         if (IsDepCheckNeeded) {
 239           Value *Leader = DepCands.getLeaderValue(Access).getPointer();
 240           unsigned &LeaderId = DepSetId[Leader];
 241           if (!LeaderId)
 242             LeaderId = RunningDepId++;
 243           DepId = LeaderId;
 244         } else
 245           // Each access has its own dependence set.
 246           DepId = RunningDepId++;
 247
 248         RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap);
 249
 250         DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *Ptr << '\n');
 251       } else {
 252         CanDoRT = false;
 253       }
 254     }
 255
 256     if (IsDepCheckNeeded && CanDoRT && RunningDepId == 2)
 257       NumComparisons += 0; // Only one dependence set.
 258     else {
 259       NumComparisons += (NumWritePtrChecks * (NumReadPtrChecks +
 260                                               NumWritePtrChecks - 1));
 261     }
 262
 263     ++ASId;
 264   }
 265
 266   // If the pointers that we would use for the bounds comparison have different
 267   // address spaces, assume the values aren't directly comparable, so we can't
 268   // use them for the runtime check. We also have to assume they could
 269   // overlap. In the future there should be metadata for whether address spaces
 270   // are disjoint.
 271   unsigned NumPointers = RtCheck.Pointers.size();
 272   for (unsigned i = 0; i < NumPointers; ++i) {
 273     for (unsigned j = i + 1; j < NumPointers; ++j) {
 274       // Only need to check pointers between two different dependency sets.
 275       if (RtCheck.DependencySetId[i] == RtCheck.DependencySetId[j])
 276        continue;
 277       // Only need to check pointers in the same alias set.
 278       if (RtCheck.AliasSetId[i] != RtCheck.AliasSetId[j])
 279         continue;
 280
 281       Value *PtrI = RtCheck.Pointers[i];
 282       Value *PtrJ = RtCheck.Pointers[j];
 283
 284       unsigned ASi = PtrI->getType()->getPointerAddressSpace();
 285       unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
 286       if (ASi != ASj) {
 287         DEBUG(dbgs() << "LV: Runtime check would require comparison between"
 288                        " different address spaces\n");
 289         return false;
 290       }
 291     }
 292   }
 293
 294   return CanDoRT;
 295 }
 296
 297 void AccessAnalysis::processMemAccesses() {
 298   // We process the set twice: first we process read-write pointers, last we
 299   // process read-only pointers. This allows us to skip dependence tests for
 300   // read-only pointers.
 301
 302   DEBUG(dbgs() << "LV: Processing memory accesses...\n");
 303   DEBUG(dbgs() << "  AST: "; AST.dump());
 304   DEBUG(dbgs() << "LV:   Accesses:\n");
 305   DEBUG({
 306     for (auto A : Accesses)
 307       dbgs() << "\t" << *A.getPointer() << " (" <<
 308                 (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ?
 309                                          "read-only" : "read")) << ")\n";
 310   });
 311
 312   // The AliasSetTracker has nicely partitioned our pointers by metadata
 313   // compatibility and potential for underlying-object overlap. As a result, we
 314   // only need to check for potential pointer dependencies within each alias
 315   // set.
 316   for (auto &AS : AST) {
 317     // Note that both the alias-set tracker and the alias sets themselves used
 318     // linked lists internally and so the iteration order here is deterministic
 319     // (matching the original instruction order within each set).
 320
 321     bool SetHasWrite = false;
 322
 323     // Map of pointers to last access encountered.
 324     typedef DenseMap<Value*, MemAccessInfo> UnderlyingObjToAccessMap;
 325     UnderlyingObjToAccessMap ObjToLastAccess;
 326
 327     // Set of access to check after all writes have been processed.
 328     PtrAccessSet DeferredAccesses;
 329
 330     // Iterate over each alias set twice, once to process read/write pointers,
 331     // and then to process read-only pointers.
 332     for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
 333       bool UseDeferred = SetIteration > 0;
 334       PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
 335
 336       for (auto AV : AS) {
 337         Value *Ptr = AV.getValue();
 338
 339         // For a single memory access in AliasSetTracker, Accesses may contain
 340         // both read and write, and they both need to be handled for CheckDeps.
 341         for (auto AC : S) {
 342           if (AC.getPointer() != Ptr)
 343             continue;
 344
 345           bool IsWrite = AC.getInt();
 346
 347           // If we're using the deferred access set, then it contains only
 348           // reads.
 349           bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
 350           if (UseDeferred && !IsReadOnlyPtr)
 351             continue;
 352           // Otherwise, the pointer must be in the PtrAccessSet, either as a
 353           // read or a write.
 354           assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
 355                   S.count(MemAccessInfo(Ptr, false))) &&
 356                  "Alias-set pointer not in the access set?");
 357
 358           MemAccessInfo Access(Ptr, IsWrite);
 359           DepCands.insert(Access);
 360
 361           // Memorize read-only pointers for later processing and skip them in
 362           // the first round (they need to be checked after we have seen all
 363           // write pointers). Note: we also mark pointer that are not
 364           // consecutive as "read-only" pointers (so that we check
 365           // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
 366           if (!UseDeferred && IsReadOnlyPtr) {
 367             DeferredAccesses.insert(Access);
 368             continue;
 369           }
 370
 371           // If this is a write - check other reads and writes for conflicts. If
 372           // this is a read only check other writes for conflicts (but only if
 373           // there is no other write to the ptr - this is an optimization to
 374           // catch "a[i] = a[i] + " without having to do a dependence check).
 375           if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
 376             CheckDeps.insert(Access);
 377             IsRTCheckNeeded = true;
 378           }
 379
 380           if (IsWrite)
 381             SetHasWrite = true;
 382
 383           // Create sets of pointers connected by a shared alias set and
 384           // underlying object.
 385           typedef SmallVector<Value *, 16> ValueVector;
 386           ValueVector TempObjects;
 387           GetUnderlyingObjects(Ptr, TempObjects, DL);
 388           for (Value *UnderlyingObj : TempObjects) {
 389             UnderlyingObjToAccessMap::iterator Prev =
 390                 ObjToLastAccess.find(UnderlyingObj);
 391             if (Prev != ObjToLastAccess.end())
 392               DepCands.unionSets(Access, Prev->second);
 393
 394             ObjToLastAccess[UnderlyingObj] = Access;
 395           }
 396         }
 397       }
 398     }
 399   }
 400 }
 401
 402 namespace {
 403 /// \brief Checks memory dependences among accesses to the same underlying
 404 /// object to determine whether there vectorization is legal or not (and at
 405 /// which vectorization factor).
 406 ///
 407 /// This class works under the assumption that we already checked that memory
 408 /// locations with different underlying pointers are "must-not alias".
 409 /// We use the ScalarEvolution framework to symbolically evalutate access
 410 /// functions pairs. Since we currently don't restructure the loop we can rely
 411 /// on the program order of memory accesses to determine their safety.
 412 /// At the moment we will only deem accesses as safe for:
 413 ///  * A negative constant distance assuming program order.
 414 ///
 415 ///      Safe: tmp = a[i + 1];     OR     a[i + 1] = x;
 416 ///            a[i] = tmp;                y = a[i];
 417 ///
 418 ///   The latter case is safe because later checks guarantuee that there can't
 419 ///   be a cycle through a phi node (that is, we check that "x" and "y" is not
 420 ///   the same variable: a header phi can only be an induction or a reduction, a
 421 ///   reduction can't have a memory sink, an induction can't have a memory
 422 ///   source). This is important and must not be violated (or we have to
 423 ///   resort to checking for cycles through memory).
 424 ///
 425 ///  * A positive constant distance assuming program order that is bigger
 426 ///    than the biggest memory access.
 427 ///
 428 ///     tmp = a[i]        OR              b[i] = x
 429 ///     a[i+2] = tmp                      y = b[i+2];
 430 ///
 431 ///     Safe distance: 2 x sizeof(a[0]), and 2 x sizeof(b[0]), respectively.
 432 ///
 433 ///  * Zero distances and all accesses have the same size.
 434 ///
 435 class MemoryDepChecker {
 436 public:
 437   typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
 438   typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
 439
 440   MemoryDepChecker(ScalarEvolution *Se, const DataLayout *Dl, const Loop *L,
 441                    const LoopAccessInfo::VectorizerParams &VectParams)
 442       : SE(Se), DL(Dl), InnermostLoop(L), AccessIdx(0),
 443         ShouldRetryWithRuntimeCheck(false), VectParams(VectParams) {}
 444
 445   /// \brief Register the location (instructions are given increasing numbers)
 446   /// of a write access.
 447   void addAccess(StoreInst *SI) {
 448     Value *Ptr = SI->getPointerOperand();
 449     Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx);
 450     InstMap.push_back(SI);
 451     ++AccessIdx;
 452   }
 453
 454   /// \brief Register the location (instructions are given increasing numbers)
 455   /// of a write access.
 456   void addAccess(LoadInst *LI) {
 457     Value *Ptr = LI->getPointerOperand();
 458     Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx);
 459     InstMap.push_back(LI);
 460     ++AccessIdx;
 461   }
 462
 463   /// \brief Check whether the dependencies between the accesses are safe.
 464   ///
 465   /// Only checks sets with elements in \p CheckDeps.
 466   bool areDepsSafe(AccessAnalysis::DepCandidates &AccessSets,
 467                    MemAccessInfoSet &CheckDeps, ValueToValueMap &Strides);
 468
 469   /// \brief The maximum number of bytes of a vector register we can vectorize
 470   /// the accesses safely with.
 471   unsigned getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }
 472
 473   /// \brief In same cases when the dependency check fails we can still
 474   /// vectorize the loop with a dynamic array access check.
 475   bool shouldRetryWithRuntimeCheck() { return ShouldRetryWithRuntimeCheck; }
 476
 477 private:
 478   ScalarEvolution *SE;
 479   const DataLayout *DL;
 480   const Loop *InnermostLoop;
 481
 482   /// \brief Maps access locations (ptr, read/write) to program order.
 483   DenseMap<MemAccessInfo, std::vector<unsigned> > Accesses;
 484
 485   /// \brief Memory access instructions in program order.
 486   SmallVector<Instruction *, 16> InstMap;
 487
 488   /// \brief The program order index to be used for the next instruction.
 489   unsigned AccessIdx;
 490
 491   // We can access this many bytes in parallel safely.
 492   unsigned MaxSafeDepDistBytes;
 493
 494   /// \brief If we see a non-constant dependence distance we can still try to
 495   /// vectorize this loop with runtime checks.
 496   bool ShouldRetryWithRuntimeCheck;
 497
 498   /// \brief Vectorizer parameters used by the analysis.
 499   LoopAccessInfo::VectorizerParams VectParams;
 500
 501   /// \brief Check whether there is a plausible dependence between the two
 502   /// accesses.
 503   ///
 504   /// Access \p A must happen before \p B in program order. The two indices
 505   /// identify the index into the program order map.
 506   ///
 507   /// This function checks  whether there is a plausible dependence (or the
 508   /// absence of such can't be proved) between the two accesses. If there is a
 509   /// plausible dependence but the dependence distance is bigger than one
 510   /// element access it records this distance in \p MaxSafeDepDistBytes (if this
 511   /// distance is smaller than any other distance encountered so far).
 512   /// Otherwise, this function returns true signaling a possible dependence.
 513   bool isDependent(const MemAccessInfo &A, unsigned AIdx,
 514                    const MemAccessInfo &B, unsigned BIdx,
 515                    ValueToValueMap &Strides);
 516
 517   /// \brief Check whether the data dependence could prevent store-load
 518   /// forwarding.
 519   bool couldPreventStoreLoadForward(unsigned Distance, unsigned TypeByteSize);
 520 };
 521
 522 } // end anonymous namespace
 523
 524 static bool isInBoundsGep(Value *Ptr) {
 525   if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
 526     return GEP->isInBounds();
 527   return false;
 528 }
 529
 530 /// \brief Check whether the access through \p Ptr has a constant stride.
 531 static int isStridedPtr(ScalarEvolution *SE, const DataLayout *DL, Value *Ptr,
 532                         const Loop *Lp, ValueToValueMap &StridesMap) {
 533   const Type *Ty = Ptr->getType();
 534   assert(Ty->isPointerTy() && "Unexpected non-ptr");
 535
 536   // Make sure that the pointer does not point to aggregate types.
 537   const PointerType *PtrTy = cast<PointerType>(Ty);
 538   if (PtrTy->getElementType()->isAggregateType()) {
 539     DEBUG(dbgs() << "LV: Bad stride - Not a pointer to a scalar type" << *Ptr <<
 540           "\n");
 541     return 0;
 542   }
 543
 544   const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, StridesMap, Ptr);
 545
 546   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
 547   if (!AR) {
 548     DEBUG(dbgs() << "LV: Bad stride - Not an AddRecExpr pointer "
 549           << *Ptr << " SCEV: " << *PtrScev << "\n");
 550     return 0;
 551   }
 552
 553   // The accesss function must stride over the innermost loop.
 554   if (Lp != AR->getLoop()) {
 555     DEBUG(dbgs() << "LV: Bad stride - Not striding over innermost loop " <<
 556           *Ptr << " SCEV: " << *PtrScev << "\n");
 557   }
 558
 559   // The address calculation must not wrap. Otherwise, a dependence could be
 560   // inverted.
 561   // An inbounds getelementptr that is a AddRec with a unit stride
 562   // cannot wrap per definition. The unit stride requirement is checked later.
 563   // An getelementptr without an inbounds attribute and unit stride would have
 564   // to access the pointer value "0" which is undefined behavior in address
 565   // space 0, therefore we can also vectorize this case.
 566   bool IsInBoundsGEP = isInBoundsGep(Ptr);
 567   bool IsNoWrapAddRec = AR->getNoWrapFlags(SCEV::NoWrapMask);
 568   bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
 569   if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
 570     DEBUG(dbgs() << "LV: Bad stride - Pointer may wrap in the address space "
 571           << *Ptr << " SCEV: " << *PtrScev << "\n");
 572     return 0;
 573   }
 574
 575   // Check the step is constant.
 576   const SCEV *Step = AR->getStepRecurrence(*SE);
 577
 578   // Calculate the pointer stride and check if it is consecutive.
 579   const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
 580   if (!C) {
 581     DEBUG(dbgs() << "LV: Bad stride - Not a constant strided " << *Ptr <<
 582           " SCEV: " << *PtrScev << "\n");
 583     return 0;
 584   }
 585
 586   int64_t Size = DL->getTypeAllocSize(PtrTy->getElementType());
 587   const APInt &APStepVal = C->getValue()->getValue();
 588
 589   // Huge step value - give up.
 590   if (APStepVal.getBitWidth() > 64)
 591     return 0;
 592
 593   int64_t StepVal = APStepVal.getSExtValue();
 594
 595   // Strided access.
 596   int64_t Stride = StepVal / Size;
 597   int64_t Rem = StepVal % Size;
 598   if (Rem)
 599     return 0;
 600
 601   // If the SCEV could wrap but we have an inbounds gep with a unit stride we
 602   // know we can't "wrap around the address space". In case of address space
 603   // zero we know that this won't happen without triggering undefined behavior.
 604   if (!IsNoWrapAddRec && (IsInBoundsGEP || IsInAddressSpaceZero) &&
 605       Stride != 1 && Stride != -1)
 606     return 0;
 607
 608   return Stride;
 609 }
 610
 611 bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance,
 612                                                     unsigned TypeByteSize) {
 613   // If loads occur at a distance that is not a multiple of a feasible vector
 614   // factor store-load forwarding does not take place.
 615   // Positive dependences might cause troubles because vectorizing them might
 616   // prevent store-load forwarding making vectorized code run a lot slower.
 617   //   a[i] = a[i-3] ^ a[i-8];
 618   //   The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
 619   //   hence on your typical architecture store-load forwarding does not take
 620   //   place. Vectorizing in such cases does not make sense.
 621   // Store-load forwarding distance.
 622   const unsigned NumCyclesForStoreLoadThroughMemory = 8*TypeByteSize;
 623   // Maximum vector factor.
 624   unsigned MaxVFWithoutSLForwardIssues = VectParams.MaxVectorWidth*TypeByteSize;
 625   if(MaxSafeDepDistBytes < MaxVFWithoutSLForwardIssues)
 626     MaxVFWithoutSLForwardIssues = MaxSafeDepDistBytes;
 627
 628   for (unsigned vf = 2*TypeByteSize; vf <= MaxVFWithoutSLForwardIssues;
 629        vf *= 2) {
 630     if (Distance % vf && Distance / vf < NumCyclesForStoreLoadThroughMemory) {
 631       MaxVFWithoutSLForwardIssues = (vf >>=1);
 632       break;
 633     }
 634   }
 635
 636   if (MaxVFWithoutSLForwardIssues< 2*TypeByteSize) {
 637     DEBUG(dbgs() << "LV: Distance " << Distance <<
 638           " that could cause a store-load forwarding conflict\n");
 639     return true;
 640   }
 641
 642   if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
 643       MaxVFWithoutSLForwardIssues != VectParams.MaxVectorWidth*TypeByteSize)
 644     MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
 645   return false;
 646 }
 647
 648 bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
 649                                    const MemAccessInfo &B, unsigned BIdx,
 650                                    ValueToValueMap &Strides) {
 651   assert (AIdx < BIdx && "Must pass arguments in program order");
 652
 653   Value *APtr = A.getPointer();
 654   Value *BPtr = B.getPointer();
 655   bool AIsWrite = A.getInt();
 656   bool BIsWrite = B.getInt();
 657
 658   // Two reads are independent.
 659   if (!AIsWrite && !BIsWrite)
 660     return false;
 661
 662   // We cannot check pointers in different address spaces.
 663   if (APtr->getType()->getPointerAddressSpace() !=
 664       BPtr->getType()->getPointerAddressSpace())
 665     return true;
 666
 667   const SCEV *AScev = replaceSymbolicStrideSCEV(SE, Strides, APtr);
 668   const SCEV *BScev = replaceSymbolicStrideSCEV(SE, Strides, BPtr);
 669
 670   int StrideAPtr = isStridedPtr(SE, DL, APtr, InnermostLoop, Strides);
 671   int StrideBPtr = isStridedPtr(SE, DL, BPtr, InnermostLoop, Strides);
 672
 673   const SCEV *Src = AScev;
 674   const SCEV *Sink = BScev;
 675
 676   // If the induction step is negative we have to invert source and sink of the
 677   // dependence.
 678   if (StrideAPtr < 0) {
 679     //Src = BScev;
 680     //Sink = AScev;
 681     std::swap(APtr, BPtr);
 682     std::swap(Src, Sink);
 683     std::swap(AIsWrite, BIsWrite);
 684     std::swap(AIdx, BIdx);
 685     std::swap(StrideAPtr, StrideBPtr);
 686   }
 687
 688   const SCEV *Dist = SE->getMinusSCEV(Sink, Src);
 689
 690   DEBUG(dbgs() << "LV: Src Scev: " << *Src << "Sink Scev: " << *Sink
 691         << "(Induction step: " << StrideAPtr <<  ")\n");
 692   DEBUG(dbgs() << "LV: Distance for " << *InstMap[AIdx] << " to "
 693         << *InstMap[BIdx] << ": " << *Dist << "\n");
 694
 695   // Need consecutive accesses. We don't want to vectorize
 696   // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
 697   // the address space.
 698   if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
 699     DEBUG(dbgs() << "Non-consecutive pointer access\n");
 700     return true;
 701   }
 702
 703   const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
 704   if (!C) {
 705     DEBUG(dbgs() << "LV: Dependence because of non-constant distance\n");
 706     ShouldRetryWithRuntimeCheck = true;
 707     return true;
 708   }
 709
 710   Type *ATy = APtr->getType()->getPointerElementType();
 711   Type *BTy = BPtr->getType()->getPointerElementType();
 712   unsigned TypeByteSize = DL->getTypeAllocSize(ATy);
 713
 714   // Negative distances are not plausible dependencies.
 715   const APInt &Val = C->getValue()->getValue();
 716   if (Val.isNegative()) {
 717     bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
 718     if (IsTrueDataDependence &&
 719         (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||
 720          ATy != BTy))
 721       return true;
 722
 723     DEBUG(dbgs() << "LV: Dependence is negative: NoDep\n");
 724     return false;
 725   }
 726
 727   // Write to the same location with the same size.
 728   // Could be improved to assert type sizes are the same (i32 == float, etc).
 729   if (Val == 0) {
 730     if (ATy == BTy)
 731       return false;
 732     DEBUG(dbgs() << "LV: Zero dependence difference but different types\n");
 733     return true;
 734   }
 735
 736   assert(Val.isStrictlyPositive() && "Expect a positive value");
 737
 738   // Positive distance bigger than max vectorization factor.
 739   if (ATy != BTy) {
 740     DEBUG(dbgs() <<
 741           "LV: ReadWrite-Write positive dependency with different types\n");
 742     return false;
 743   }
 744
 745   unsigned Distance = (unsigned) Val.getZExtValue();
 746
 747   // Bail out early if passed-in parameters make vectorization not feasible.
 748   unsigned ForcedFactor = (VectParams.VectorizationFactor ?
 749                            VectParams.VectorizationFactor : 1);
 750   unsigned ForcedUnroll = (VectParams.VectorizationInterleave ?
 751                            VectParams.VectorizationInterleave : 1);
 752
 753   // The distance must be bigger than the size needed for a vectorized version
 754   // of the operation and the size of the vectorized operation must not be
 755   // bigger than the currrent maximum size.
 756   if (Distance < 2*TypeByteSize ||
 757       2*TypeByteSize > MaxSafeDepDistBytes ||
 758       Distance < TypeByteSize * ForcedUnroll * ForcedFactor) {
 759     DEBUG(dbgs() << "LV: Failure because of Positive distance "
 760         << Val.getSExtValue() << '\n');
 761     return true;
 762   }
 763
 764   MaxSafeDepDistBytes = Distance < MaxSafeDepDistBytes ?
 765     Distance : MaxSafeDepDistBytes;
 766
 767   bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
 768   if (IsTrueDataDependence &&
 769       couldPreventStoreLoadForward(Distance, TypeByteSize))
 770      return true;
 771
 772   DEBUG(dbgs() << "LV: Positive distance " << Val.getSExtValue() <<
 773         " with max VF = " << MaxSafeDepDistBytes / TypeByteSize << '\n');
 774
 775   return false;
 776 }
 777
 778 bool MemoryDepChecker::areDepsSafe(AccessAnalysis::DepCandidates &AccessSets,
 779                                    MemAccessInfoSet &CheckDeps,
 780                                    ValueToValueMap &Strides) {
 781
 782   MaxSafeDepDistBytes = -1U;
 783   while (!CheckDeps.empty()) {
 784     MemAccessInfo CurAccess = *CheckDeps.begin();
 785
 786     // Get the relevant memory access set.
 787     EquivalenceClasses<MemAccessInfo>::iterator I =
 788       AccessSets.findValue(AccessSets.getLeaderValue(CurAccess));
 789
 790     // Check accesses within this set.
 791     EquivalenceClasses<MemAccessInfo>::member_iterator AI, AE;
 792     AI = AccessSets.member_begin(I), AE = AccessSets.member_end();
 793
 794     // Check every access pair.
 795     while (AI != AE) {
 796       CheckDeps.erase(*AI);
 797       EquivalenceClasses<MemAccessInfo>::member_iterator OI = std::next(AI);
 798       while (OI != AE) {
 799         // Check every accessing instruction pair in program order.
 800         for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
 801              I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
 802           for (std::vector<unsigned>::iterator I2 = Accesses[*OI].begin(),
 803                I2E = Accesses[*OI].end(); I2 != I2E; ++I2) {
 804             if (*I1 < *I2 && isDependent(*AI, *I1, *OI, *I2, Strides))
 805               return false;
 806             if (*I2 < *I1 && isDependent(*OI, *I2, *AI, *I1, Strides))
 807               return false;
 808           }
 809         ++OI;
 810       }
 811       AI++;
 812     }
 813   }
 814   return true;
 815 }
 816
 817 bool LoopAccessInfo::canVectorizeMemory(ValueToValueMap &Strides) {
 818
 819   typedef SmallVector<Value*, 16> ValueVector;
 820   typedef SmallPtrSet<Value*, 16> ValueSet;
 821
 822   // Holds the Load and Store *instructions*.
 823   ValueVector Loads;
 824   ValueVector Stores;
 825
 826   // Holds all the different accesses in the loop.
 827   unsigned NumReads = 0;
 828   unsigned NumReadWrites = 0;
 829
 830   PtrRtCheck.Pointers.clear();
 831   PtrRtCheck.Need = false;
 832
 833   const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
 834   MemoryDepChecker DepChecker(SE, DL, TheLoop, VectParams);
 835
 836   // For each block.
 837   for (Loop::block_iterator bb = TheLoop->block_begin(),
 838        be = TheLoop->block_end(); bb != be; ++bb) {
 839
 840     // Scan the BB and collect legal loads and stores.
 841     for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e;
 842          ++it) {
 843
 844       // If this is a load, save it. If this instruction can read from memory
 845       // but is not a load, then we quit. Notice that we don't handle function
 846       // calls that read or write.
 847       if (it->mayReadFromMemory()) {
 848         // Many math library functions read the rounding mode. We will only
 849         // vectorize a loop if it contains known function calls that don't set
 850         // the flag. Therefore, it is safe to ignore this read from memory.
 851         CallInst *Call = dyn_cast<CallInst>(it);
 852         if (Call && getIntrinsicIDForCall(Call, TLI))
 853           continue;
 854
 855         LoadInst *Ld = dyn_cast<LoadInst>(it);
 856         if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) {
 857           emitAnalysis(VectorizationReport(Ld)
 858                        << "read with atomic ordering or volatile read");
 859           DEBUG(dbgs() << "LV: Found a non-simple load.\n");
 860           return false;
 861         }
 862         NumLoads++;
 863         Loads.push_back(Ld);
 864         DepChecker.addAccess(Ld);
 865         continue;
 866       }
 867
 868       // Save 'store' instructions. Abort if other instructions write to memory.
 869       if (it->mayWriteToMemory()) {
 870         StoreInst *St = dyn_cast<StoreInst>(it);
 871         if (!St) {
 872           emitAnalysis(VectorizationReport(it) <<
 873                        "instruction cannot be vectorized");
 874           return false;
 875         }
 876         if (!St->isSimple() && !IsAnnotatedParallel) {
 877           emitAnalysis(VectorizationReport(St)
 878                        << "write with atomic ordering or volatile write");
 879           DEBUG(dbgs() << "LV: Found a non-simple store.\n");
 880           return false;
 881         }
 882         NumStores++;
 883         Stores.push_back(St);
 884         DepChecker.addAccess(St);
 885       }
 886     } // Next instr.
 887   } // Next block.
 888
 889   // Now we have two lists that hold the loads and the stores.
 890   // Next, we find the pointers that they use.
 891
 892   // Check if we see any stores. If there are no stores, then we don't
 893   // care if the pointers are *restrict*.
 894   if (!Stores.size()) {
 895     DEBUG(dbgs() << "LV: Found a read-only loop!\n");
 896     return true;
 897   }
 898
 899   AccessAnalysis::DepCandidates DependentAccesses;
 900   AccessAnalysis Accesses(DL, AA, DependentAccesses);
 901
 902   // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
 903   // multiple times on the same object. If the ptr is accessed twice, once
 904   // for read and once for write, it will only appear once (on the write
 905   // list). This is okay, since we are going to check for conflicts between
 906   // writes and between reads and writes, but not between reads and reads.
 907   ValueSet Seen;
 908
 909   ValueVector::iterator I, IE;
 910   for (I = Stores.begin(), IE = Stores.end(); I != IE; ++I) {
 911     StoreInst *ST = cast<StoreInst>(*I);
 912     Value* Ptr = ST->getPointerOperand();
 913
 914     if (isUniform(Ptr)) {
 915       emitAnalysis(
 916           VectorizationReport(ST)
 917           << "write to a loop invariant address could not be vectorized");
 918       DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n");
 919       return false;
 920     }
 921
 922     // If we did *not* see this pointer before, insert it to  the read-write
 923     // list. At this phase it is only a 'write' list.
 924     if (Seen.insert(Ptr).second) {
 925       ++NumReadWrites;
 926
 927       AliasAnalysis::Location Loc = AA->getLocation(ST);
 928       // The TBAA metadata could have a control dependency on the predication
 929       // condition, so we cannot rely on it when determining whether or not we
 930       // need runtime pointer checks.
 931       if (blockNeedsPredication(ST->getParent()))
 932         Loc.AATags.TBAA = nullptr;
 933
 934       Accesses.addStore(Loc);
 935     }
 936   }
 937
 938   if (IsAnnotatedParallel) {
 939     DEBUG(dbgs()
 940           << "LV: A loop annotated parallel, ignore memory dependency "
 941           << "checks.\n");
 942     return true;
 943   }
 944
 945   for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) {
 946     LoadInst *LD = cast<LoadInst>(*I);
 947     Value* Ptr = LD->getPointerOperand();
 948     // If we did *not* see this pointer before, insert it to the
 949     // read list. If we *did* see it before, then it is already in
 950     // the read-write list. This allows us to vectorize expressions
 951     // such as A[i] += x;  Because the address of A[i] is a read-write
 952     // pointer. This only works if the index of A[i] is consecutive.
 953     // If the address of i is unknown (for example A[B[i]]) then we may
 954     // read a few words, modify, and write a few words, and some of the
 955     // words may be written to the same address.
 956     bool IsReadOnlyPtr = false;
 957     if (Seen.insert(Ptr).second ||
 958         !isStridedPtr(SE, DL, Ptr, TheLoop, Strides)) {
 959       ++NumReads;
 960       IsReadOnlyPtr = true;
 961     }
 962
 963     AliasAnalysis::Location Loc = AA->getLocation(LD);
 964     // The TBAA metadata could have a control dependency on the predication
 965     // condition, so we cannot rely on it when determining whether or not we
 966     // need runtime pointer checks.
 967     if (blockNeedsPredication(LD->getParent()))
 968       Loc.AATags.TBAA = nullptr;
 969
 970     Accesses.addLoad(Loc, IsReadOnlyPtr);
 971   }
 972
 973   // If we write (or read-write) to a single destination and there are no
 974   // other reads in this loop then is it safe to vectorize.
 975   if (NumReadWrites == 1 && NumReads == 0) {
 976     DEBUG(dbgs() << "LV: Found a write-only loop!\n");
 977     return true;
 978   }
 979
 980   // Build dependence sets and check whether we need a runtime pointer bounds
 981   // check.
 982   Accesses.buildDependenceSets();
 983   bool NeedRTCheck = Accesses.isRTCheckNeeded();
 984
 985   // Find pointers with computable bounds. We are going to use this information
 986   // to place a runtime bound check.
 987   unsigned NumComparisons = 0;
 988   bool CanDoRT = false;
 989   if (NeedRTCheck)
 990     CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, TheLoop,
 991                                        Strides);
 992
 993   DEBUG(dbgs() << "LV: We need to do " << NumComparisons <<
 994         " pointer comparisons.\n");
 995
 996   // If we only have one set of dependences to check pointers among we don't
 997   // need a runtime check.
 998   if (NumComparisons == 0 && NeedRTCheck)
 999     NeedRTCheck = false;
1000
1001   // Check that we did not collect too many pointers or found an unsizeable
1002   // pointer.
1003   if (!CanDoRT || NumComparisons > VectParams.RuntimeMemoryCheckThreshold) {
1004     PtrRtCheck.reset();
1005     CanDoRT = false;
1006   }
1007
1008   if (CanDoRT) {
1009     DEBUG(dbgs() << "LV: We can perform a memory runtime check if needed.\n");
1010   }
1011
1012   if (NeedRTCheck && !CanDoRT) {
1013     emitAnalysis(VectorizationReport() << "cannot identify array bounds");
1014     DEBUG(dbgs() << "LV: We can't vectorize because we can't find " <<
1015           "the array bounds.\n");
1016     PtrRtCheck.reset();
1017     return false;
1018   }
1019
1020   PtrRtCheck.Need = NeedRTCheck;
1021
1022   bool CanVecMem = true;
1023   if (Accesses.isDependencyCheckNeeded()) {
1024     DEBUG(dbgs() << "LV: Checking memory dependencies\n");
1025     CanVecMem = DepChecker.areDepsSafe(
1026         DependentAccesses, Accesses.getDependenciesToCheck(), Strides);
1027     MaxSafeDepDistBytes = DepChecker.getMaxSafeDepDistBytes();
1028
1029     if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) {
1030       DEBUG(dbgs() << "LV: Retrying with memory checks\n");
1031       NeedRTCheck = true;
1032
1033       // Clear the dependency checks. We assume they are not needed.
1034       Accesses.resetDepChecks();
1035
1036       PtrRtCheck.reset();
1037       PtrRtCheck.Need = true;
1038
1039       CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE,
1040                                          TheLoop, Strides, true);
1041       // Check that we did not collect too many pointers or found an unsizeable
1042       // pointer.
1043       if (!CanDoRT || NumComparisons > VectParams.RuntimeMemoryCheckThreshold) {
1044         if (!CanDoRT && NumComparisons > 0)
1045           emitAnalysis(VectorizationReport()
1046                        << "cannot check memory dependencies at runtime");
1047         else
1048           emitAnalysis(VectorizationReport()
1049                        << NumComparisons << " exceeds limit of "
1050                        << VectParams.RuntimeMemoryCheckThreshold
1051                        << " dependent memory operations checked at runtime");
1052         DEBUG(dbgs() << "LV: Can't vectorize with memory checks\n");
1053         PtrRtCheck.reset();
1054         return false;
1055       }
1056
1057       CanVecMem = true;
1058     }
1059   }
1060
1061   if (!CanVecMem)
1062     emitAnalysis(VectorizationReport() <<
1063                  "unsafe dependent memory operations in loop");
1064
1065   DEBUG(dbgs() << "LV: We" << (NeedRTCheck ? "" : " don't") <<
1066         " need a runtime memory check.\n");
1067
1068   return CanVecMem;
1069 }
1070
1071 bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB)  {
1072   assert(TheLoop->contains(BB) && "Unknown block used");
1073
1074   // Blocks that do not dominate the latch need predication.
1075   BasicBlock* Latch = TheLoop->getLoopLatch();
1076   return !DT->dominates(BB, Latch);
1077 }
1078
1079 void LoopAccessInfo::emitAnalysis(VectorizationReport &Message) {
1080   VectorizationReport::emitAnalysis(Message, TheFunction, TheLoop);
1081 }
1082
1083 bool LoopAccessInfo::isUniform(Value *V) {
1084   return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
1085 }
1086
1087 // FIXME: this function is currently a duplicate of the one in
1088 // LoopVectorize.cpp.
1089 static Instruction *getFirstInst(Instruction *FirstInst, Value *V,
1090                                  Instruction *Loc) {
1091   if (FirstInst)
1092     return FirstInst;
1093   if (Instruction *I = dyn_cast<Instruction>(V))
1094     return I->getParent() == Loc->getParent() ? I : nullptr;
1095   return nullptr;
1096 }
1097
1098 std::pair<Instruction *, Instruction *>
1099 LoopAccessInfo::addRuntimeCheck(Instruction *Loc) {
1100   Instruction *tnullptr = nullptr;
1101   if (!PtrRtCheck.Need)
1102     return std::pair<Instruction *, Instruction *>(tnullptr, tnullptr);
1103
1104   unsigned NumPointers = PtrRtCheck.Pointers.size();
1105   SmallVector<TrackingVH<Value> , 2> Starts;
1106   SmallVector<TrackingVH<Value> , 2> Ends;
1107
1108   LLVMContext &Ctx = Loc->getContext();
1109   SCEVExpander Exp(*SE, "induction");
1110   Instruction *FirstInst = nullptr;
1111
1112   for (unsigned i = 0; i < NumPointers; ++i) {
1113     Value *Ptr = PtrRtCheck.Pointers[i];
1114     const SCEV *Sc = SE->getSCEV(Ptr);
1115
1116     if (SE->isLoopInvariant(Sc, TheLoop)) {
1117       DEBUG(dbgs() << "LV: Adding RT check for a loop invariant ptr:" <<
1118             *Ptr <<"\n");
1119       Starts.push_back(Ptr);
1120       Ends.push_back(Ptr);
1121     } else {
1122       DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr << '\n');
1123       unsigned AS = Ptr->getType()->getPointerAddressSpace();
1124
1125       // Use this type for pointer arithmetic.
1126       Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
1127
1128       Value *Start = Exp.expandCodeFor(PtrRtCheck.Starts[i], PtrArithTy, Loc);
1129       Value *End = Exp.expandCodeFor(PtrRtCheck.Ends[i], PtrArithTy, Loc);
1130       Starts.push_back(Start);
1131       Ends.push_back(End);
1132     }
1133   }
1134
1135   IRBuilder<> ChkBuilder(Loc);
1136   // Our instructions might fold to a constant.
1137   Value *MemoryRuntimeCheck = nullptr;
1138   for (unsigned i = 0; i < NumPointers; ++i) {
1139     for (unsigned j = i+1; j < NumPointers; ++j) {
1140       // No need to check if two readonly pointers intersect.
1141       if (!PtrRtCheck.IsWritePtr[i] && !PtrRtCheck.IsWritePtr[j])
1142         continue;
1143
1144       // Only need to check pointers between two different dependency sets.
1145       if (PtrRtCheck.DependencySetId[i] == PtrRtCheck.DependencySetId[j])
1146        continue;
1147       // Only need to check pointers in the same alias set.
1148       if (PtrRtCheck.AliasSetId[i] != PtrRtCheck.AliasSetId[j])
1149         continue;
1150
1151       unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace();
1152       unsigned AS1 = Starts[j]->getType()->getPointerAddressSpace();
1153
1154       assert((AS0 == Ends[j]->getType()->getPointerAddressSpace()) &&
1155              (AS1 == Ends[i]->getType()->getPointerAddressSpace()) &&
1156              "Trying to bounds check pointers with different address spaces");
1157
1158       Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
1159       Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
1160
1161       Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy0, "bc");
1162       Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy1, "bc");
1163       Value *End0 =   ChkBuilder.CreateBitCast(Ends[i],   PtrArithTy1, "bc");
1164       Value *End1 =   ChkBuilder.CreateBitCast(Ends[j],   PtrArithTy0, "bc");
1165
1166       Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0");
1167       FirstInst = getFirstInst(FirstInst, Cmp0, Loc);
1168       Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1");
1169       FirstInst = getFirstInst(FirstInst, Cmp1, Loc);
1170       Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
1171       FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
1172       if (MemoryRuntimeCheck) {
1173         IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict,
1174                                          "conflict.rdx");
1175         FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
1176       }
1177       MemoryRuntimeCheck = IsConflict;
1178     }
1179   }
1180
1181   // We have to do this trickery because the IRBuilder might fold the check to a
1182   // constant expression in which case there is no Instruction anchored in a
1183   // the block.
1184   Instruction *Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck,
1185                                                  ConstantInt::getTrue(Ctx));
1186   ChkBuilder.Insert(Check, "memcheck.conflict");
1187   FirstInst = getFirstInst(FirstInst, Check, Loc);
1188   return std::make_pair(FirstInst, Check);
1189 }