X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FAnalysis%2FLoopAccessAnalysis.cpp;h=49b28078c9767c0dce6484d7253b7551c0fcb589;hp=3c61bba98a71ecacedd86e1c281e5f87939f82bb;hb=fe0b6a7f50fe8ad11d595d90eedfeb6484dc841f;hpb=ecb6a3711194c753066337d97f437557139bdbd3 diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp index 3c61bba98a7..49b28078c97 100644 --- a/lib/Analysis/LoopAccessAnalysis.cpp +++ b/lib/Analysis/LoopAccessAnalysis.cpp @@ -58,12 +58,12 @@ static cl::opt MemoryCheckMergeThreshold( /// Maximum SIMD width. const unsigned VectorizerParams::MaxVectorWidth = 64; -/// \brief We collect interesting dependences up to this threshold. -static cl::opt MaxInterestingDependence( - "max-interesting-dependences", cl::Hidden, - cl::desc("Maximum number of interesting dependences collected by " - "loop-access analysis (default = 100)"), - cl::init(100)); +/// \brief We collect dependences up to this threshold. +static cl::opt + MaxDependences("max-dependences", cl::Hidden, + cl::desc("Maximum number of dependences collected by " + "loop-access analysis (default = 100)"), + cl::init(100)); bool VectorizerParams::isInterleaveForced() { return ::VectorizationInterleave.getNumOccurrences() > 0; @@ -89,8 +89,8 @@ Value *llvm::stripIntegerCast(Value *V) { const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE, const ValueToValueMap &PtrToStride, + SCEVUnionPredicate &Preds, Value *Ptr, Value *OrigPtr) { - const SCEV *OrigSCEV = SE->getSCEV(Ptr); // If there is an entry in the map return the SCEV of the pointer with the @@ -108,41 +108,80 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE, ValueToValueMap RewriteMap; RewriteMap[StrideVal] = One; - const SCEV *ByOne = - SCEVParameterRewriter::rewrite(OrigSCEV, *SE, RewriteMap, true); + const auto *U = cast(SE->getSCEV(StrideVal)); + const auto *CT = + static_cast(SE->getOne(StrideVal->getType())); + + Preds.add(SE->getEqualPredicate(U, CT)); + + const SCEV *ByOne = SE->rewriteUsingPredicate(OrigSCEV, Preds); DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *ByOne << "\n"); return ByOne; } // Otherwise, just return the SCEV of the original pointer. - return SE->getSCEV(Ptr); + return OrigSCEV; } -void LoopAccessInfo::RuntimePointerCheck::insert( - Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, unsigned ASId, - const ValueToValueMap &Strides) { +void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr, + unsigned DepSetId, unsigned ASId, + const ValueToValueMap &Strides, + SCEVUnionPredicate &Preds) { // Get the stride replaced scev. - const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr); + const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Preds, Ptr); const SCEVAddRecExpr *AR = dyn_cast(Sc); assert(AR && "Invalid addrec expression"); const SCEV *Ex = SE->getBackedgeTakenCount(Lp); + + const SCEV *ScStart = AR->getStart(); const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE); - Pointers.push_back(Ptr); - Starts.push_back(AR->getStart()); - Ends.push_back(ScEnd); - IsWritePtr.push_back(WritePtr); - DependencySetId.push_back(DepSetId); - AliasSetId.push_back(ASId); - Exprs.push_back(Sc); + const SCEV *Step = AR->getStepRecurrence(*SE); + + // For expressions with negative step, the upper bound is ScStart and the + // lower bound is ScEnd. + if (const SCEVConstant *CStep = dyn_cast(Step)) { + if (CStep->getValue()->isNegative()) + std::swap(ScStart, ScEnd); + } else { + // Fallback case: the step is not constant, but the we can still + // get the upper and lower bounds of the interval by using min/max + // expressions. + ScStart = SE->getUMinExpr(ScStart, ScEnd); + ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd); + } + + Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc); +} + +SmallVector +RuntimePointerChecking::generateChecks() const { + SmallVector Checks; + + for (unsigned I = 0; I < CheckingGroups.size(); ++I) { + for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) { + const RuntimePointerChecking::CheckingPtrGroup &CGI = CheckingGroups[I]; + const RuntimePointerChecking::CheckingPtrGroup &CGJ = CheckingGroups[J]; + + if (needsChecking(CGI, CGJ)) + Checks.push_back(std::make_pair(&CGI, &CGJ)); + } + } + return Checks; } -bool LoopAccessInfo::RuntimePointerCheck::needsChecking( - const CheckingPtrGroup &M, const CheckingPtrGroup &N, - const SmallVectorImpl *PtrPartition) const { +void RuntimePointerChecking::generateChecks( + MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) { + assert(Checks.empty() && "Checks is not empty"); + groupChecks(DepCands, UseDependencies); + Checks = generateChecks(); +} + +bool RuntimePointerChecking::needsChecking(const CheckingPtrGroup &M, + const CheckingPtrGroup &N) const { for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I) for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J) - if (needsChecking(M.Members[I], N.Members[J], PtrPartition)) + if (needsChecking(M.Members[I], N.Members[J])) return true; return false; } @@ -161,34 +200,35 @@ static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J, return I; } -bool LoopAccessInfo::RuntimePointerCheck::CheckingPtrGroup::addPointer( - unsigned Index) { +bool RuntimePointerChecking::CheckingPtrGroup::addPointer(unsigned Index) { + const SCEV *Start = RtCheck.Pointers[Index].Start; + const SCEV *End = RtCheck.Pointers[Index].End; + // Compare the starts and ends with the known minimum and maximum // of this set. We need to know how we compare against the min/max // of the set in order to be able to emit memchecks. - const SCEV *Min0 = getMinFromExprs(RtCheck.Starts[Index], Low, RtCheck.SE); + const SCEV *Min0 = getMinFromExprs(Start, Low, RtCheck.SE); if (!Min0) return false; - const SCEV *Min1 = getMinFromExprs(RtCheck.Ends[Index], High, RtCheck.SE); + const SCEV *Min1 = getMinFromExprs(End, High, RtCheck.SE); if (!Min1) return false; // Update the low bound expression if we've found a new min value. - if (Min0 == RtCheck.Starts[Index]) - Low = RtCheck.Starts[Index]; + if (Min0 == Start) + Low = Start; // Update the high bound expression if we've found a new max value. - if (Min1 != RtCheck.Ends[Index]) - High = RtCheck.Ends[Index]; + if (Min1 != End) + High = End; Members.push_back(Index); return true; } -void LoopAccessInfo::RuntimePointerCheck::groupChecks( - MemoryDepChecker::DepCandidates &DepCands, - bool UseDependencies) { +void RuntimePointerChecking::groupChecks( + MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) { // We build the groups from dependency candidates equivalence classes // because: // - We know that pointers in the same equivalence class share @@ -208,8 +248,31 @@ void LoopAccessInfo::RuntimePointerCheck::groupChecks( CheckingGroups.clear(); + // If we need to check two pointers to the same underlying object + // with a non-constant difference, we shouldn't perform any pointer + // grouping with those pointers. This is because we can easily get + // into cases where the resulting check would return false, even when + // the accesses are safe. + // + // The following example shows this: + // for (i = 0; i < 1000; ++i) + // a[5000 + i * m] = a[i] + a[i + 9000] + // + // Here grouping gives a check of (5000, 5000 + 1000 * m) against + // (0, 10000) which is always false. However, if m is 1, there is no + // dependence. Not grouping the checks for a[i] and a[i + 9000] allows + // us to perform an accurate check in this case. + // + // The above case requires that we have an UnknownDependence between + // accesses to the same underlying object. This cannot happen unless + // ShouldRetryWithRuntimeCheck is set, and therefore UseDependencies + // is also false. In this case we will use the fallback path and create + // separate checking groups for all pointers. + // If we don't have the dependency partitions, construct a new - // checking pointer group for each pointer. + // checking pointer group for each pointer. This is also required + // for correctness, because in this case we can have checking between + // pointers to the same underlying object. if (!UseDependencies) { for (unsigned I = 0; I < Pointers.size(); ++I) CheckingGroups.push_back(CheckingPtrGroup(I, *this)); @@ -219,21 +282,39 @@ void LoopAccessInfo::RuntimePointerCheck::groupChecks( unsigned TotalComparisons = 0; DenseMap PositionMap; - for (unsigned Pointer = 0; Pointer < Pointers.size(); ++Pointer) - PositionMap[Pointers[Pointer]] = Pointer; + for (unsigned Index = 0; Index < Pointers.size(); ++Index) + PositionMap[Pointers[Index].PointerValue] = Index; + + // We need to keep track of what pointers we've already seen so we + // don't process them twice. + SmallSet Seen; // Go through all equivalence classes, get the the "pointer check groups" - // and add them to the overall solution. - for (auto DI = DepCands.begin(), DE = DepCands.end(); DI != DE; ++DI) { - if (!DI->isLeader()) + // and add them to the overall solution. We use the order in which accesses + // appear in 'Pointers' to enforce determinism. + for (unsigned I = 0; I < Pointers.size(); ++I) { + // We've seen this pointer before, and therefore already processed + // its equivalence class. + if (Seen.count(I)) continue; - SmallVector Groups; + MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue, + Pointers[I].IsWritePtr); - for (auto MI = DepCands.member_begin(DI), ME = DepCands.member_end(); + SmallVector Groups; + auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access)); + + // Because DepCands is constructed by visiting accesses in the order in + // which they appear in alias sets (which is deterministic) and the + // iteration order within an equivalence class member is only dependent on + // the order in which unions and insertions are performed on the + // equivalence class, the iteration order is deterministic. + for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end(); MI != ME; ++MI) { unsigned Pointer = PositionMap[MI->getPointer()]; bool Merged = false; + // Mark this pointer as seen. + Seen.insert(Pointer); // Go through all the existing sets and see if we can find one // which can include this pointer. @@ -266,98 +347,68 @@ void LoopAccessInfo::RuntimePointerCheck::groupChecks( } } -bool LoopAccessInfo::RuntimePointerCheck::needsChecking( - unsigned I, unsigned J, const SmallVectorImpl *PtrPartition) const { +bool RuntimePointerChecking::arePointersInSamePartition( + const SmallVectorImpl &PtrToPartition, unsigned PtrIdx1, + unsigned PtrIdx2) { + return (PtrToPartition[PtrIdx1] != -1 && + PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]); +} + +bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const { + const PointerInfo &PointerI = Pointers[I]; + const PointerInfo &PointerJ = Pointers[J]; + // No need to check if two readonly pointers intersect. - if (!IsWritePtr[I] && !IsWritePtr[J]) + if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr) return false; // Only need to check pointers between two different dependency sets. - if (DependencySetId[I] == DependencySetId[J]) + if (PointerI.DependencySetId == PointerJ.DependencySetId) return false; // Only need to check pointers in the same alias set. - if (AliasSetId[I] != AliasSetId[J]) - return false; - - // If PtrPartition is set omit checks between pointers of the same partition. - // Partition number -1 means that the pointer is used in multiple partitions. - // In this case we can't omit the check. - if (PtrPartition && (*PtrPartition)[I] != -1 && - (*PtrPartition)[I] == (*PtrPartition)[J]) + if (PointerI.AliasSetId != PointerJ.AliasSetId) return false; return true; } -void LoopAccessInfo::RuntimePointerCheck::print( - raw_ostream &OS, unsigned Depth, - const SmallVectorImpl *PtrPartition) const { - - OS.indent(Depth) << "Run-time memory checks:\n"; - +void RuntimePointerChecking::printChecks( + raw_ostream &OS, const SmallVectorImpl &Checks, + unsigned Depth) const { unsigned N = 0; - for (unsigned I = 0; I < CheckingGroups.size(); ++I) - for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) - if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition)) { - OS.indent(Depth) << "Check " << N++ << ":\n"; - OS.indent(Depth + 2) << "Comparing group " << I << ":\n"; - - for (unsigned K = 0; K < CheckingGroups[I].Members.size(); ++K) { - OS.indent(Depth + 2) << *Pointers[CheckingGroups[I].Members[K]] - << "\n"; - if (PtrPartition) - OS << " (Partition: " - << (*PtrPartition)[CheckingGroups[I].Members[K]] << ")" - << "\n"; - } + for (const auto &Check : Checks) { + const auto &First = Check.first->Members, &Second = Check.second->Members; - OS.indent(Depth + 2) << "Against group " << J << ":\n"; + OS.indent(Depth) << "Check " << N++ << ":\n"; - for (unsigned K = 0; K < CheckingGroups[J].Members.size(); ++K) { - OS.indent(Depth + 2) << *Pointers[CheckingGroups[J].Members[K]] - << "\n"; - if (PtrPartition) - OS << " (Partition: " - << (*PtrPartition)[CheckingGroups[J].Members[K]] << ")" - << "\n"; - } - } + OS.indent(Depth + 2) << "Comparing group (" << Check.first << "):\n"; + for (unsigned K = 0; K < First.size(); ++K) + OS.indent(Depth + 2) << *Pointers[First[K]].PointerValue << "\n"; - OS.indent(Depth) << "Grouped accesses:\n"; - for (unsigned I = 0; I < CheckingGroups.size(); ++I) { - OS.indent(Depth + 2) << "Group " << I << ":\n"; - OS.indent(Depth + 4) << "(Low: " << *CheckingGroups[I].Low - << " High: " << *CheckingGroups[I].High << ")\n"; - for (unsigned J = 0; J < CheckingGroups[I].Members.size(); ++J) { - OS.indent(Depth + 6) << "Member: " << *Exprs[CheckingGroups[I].Members[J]] - << "\n"; - } + OS.indent(Depth + 2) << "Against group (" << Check.second << "):\n"; + for (unsigned K = 0; K < Second.size(); ++K) + OS.indent(Depth + 2) << *Pointers[Second[K]].PointerValue << "\n"; } } -unsigned LoopAccessInfo::RuntimePointerCheck::getNumberOfChecks( - const SmallVectorImpl *PtrPartition) const { +void RuntimePointerChecking::print(raw_ostream &OS, unsigned Depth) const { - unsigned NumPartitions = CheckingGroups.size(); - unsigned CheckCount = 0; - - for (unsigned I = 0; I < NumPartitions; ++I) - for (unsigned J = I + 1; J < NumPartitions; ++J) - if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition)) - CheckCount++; - return CheckCount; -} + OS.indent(Depth) << "Run-time memory checks:\n"; + printChecks(OS, Checks, Depth); -bool LoopAccessInfo::RuntimePointerCheck::needsAnyChecking( - const SmallVectorImpl *PtrPartition) const { - unsigned NumPointers = Pointers.size(); + OS.indent(Depth) << "Grouped accesses:\n"; + for (unsigned I = 0; I < CheckingGroups.size(); ++I) { + const auto &CG = CheckingGroups[I]; - for (unsigned I = 0; I < NumPointers; ++I) - for (unsigned J = I + 1; J < NumPointers; ++J) - if (needsChecking(I, J, PtrPartition)) - return true; - return false; + OS.indent(Depth + 2) << "Group " << &CG << ":\n"; + OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High + << ")\n"; + for (unsigned J = 0; J < CG.Members.size(); ++J) { + OS.indent(Depth + 6) << "Member: " << *Pointers[CG.Members[J]].Expr + << "\n"; + } + } } namespace { @@ -372,9 +423,9 @@ public: typedef SmallPtrSet MemAccessInfoSet; AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI, - MemoryDepChecker::DepCandidates &DA) - : DL(Dl), AST(*AA), LI(LI), DepCands(DA), - IsRTCheckAnalysisNeeded(false) {} + MemoryDepChecker::DepCandidates &DA, SCEVUnionPredicate &Preds) + : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckAnalysisNeeded(false), + Preds(Preds) {} /// \brief Register a load and whether it is only read from. void addLoad(MemoryLocation &Loc, bool IsReadOnly) { @@ -393,11 +444,12 @@ public: } /// \brief Check whether we can check the pointers at runtime for - /// non-intersection. Returns true when we have 0 pointers - /// (a check on 0 pointers for non-intersection will always return true). - bool canCheckPtrAtRT(LoopAccessInfo::RuntimePointerCheck &RtCheck, - bool &NeedRTCheck, ScalarEvolution *SE, Loop *TheLoop, - const ValueToValueMap &Strides, + /// non-intersection. + /// + /// Returns true if we need no check or if we do and we can generate them + /// (i.e. the pointers have computable bounds). + bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE, + Loop *TheLoop, const ValueToValueMap &Strides, bool ShouldCheckStride = false); /// \brief Goes over all memory accesses, checks whether a RT check is needed @@ -416,7 +468,7 @@ public: /// We decided that no dependence analysis would be used. Reset the state. void resetDepChecks(MemoryDepChecker &DepChecker) { CheckDeps.clear(); - DepChecker.clearInterestingDependences(); + DepChecker.clearDependences(); } MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; } @@ -425,7 +477,7 @@ private: typedef SetVector PtrAccessSet; /// \brief Go over all memory access and check whether runtime pointer checks - /// are needed /// and build sets of dependency check candidates. + /// are needed and build sets of dependency check candidates. void processMemAccesses(); /// Set of all accesses. @@ -458,14 +510,18 @@ private: /// (i.e. ShouldRetryWithRuntimeCheck), isDependencyCheckNeeded is cleared /// while this remains set if we have potentially dependent accesses. bool IsRTCheckAnalysisNeeded; + + /// The SCEV predicate containing all the SCEV-related assumptions. + SCEVUnionPredicate &Preds; }; } // end anonymous namespace /// \brief Check whether a pointer can participate in a runtime bounds check. static bool hasComputableBounds(ScalarEvolution *SE, - const ValueToValueMap &Strides, Value *Ptr) { - const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, Strides, Ptr); + const ValueToValueMap &Strides, Value *Ptr, + Loop *L, SCEVUnionPredicate &Preds) { + const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, Strides, Preds, Ptr); const SCEVAddRecExpr *AR = dyn_cast(PtrScev); if (!AR) return false; @@ -473,15 +529,15 @@ static bool hasComputableBounds(ScalarEvolution *SE, return AR->isAffine(); } -bool AccessAnalysis::canCheckPtrAtRT( - LoopAccessInfo::RuntimePointerCheck &RtCheck, bool &NeedRTCheck, - ScalarEvolution *SE, Loop *TheLoop, const ValueToValueMap &StridesMap, - bool ShouldCheckStride) { +bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, + ScalarEvolution *SE, Loop *TheLoop, + const ValueToValueMap &StridesMap, + bool ShouldCheckStride) { // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. bool CanDoRT = true; - NeedRTCheck = false; + bool NeedRTCheck = false; if (!IsRTCheckAnalysisNeeded) return true; bool IsDepCheckNeeded = isDependencyCheckNeeded(); @@ -508,11 +564,11 @@ bool AccessAnalysis::canCheckPtrAtRT( else ++NumReadPtrChecks; - if (hasComputableBounds(SE, StridesMap, Ptr) && + if (hasComputableBounds(SE, StridesMap, Ptr, TheLoop, Preds) && // When we run after a failing dependency check we have to make sure // we don't have wrapping pointers. (!ShouldCheckStride || - isStridedPtr(SE, Ptr, TheLoop, StridesMap) == 1)) { + isStridedPtr(SE, Ptr, TheLoop, StridesMap, Preds) == 1)) { // The id of the dependence set. unsigned DepId; @@ -526,7 +582,7 @@ bool AccessAnalysis::canCheckPtrAtRT( // Each access has its own dependence set. DepId = RunningDepId++; - RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap); + RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, Preds); DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'); } else { @@ -559,14 +615,15 @@ bool AccessAnalysis::canCheckPtrAtRT( for (unsigned i = 0; i < NumPointers; ++i) { for (unsigned j = i + 1; j < NumPointers; ++j) { // Only need to check pointers between two different dependency sets. - if (RtCheck.DependencySetId[i] == RtCheck.DependencySetId[j]) + if (RtCheck.Pointers[i].DependencySetId == + RtCheck.Pointers[j].DependencySetId) continue; // Only need to check pointers in the same alias set. - if (RtCheck.AliasSetId[i] != RtCheck.AliasSetId[j]) + if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId) continue; - Value *PtrI = RtCheck.Pointers[i]; - Value *PtrJ = RtCheck.Pointers[j]; + Value *PtrI = RtCheck.Pointers[i].PointerValue; + Value *PtrJ = RtCheck.Pointers[j].PointerValue; unsigned ASi = PtrI->getType()->getPointerAddressSpace(); unsigned ASj = PtrJ->getType()->getPointerAddressSpace(); @@ -579,9 +636,17 @@ bool AccessAnalysis::canCheckPtrAtRT( } if (NeedRTCheck && CanDoRT) - RtCheck.groupChecks(DepCands, IsDepCheckNeeded); + RtCheck.generateChecks(DepCands, IsDepCheckNeeded); - return CanDoRT; + DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks() + << " pointer comparisons.\n"); + + RtCheck.Need = NeedRTCheck; + + bool CanDoRTIfNeeded = !NeedRTCheck || CanDoRT; + if (!CanDoRTIfNeeded) + RtCheck.reset(); + return CanDoRTIfNeeded; } void AccessAnalysis::processMemAccesses() { @@ -678,6 +743,11 @@ void AccessAnalysis::processMemAccesses() { GetUnderlyingObjects(Ptr, TempObjects, DL, LI); DEBUG(dbgs() << "Underlying objects for pointer " << *Ptr << "\n"); for (Value *UnderlyingObj : TempObjects) { + // nullptr never alias, don't join sets for pointer that have "null" + // in their UnderlyingObjects list. + if (isa(UnderlyingObj)) + continue; + UnderlyingObjToAccessMap::iterator Prev = ObjToLastAccess.find(UnderlyingObj); if (Prev != ObjToLastAccess.end()) @@ -748,19 +818,20 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, /// \brief Check whether the access through \p Ptr has a constant stride. int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, - const ValueToValueMap &StridesMap) { - const Type *Ty = Ptr->getType(); + const ValueToValueMap &StridesMap, + SCEVUnionPredicate &Preds) { + Type *Ty = Ptr->getType(); assert(Ty->isPointerTy() && "Unexpected non-ptr"); // Make sure that the pointer does not point to aggregate types. - const PointerType *PtrTy = cast(Ty); + auto *PtrTy = cast(Ty); if (PtrTy->getElementType()->isAggregateType()) { DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type" << *Ptr << "\n"); return 0; } - const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, StridesMap, Ptr); + const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, StridesMap, Preds, Ptr); const SCEVAddRecExpr *AR = dyn_cast(PtrScev); if (!AR) { @@ -844,15 +915,15 @@ bool MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) { llvm_unreachable("unexpected DepType!"); } -bool MemoryDepChecker::Dependence::isInterestingDependence(DepType Type) { +bool MemoryDepChecker::Dependence::isBackward() const { switch (Type) { case NoDep: case Forward: + case ForwardButPreventsForwarding: + case Unknown: return false; case BackwardVectorizable: - case Unknown: - case ForwardButPreventsForwarding: case Backward: case BackwardVectorizableButPreventsForwarding: return true; @@ -861,17 +932,21 @@ bool MemoryDepChecker::Dependence::isInterestingDependence(DepType Type) { } bool MemoryDepChecker::Dependence::isPossiblyBackward() const { + return isBackward() || Type == Unknown; +} + +bool MemoryDepChecker::Dependence::isForward() const { switch (Type) { - case NoDep: case Forward: case ForwardButPreventsForwarding: - return false; + return true; + case NoDep: case Unknown: case BackwardVectorizable: case Backward: case BackwardVectorizableButPreventsForwarding: - return true; + return false; } llvm_unreachable("unexpected DepType!"); } @@ -971,11 +1046,11 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, BPtr->getType()->getPointerAddressSpace()) return Dependence::Unknown; - const SCEV *AScev = replaceSymbolicStrideSCEV(SE, Strides, APtr); - const SCEV *BScev = replaceSymbolicStrideSCEV(SE, Strides, BPtr); + const SCEV *AScev = replaceSymbolicStrideSCEV(SE, Strides, Preds, APtr); + const SCEV *BScev = replaceSymbolicStrideSCEV(SE, Strides, Preds, BPtr); - int StrideAPtr = isStridedPtr(SE, APtr, InnermostLoop, Strides); - int StrideBPtr = isStridedPtr(SE, BPtr, InnermostLoop, Strides); + int StrideAPtr = isStridedPtr(SE, APtr, InnermostLoop, Strides, Preds); + int StrideBPtr = isStridedPtr(SE, BPtr, InnermostLoop, Strides, Preds); const SCEV *Src = AScev; const SCEV *Sink = BScev; @@ -1036,7 +1111,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // Could be improved to assert type sizes are the same (i32 == float, etc). if (Val == 0) { if (ATy == BTy) - return Dependence::NoDep; + return Dependence::Forward; DEBUG(dbgs() << "LAA: Zero dependence difference but different types\n"); return Dependence::Unknown; } @@ -1175,22 +1250,21 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, isDependent(*A.first, A.second, *B.first, B.second, Strides); SafeForVectorization &= Dependence::isSafeForVectorization(Type); - // Gather dependences unless we accumulated MaxInterestingDependence + // Gather dependences unless we accumulated MaxDependences // dependences. In that case return as soon as we find the first // unsafe dependence. This puts a limit on this quadratic // algorithm. - if (RecordInterestingDependences) { - if (Dependence::isInterestingDependence(Type)) - InterestingDependences.push_back( - Dependence(A.second, B.second, Type)); - - if (InterestingDependences.size() >= MaxInterestingDependence) { - RecordInterestingDependences = false; - InterestingDependences.clear(); + if (RecordDependences) { + if (Type != Dependence::NoDep) + Dependences.push_back(Dependence(A.second, B.second, Type)); + + if (Dependences.size() >= MaxDependences) { + RecordDependences = false; + Dependences.clear(); DEBUG(dbgs() << "Too many dependences, stopped recording\n"); } } - if (!RecordInterestingDependences && !SafeForVectorization) + if (!RecordDependences && !SafeForVectorization) return false; } ++OI; @@ -1199,8 +1273,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, } } - DEBUG(dbgs() << "Total Interesting Dependences: " - << InterestingDependences.size() << "\n"); + DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n"); return SafeForVectorization; } @@ -1294,8 +1367,8 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { unsigned NumReads = 0; unsigned NumReadWrites = 0; - PtrRtCheck.Pointers.clear(); - PtrRtCheck.Need = false; + PtrRtChecking.Pointers.clear(); + PtrRtChecking.Need = false; const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); @@ -1342,7 +1415,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { if (it->mayWriteToMemory()) { StoreInst *St = dyn_cast(it); if (!St) { - emitAnalysis(LoopAccessReport(it) << + emitAnalysis(LoopAccessReport(&*it) << "instruction cannot be vectorized"); CanVecMem = false; return; @@ -1374,7 +1447,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { MemoryDepChecker::DepCandidates DependentAccesses; AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(), - AA, LI, DependentAccesses); + AA, LI, DependentAccesses, Preds); // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects // multiple times on the same object. If the ptr is accessed twice, once @@ -1425,7 +1498,8 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { // read a few words, modify, and write a few words, and some of the // words may be written to the same address. bool IsReadOnlyPtr = false; - if (Seen.insert(Ptr).second || !isStridedPtr(SE, Ptr, TheLoop, Strides)) { + if (Seen.insert(Ptr).second || + !isStridedPtr(SE, Ptr, TheLoop, Strides, Preds)) { ++NumReads; IsReadOnlyPtr = true; } @@ -1454,28 +1528,17 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. - bool NeedRTCheck; - bool CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, - NeedRTCheck, SE, - TheLoop, Strides); - - DEBUG(dbgs() << "LAA: We need to do " - << PtrRtCheck.getNumberOfChecks(nullptr) - << " pointer comparisons.\n"); - - // Check that we found the bounds for the pointer. - if (CanDoRT) - DEBUG(dbgs() << "LAA: We can perform a memory runtime check if needed.\n"); - else if (NeedRTCheck) { + bool CanDoRTIfNeeded = + Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides); + if (!CanDoRTIfNeeded) { emitAnalysis(LoopAccessReport() << "cannot identify array bounds"); - DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " << - "the array bounds.\n"); - PtrRtCheck.reset(); + DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " + << "the array bounds.\n"); CanVecMem = false; return; } - PtrRtCheck.Need = NeedRTCheck; + DEBUG(dbgs() << "LAA: We can perform a memory runtime check if needed.\n"); CanVecMem = true; if (Accesses.isDependencyCheckNeeded()) { @@ -1486,23 +1549,21 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) { DEBUG(dbgs() << "LAA: Retrying with memory checks\n"); - NeedRTCheck = true; // Clear the dependency checks. We assume they are not needed. Accesses.resetDepChecks(DepChecker); - PtrRtCheck.reset(); - PtrRtCheck.Need = true; + PtrRtChecking.reset(); + PtrRtChecking.Need = true; - CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NeedRTCheck, SE, - TheLoop, Strides, true); + CanDoRTIfNeeded = + Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides, true); // Check that we found the bounds for the pointer. - if (NeedRTCheck && !CanDoRT) { + if (!CanDoRTIfNeeded) { emitAnalysis(LoopAccessReport() << "cannot check memory dependencies at runtime"); DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n"); - PtrRtCheck.reset(); CanVecMem = false; return; } @@ -1513,8 +1574,8 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { if (CanVecMem) DEBUG(dbgs() << "LAA: No unsafe dependent memory operations in loop. We" - << (NeedRTCheck ? "" : " don't") - << " need a runtime memory check.\n"); + << (PtrRtChecking.Need ? "" : " don't") + << " need runtime memory checks.\n"); else { emitAnalysis(LoopAccessReport() << "unsafe dependent memory operations in loop"); @@ -1551,86 +1612,115 @@ static Instruction *getFirstInst(Instruction *FirstInst, Value *V, return nullptr; } -std::pair LoopAccessInfo::addRuntimeCheck( - Instruction *Loc, const SmallVectorImpl *PtrPartition) const { - if (!PtrRtCheck.Need) - return std::make_pair(nullptr, nullptr); +namespace { +/// \brief IR Values for the lower and upper bounds of a pointer evolution. We +/// need to use value-handles because SCEV expansion can invalidate previously +/// expanded values. Thus expansion of a pointer can invalidate the bounds for +/// a previous one. +struct PointerBounds { + TrackingVH Start; + TrackingVH End; +}; +} // end anonymous namespace - SmallVector, 2> Starts; - SmallVector, 2> Ends; +/// \brief Expand code for the lower and upper bound of the pointer group \p CG +/// in \p TheLoop. \return the values for the bounds. +static PointerBounds +expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop, + Instruction *Loc, SCEVExpander &Exp, ScalarEvolution *SE, + const RuntimePointerChecking &PtrRtChecking) { + Value *Ptr = PtrRtChecking.Pointers[CG->Members[0]].PointerValue; + const SCEV *Sc = SE->getSCEV(Ptr); + + if (SE->isLoopInvariant(Sc, TheLoop)) { + DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr + << "\n"); + return {Ptr, Ptr}; + } else { + unsigned AS = Ptr->getType()->getPointerAddressSpace(); + LLVMContext &Ctx = Loc->getContext(); + + // Use this type for pointer arithmetic. + Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS); + Value *Start = nullptr, *End = nullptr; + + DEBUG(dbgs() << "LAA: Adding RT check for range:\n"); + Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc); + End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc); + DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High << "\n"); + return {Start, End}; + } +} - LLVMContext &Ctx = Loc->getContext(); - SCEVExpander Exp(*SE, DL, "induction"); - Instruction *FirstInst = nullptr; +/// \brief Turns a collection of checks into a collection of expanded upper and +/// lower bounds for both pointers in the check. +static SmallVector, 4> expandBounds( + const SmallVectorImpl &PointerChecks, + Loop *L, Instruction *Loc, ScalarEvolution *SE, SCEVExpander &Exp, + const RuntimePointerChecking &PtrRtChecking) { + SmallVector, 4> ChecksWithBounds; + + // Here we're relying on the SCEV Expander's cache to only emit code for the + // same bounds once. + std::transform( + PointerChecks.begin(), PointerChecks.end(), + std::back_inserter(ChecksWithBounds), + [&](const RuntimePointerChecking::PointerCheck &Check) { + PointerBounds + First = expandBounds(Check.first, L, Loc, Exp, SE, PtrRtChecking), + Second = expandBounds(Check.second, L, Loc, Exp, SE, PtrRtChecking); + return std::make_pair(First, Second); + }); + + return ChecksWithBounds; +} - for (unsigned i = 0; i < PtrRtCheck.CheckingGroups.size(); ++i) { - const RuntimePointerCheck::CheckingPtrGroup &CG = - PtrRtCheck.CheckingGroups[i]; - Value *Ptr = PtrRtCheck.Pointers[CG.Members[0]]; - const SCEV *Sc = SE->getSCEV(Ptr); - - if (SE->isLoopInvariant(Sc, TheLoop)) { - DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr - << "\n"); - Starts.push_back(Ptr); - Ends.push_back(Ptr); - } else { - unsigned AS = Ptr->getType()->getPointerAddressSpace(); - - // Use this type for pointer arithmetic. - Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS); - Value *Start = nullptr, *End = nullptr; - - DEBUG(dbgs() << "LAA: Adding RT check for range:\n"); - Start = Exp.expandCodeFor(CG.Low, PtrArithTy, Loc); - End = Exp.expandCodeFor(CG.High, PtrArithTy, Loc); - DEBUG(dbgs() << "Start: " << *CG.Low << " End: " << *CG.High << "\n"); - Starts.push_back(Start); - Ends.push_back(End); - } - } +std::pair LoopAccessInfo::addRuntimeChecks( + Instruction *Loc, + const SmallVectorImpl &PointerChecks) + const { + SCEVExpander Exp(*SE, DL, "induction"); + auto ExpandedChecks = + expandBounds(PointerChecks, TheLoop, Loc, SE, Exp, PtrRtChecking); + + LLVMContext &Ctx = Loc->getContext(); + Instruction *FirstInst = nullptr; IRBuilder<> ChkBuilder(Loc); // Our instructions might fold to a constant. Value *MemoryRuntimeCheck = nullptr; - for (unsigned i = 0; i < PtrRtCheck.CheckingGroups.size(); ++i) { - for (unsigned j = i + 1; j < PtrRtCheck.CheckingGroups.size(); ++j) { - const RuntimePointerCheck::CheckingPtrGroup &CGI = - PtrRtCheck.CheckingGroups[i]; - const RuntimePointerCheck::CheckingPtrGroup &CGJ = - PtrRtCheck.CheckingGroups[j]; - - if (!PtrRtCheck.needsChecking(CGI, CGJ, PtrPartition)) - continue; - - unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace(); - unsigned AS1 = Starts[j]->getType()->getPointerAddressSpace(); - - assert((AS0 == Ends[j]->getType()->getPointerAddressSpace()) && - (AS1 == Ends[i]->getType()->getPointerAddressSpace()) && - "Trying to bounds check pointers with different address spaces"); - - Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0); - Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1); - - Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy0, "bc"); - Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy1, "bc"); - Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy1, "bc"); - Value *End1 = ChkBuilder.CreateBitCast(Ends[j], PtrArithTy0, "bc"); - Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0"); - FirstInst = getFirstInst(FirstInst, Cmp0, Loc); - Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1"); - FirstInst = getFirstInst(FirstInst, Cmp1, Loc); - Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict"); + for (const auto &Check : ExpandedChecks) { + const PointerBounds &A = Check.first, &B = Check.second; + // Check if two pointers (A and B) conflict where conflict is computed as: + // start(A) <= end(B) && start(B) <= end(A) + unsigned AS0 = A.Start->getType()->getPointerAddressSpace(); + unsigned AS1 = B.Start->getType()->getPointerAddressSpace(); + + assert((AS0 == B.End->getType()->getPointerAddressSpace()) && + (AS1 == A.End->getType()->getPointerAddressSpace()) && + "Trying to bounds check pointers with different address spaces"); + + Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0); + Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1); + + Value *Start0 = ChkBuilder.CreateBitCast(A.Start, PtrArithTy0, "bc"); + Value *Start1 = ChkBuilder.CreateBitCast(B.Start, PtrArithTy1, "bc"); + Value *End0 = ChkBuilder.CreateBitCast(A.End, PtrArithTy1, "bc"); + Value *End1 = ChkBuilder.CreateBitCast(B.End, PtrArithTy0, "bc"); + + Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0"); + FirstInst = getFirstInst(FirstInst, Cmp0, Loc); + Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1"); + FirstInst = getFirstInst(FirstInst, Cmp1, Loc); + Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict"); + FirstInst = getFirstInst(FirstInst, IsConflict, Loc); + if (MemoryRuntimeCheck) { + IsConflict = + ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx"); FirstInst = getFirstInst(FirstInst, IsConflict, Loc); - if (MemoryRuntimeCheck) { - IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, - "conflict.rdx"); - FirstInst = getFirstInst(FirstInst, IsConflict, Loc); - } - MemoryRuntimeCheck = IsConflict; } + MemoryRuntimeCheck = IsConflict; } if (!MemoryRuntimeCheck) @@ -1646,13 +1736,21 @@ std::pair LoopAccessInfo::addRuntimeCheck( return std::make_pair(FirstInst, Check); } +std::pair +LoopAccessInfo::addRuntimeChecks(Instruction *Loc) const { + if (!PtrRtChecking.Need) + return std::make_pair(nullptr, nullptr); + + return addRuntimeChecks(Loc, PtrRtChecking.getChecks()); +} + LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, const DataLayout &DL, const TargetLibraryInfo *TLI, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, const ValueToValueMap &Strides) - : PtrRtCheck(SE), DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL), TLI(TLI), - AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0), + : PtrRtChecking(SE), DepChecker(SE, L, Preds), TheLoop(L), SE(SE), DL(DL), + TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1U), CanVecMem(false), StoreToLoopInvariantAddress(false) { if (canAnalyzeLoop()) @@ -1661,7 +1759,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { if (CanVecMem) { - if (PtrRtCheck.Need) + if (PtrRtChecking.Need) OS.indent(Depth) << "Memory dependences are safe with run-time checks\n"; else OS.indent(Depth) << "Memory dependences are safe\n"; @@ -1670,22 +1768,25 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { if (Report) OS.indent(Depth) << "Report: " << Report->str() << "\n"; - if (auto *InterestingDependences = DepChecker.getInterestingDependences()) { - OS.indent(Depth) << "Interesting Dependences:\n"; - for (auto &Dep : *InterestingDependences) { + if (auto *Dependences = DepChecker.getDependences()) { + OS.indent(Depth) << "Dependences:\n"; + for (auto &Dep : *Dependences) { Dep.print(OS, Depth + 2, DepChecker.getMemoryInstructions()); OS << "\n"; } } else - OS.indent(Depth) << "Too many interesting dependences, not recorded\n"; + OS.indent(Depth) << "Too many dependences, not recorded\n"; // List the pair of accesses need run-time checks to prove independence. - PtrRtCheck.print(OS, Depth); + PtrRtChecking.print(OS, Depth); OS << "\n"; OS.indent(Depth) << "Store to invariant address was " << (StoreToLoopInvariantAddress ? "" : "not ") << "found in loop.\n"; + + OS.indent(Depth) << "SCEV assumptions:\n"; + Preds.print(OS, Depth); } const LoopAccessInfo & @@ -1699,8 +1800,8 @@ LoopAccessAnalysis::getInfo(Loop *L, const ValueToValueMap &Strides) { if (!LAI) { const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); - LAI = llvm::make_unique(L, SE, DL, TLI, AA, DT, LI, - Strides); + LAI = + llvm::make_unique(L, SE, DL, TLI, AA, DT, LI, Strides); #ifndef NDEBUG LAI->NumSymbolicStrides = Strides.size(); #endif @@ -1722,10 +1823,10 @@ void LoopAccessAnalysis::print(raw_ostream &OS, const Module *M) const { } bool LoopAccessAnalysis::runOnFunction(Function &F) { - SE = &getAnalysis(); + SE = &getAnalysis().getSE(); auto *TLIP = getAnalysisIfAvailable(); TLI = TLIP ? &TLIP->getTLI() : nullptr; - AA = &getAnalysis(); + AA = &getAnalysis().getAAResults(); DT = &getAnalysis().getDomTree(); LI = &getAnalysis().getLoopInfo(); @@ -1733,8 +1834,8 @@ bool LoopAccessAnalysis::runOnFunction(Function &F) { } void LoopAccessAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.addRequired(); + AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); @@ -1746,8 +1847,8 @@ static const char laa_name[] = "Loop Access Analysis"; #define LAA_NAME "loop-accesses" INITIALIZE_PASS_BEGIN(LoopAccessAnalysis, LAA_NAME, laa_name, false, true) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(LoopAccessAnalysis, LAA_NAME, laa_name, false, true)