X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FAnalysis%2FLoopAccessAnalysis.cpp;h=8bcdcb862014dc9bd9dc786dd1a63b55914abce8;hb=1c140c48ae1ef861e6f49b85fd7941a4e617fc57;hp=78a8b20f78adb7941b6a3227f20212d656f5ff4d;hpb=81e7e2dfab488cac1c164dde1b92ccdb72a6c9bd;p=oota-llvm.git diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp index 78a8b20f78a..8bcdcb86201 100644 --- a/lib/Analysis/LoopAccessAnalysis.cpp +++ b/lib/Analysis/LoopAccessAnalysis.cpp @@ -58,12 +58,12 @@ static cl::opt MemoryCheckMergeThreshold( /// Maximum SIMD width. const unsigned VectorizerParams::MaxVectorWidth = 64; -/// \brief We collect interesting dependences up to this threshold. -static cl::opt MaxInterestingDependence( - "max-interesting-dependences", cl::Hidden, - cl::desc("Maximum number of interesting dependences collected by " - "loop-access analysis (default = 100)"), - cl::init(100)); +/// \brief We collect dependences up to this threshold. +static cl::opt + MaxDependences("max-dependences", cl::Hidden, + cl::desc("Maximum number of dependences collected by " + "loop-access analysis (default = 100)"), + cl::init(100)); bool VectorizerParams::isInterleaveForced() { return ::VectorizationInterleave.getNumOccurrences() > 0; @@ -87,11 +87,10 @@ Value *llvm::stripIntegerCast(Value *V) { return V; } -const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE, +const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr, Value *OrigPtr) { - - const SCEV *OrigSCEV = SE->getSCEV(Ptr); + const SCEV *OrigSCEV = PSE.getSCEV(Ptr); // If there is an entry in the map return the SCEV of the pointer with the // symbolic stride replaced by one. @@ -108,41 +107,82 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE, ValueToValueMap RewriteMap; RewriteMap[StrideVal] = One; - const SCEV *ByOne = - SCEVParameterRewriter::rewrite(OrigSCEV, *SE, RewriteMap, true); - DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *ByOne + ScalarEvolution *SE = PSE.getSE(); + const auto *U = cast(SE->getSCEV(StrideVal)); + const auto *CT = + static_cast(SE->getOne(StrideVal->getType())); + + PSE.addPredicate(*SE->getEqualPredicate(U, CT)); + auto *Expr = PSE.getSCEV(Ptr); + + DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *Expr << "\n"); - return ByOne; + return Expr; } // Otherwise, just return the SCEV of the original pointer. - return SE->getSCEV(Ptr); + return OrigSCEV; } -void LoopAccessInfo::RuntimePointerCheck::insert( - Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, unsigned ASId, - const ValueToValueMap &Strides) { +void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr, + unsigned DepSetId, unsigned ASId, + const ValueToValueMap &Strides, + PredicatedScalarEvolution &PSE) { // Get the stride replaced scev. - const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr); + const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); const SCEVAddRecExpr *AR = dyn_cast(Sc); assert(AR && "Invalid addrec expression"); + ScalarEvolution *SE = PSE.getSE(); const SCEV *Ex = SE->getBackedgeTakenCount(Lp); + + const SCEV *ScStart = AR->getStart(); const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE); - Pointers.push_back(Ptr); - Starts.push_back(AR->getStart()); - Ends.push_back(ScEnd); - IsWritePtr.push_back(WritePtr); - DependencySetId.push_back(DepSetId); - AliasSetId.push_back(ASId); - Exprs.push_back(Sc); + const SCEV *Step = AR->getStepRecurrence(*SE); + + // For expressions with negative step, the upper bound is ScStart and the + // lower bound is ScEnd. + if (const SCEVConstant *CStep = dyn_cast(Step)) { + if (CStep->getValue()->isNegative()) + std::swap(ScStart, ScEnd); + } else { + // Fallback case: the step is not constant, but the we can still + // get the upper and lower bounds of the interval by using min/max + // expressions. + ScStart = SE->getUMinExpr(ScStart, ScEnd); + ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd); + } + + Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc); } -bool LoopAccessInfo::RuntimePointerCheck::needsChecking( - const CheckingPtrGroup &M, const CheckingPtrGroup &N, - const SmallVectorImpl *PtrPartition) const { +SmallVector +RuntimePointerChecking::generateChecks() const { + SmallVector Checks; + + for (unsigned I = 0; I < CheckingGroups.size(); ++I) { + for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) { + const RuntimePointerChecking::CheckingPtrGroup &CGI = CheckingGroups[I]; + const RuntimePointerChecking::CheckingPtrGroup &CGJ = CheckingGroups[J]; + + if (needsChecking(CGI, CGJ)) + Checks.push_back(std::make_pair(&CGI, &CGJ)); + } + } + return Checks; +} + +void RuntimePointerChecking::generateChecks( + MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) { + assert(Checks.empty() && "Checks is not empty"); + groupChecks(DepCands, UseDependencies); + Checks = generateChecks(); +} + +bool RuntimePointerChecking::needsChecking(const CheckingPtrGroup &M, + const CheckingPtrGroup &N) const { for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I) for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J) - if (needsChecking(M.Members[I], N.Members[J], PtrPartition)) + if (needsChecking(M.Members[I], N.Members[J])) return true; return false; } @@ -161,34 +201,35 @@ static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J, return I; } -bool LoopAccessInfo::RuntimePointerCheck::CheckingPtrGroup::addPointer( - unsigned Index) { +bool RuntimePointerChecking::CheckingPtrGroup::addPointer(unsigned Index) { + const SCEV *Start = RtCheck.Pointers[Index].Start; + const SCEV *End = RtCheck.Pointers[Index].End; + // Compare the starts and ends with the known minimum and maximum // of this set. We need to know how we compare against the min/max // of the set in order to be able to emit memchecks. - const SCEV *Min0 = getMinFromExprs(RtCheck.Starts[Index], Low, RtCheck.SE); + const SCEV *Min0 = getMinFromExprs(Start, Low, RtCheck.SE); if (!Min0) return false; - const SCEV *Min1 = getMinFromExprs(RtCheck.Ends[Index], High, RtCheck.SE); + const SCEV *Min1 = getMinFromExprs(End, High, RtCheck.SE); if (!Min1) return false; // Update the low bound expression if we've found a new min value. - if (Min0 == RtCheck.Starts[Index]) - Low = RtCheck.Starts[Index]; + if (Min0 == Start) + Low = Start; // Update the high bound expression if we've found a new max value. - if (Min1 != RtCheck.Ends[Index]) - High = RtCheck.Ends[Index]; + if (Min1 != End) + High = End; Members.push_back(Index); return true; } -void LoopAccessInfo::RuntimePointerCheck::groupChecks( - MemoryDepChecker::DepCandidates &DepCands, - bool UseDependencies) { +void RuntimePointerChecking::groupChecks( + MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) { // We build the groups from dependency candidates equivalence classes // because: // - We know that pointers in the same equivalence class share @@ -208,8 +249,31 @@ void LoopAccessInfo::RuntimePointerCheck::groupChecks( CheckingGroups.clear(); + // If we need to check two pointers to the same underlying object + // with a non-constant difference, we shouldn't perform any pointer + // grouping with those pointers. This is because we can easily get + // into cases where the resulting check would return false, even when + // the accesses are safe. + // + // The following example shows this: + // for (i = 0; i < 1000; ++i) + // a[5000 + i * m] = a[i] + a[i + 9000] + // + // Here grouping gives a check of (5000, 5000 + 1000 * m) against + // (0, 10000) which is always false. However, if m is 1, there is no + // dependence. Not grouping the checks for a[i] and a[i + 9000] allows + // us to perform an accurate check in this case. + // + // The above case requires that we have an UnknownDependence between + // accesses to the same underlying object. This cannot happen unless + // ShouldRetryWithRuntimeCheck is set, and therefore UseDependencies + // is also false. In this case we will use the fallback path and create + // separate checking groups for all pointers. + // If we don't have the dependency partitions, construct a new - // checking pointer group for each pointer. + // checking pointer group for each pointer. This is also required + // for correctness, because in this case we can have checking between + // pointers to the same underlying object. if (!UseDependencies) { for (unsigned I = 0; I < Pointers.size(); ++I) CheckingGroups.push_back(CheckingPtrGroup(I, *this)); @@ -219,14 +283,14 @@ void LoopAccessInfo::RuntimePointerCheck::groupChecks( unsigned TotalComparisons = 0; DenseMap PositionMap; - for (unsigned Pointer = 0; Pointer < Pointers.size(); ++Pointer) - PositionMap[Pointers[Pointer]] = Pointer; + for (unsigned Index = 0; Index < Pointers.size(); ++Index) + PositionMap[Pointers[Index].PointerValue] = Index; // We need to keep track of what pointers we've already seen so we // don't process them twice. SmallSet Seen; - // Go through all equivalence classes, get the the "pointer check groups" + // Go through all equivalence classes, get the "pointer check groups" // and add them to the overall solution. We use the order in which accesses // appear in 'Pointers' to enforce determinism. for (unsigned I = 0; I < Pointers.size(); ++I) { @@ -235,24 +299,20 @@ void LoopAccessInfo::RuntimePointerCheck::groupChecks( if (Seen.count(I)) continue; - MemoryDepChecker::MemAccessInfo Access(Pointers[I], IsWritePtr[I]); + MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue, + Pointers[I].IsWritePtr); SmallVector Groups; auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access)); - SmallVector MemberIndices; - - // Get all indeces of the members of this equivalence class and sort them. - // This will allow us to process all accesses in the order in which they - // were added to the RuntimePointerCheck. + // Because DepCands is constructed by visiting accesses in the order in + // which they appear in alias sets (which is deterministic) and the + // iteration order within an equivalence class member is only dependent on + // the order in which unions and insertions are performed on the + // equivalence class, the iteration order is deterministic. for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end(); MI != ME; ++MI) { unsigned Pointer = PositionMap[MI->getPointer()]; - MemberIndices.push_back(Pointer); - } - std::sort(MemberIndices.begin(), MemberIndices.end()); - - for (unsigned Pointer : MemberIndices) { bool Merged = false; // Mark this pointer as seen. Seen.insert(Pointer); @@ -288,98 +348,68 @@ void LoopAccessInfo::RuntimePointerCheck::groupChecks( } } -bool LoopAccessInfo::RuntimePointerCheck::needsChecking( - unsigned I, unsigned J, const SmallVectorImpl *PtrPartition) const { +bool RuntimePointerChecking::arePointersInSamePartition( + const SmallVectorImpl &PtrToPartition, unsigned PtrIdx1, + unsigned PtrIdx2) { + return (PtrToPartition[PtrIdx1] != -1 && + PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]); +} + +bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const { + const PointerInfo &PointerI = Pointers[I]; + const PointerInfo &PointerJ = Pointers[J]; + // No need to check if two readonly pointers intersect. - if (!IsWritePtr[I] && !IsWritePtr[J]) + if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr) return false; // Only need to check pointers between two different dependency sets. - if (DependencySetId[I] == DependencySetId[J]) + if (PointerI.DependencySetId == PointerJ.DependencySetId) return false; // Only need to check pointers in the same alias set. - if (AliasSetId[I] != AliasSetId[J]) - return false; - - // If PtrPartition is set omit checks between pointers of the same partition. - // Partition number -1 means that the pointer is used in multiple partitions. - // In this case we can't omit the check. - if (PtrPartition && (*PtrPartition)[I] != -1 && - (*PtrPartition)[I] == (*PtrPartition)[J]) + if (PointerI.AliasSetId != PointerJ.AliasSetId) return false; return true; } -void LoopAccessInfo::RuntimePointerCheck::print( - raw_ostream &OS, unsigned Depth, - const SmallVectorImpl *PtrPartition) const { - - OS.indent(Depth) << "Run-time memory checks:\n"; - +void RuntimePointerChecking::printChecks( + raw_ostream &OS, const SmallVectorImpl &Checks, + unsigned Depth) const { unsigned N = 0; - for (unsigned I = 0; I < CheckingGroups.size(); ++I) - for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) - if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition)) { - OS.indent(Depth) << "Check " << N++ << ":\n"; - OS.indent(Depth + 2) << "Comparing group " << I << ":\n"; - - for (unsigned K = 0; K < CheckingGroups[I].Members.size(); ++K) { - OS.indent(Depth + 2) << *Pointers[CheckingGroups[I].Members[K]] - << "\n"; - if (PtrPartition) - OS << " (Partition: " - << (*PtrPartition)[CheckingGroups[I].Members[K]] << ")" - << "\n"; - } + for (const auto &Check : Checks) { + const auto &First = Check.first->Members, &Second = Check.second->Members; - OS.indent(Depth + 2) << "Against group " << J << ":\n"; + OS.indent(Depth) << "Check " << N++ << ":\n"; - for (unsigned K = 0; K < CheckingGroups[J].Members.size(); ++K) { - OS.indent(Depth + 2) << *Pointers[CheckingGroups[J].Members[K]] - << "\n"; - if (PtrPartition) - OS << " (Partition: " - << (*PtrPartition)[CheckingGroups[J].Members[K]] << ")" - << "\n"; - } - } + OS.indent(Depth + 2) << "Comparing group (" << Check.first << "):\n"; + for (unsigned K = 0; K < First.size(); ++K) + OS.indent(Depth + 2) << *Pointers[First[K]].PointerValue << "\n"; - OS.indent(Depth) << "Grouped accesses:\n"; - for (unsigned I = 0; I < CheckingGroups.size(); ++I) { - OS.indent(Depth + 2) << "Group " << I << ":\n"; - OS.indent(Depth + 4) << "(Low: " << *CheckingGroups[I].Low - << " High: " << *CheckingGroups[I].High << ")\n"; - for (unsigned J = 0; J < CheckingGroups[I].Members.size(); ++J) { - OS.indent(Depth + 6) << "Member: " << *Exprs[CheckingGroups[I].Members[J]] - << "\n"; - } + OS.indent(Depth + 2) << "Against group (" << Check.second << "):\n"; + for (unsigned K = 0; K < Second.size(); ++K) + OS.indent(Depth + 2) << *Pointers[Second[K]].PointerValue << "\n"; } } -unsigned LoopAccessInfo::RuntimePointerCheck::getNumberOfChecks( - const SmallVectorImpl *PtrPartition) const { - - unsigned NumPartitions = CheckingGroups.size(); - unsigned CheckCount = 0; +void RuntimePointerChecking::print(raw_ostream &OS, unsigned Depth) const { - for (unsigned I = 0; I < NumPartitions; ++I) - for (unsigned J = I + 1; J < NumPartitions; ++J) - if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition)) - CheckCount++; - return CheckCount; -} + OS.indent(Depth) << "Run-time memory checks:\n"; + printChecks(OS, Checks, Depth); -bool LoopAccessInfo::RuntimePointerCheck::needsAnyChecking( - const SmallVectorImpl *PtrPartition) const { - unsigned NumPointers = Pointers.size(); + OS.indent(Depth) << "Grouped accesses:\n"; + for (unsigned I = 0; I < CheckingGroups.size(); ++I) { + const auto &CG = CheckingGroups[I]; - for (unsigned I = 0; I < NumPointers; ++I) - for (unsigned J = I + 1; J < NumPointers; ++J) - if (needsChecking(I, J, PtrPartition)) - return true; - return false; + OS.indent(Depth + 2) << "Group " << &CG << ":\n"; + OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High + << ")\n"; + for (unsigned J = 0; J < CG.Members.size(); ++J) { + OS.indent(Depth + 6) << "Member: " << *Pointers[CG.Members[J]].Expr + << "\n"; + } + } } namespace { @@ -394,9 +424,10 @@ public: typedef SmallPtrSet MemAccessInfoSet; AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI, - MemoryDepChecker::DepCandidates &DA) - : DL(Dl), AST(*AA), LI(LI), DepCands(DA), - IsRTCheckAnalysisNeeded(false) {} + MemoryDepChecker::DepCandidates &DA, + PredicatedScalarEvolution &PSE) + : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckAnalysisNeeded(false), + PSE(PSE) {} /// \brief Register a load and whether it is only read from. void addLoad(MemoryLocation &Loc, bool IsReadOnly) { @@ -419,9 +450,8 @@ public: /// /// Returns true if we need no check or if we do and we can generate them /// (i.e. the pointers have computable bounds). - bool canCheckPtrAtRT(LoopAccessInfo::RuntimePointerCheck &RtCheck, - ScalarEvolution *SE, Loop *TheLoop, - const ValueToValueMap &Strides, + bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE, + Loop *TheLoop, const ValueToValueMap &Strides, bool ShouldCheckStride = false); /// \brief Goes over all memory accesses, checks whether a RT check is needed @@ -440,7 +470,7 @@ public: /// We decided that no dependence analysis would be used. Reset the state. void resetDepChecks(MemoryDepChecker &DepChecker) { CheckDeps.clear(); - DepChecker.clearInterestingDependences(); + DepChecker.clearDependences(); } MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; } @@ -482,14 +512,18 @@ private: /// (i.e. ShouldRetryWithRuntimeCheck), isDependencyCheckNeeded is cleared /// while this remains set if we have potentially dependent accesses. bool IsRTCheckAnalysisNeeded; + + /// The SCEV predicate containing all the SCEV-related assumptions. + PredicatedScalarEvolution &PSE; }; } // end anonymous namespace /// \brief Check whether a pointer can participate in a runtime bounds check. -static bool hasComputableBounds(ScalarEvolution *SE, - const ValueToValueMap &Strides, Value *Ptr) { - const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, Strides, Ptr); +static bool hasComputableBounds(PredicatedScalarEvolution &PSE, + const ValueToValueMap &Strides, Value *Ptr, + Loop *L) { + const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); const SCEVAddRecExpr *AR = dyn_cast(PtrScev); if (!AR) return false; @@ -497,9 +531,10 @@ static bool hasComputableBounds(ScalarEvolution *SE, return AR->isAffine(); } -bool AccessAnalysis::canCheckPtrAtRT( - LoopAccessInfo::RuntimePointerCheck &RtCheck, ScalarEvolution *SE, - Loop *TheLoop, const ValueToValueMap &StridesMap, bool ShouldCheckStride) { +bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, + ScalarEvolution *SE, Loop *TheLoop, + const ValueToValueMap &StridesMap, + bool ShouldCheckStride) { // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. bool CanDoRT = true; @@ -531,11 +566,11 @@ bool AccessAnalysis::canCheckPtrAtRT( else ++NumReadPtrChecks; - if (hasComputableBounds(SE, StridesMap, Ptr) && + if (hasComputableBounds(PSE, StridesMap, Ptr, TheLoop) && // When we run after a failing dependency check we have to make sure // we don't have wrapping pointers. (!ShouldCheckStride || - isStridedPtr(SE, Ptr, TheLoop, StridesMap) == 1)) { + isStridedPtr(PSE, Ptr, TheLoop, StridesMap) == 1)) { // The id of the dependence set. unsigned DepId; @@ -549,7 +584,7 @@ bool AccessAnalysis::canCheckPtrAtRT( // Each access has its own dependence set. DepId = RunningDepId++; - RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap); + RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE); DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'); } else { @@ -582,14 +617,15 @@ bool AccessAnalysis::canCheckPtrAtRT( for (unsigned i = 0; i < NumPointers; ++i) { for (unsigned j = i + 1; j < NumPointers; ++j) { // Only need to check pointers between two different dependency sets. - if (RtCheck.DependencySetId[i] == RtCheck.DependencySetId[j]) + if (RtCheck.Pointers[i].DependencySetId == + RtCheck.Pointers[j].DependencySetId) continue; // Only need to check pointers in the same alias set. - if (RtCheck.AliasSetId[i] != RtCheck.AliasSetId[j]) + if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId) continue; - Value *PtrI = RtCheck.Pointers[i]; - Value *PtrJ = RtCheck.Pointers[j]; + Value *PtrI = RtCheck.Pointers[i].PointerValue; + Value *PtrJ = RtCheck.Pointers[j].PointerValue; unsigned ASi = PtrI->getType()->getPointerAddressSpace(); unsigned ASj = PtrJ->getType()->getPointerAddressSpace(); @@ -602,9 +638,9 @@ bool AccessAnalysis::canCheckPtrAtRT( } if (NeedRTCheck && CanDoRT) - RtCheck.groupChecks(DepCands, IsDepCheckNeeded); + RtCheck.generateChecks(DepCands, IsDepCheckNeeded); - DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks(nullptr) + DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks() << " pointer comparisons.\n"); RtCheck.Need = NeedRTCheck; @@ -709,6 +745,11 @@ void AccessAnalysis::processMemAccesses() { GetUnderlyingObjects(Ptr, TempObjects, DL, LI); DEBUG(dbgs() << "Underlying objects for pointer " << *Ptr << "\n"); for (Value *UnderlyingObj : TempObjects) { + // nullptr never alias, don't join sets for pointer that have "null" + // in their UnderlyingObjects list. + if (isa(UnderlyingObj)) + continue; + UnderlyingObjToAccessMap::iterator Prev = ObjToLastAccess.find(UnderlyingObj); if (Prev != ObjToLastAccess.end()) @@ -778,20 +819,20 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, } /// \brief Check whether the access through \p Ptr has a constant stride. -int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, - const ValueToValueMap &StridesMap) { - const Type *Ty = Ptr->getType(); +int llvm::isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr, + const Loop *Lp, const ValueToValueMap &StridesMap) { + Type *Ty = Ptr->getType(); assert(Ty->isPointerTy() && "Unexpected non-ptr"); // Make sure that the pointer does not point to aggregate types. - const PointerType *PtrTy = cast(Ty); + auto *PtrTy = cast(Ty); if (PtrTy->getElementType()->isAggregateType()) { DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type" << *Ptr << "\n"); return 0; } - const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, StridesMap, Ptr); + const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr); const SCEVAddRecExpr *AR = dyn_cast(PtrScev); if (!AR) { @@ -804,6 +845,7 @@ int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, if (Lp != AR->getLoop()) { DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop " << *Ptr << " SCEV: " << *PtrScev << "\n"); + return 0; } // The address calculation must not wrap. Otherwise, a dependence could be @@ -814,16 +856,16 @@ int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, // to access the pointer value "0" which is undefined behavior in address // space 0, therefore we can also vectorize this case. bool IsInBoundsGEP = isInBoundsGep(Ptr); - bool IsNoWrapAddRec = isNoWrapAddRec(Ptr, AR, SE, Lp); + bool IsNoWrapAddRec = isNoWrapAddRec(Ptr, AR, PSE.getSE(), Lp); bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0; if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) { DEBUG(dbgs() << "LAA: Bad stride - Pointer may wrap in the address space " - << *Ptr << " SCEV: " << *PtrScev << "\n"); + << *Ptr << " SCEV: " << *PtrScev << "\n"); return 0; } // Check the step is constant. - const SCEV *Step = AR->getStepRecurrence(*SE); + const SCEV *Step = AR->getStepRecurrence(*PSE.getSE()); // Calculate the pointer stride and check if it is constant. const SCEVConstant *C = dyn_cast(Step); @@ -835,7 +877,7 @@ int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, auto &DL = Lp->getHeader()->getModule()->getDataLayout(); int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType()); - const APInt &APStepVal = C->getValue()->getValue(); + const APInt &APStepVal = C->getAPInt(); // Huge step value - give up. if (APStepVal.getBitWidth() > 64) @@ -875,15 +917,15 @@ bool MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) { llvm_unreachable("unexpected DepType!"); } -bool MemoryDepChecker::Dependence::isInterestingDependence(DepType Type) { +bool MemoryDepChecker::Dependence::isBackward() const { switch (Type) { case NoDep: case Forward: + case ForwardButPreventsForwarding: + case Unknown: return false; case BackwardVectorizable: - case Unknown: - case ForwardButPreventsForwarding: case Backward: case BackwardVectorizableButPreventsForwarding: return true; @@ -892,17 +934,21 @@ bool MemoryDepChecker::Dependence::isInterestingDependence(DepType Type) { } bool MemoryDepChecker::Dependence::isPossiblyBackward() const { + return isBackward() || Type == Unknown; +} + +bool MemoryDepChecker::Dependence::isForward() const { switch (Type) { - case NoDep: case Forward: case ForwardButPreventsForwarding: - return false; + return true; + case NoDep: case Unknown: case BackwardVectorizable: case Backward: case BackwardVectorizableButPreventsForwarding: - return true; + return false; } llvm_unreachable("unexpected DepType!"); } @@ -1002,11 +1048,11 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, BPtr->getType()->getPointerAddressSpace()) return Dependence::Unknown; - const SCEV *AScev = replaceSymbolicStrideSCEV(SE, Strides, APtr); - const SCEV *BScev = replaceSymbolicStrideSCEV(SE, Strides, BPtr); + const SCEV *AScev = replaceSymbolicStrideSCEV(PSE, Strides, APtr); + const SCEV *BScev = replaceSymbolicStrideSCEV(PSE, Strides, BPtr); - int StrideAPtr = isStridedPtr(SE, APtr, InnermostLoop, Strides); - int StrideBPtr = isStridedPtr(SE, BPtr, InnermostLoop, Strides); + int StrideAPtr = isStridedPtr(PSE, APtr, InnermostLoop, Strides); + int StrideBPtr = isStridedPtr(PSE, BPtr, InnermostLoop, Strides); const SCEV *Src = AScev; const SCEV *Sink = BScev; @@ -1023,12 +1069,12 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, std::swap(StrideAPtr, StrideBPtr); } - const SCEV *Dist = SE->getMinusSCEV(Sink, Src); + const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src); DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink - << "(Induction step: " << StrideAPtr << ")\n"); + << "(Induction step: " << StrideAPtr << ")\n"); DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to " - << *InstMap[BIdx] << ": " << *Dist << "\n"); + << *InstMap[BIdx] << ": " << *Dist << "\n"); // Need accesses with constant stride. We don't want to vectorize // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in @@ -1051,7 +1097,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, unsigned TypeByteSize = DL.getTypeAllocSize(ATy); // Negative distances are not plausible dependencies. - const APInt &Val = C->getValue()->getValue(); + const APInt &Val = C->getAPInt(); if (Val.isNegative()) { bool IsTrueDataDependence = (AIsWrite && !BIsWrite); if (IsTrueDataDependence && @@ -1067,7 +1113,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // Could be improved to assert type sizes are the same (i32 == float, etc). if (Val == 0) { if (ATy == BTy) - return Dependence::NoDep; + return Dependence::Forward; DEBUG(dbgs() << "LAA: Zero dependence difference but different types\n"); return Dependence::Unknown; } @@ -1206,22 +1252,21 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, isDependent(*A.first, A.second, *B.first, B.second, Strides); SafeForVectorization &= Dependence::isSafeForVectorization(Type); - // Gather dependences unless we accumulated MaxInterestingDependence + // Gather dependences unless we accumulated MaxDependences // dependences. In that case return as soon as we find the first // unsafe dependence. This puts a limit on this quadratic // algorithm. - if (RecordInterestingDependences) { - if (Dependence::isInterestingDependence(Type)) - InterestingDependences.push_back( - Dependence(A.second, B.second, Type)); - - if (InterestingDependences.size() >= MaxInterestingDependence) { - RecordInterestingDependences = false; - InterestingDependences.clear(); + if (RecordDependences) { + if (Type != Dependence::NoDep) + Dependences.push_back(Dependence(A.second, B.second, Type)); + + if (Dependences.size() >= MaxDependences) { + RecordDependences = false; + Dependences.clear(); DEBUG(dbgs() << "Too many dependences, stopped recording\n"); } } - if (!RecordInterestingDependences && !SafeForVectorization) + if (!RecordDependences && !SafeForVectorization) return false; } ++OI; @@ -1230,8 +1275,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, } } - DEBUG(dbgs() << "Total Interesting Dependences: " - << InterestingDependences.size() << "\n"); + DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n"); return SafeForVectorization; } @@ -1301,10 +1345,10 @@ bool LoopAccessInfo::canAnalyzeLoop() { } // ScalarEvolution needs to be able to find the exit count. - const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop); - if (ExitCount == SE->getCouldNotCompute()) { - emitAnalysis(LoopAccessReport() << - "could not determine number of loop iterations"); + const SCEV *ExitCount = PSE.getSE()->getBackedgeTakenCount(TheLoop); + if (ExitCount == PSE.getSE()->getCouldNotCompute()) { + emitAnalysis(LoopAccessReport() + << "could not determine number of loop iterations"); DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n"); return false; } @@ -1325,8 +1369,8 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { unsigned NumReads = 0; unsigned NumReadWrites = 0; - PtrRtCheck.Pointers.clear(); - PtrRtCheck.Need = false; + PtrRtChecking.Pointers.clear(); + PtrRtChecking.Need = false; const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); @@ -1373,7 +1417,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { if (it->mayWriteToMemory()) { StoreInst *St = dyn_cast(it); if (!St) { - emitAnalysis(LoopAccessReport(it) << + emitAnalysis(LoopAccessReport(&*it) << "instruction cannot be vectorized"); CanVecMem = false; return; @@ -1405,7 +1449,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { MemoryDepChecker::DepCandidates DependentAccesses; AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(), - AA, LI, DependentAccesses); + AA, LI, DependentAccesses, PSE); // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects // multiple times on the same object. If the ptr is accessed twice, once @@ -1456,7 +1500,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { // read a few words, modify, and write a few words, and some of the // words may be written to the same address. bool IsReadOnlyPtr = false; - if (Seen.insert(Ptr).second || !isStridedPtr(SE, Ptr, TheLoop, Strides)) { + if (Seen.insert(Ptr).second || !isStridedPtr(PSE, Ptr, TheLoop, Strides)) { ++NumReads; IsReadOnlyPtr = true; } @@ -1486,7 +1530,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. bool CanDoRTIfNeeded = - Accesses.canCheckPtrAtRT(PtrRtCheck, SE, TheLoop, Strides); + Accesses.canCheckPtrAtRT(PtrRtChecking, PSE.getSE(), TheLoop, Strides); if (!CanDoRTIfNeeded) { emitAnalysis(LoopAccessReport() << "cannot identify array bounds"); DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " @@ -1510,11 +1554,12 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { // Clear the dependency checks. We assume they are not needed. Accesses.resetDepChecks(DepChecker); - PtrRtCheck.reset(); - PtrRtCheck.Need = true; + PtrRtChecking.reset(); + PtrRtChecking.Need = true; + auto *SE = PSE.getSE(); CanDoRTIfNeeded = - Accesses.canCheckPtrAtRT(PtrRtCheck, SE, TheLoop, Strides, true); + Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides, true); // Check that we found the bounds for the pointer. if (!CanDoRTIfNeeded) { @@ -1531,7 +1576,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { if (CanVecMem) DEBUG(dbgs() << "LAA: No unsafe dependent memory operations in loop. We" - << (PtrRtCheck.Need ? "" : " don't") + << (PtrRtChecking.Need ? "" : " don't") << " need runtime memory checks.\n"); else { emitAnalysis(LoopAccessReport() << @@ -1555,7 +1600,7 @@ void LoopAccessInfo::emitAnalysis(LoopAccessReport &Message) { } bool LoopAccessInfo::isUniform(Value *V) const { - return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop)); + return (PSE.getSE()->isLoopInvariant(PSE.getSE()->getSCEV(V), TheLoop)); } // FIXME: this function is currently a duplicate of the one in @@ -1569,86 +1614,115 @@ static Instruction *getFirstInst(Instruction *FirstInst, Value *V, return nullptr; } -std::pair LoopAccessInfo::addRuntimeCheck( - Instruction *Loc, const SmallVectorImpl *PtrPartition) const { - if (!PtrRtCheck.Need) - return std::make_pair(nullptr, nullptr); +namespace { +/// \brief IR Values for the lower and upper bounds of a pointer evolution. We +/// need to use value-handles because SCEV expansion can invalidate previously +/// expanded values. Thus expansion of a pointer can invalidate the bounds for +/// a previous one. +struct PointerBounds { + TrackingVH Start; + TrackingVH End; +}; +} // end anonymous namespace - SmallVector, 2> Starts; - SmallVector, 2> Ends; +/// \brief Expand code for the lower and upper bound of the pointer group \p CG +/// in \p TheLoop. \return the values for the bounds. +static PointerBounds +expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop, + Instruction *Loc, SCEVExpander &Exp, ScalarEvolution *SE, + const RuntimePointerChecking &PtrRtChecking) { + Value *Ptr = PtrRtChecking.Pointers[CG->Members[0]].PointerValue; + const SCEV *Sc = SE->getSCEV(Ptr); + + if (SE->isLoopInvariant(Sc, TheLoop)) { + DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr + << "\n"); + return {Ptr, Ptr}; + } else { + unsigned AS = Ptr->getType()->getPointerAddressSpace(); + LLVMContext &Ctx = Loc->getContext(); + + // Use this type for pointer arithmetic. + Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS); + Value *Start = nullptr, *End = nullptr; + + DEBUG(dbgs() << "LAA: Adding RT check for range:\n"); + Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc); + End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc); + DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High << "\n"); + return {Start, End}; + } +} - LLVMContext &Ctx = Loc->getContext(); - SCEVExpander Exp(*SE, DL, "induction"); - Instruction *FirstInst = nullptr; +/// \brief Turns a collection of checks into a collection of expanded upper and +/// lower bounds for both pointers in the check. +static SmallVector, 4> expandBounds( + const SmallVectorImpl &PointerChecks, + Loop *L, Instruction *Loc, ScalarEvolution *SE, SCEVExpander &Exp, + const RuntimePointerChecking &PtrRtChecking) { + SmallVector, 4> ChecksWithBounds; + + // Here we're relying on the SCEV Expander's cache to only emit code for the + // same bounds once. + std::transform( + PointerChecks.begin(), PointerChecks.end(), + std::back_inserter(ChecksWithBounds), + [&](const RuntimePointerChecking::PointerCheck &Check) { + PointerBounds + First = expandBounds(Check.first, L, Loc, Exp, SE, PtrRtChecking), + Second = expandBounds(Check.second, L, Loc, Exp, SE, PtrRtChecking); + return std::make_pair(First, Second); + }); + + return ChecksWithBounds; +} - for (unsigned i = 0; i < PtrRtCheck.CheckingGroups.size(); ++i) { - const RuntimePointerCheck::CheckingPtrGroup &CG = - PtrRtCheck.CheckingGroups[i]; - Value *Ptr = PtrRtCheck.Pointers[CG.Members[0]]; - const SCEV *Sc = SE->getSCEV(Ptr); - - if (SE->isLoopInvariant(Sc, TheLoop)) { - DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr - << "\n"); - Starts.push_back(Ptr); - Ends.push_back(Ptr); - } else { - unsigned AS = Ptr->getType()->getPointerAddressSpace(); - - // Use this type for pointer arithmetic. - Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS); - Value *Start = nullptr, *End = nullptr; - - DEBUG(dbgs() << "LAA: Adding RT check for range:\n"); - Start = Exp.expandCodeFor(CG.Low, PtrArithTy, Loc); - End = Exp.expandCodeFor(CG.High, PtrArithTy, Loc); - DEBUG(dbgs() << "Start: " << *CG.Low << " End: " << *CG.High << "\n"); - Starts.push_back(Start); - Ends.push_back(End); - } - } +std::pair LoopAccessInfo::addRuntimeChecks( + Instruction *Loc, + const SmallVectorImpl &PointerChecks) + const { + auto *SE = PSE.getSE(); + SCEVExpander Exp(*SE, DL, "induction"); + auto ExpandedChecks = + expandBounds(PointerChecks, TheLoop, Loc, SE, Exp, PtrRtChecking); + LLVMContext &Ctx = Loc->getContext(); + Instruction *FirstInst = nullptr; IRBuilder<> ChkBuilder(Loc); // Our instructions might fold to a constant. Value *MemoryRuntimeCheck = nullptr; - for (unsigned i = 0; i < PtrRtCheck.CheckingGroups.size(); ++i) { - for (unsigned j = i + 1; j < PtrRtCheck.CheckingGroups.size(); ++j) { - const RuntimePointerCheck::CheckingPtrGroup &CGI = - PtrRtCheck.CheckingGroups[i]; - const RuntimePointerCheck::CheckingPtrGroup &CGJ = - PtrRtCheck.CheckingGroups[j]; - - if (!PtrRtCheck.needsChecking(CGI, CGJ, PtrPartition)) - continue; - - unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace(); - unsigned AS1 = Starts[j]->getType()->getPointerAddressSpace(); - - assert((AS0 == Ends[j]->getType()->getPointerAddressSpace()) && - (AS1 == Ends[i]->getType()->getPointerAddressSpace()) && - "Trying to bounds check pointers with different address spaces"); - - Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0); - Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1); - - Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy0, "bc"); - Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy1, "bc"); - Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy1, "bc"); - Value *End1 = ChkBuilder.CreateBitCast(Ends[j], PtrArithTy0, "bc"); - Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0"); - FirstInst = getFirstInst(FirstInst, Cmp0, Loc); - Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1"); - FirstInst = getFirstInst(FirstInst, Cmp1, Loc); - Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict"); + for (const auto &Check : ExpandedChecks) { + const PointerBounds &A = Check.first, &B = Check.second; + // Check if two pointers (A and B) conflict where conflict is computed as: + // start(A) <= end(B) && start(B) <= end(A) + unsigned AS0 = A.Start->getType()->getPointerAddressSpace(); + unsigned AS1 = B.Start->getType()->getPointerAddressSpace(); + + assert((AS0 == B.End->getType()->getPointerAddressSpace()) && + (AS1 == A.End->getType()->getPointerAddressSpace()) && + "Trying to bounds check pointers with different address spaces"); + + Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0); + Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1); + + Value *Start0 = ChkBuilder.CreateBitCast(A.Start, PtrArithTy0, "bc"); + Value *Start1 = ChkBuilder.CreateBitCast(B.Start, PtrArithTy1, "bc"); + Value *End0 = ChkBuilder.CreateBitCast(A.End, PtrArithTy1, "bc"); + Value *End1 = ChkBuilder.CreateBitCast(B.End, PtrArithTy0, "bc"); + + Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0"); + FirstInst = getFirstInst(FirstInst, Cmp0, Loc); + Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1"); + FirstInst = getFirstInst(FirstInst, Cmp1, Loc); + Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict"); + FirstInst = getFirstInst(FirstInst, IsConflict, Loc); + if (MemoryRuntimeCheck) { + IsConflict = + ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx"); FirstInst = getFirstInst(FirstInst, IsConflict, Loc); - if (MemoryRuntimeCheck) { - IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, - "conflict.rdx"); - FirstInst = getFirstInst(FirstInst, IsConflict, Loc); - } - MemoryRuntimeCheck = IsConflict; } + MemoryRuntimeCheck = IsConflict; } if (!MemoryRuntimeCheck) @@ -1664,13 +1738,21 @@ std::pair LoopAccessInfo::addRuntimeCheck( return std::make_pair(FirstInst, Check); } +std::pair +LoopAccessInfo::addRuntimeChecks(Instruction *Loc) const { + if (!PtrRtChecking.Need) + return std::make_pair(nullptr, nullptr); + + return addRuntimeChecks(Loc, PtrRtChecking.getChecks()); +} + LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, const DataLayout &DL, const TargetLibraryInfo *TLI, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, const ValueToValueMap &Strides) - : PtrRtCheck(SE), DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL), TLI(TLI), - AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0), + : PSE(*SE), PtrRtChecking(SE), DepChecker(PSE, L), TheLoop(L), DL(DL), + TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1U), CanVecMem(false), StoreToLoopInvariantAddress(false) { if (canAnalyzeLoop()) @@ -1679,7 +1761,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { if (CanVecMem) { - if (PtrRtCheck.Need) + if (PtrRtChecking.Need) OS.indent(Depth) << "Memory dependences are safe with run-time checks\n"; else OS.indent(Depth) << "Memory dependences are safe\n"; @@ -1688,22 +1770,25 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { if (Report) OS.indent(Depth) << "Report: " << Report->str() << "\n"; - if (auto *InterestingDependences = DepChecker.getInterestingDependences()) { - OS.indent(Depth) << "Interesting Dependences:\n"; - for (auto &Dep : *InterestingDependences) { + if (auto *Dependences = DepChecker.getDependences()) { + OS.indent(Depth) << "Dependences:\n"; + for (auto &Dep : *Dependences) { Dep.print(OS, Depth + 2, DepChecker.getMemoryInstructions()); OS << "\n"; } } else - OS.indent(Depth) << "Too many interesting dependences, not recorded\n"; + OS.indent(Depth) << "Too many dependences, not recorded\n"; // List the pair of accesses need run-time checks to prove independence. - PtrRtCheck.print(OS, Depth); + PtrRtChecking.print(OS, Depth); OS << "\n"; OS.indent(Depth) << "Store to invariant address was " << (StoreToLoopInvariantAddress ? "" : "not ") << "found in loop.\n"; + + OS.indent(Depth) << "SCEV assumptions:\n"; + PSE.getUnionPredicate().print(OS, Depth); } const LoopAccessInfo & @@ -1717,8 +1802,8 @@ LoopAccessAnalysis::getInfo(Loop *L, const ValueToValueMap &Strides) { if (!LAI) { const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); - LAI = llvm::make_unique(L, SE, DL, TLI, AA, DT, LI, - Strides); + LAI = + llvm::make_unique(L, SE, DL, TLI, AA, DT, LI, Strides); #ifndef NDEBUG LAI->NumSymbolicStrides = Strides.size(); #endif @@ -1740,10 +1825,10 @@ void LoopAccessAnalysis::print(raw_ostream &OS, const Module *M) const { } bool LoopAccessAnalysis::runOnFunction(Function &F) { - SE = &getAnalysis(); + SE = &getAnalysis().getSE(); auto *TLIP = getAnalysisIfAvailable(); TLI = TLIP ? &TLIP->getTLI() : nullptr; - AA = &getAnalysis(); + AA = &getAnalysis().getAAResults(); DT = &getAnalysis().getDomTree(); LI = &getAnalysis().getLoopInfo(); @@ -1751,8 +1836,8 @@ bool LoopAccessAnalysis::runOnFunction(Function &F) { } void LoopAccessAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.addRequired(); + AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); @@ -1764,8 +1849,8 @@ static const char laa_name[] = "Loop Access Analysis"; #define LAA_NAME "loop-accesses" INITIALIZE_PASS_BEGIN(LoopAccessAnalysis, LAA_NAME, laa_name, false, true) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(LoopAccessAnalysis, LAA_NAME, laa_name, false, true)