return SE->getSCEV(Ptr);
}
-void LoopAccessInfo::RuntimePointerCheck::insert(
- Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, unsigned ASId,
- const ValueToValueMap &Strides) {
+void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
+ unsigned DepSetId, unsigned ASId,
+ const ValueToValueMap &Strides) {
// Get the stride replaced scev.
const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
assert(AR && "Invalid addrec expression");
const SCEV *Ex = SE->getBackedgeTakenCount(Lp);
+
+ const SCEV *ScStart = AR->getStart();
const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE);
- Pointers.push_back(Ptr);
- Starts.push_back(AR->getStart());
- Ends.push_back(ScEnd);
- IsWritePtr.push_back(WritePtr);
- DependencySetId.push_back(DepSetId);
- AliasSetId.push_back(ASId);
- Exprs.push_back(Sc);
+ const SCEV *Step = AR->getStepRecurrence(*SE);
+
+ // For expressions with negative step, the upper bound is ScStart and the
+ // lower bound is ScEnd.
+ if (const SCEVConstant *CStep = dyn_cast<const SCEVConstant>(Step)) {
+ if (CStep->getValue()->isNegative())
+ std::swap(ScStart, ScEnd);
+ } else {
+ // Fallback case: the step is not constant, but the we can still
+ // get the upper and lower bounds of the interval by using min/max
+ // expressions.
+ ScStart = SE->getUMinExpr(ScStart, ScEnd);
+ ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
+ }
+
+ Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
+}
+
+SmallVector<RuntimePointerChecking::PointerCheck, 4>
+RuntimePointerChecking::generateChecks() const {
+ SmallVector<PointerCheck, 4> Checks;
+
+ for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
+ for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) {
+ const RuntimePointerChecking::CheckingPtrGroup &CGI = CheckingGroups[I];
+ const RuntimePointerChecking::CheckingPtrGroup &CGJ = CheckingGroups[J];
+
+ if (needsChecking(CGI, CGJ))
+ Checks.push_back(std::make_pair(&CGI, &CGJ));
+ }
+ }
+ return Checks;
}
-bool LoopAccessInfo::RuntimePointerCheck::needsChecking(
- const CheckingPtrGroup &M, const CheckingPtrGroup &N,
- const SmallVectorImpl<int> *PtrPartition) const {
+void RuntimePointerChecking::generateChecks(
+ MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
+ assert(Checks.empty() && "Checks is not empty");
+ groupChecks(DepCands, UseDependencies);
+ Checks = generateChecks();
+}
+
+bool RuntimePointerChecking::needsChecking(const CheckingPtrGroup &M,
+ const CheckingPtrGroup &N) const {
for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)
for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J)
- if (needsChecking(M.Members[I], N.Members[J], PtrPartition))
+ if (needsChecking(M.Members[I], N.Members[J]))
return true;
return false;
}
return I;
}
-bool LoopAccessInfo::RuntimePointerCheck::CheckingPtrGroup::addPointer(
- unsigned Index) {
+bool RuntimePointerChecking::CheckingPtrGroup::addPointer(unsigned Index) {
+ const SCEV *Start = RtCheck.Pointers[Index].Start;
+ const SCEV *End = RtCheck.Pointers[Index].End;
+
// Compare the starts and ends with the known minimum and maximum
// of this set. We need to know how we compare against the min/max
// of the set in order to be able to emit memchecks.
- const SCEV *Min0 = getMinFromExprs(RtCheck.Starts[Index], Low, RtCheck.SE);
+ const SCEV *Min0 = getMinFromExprs(Start, Low, RtCheck.SE);
if (!Min0)
return false;
- const SCEV *Min1 = getMinFromExprs(RtCheck.Ends[Index], High, RtCheck.SE);
+ const SCEV *Min1 = getMinFromExprs(End, High, RtCheck.SE);
if (!Min1)
return false;
// Update the low bound expression if we've found a new min value.
- if (Min0 == RtCheck.Starts[Index])
- Low = RtCheck.Starts[Index];
+ if (Min0 == Start)
+ Low = Start;
// Update the high bound expression if we've found a new max value.
- if (Min1 != RtCheck.Ends[Index])
- High = RtCheck.Ends[Index];
+ if (Min1 != End)
+ High = End;
Members.push_back(Index);
return true;
}
-void LoopAccessInfo::RuntimePointerCheck::groupChecks(
- MemoryDepChecker::DepCandidates &DepCands,
- bool UseDependencies) {
+void RuntimePointerChecking::groupChecks(
+ MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
// We build the groups from dependency candidates equivalence classes
// because:
// - We know that pointers in the same equivalence class share
CheckingGroups.clear();
+ // If we need to check two pointers to the same underlying object
+ // with a non-constant difference, we shouldn't perform any pointer
+ // grouping with those pointers. This is because we can easily get
+ // into cases where the resulting check would return false, even when
+ // the accesses are safe.
+ //
+ // The following example shows this:
+ // for (i = 0; i < 1000; ++i)
+ // a[5000 + i * m] = a[i] + a[i + 9000]
+ //
+ // Here grouping gives a check of (5000, 5000 + 1000 * m) against
+ // (0, 10000) which is always false. However, if m is 1, there is no
+ // dependence. Not grouping the checks for a[i] and a[i + 9000] allows
+ // us to perform an accurate check in this case.
+ //
+ // The above case requires that we have an UnknownDependence between
+ // accesses to the same underlying object. This cannot happen unless
+ // ShouldRetryWithRuntimeCheck is set, and therefore UseDependencies
+ // is also false. In this case we will use the fallback path and create
+ // separate checking groups for all pointers.
+
// If we don't have the dependency partitions, construct a new
- // checking pointer group for each pointer.
+ // checking pointer group for each pointer. This is also required
+ // for correctness, because in this case we can have checking between
+ // pointers to the same underlying object.
if (!UseDependencies) {
for (unsigned I = 0; I < Pointers.size(); ++I)
CheckingGroups.push_back(CheckingPtrGroup(I, *this));
unsigned TotalComparisons = 0;
DenseMap<Value *, unsigned> PositionMap;
- for (unsigned Pointer = 0; Pointer < Pointers.size(); ++Pointer)
- PositionMap[Pointers[Pointer]] = Pointer;
+ for (unsigned Index = 0; Index < Pointers.size(); ++Index)
+ PositionMap[Pointers[Index].PointerValue] = Index;
+
+ // We need to keep track of what pointers we've already seen so we
+ // don't process them twice.
+ SmallSet<unsigned, 2> Seen;
// Go through all equivalence classes, get the the "pointer check groups"
- // and add them to the overall solution.
- for (auto DI = DepCands.begin(), DE = DepCands.end(); DI != DE; ++DI) {
- if (!DI->isLeader())
+ // and add them to the overall solution. We use the order in which accesses
+ // appear in 'Pointers' to enforce determinism.
+ for (unsigned I = 0; I < Pointers.size(); ++I) {
+ // We've seen this pointer before, and therefore already processed
+ // its equivalence class.
+ if (Seen.count(I))
continue;
- SmallVector<CheckingPtrGroup, 2> Groups;
+ MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue,
+ Pointers[I].IsWritePtr);
- for (auto MI = DepCands.member_begin(DI), ME = DepCands.member_end();
+ SmallVector<CheckingPtrGroup, 2> Groups;
+ auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access));
+
+ // Because DepCands is constructed by visiting accesses in the order in
+ // which they appear in alias sets (which is deterministic) and the
+ // iteration order within an equivalence class member is only dependent on
+ // the order in which unions and insertions are performed on the
+ // equivalence class, the iteration order is deterministic.
+ for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end();
MI != ME; ++MI) {
unsigned Pointer = PositionMap[MI->getPointer()];
bool Merged = false;
+ // Mark this pointer as seen.
+ Seen.insert(Pointer);
// Go through all the existing sets and see if we can find one
// which can include this pointer.
}
}
-bool LoopAccessInfo::RuntimePointerCheck::needsChecking(
- unsigned I, unsigned J, const SmallVectorImpl<int> *PtrPartition) const {
+bool RuntimePointerChecking::arePointersInSamePartition(
+ const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1,
+ unsigned PtrIdx2) {
+ return (PtrToPartition[PtrIdx1] != -1 &&
+ PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
+}
+
+bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const {
+ const PointerInfo &PointerI = Pointers[I];
+ const PointerInfo &PointerJ = Pointers[J];
+
// No need to check if two readonly pointers intersect.
- if (!IsWritePtr[I] && !IsWritePtr[J])
+ if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr)
return false;
// Only need to check pointers between two different dependency sets.
- if (DependencySetId[I] == DependencySetId[J])
+ if (PointerI.DependencySetId == PointerJ.DependencySetId)
return false;
// Only need to check pointers in the same alias set.
- if (AliasSetId[I] != AliasSetId[J])
- return false;
-
- // If PtrPartition is set omit checks between pointers of the same partition.
- // Partition number -1 means that the pointer is used in multiple partitions.
- // In this case we can't omit the check.
- if (PtrPartition && (*PtrPartition)[I] != -1 &&
- (*PtrPartition)[I] == (*PtrPartition)[J])
+ if (PointerI.AliasSetId != PointerJ.AliasSetId)
return false;
return true;
}
-void LoopAccessInfo::RuntimePointerCheck::print(
- raw_ostream &OS, unsigned Depth,
- const SmallVectorImpl<int> *PtrPartition) const {
-
- OS.indent(Depth) << "Run-time memory checks:\n";
-
+void RuntimePointerChecking::printChecks(
+ raw_ostream &OS, const SmallVectorImpl<PointerCheck> &Checks,
+ unsigned Depth) const {
unsigned N = 0;
- for (unsigned I = 0; I < CheckingGroups.size(); ++I)
- for (unsigned J = I + 1; J < CheckingGroups.size(); ++J)
- if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition)) {
- OS.indent(Depth) << "Check " << N++ << ":\n";
- OS.indent(Depth + 2) << "Comparing group " << I << ":\n";
-
- for (unsigned K = 0; K < CheckingGroups[I].Members.size(); ++K) {
- OS.indent(Depth + 2) << *Pointers[CheckingGroups[I].Members[K]]
- << "\n";
- if (PtrPartition)
- OS << " (Partition: "
- << (*PtrPartition)[CheckingGroups[I].Members[K]] << ")"
- << "\n";
- }
+ for (const auto &Check : Checks) {
+ const auto &First = Check.first->Members, &Second = Check.second->Members;
- OS.indent(Depth + 2) << "Against group " << J << ":\n";
+ OS.indent(Depth) << "Check " << N++ << ":\n";
- for (unsigned K = 0; K < CheckingGroups[J].Members.size(); ++K) {
- OS.indent(Depth + 2) << *Pointers[CheckingGroups[J].Members[K]]
- << "\n";
- if (PtrPartition)
- OS << " (Partition: "
- << (*PtrPartition)[CheckingGroups[J].Members[K]] << ")"
- << "\n";
- }
- }
+ OS.indent(Depth + 2) << "Comparing group (" << Check.first << "):\n";
+ for (unsigned K = 0; K < First.size(); ++K)
+ OS.indent(Depth + 2) << *Pointers[First[K]].PointerValue << "\n";
- OS.indent(Depth) << "Grouped accesses:\n";
- for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
- OS.indent(Depth + 2) << "Group " << I << ":\n";
- OS.indent(Depth + 4) << "(Low: " << *CheckingGroups[I].Low
- << " High: " << *CheckingGroups[I].High << ")\n";
- for (unsigned J = 0; J < CheckingGroups[I].Members.size(); ++J) {
- OS.indent(Depth + 6) << "Member: " << *Exprs[CheckingGroups[I].Members[J]]
- << "\n";
- }
+ OS.indent(Depth + 2) << "Against group (" << Check.second << "):\n";
+ for (unsigned K = 0; K < Second.size(); ++K)
+ OS.indent(Depth + 2) << *Pointers[Second[K]].PointerValue << "\n";
}
}
-unsigned LoopAccessInfo::RuntimePointerCheck::getNumberOfChecks(
- const SmallVectorImpl<int> *PtrPartition) const {
-
- unsigned NumPartitions = CheckingGroups.size();
- unsigned CheckCount = 0;
+void RuntimePointerChecking::print(raw_ostream &OS, unsigned Depth) const {
- for (unsigned I = 0; I < NumPartitions; ++I)
- for (unsigned J = I + 1; J < NumPartitions; ++J)
- if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition))
- CheckCount++;
- return CheckCount;
-}
+ OS.indent(Depth) << "Run-time memory checks:\n";
+ printChecks(OS, Checks, Depth);
-bool LoopAccessInfo::RuntimePointerCheck::needsAnyChecking(
- const SmallVectorImpl<int> *PtrPartition) const {
- unsigned NumPointers = Pointers.size();
+ OS.indent(Depth) << "Grouped accesses:\n";
+ for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
+ const auto &CG = CheckingGroups[I];
- for (unsigned I = 0; I < NumPointers; ++I)
- for (unsigned J = I + 1; J < NumPointers; ++J)
- if (needsChecking(I, J, PtrPartition))
- return true;
- return false;
+ OS.indent(Depth + 2) << "Group " << &CG << ":\n";
+ OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High
+ << ")\n";
+ for (unsigned J = 0; J < CG.Members.size(); ++J) {
+ OS.indent(Depth + 6) << "Member: " << *Pointers[CG.Members[J]].Expr
+ << "\n";
+ }
+ }
}
namespace {
AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI,
MemoryDepChecker::DepCandidates &DA)
- : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckNeeded(false) {}
+ : DL(Dl), AST(*AA), LI(LI), DepCands(DA),
+ IsRTCheckAnalysisNeeded(false) {}
/// \brief Register a load and whether it is only read from.
void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
}
/// \brief Check whether we can check the pointers at runtime for
- /// non-intersection. Returns true when we have 0 pointers
- /// (a check on 0 pointers for non-intersection will always return true).
- bool canCheckPtrAtRT(LoopAccessInfo::RuntimePointerCheck &RtCheck,
- bool &NeedRTCheck, ScalarEvolution *SE, Loop *TheLoop,
- const ValueToValueMap &Strides,
+ /// non-intersection.
+ ///
+ /// Returns true if we need no check or if we do and we can generate them
+ /// (i.e. the pointers have computable bounds).
+ bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
+ Loop *TheLoop, const ValueToValueMap &Strides,
bool ShouldCheckStride = false);
/// \brief Goes over all memory accesses, checks whether a RT check is needed
processMemAccesses();
}
- bool isRTCheckNeeded() { return IsRTCheckNeeded; }
-
+ /// \brief Initial processing of memory accesses determined that we need to
+ /// perform dependency checking.
+ ///
+ /// Note that this can later be cleared if we retry memcheck analysis without
+ /// dependency checking (i.e. ShouldRetryWithRuntimeCheck).
bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
/// We decided that no dependence analysis would be used. Reset the state.
typedef SetVector<MemAccessInfo> PtrAccessSet;
/// \brief Go over all memory access and check whether runtime pointer checks
- /// are needed /// and build sets of dependency check candidates.
+ /// are needed and build sets of dependency check candidates.
void processMemAccesses();
/// Set of all accesses.
/// dependence check.
MemoryDepChecker::DepCandidates &DepCands;
- bool IsRTCheckNeeded;
+ /// \brief Initial processing of memory accesses determined that we may need
+ /// to add memchecks. Perform the analysis to determine the necessary checks.
+ ///
+ /// Note that, this is different from isDependencyCheckNeeded. When we retry
+ /// memcheck analysis without dependency checking
+ /// (i.e. ShouldRetryWithRuntimeCheck), isDependencyCheckNeeded is cleared
+ /// while this remains set if we have potentially dependent accesses.
+ bool IsRTCheckAnalysisNeeded;
};
} // end anonymous namespace
return AR->isAffine();
}
-bool AccessAnalysis::canCheckPtrAtRT(
- LoopAccessInfo::RuntimePointerCheck &RtCheck, bool &NeedRTCheck,
- ScalarEvolution *SE, Loop *TheLoop, const ValueToValueMap &StridesMap,
- bool ShouldCheckStride) {
+bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
+ ScalarEvolution *SE, Loop *TheLoop,
+ const ValueToValueMap &StridesMap,
+ bool ShouldCheckStride) {
// Find pointers with computable bounds. We are going to use this information
// to place a runtime bound check.
bool CanDoRT = true;
- NeedRTCheck = false;
- if (!IsRTCheckNeeded) return true;
+ bool NeedRTCheck = false;
+ if (!IsRTCheckAnalysisNeeded) return true;
bool IsDepCheckNeeded = isDependencyCheckNeeded();
for (unsigned i = 0; i < NumPointers; ++i) {
for (unsigned j = i + 1; j < NumPointers; ++j) {
// Only need to check pointers between two different dependency sets.
- if (RtCheck.DependencySetId[i] == RtCheck.DependencySetId[j])
+ if (RtCheck.Pointers[i].DependencySetId ==
+ RtCheck.Pointers[j].DependencySetId)
continue;
// Only need to check pointers in the same alias set.
- if (RtCheck.AliasSetId[i] != RtCheck.AliasSetId[j])
+ if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId)
continue;
- Value *PtrI = RtCheck.Pointers[i];
- Value *PtrJ = RtCheck.Pointers[j];
+ Value *PtrI = RtCheck.Pointers[i].PointerValue;
+ Value *PtrJ = RtCheck.Pointers[j].PointerValue;
unsigned ASi = PtrI->getType()->getPointerAddressSpace();
unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
}
if (NeedRTCheck && CanDoRT)
- RtCheck.groupChecks(DepCands, IsDepCheckNeeded);
+ RtCheck.generateChecks(DepCands, IsDepCheckNeeded);
+
+ DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
+ << " pointer comparisons.\n");
- return CanDoRT;
+ RtCheck.Need = NeedRTCheck;
+
+ bool CanDoRTIfNeeded = !NeedRTCheck || CanDoRT;
+ if (!CanDoRTIfNeeded)
+ RtCheck.reset();
+ return CanDoRTIfNeeded;
}
void AccessAnalysis::processMemAccesses() {
// catch "a[i] = a[i] + " without having to do a dependence check).
if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
CheckDeps.insert(Access);
- IsRTCheckNeeded = true;
+ IsRTCheckAnalysisNeeded = true;
}
if (IsWrite)
/// \brief Check whether the access through \p Ptr has a constant stride.
int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,
const ValueToValueMap &StridesMap) {
- const Type *Ty = Ptr->getType();
+ Type *Ty = Ptr->getType();
assert(Ty->isPointerTy() && "Unexpected non-ptr");
// Make sure that the pointer does not point to aggregate types.
- const PointerType *PtrTy = cast<PointerType>(Ty);
+ auto *PtrTy = cast<PointerType>(Ty);
if (PtrTy->getElementType()->isAggregateType()) {
DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"
<< *Ptr << "\n");
unsigned NumReads = 0;
unsigned NumReadWrites = 0;
- PtrRtCheck.Pointers.clear();
- PtrRtCheck.Need = false;
+ PtrRtChecking.Pointers.clear();
+ PtrRtChecking.Need = false;
const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
// Find pointers with computable bounds. We are going to use this information
// to place a runtime bound check.
- bool NeedRTCheck;
- bool CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck,
- NeedRTCheck, SE,
- TheLoop, Strides);
-
- DEBUG(dbgs() << "LAA: We need to do "
- << PtrRtCheck.getNumberOfChecks(nullptr)
- << " pointer comparisons.\n");
-
- // Check that we found the bounds for the pointer.
- if (CanDoRT)
- DEBUG(dbgs() << "LAA: We can perform a memory runtime check if needed.\n");
- else if (NeedRTCheck) {
+ bool CanDoRTIfNeeded =
+ Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides);
+ if (!CanDoRTIfNeeded) {
emitAnalysis(LoopAccessReport() << "cannot identify array bounds");
- DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " <<
- "the array bounds.\n");
- PtrRtCheck.reset();
+ DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
+ << "the array bounds.\n");
CanVecMem = false;
return;
}
- PtrRtCheck.Need = NeedRTCheck;
+ DEBUG(dbgs() << "LAA: We can perform a memory runtime check if needed.\n");
CanVecMem = true;
if (Accesses.isDependencyCheckNeeded()) {
if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) {
DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
- NeedRTCheck = true;
// Clear the dependency checks. We assume they are not needed.
Accesses.resetDepChecks(DepChecker);
- PtrRtCheck.reset();
- PtrRtCheck.Need = true;
+ PtrRtChecking.reset();
+ PtrRtChecking.Need = true;
- CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NeedRTCheck, SE,
- TheLoop, Strides, true);
+ CanDoRTIfNeeded =
+ Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides, true);
// Check that we found the bounds for the pointer.
- if (NeedRTCheck && !CanDoRT) {
+ if (!CanDoRTIfNeeded) {
emitAnalysis(LoopAccessReport()
<< "cannot check memory dependencies at runtime");
DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
- PtrRtCheck.reset();
CanVecMem = false;
return;
}
if (CanVecMem)
DEBUG(dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
- << (NeedRTCheck ? "" : " don't")
- << " need a runtime memory check.\n");
+ << (PtrRtChecking.Need ? "" : " don't")
+ << " need runtime memory checks.\n");
else {
emitAnalysis(LoopAccessReport() <<
"unsafe dependent memory operations in loop");
return nullptr;
}
-std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(
- Instruction *Loc, const SmallVectorImpl<int> *PtrPartition) const {
- if (!PtrRtCheck.Need)
- return std::make_pair(nullptr, nullptr);
+/// \brief IR Values for the lower and upper bounds of a pointer evolution. We
+/// need to use value-handles because SCEV expansion can invalidate previously
+/// expanded values. Thus expansion of a pointer can invalidate the bounds for
+/// a previous one.
+struct PointerBounds {
+ TrackingVH<Value> Start;
+ TrackingVH<Value> End;
+};
- SmallVector<TrackingVH<Value>, 2> Starts;
- SmallVector<TrackingVH<Value>, 2> Ends;
+/// \brief Expand code for the lower and upper bound of the pointer group \p CG
+/// in \p TheLoop. \return the values for the bounds.
+static PointerBounds
+expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop,
+ Instruction *Loc, SCEVExpander &Exp, ScalarEvolution *SE,
+ const RuntimePointerChecking &PtrRtChecking) {
+ Value *Ptr = PtrRtChecking.Pointers[CG->Members[0]].PointerValue;
+ const SCEV *Sc = SE->getSCEV(Ptr);
+
+ if (SE->isLoopInvariant(Sc, TheLoop)) {
+ DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr
+ << "\n");
+ return {Ptr, Ptr};
+ } else {
+ unsigned AS = Ptr->getType()->getPointerAddressSpace();
+ LLVMContext &Ctx = Loc->getContext();
+
+ // Use this type for pointer arithmetic.
+ Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
+ Value *Start = nullptr, *End = nullptr;
+
+ DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
+ Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);
+ End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);
+ DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High << "\n");
+ return {Start, End};
+ }
+}
- LLVMContext &Ctx = Loc->getContext();
- SCEVExpander Exp(*SE, DL, "induction");
- Instruction *FirstInst = nullptr;
+/// \brief Turns a collection of checks into a collection of expanded upper and
+/// lower bounds for both pointers in the check.
+static SmallVector<std::pair<PointerBounds, PointerBounds>, 4> expandBounds(
+ const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &PointerChecks,
+ Loop *L, Instruction *Loc, ScalarEvolution *SE, SCEVExpander &Exp,
+ const RuntimePointerChecking &PtrRtChecking) {
+ SmallVector<std::pair<PointerBounds, PointerBounds>, 4> ChecksWithBounds;
+
+ // Here we're relying on the SCEV Expander's cache to only emit code for the
+ // same bounds once.
+ std::transform(
+ PointerChecks.begin(), PointerChecks.end(),
+ std::back_inserter(ChecksWithBounds),
+ [&](const RuntimePointerChecking::PointerCheck &Check) {
+ PointerBounds
+ First = expandBounds(Check.first, L, Loc, Exp, SE, PtrRtChecking),
+ Second = expandBounds(Check.second, L, Loc, Exp, SE, PtrRtChecking);
+ return std::make_pair(First, Second);
+ });
+
+ return ChecksWithBounds;
+}
- for (unsigned i = 0; i < PtrRtCheck.CheckingGroups.size(); ++i) {
- const RuntimePointerCheck::CheckingPtrGroup &CG =
- PtrRtCheck.CheckingGroups[i];
- Value *Ptr = PtrRtCheck.Pointers[CG.Members[0]];
- const SCEV *Sc = SE->getSCEV(Ptr);
-
- if (SE->isLoopInvariant(Sc, TheLoop)) {
- DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr
- << "\n");
- Starts.push_back(Ptr);
- Ends.push_back(Ptr);
- } else {
- unsigned AS = Ptr->getType()->getPointerAddressSpace();
-
- // Use this type for pointer arithmetic.
- Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
- Value *Start = nullptr, *End = nullptr;
-
- DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
- Start = Exp.expandCodeFor(CG.Low, PtrArithTy, Loc);
- End = Exp.expandCodeFor(CG.High, PtrArithTy, Loc);
- DEBUG(dbgs() << "Start: " << *CG.Low << " End: " << *CG.High << "\n");
- Starts.push_back(Start);
- Ends.push_back(End);
- }
- }
+std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeChecks(
+ Instruction *Loc,
+ const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &PointerChecks)
+ const {
+ SCEVExpander Exp(*SE, DL, "induction");
+ auto ExpandedChecks =
+ expandBounds(PointerChecks, TheLoop, Loc, SE, Exp, PtrRtChecking);
+
+ LLVMContext &Ctx = Loc->getContext();
+ Instruction *FirstInst = nullptr;
IRBuilder<> ChkBuilder(Loc);
// Our instructions might fold to a constant.
Value *MemoryRuntimeCheck = nullptr;
- for (unsigned i = 0; i < PtrRtCheck.CheckingGroups.size(); ++i) {
- for (unsigned j = i + 1; j < PtrRtCheck.CheckingGroups.size(); ++j) {
- const RuntimePointerCheck::CheckingPtrGroup &CGI =
- PtrRtCheck.CheckingGroups[i];
- const RuntimePointerCheck::CheckingPtrGroup &CGJ =
- PtrRtCheck.CheckingGroups[j];
-
- if (!PtrRtCheck.needsChecking(CGI, CGJ, PtrPartition))
- continue;
- unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace();
- unsigned AS1 = Starts[j]->getType()->getPointerAddressSpace();
-
- assert((AS0 == Ends[j]->getType()->getPointerAddressSpace()) &&
- (AS1 == Ends[i]->getType()->getPointerAddressSpace()) &&
- "Trying to bounds check pointers with different address spaces");
-
- Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
- Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
-
- Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy0, "bc");
- Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy1, "bc");
- Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy1, "bc");
- Value *End1 = ChkBuilder.CreateBitCast(Ends[j], PtrArithTy0, "bc");
-
- Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0");
- FirstInst = getFirstInst(FirstInst, Cmp0, Loc);
- Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1");
- FirstInst = getFirstInst(FirstInst, Cmp1, Loc);
- Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
+ for (const auto &Check : ExpandedChecks) {
+ const PointerBounds &A = Check.first, &B = Check.second;
+ // Check if two pointers (A and B) conflict where conflict is computed as:
+ // start(A) <= end(B) && start(B) <= end(A)
+ unsigned AS0 = A.Start->getType()->getPointerAddressSpace();
+ unsigned AS1 = B.Start->getType()->getPointerAddressSpace();
+
+ assert((AS0 == B.End->getType()->getPointerAddressSpace()) &&
+ (AS1 == A.End->getType()->getPointerAddressSpace()) &&
+ "Trying to bounds check pointers with different address spaces");
+
+ Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
+ Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
+
+ Value *Start0 = ChkBuilder.CreateBitCast(A.Start, PtrArithTy0, "bc");
+ Value *Start1 = ChkBuilder.CreateBitCast(B.Start, PtrArithTy1, "bc");
+ Value *End0 = ChkBuilder.CreateBitCast(A.End, PtrArithTy1, "bc");
+ Value *End1 = ChkBuilder.CreateBitCast(B.End, PtrArithTy0, "bc");
+
+ Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0");
+ FirstInst = getFirstInst(FirstInst, Cmp0, Loc);
+ Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1");
+ FirstInst = getFirstInst(FirstInst, Cmp1, Loc);
+ Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
+ FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
+ if (MemoryRuntimeCheck) {
+ IsConflict =
+ ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
- if (MemoryRuntimeCheck) {
- IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict,
- "conflict.rdx");
- FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
- }
- MemoryRuntimeCheck = IsConflict;
}
+ MemoryRuntimeCheck = IsConflict;
}
if (!MemoryRuntimeCheck)
return std::make_pair(FirstInst, Check);
}
+std::pair<Instruction *, Instruction *>
+LoopAccessInfo::addRuntimeChecks(Instruction *Loc) const {
+ if (!PtrRtChecking.Need)
+ return std::make_pair(nullptr, nullptr);
+
+ return addRuntimeChecks(Loc, PtrRtChecking.getChecks());
+}
+
LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
const DataLayout &DL,
const TargetLibraryInfo *TLI, AliasAnalysis *AA,
DominatorTree *DT, LoopInfo *LI,
const ValueToValueMap &Strides)
- : PtrRtCheck(SE), DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL), TLI(TLI),
- AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
+ : PtrRtChecking(SE), DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL),
+ TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
MaxSafeDepDistBytes(-1U), CanVecMem(false),
StoreToLoopInvariantAddress(false) {
if (canAnalyzeLoop())
void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
if (CanVecMem) {
- if (PtrRtCheck.Need)
+ if (PtrRtChecking.Need)
OS.indent(Depth) << "Memory dependences are safe with run-time checks\n";
else
OS.indent(Depth) << "Memory dependences are safe\n";
OS.indent(Depth) << "Too many interesting dependences, not recorded\n";
// List the pair of accesses need run-time checks to prove independence.
- PtrRtCheck.print(OS, Depth);
+ PtrRtChecking.print(OS, Depth);
OS << "\n";
OS.indent(Depth) << "Store to invariant address was "
}
bool LoopAccessAnalysis::runOnFunction(Function &F) {
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
TLI = TLIP ? &TLIP->getTLI() : nullptr;
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
}
void LoopAccessAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<ScalarEvolution>();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
#define LAA_NAME "loop-accesses"
INITIALIZE_PASS_BEGIN(LoopAccessAnalysis, LAA_NAME, laa_name, false, true)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(LoopAccessAnalysis, LAA_NAME, laa_name, false, true)