return SE->getSCEV(Ptr);
}
-void LoopAccessAnalysis::RuntimePointerCheck::insert(ScalarEvolution *SE,
- Loop *Lp, Value *Ptr,
- bool WritePtr,
- unsigned DepSetId,
- unsigned ASId,
- ValueToValueMap &Strides) {
+void LoopAccessInfo::RuntimePointerCheck::insert(ScalarEvolution *SE, Loop *Lp,
+ Value *Ptr, bool WritePtr,
+ unsigned DepSetId,
+ unsigned ASId,
+ ValueToValueMap &Strides) {
// Get the stride replaced scev.
const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
AliasSetId.push_back(ASId);
}
+bool LoopAccessInfo::RuntimePointerCheck::needsChecking(unsigned I,
+ unsigned J) const {
+ // No need to check if two readonly pointers intersect.
+ if (!IsWritePtr[I] && !IsWritePtr[J])
+ return false;
+
+ // Only need to check pointers between two different dependency sets.
+ if (DependencySetId[I] == DependencySetId[J])
+ return false;
+
+ // Only need to check pointers in the same alias set.
+ if (AliasSetId[I] != AliasSetId[J])
+ return false;
+
+ return true;
+}
+
namespace {
/// \brief Analyses memory accesses in a loop.
///
/// \brief Check whether we can check the pointers at runtime for
/// non-intersection.
- bool canCheckPtrAtRT(LoopAccessAnalysis::RuntimePointerCheck &RtCheck,
+ bool canCheckPtrAtRT(LoopAccessInfo::RuntimePointerCheck &RtCheck,
unsigned &NumComparisons,
ScalarEvolution *SE, Loop *TheLoop,
ValueToValueMap &Strides,
const Loop *Lp, ValueToValueMap &StridesMap);
bool AccessAnalysis::canCheckPtrAtRT(
- LoopAccessAnalysis::RuntimePointerCheck &RtCheck,
+ LoopAccessInfo::RuntimePointerCheck &RtCheck,
unsigned &NumComparisons, ScalarEvolution *SE, Loop *TheLoop,
ValueToValueMap &StridesMap, bool ShouldCheckStride) {
// Find pointers with computable bounds. We are going to use this information
unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
if (ASi != ASj) {
DEBUG(dbgs() << "LV: Runtime check would require comparison between"
- " different address spaces\n");
+ " different address spaces\n");
return false;
}
}
typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
MemoryDepChecker(ScalarEvolution *Se, const DataLayout *Dl, const Loop *L,
- const LoopAccessAnalysis::VectorizerParams &VectParams)
+ const LoopAccessInfo::VectorizerParams &VectParams)
: SE(Se), DL(Dl), InnermostLoop(L), AccessIdx(0),
ShouldRetryWithRuntimeCheck(false), VectParams(VectParams) {}
bool ShouldRetryWithRuntimeCheck;
/// \brief Vectorizer parameters used by the analysis.
- LoopAccessAnalysis::VectorizerParams VectParams;
+ LoopAccessInfo::VectorizerParams VectParams;
/// \brief Check whether there is a plausible dependence between the two
/// accesses.
// Make sure that the pointer does not point to aggregate types.
const PointerType *PtrTy = cast<PointerType>(Ty);
if (PtrTy->getElementType()->isAggregateType()) {
- DEBUG(dbgs() << "LV: Bad stride - Not a pointer to a scalar type" << *Ptr <<
- "\n");
+ DEBUG(dbgs() << "LV: Bad stride - Not a pointer to a scalar type" << *Ptr
+ << "\n");
return 0;
}
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
if (!AR) {
- DEBUG(dbgs() << "LV: Bad stride - Not an AddRecExpr pointer "
- << *Ptr << " SCEV: " << *PtrScev << "\n");
+ DEBUG(dbgs() << "LV: Bad stride - Not an AddRecExpr pointer " << *Ptr
+ << " SCEV: " << *PtrScev << "\n");
return 0;
}
// The accesss function must stride over the innermost loop.
if (Lp != AR->getLoop()) {
- DEBUG(dbgs() << "LV: Bad stride - Not striding over innermost loop " <<
- *Ptr << " SCEV: " << *PtrScev << "\n");
+ DEBUG(dbgs() << "LV: Bad stride - Not striding over innermost loop " << *Ptr
+ << " SCEV: " << *PtrScev << "\n");
}
// The address calculation must not wrap. Otherwise, a dependence could be
bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
DEBUG(dbgs() << "LV: Bad stride - Pointer may wrap in the address space "
- << *Ptr << " SCEV: " << *PtrScev << "\n");
+ << *Ptr << " SCEV: " << *PtrScev << "\n");
return 0;
}
// Calculate the pointer stride and check if it is consecutive.
const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
if (!C) {
- DEBUG(dbgs() << "LV: Bad stride - Not a constant strided " << *Ptr <<
- " SCEV: " << *PtrScev << "\n");
+ DEBUG(dbgs() << "LV: Bad stride - Not a constant strided " << *Ptr
+ << " SCEV: " << *PtrScev << "\n");
return 0;
}
// Store-load forwarding distance.
const unsigned NumCyclesForStoreLoadThroughMemory = 8*TypeByteSize;
// Maximum vector factor.
- unsigned MaxVFWithoutSLForwardIssues = VectParams.MaxVectorWidth*TypeByteSize;
- if(MaxSafeDepDistBytes < MaxVFWithoutSLForwardIssues)
+ unsigned MaxVFWithoutSLForwardIssues =
+ VectParams.MaxVectorWidth * TypeByteSize;
+ if (MaxSafeDepDistBytes < MaxVFWithoutSLForwardIssues)
MaxVFWithoutSLForwardIssues = MaxSafeDepDistBytes;
for (unsigned vf = 2*TypeByteSize; vf <= MaxVFWithoutSLForwardIssues;
}
}
- if (MaxVFWithoutSLForwardIssues< 2*TypeByteSize) {
- DEBUG(dbgs() << "LV: Distance " << Distance <<
- " that could cause a store-load forwarding conflict\n");
+ if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) {
+ DEBUG(dbgs() << "LV: Distance " << Distance
+ << " that could cause a store-load forwarding conflict\n");
return true;
}
if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
- MaxVFWithoutSLForwardIssues != VectParams.MaxVectorWidth*TypeByteSize)
+ MaxVFWithoutSLForwardIssues != VectParams.MaxVectorWidth * TypeByteSize)
MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
return false;
}
const SCEV *Dist = SE->getMinusSCEV(Sink, Src);
DEBUG(dbgs() << "LV: Src Scev: " << *Src << "Sink Scev: " << *Sink
- << "(Induction step: " << StrideAPtr << ")\n");
+ << "(Induction step: " << StrideAPtr << ")\n");
DEBUG(dbgs() << "LV: Distance for " << *InstMap[AIdx] << " to "
- << *InstMap[BIdx] << ": " << *Dist << "\n");
+ << *InstMap[BIdx] << ": " << *Dist << "\n");
// Need consecutive accesses. We don't want to vectorize
// "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
// Positive distance bigger than max vectorization factor.
if (ATy != BTy) {
- DEBUG(dbgs() <<
- "LV: ReadWrite-Write positive dependency with different types\n");
+ DEBUG(dbgs()
+ << "LV: ReadWrite-Write positive dependency with different types\n");
return false;
}
unsigned Distance = (unsigned) Val.getZExtValue();
// Bail out early if passed-in parameters make vectorization not feasible.
- unsigned ForcedFactor = (VectParams.VectorizationFactor ?
- VectParams.VectorizationFactor : 1);
- unsigned ForcedUnroll = (VectParams.VectorizationInterleave ?
- VectParams.VectorizationInterleave : 1);
+ unsigned ForcedFactor =
+ (VectParams.VectorizationFactor ? VectParams.VectorizationFactor : 1);
+ unsigned ForcedUnroll =
+ (VectParams.VectorizationInterleave ? VectParams.VectorizationInterleave
+ : 1);
// The distance must be bigger than the size needed for a vectorized version
// of the operation and the size of the vectorized operation must not be
2*TypeByteSize > MaxSafeDepDistBytes ||
Distance < TypeByteSize * ForcedUnroll * ForcedFactor) {
DEBUG(dbgs() << "LV: Failure because of Positive distance "
- << Val.getSExtValue() << '\n');
+ << Val.getSExtValue() << '\n');
return true;
}
couldPreventStoreLoadForward(Distance, TypeByteSize))
return true;
- DEBUG(dbgs() << "LV: Positive distance " << Val.getSExtValue() <<
- " with max VF = " << MaxSafeDepDistBytes / TypeByteSize << '\n');
+ DEBUG(dbgs() << "LV: Positive distance " << Val.getSExtValue()
+ << " with max VF = " << MaxSafeDepDistBytes / TypeByteSize
+ << '\n');
return false;
}
return true;
}
-bool LoopAccessAnalysis::canVectorizeMemory(ValueToValueMap &Strides) {
+bool LoopAccessInfo::canVectorizeMemory(ValueToValueMap &Strides) {
typedef SmallVector<Value*, 16> ValueVector;
typedef SmallPtrSet<Value*, 16> ValueSet;
if (it->mayWriteToMemory()) {
StoreInst *St = dyn_cast<StoreInst>(it);
if (!St) {
- emitAnalysis(VectorizationReport(it) <<
- "instruction cannot be vectorized");
+ emitAnalysis(VectorizationReport(it)
+ << "instruction cannot be vectorized");
return false;
}
if (!St->isSimple() && !IsAnnotatedParallel) {
// The TBAA metadata could have a control dependency on the predication
// condition, so we cannot rely on it when determining whether or not we
// need runtime pointer checks.
- if (blockNeedsPredication(ST->getParent()))
+ if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
Loc.AATags.TBAA = nullptr;
Accesses.addStore(Loc);
}
if (IsAnnotatedParallel) {
- DEBUG(dbgs()
- << "LV: A loop annotated parallel, ignore memory dependency "
- << "checks.\n");
+ DEBUG(dbgs() << "LV: A loop annotated parallel, ignore memory dependency "
+ << "checks.\n");
return true;
}
// The TBAA metadata could have a control dependency on the predication
// condition, so we cannot rely on it when determining whether or not we
// need runtime pointer checks.
- if (blockNeedsPredication(LD->getParent()))
+ if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
Loc.AATags.TBAA = nullptr;
Accesses.addLoad(Loc, IsReadOnlyPtr);
CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, TheLoop,
Strides);
- DEBUG(dbgs() << "LV: We need to do " << NumComparisons <<
- " pointer comparisons.\n");
+ DEBUG(dbgs() << "LV: We need to do " << NumComparisons
+ << " pointer comparisons.\n");
// If we only have one set of dependences to check pointers among we don't
// need a runtime check.
if (NeedRTCheck && !CanDoRT) {
emitAnalysis(VectorizationReport() << "cannot identify array bounds");
- DEBUG(dbgs() << "LV: We can't vectorize because we can't find " <<
- "the array bounds.\n");
+ DEBUG(dbgs() << "LV: We can't vectorize because we can't find "
+ << "the array bounds.\n");
PtrRtCheck.reset();
return false;
}
}
if (!CanVecMem)
- emitAnalysis(VectorizationReport() <<
- "unsafe dependent memory operations in loop");
+ emitAnalysis(VectorizationReport()
+ << "unsafe dependent memory operations in loop");
- DEBUG(dbgs() << "LV: We" << (NeedRTCheck ? "" : " don't") <<
- " need a runtime memory check.\n");
+ DEBUG(dbgs() << "LV: We" << (NeedRTCheck ? "" : " don't")
+ << " need a runtime memory check.\n");
return CanVecMem;
}
-bool LoopAccessAnalysis::blockNeedsPredication(BasicBlock *BB) {
+bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
+ DominatorTree *DT) {
assert(TheLoop->contains(BB) && "Unknown block used");
// Blocks that do not dominate the latch need predication.
return !DT->dominates(BB, Latch);
}
-void LoopAccessAnalysis::emitAnalysis(VectorizationReport &Message) {
+void LoopAccessInfo::emitAnalysis(VectorizationReport &Message) {
VectorizationReport::emitAnalysis(Message, TheFunction, TheLoop);
}
-bool LoopAccessAnalysis::isUniform(Value *V) {
+bool LoopAccessInfo::isUniform(Value *V) {
return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
}
}
std::pair<Instruction *, Instruction *>
-LoopAccessAnalysis::addRuntimeCheck(Instruction *Loc) {
+LoopAccessInfo::addRuntimeCheck(Instruction *Loc) {
Instruction *tnullptr = nullptr;
if (!PtrRtCheck.Need)
return std::pair<Instruction *, Instruction *>(tnullptr, tnullptr);
const SCEV *Sc = SE->getSCEV(Ptr);
if (SE->isLoopInvariant(Sc, TheLoop)) {
- DEBUG(dbgs() << "LV: Adding RT check for a loop invariant ptr:" <<
- *Ptr <<"\n");
+ DEBUG(dbgs() << "LV: Adding RT check for a loop invariant ptr:" << *Ptr
+ << "\n");
Starts.push_back(Ptr);
Ends.push_back(Ptr);
} else {
Value *MemoryRuntimeCheck = nullptr;
for (unsigned i = 0; i < NumPointers; ++i) {
for (unsigned j = i+1; j < NumPointers; ++j) {
- // No need to check if two readonly pointers intersect.
- if (!PtrRtCheck.IsWritePtr[i] && !PtrRtCheck.IsWritePtr[j])
- continue;
-
- // Only need to check pointers between two different dependency sets.
- if (PtrRtCheck.DependencySetId[i] == PtrRtCheck.DependencySetId[j])
- continue;
- // Only need to check pointers in the same alias set.
- if (PtrRtCheck.AliasSetId[i] != PtrRtCheck.AliasSetId[j])
+ if (!PtrRtCheck.needsChecking(i, j))
continue;
unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace();