#define DEBUG_TYPE "loop-vectorize"
+static cl::opt<unsigned, true>
+VectorizationFactor("force-vector-width", cl::Hidden,
+ cl::desc("Sets the SIMD width. Zero is autoselect."),
+ cl::location(VectorizerParams::VectorizationFactor));
+unsigned VectorizerParams::VectorizationFactor = 0;
+
+static cl::opt<unsigned, true>
+VectorizationInterleave("force-vector-interleave", cl::Hidden,
+ cl::desc("Sets the vectorization interleave count. "
+ "Zero is autoselect."),
+ cl::location(
+ VectorizerParams::VectorizationInterleave));
+unsigned VectorizerParams::VectorizationInterleave = 0;
+
+/// When performing memory disambiguation checks at runtime do not make more
+/// than this number of comparisons.
+const unsigned VectorizerParams::RuntimeMemoryCheckThreshold = 8;
+
+/// Maximum SIMD width.
+const unsigned VectorizerParams::MaxVectorWidth = 64;
+
+bool VectorizerParams::isInterleaveForced() {
+ return ::VectorizationInterleave.getNumOccurrences() > 0;
+}
+
void VectorizationReport::emitAnalysis(VectorizationReport &Message,
const Function *TheFunction,
const Loop *TheLoop) {
AliasSetId.push_back(ASId);
}
+bool LoopAccessInfo::RuntimePointerCheck::needsChecking(unsigned I,
+ unsigned J) const {
+ // No need to check if two readonly pointers intersect.
+ if (!IsWritePtr[I] && !IsWritePtr[J])
+ return false;
+
+ // Only need to check pointers between two different dependency sets.
+ if (DependencySetId[I] == DependencySetId[J])
+ return false;
+
+ // Only need to check pointers in the same alias set.
+ if (AliasSetId[I] != AliasSetId[J])
+ return false;
+
+ return true;
+}
+
namespace {
/// \brief Analyses memory accesses in a loop.
///
return true;
}
-bool LoopAccessInfo::canVectorizeMemory(ValueToValueMap &Strides) {
+void LoopAccessInfo::analyzeLoop(ValueToValueMap &Strides) {
typedef SmallVector<Value*, 16> ValueVector;
typedef SmallPtrSet<Value*, 16> ValueSet;
emitAnalysis(VectorizationReport(Ld)
<< "read with atomic ordering or volatile read");
DEBUG(dbgs() << "LV: Found a non-simple load.\n");
- return false;
+ CanVecMem = false;
+ return;
}
NumLoads++;
Loads.push_back(Ld);
if (!St) {
emitAnalysis(VectorizationReport(it) <<
"instruction cannot be vectorized");
- return false;
+ CanVecMem = false;
+ return;
}
if (!St->isSimple() && !IsAnnotatedParallel) {
emitAnalysis(VectorizationReport(St)
<< "write with atomic ordering or volatile write");
DEBUG(dbgs() << "LV: Found a non-simple store.\n");
- return false;
+ CanVecMem = false;
+ return;
}
NumStores++;
Stores.push_back(St);
// care if the pointers are *restrict*.
if (!Stores.size()) {
DEBUG(dbgs() << "LV: Found a read-only loop!\n");
- return true;
+ CanVecMem = true;
+ return;
}
AccessAnalysis::DepCandidates DependentAccesses;
VectorizationReport(ST)
<< "write to a loop invariant address could not be vectorized");
DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n");
- return false;
+ CanVecMem = false;
+ return;
}
// If we did *not* see this pointer before, insert it to the read-write
// The TBAA metadata could have a control dependency on the predication
// condition, so we cannot rely on it when determining whether or not we
// need runtime pointer checks.
- if (blockNeedsPredication(ST->getParent()))
+ if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
Loc.AATags.TBAA = nullptr;
Accesses.addStore(Loc);
DEBUG(dbgs()
<< "LV: A loop annotated parallel, ignore memory dependency "
<< "checks.\n");
- return true;
+ CanVecMem = true;
+ return;
}
for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) {
// The TBAA metadata could have a control dependency on the predication
// condition, so we cannot rely on it when determining whether or not we
// need runtime pointer checks.
- if (blockNeedsPredication(LD->getParent()))
+ if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
Loc.AATags.TBAA = nullptr;
Accesses.addLoad(Loc, IsReadOnlyPtr);
// other reads in this loop then is it safe to vectorize.
if (NumReadWrites == 1 && NumReads == 0) {
DEBUG(dbgs() << "LV: Found a write-only loop!\n");
- return true;
+ CanVecMem = true;
+ return;
}
// Build dependence sets and check whether we need a runtime pointer bounds
DEBUG(dbgs() << "LV: We can't vectorize because we can't find " <<
"the array bounds.\n");
PtrRtCheck.reset();
- return false;
+ CanVecMem = false;
+ return;
}
PtrRtCheck.Need = NeedRTCheck;
- bool CanVecMem = true;
+ CanVecMem = true;
if (Accesses.isDependencyCheckNeeded()) {
DEBUG(dbgs() << "LV: Checking memory dependencies\n");
CanVecMem = DepChecker.areDepsSafe(
<< " dependent memory operations checked at runtime");
DEBUG(dbgs() << "LV: Can't vectorize with memory checks\n");
PtrRtCheck.reset();
- return false;
+ CanVecMem = false;
+ return;
}
CanVecMem = true;
DEBUG(dbgs() << "LV: We" << (NeedRTCheck ? "" : " don't") <<
" need a runtime memory check.\n");
-
- return CanVecMem;
}
-bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB) {
+bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
+ DominatorTree *DT) {
assert(TheLoop->contains(BB) && "Unknown block used");
// Blocks that do not dominate the latch need predication.
}
void LoopAccessInfo::emitAnalysis(VectorizationReport &Message) {
- assert(!Report && "Multiple report generated");
+ assert(!Report && "Multiple reports generated");
Report = Message;
}
Value *MemoryRuntimeCheck = nullptr;
for (unsigned i = 0; i < NumPointers; ++i) {
for (unsigned j = i+1; j < NumPointers; ++j) {
- // No need to check if two readonly pointers intersect.
- if (!PtrRtCheck.IsWritePtr[i] && !PtrRtCheck.IsWritePtr[j])
- continue;
-
- // Only need to check pointers between two different dependency sets.
- if (PtrRtCheck.DependencySetId[i] == PtrRtCheck.DependencySetId[j])
- continue;
- // Only need to check pointers in the same alias set.
- if (PtrRtCheck.AliasSetId[i] != PtrRtCheck.AliasSetId[j])
+ if (!PtrRtCheck.needsChecking(i, j))
continue;
unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace();