#include "llvm/Transforms/Utils/VectorUtils.h"
using namespace llvm;
-#define DEBUG_TYPE "loop-accesses"
+#define DEBUG_TYPE "loop-vectorize"
+
+static cl::opt<unsigned, true>
+VectorizationFactor("force-vector-width", cl::Hidden,
+ cl::desc("Sets the SIMD width. Zero is autoselect."),
+ cl::location(VectorizerParams::VectorizationFactor));
+unsigned VectorizerParams::VectorizationFactor = 0;
+
+static cl::opt<unsigned, true>
+VectorizationInterleave("force-vector-interleave", cl::Hidden,
+ cl::desc("Sets the vectorization interleave count. "
+ "Zero is autoselect."),
+ cl::location(
+ VectorizerParams::VectorizationInterleave));
+unsigned VectorizerParams::VectorizationInterleave = 0;
+
+/// When performing memory disambiguation checks at runtime do not make more
+/// than this number of comparisons.
+const unsigned VectorizerParams::RuntimeMemoryCheckThreshold = 8;
+
+/// Maximum SIMD width.
+const unsigned VectorizerParams::MaxVectorWidth = 64;
+
+bool VectorizerParams::isInterleaveForced() {
+ return ::VectorizationInterleave.getNumOccurrences() > 0;
+}
-void VectorizationReport::emitAnalysis(const VectorizationReport &Message,
+void VectorizationReport::emitAnalysis(VectorizationReport &Message,
const Function *TheFunction,
- const Loop *TheLoop,
- const char *PassName) {
+ const Loop *TheLoop) {
DebugLoc DL = TheLoop->getStartLoc();
- if (const Instruction *I = Message.getInstr())
+ if (Instruction *I = Message.getInstr())
DL = I->getDebugLoc();
- emitOptimizationRemarkAnalysis(TheFunction->getContext(), PassName,
+ emitOptimizationRemarkAnalysis(TheFunction->getContext(), DEBUG_TYPE,
*TheFunction, DL, Message.str());
}
const SCEV *ByOne =
SCEVParameterRewriter::rewrite(OrigSCEV, *SE, RewriteMap, true);
- DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *ByOne
+ DEBUG(dbgs() << "LV: Replacing SCEV: " << *OrigSCEV << " by: " << *ByOne
<< "\n");
return ByOne;
}
RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap);
- DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
+ DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *Ptr << '\n');
} else {
CanDoRT = false;
}
unsigned ASi = PtrI->getType()->getPointerAddressSpace();
unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
if (ASi != ASj) {
- DEBUG(dbgs() << "LAA: Runtime check would require comparison between"
+ DEBUG(dbgs() << "LV: Runtime check would require comparison between"
" different address spaces\n");
return false;
}
// process read-only pointers. This allows us to skip dependence tests for
// read-only pointers.
- DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
+ DEBUG(dbgs() << "LV: Processing memory accesses...\n");
DEBUG(dbgs() << " AST: "; AST.dump());
- DEBUG(dbgs() << "LAA: Accesses:\n");
+ DEBUG(dbgs() << "LV: Accesses:\n");
DEBUG({
for (auto A : Accesses)
dbgs() << "\t" << *A.getPointer() << " (" <<
// Make sure that the pointer does not point to aggregate types.
const PointerType *PtrTy = cast<PointerType>(Ty);
if (PtrTy->getElementType()->isAggregateType()) {
- DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"
- << *Ptr << "\n");
+ DEBUG(dbgs() << "LV: Bad stride - Not a pointer to a scalar type" << *Ptr <<
+ "\n");
return 0;
}
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
if (!AR) {
- DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer "
+ DEBUG(dbgs() << "LV: Bad stride - Not an AddRecExpr pointer "
<< *Ptr << " SCEV: " << *PtrScev << "\n");
return 0;
}
// The accesss function must stride over the innermost loop.
if (Lp != AR->getLoop()) {
- DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop " <<
+ DEBUG(dbgs() << "LV: Bad stride - Not striding over innermost loop " <<
*Ptr << " SCEV: " << *PtrScev << "\n");
}
bool IsNoWrapAddRec = AR->getNoWrapFlags(SCEV::NoWrapMask);
bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
- DEBUG(dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
+ DEBUG(dbgs() << "LV: Bad stride - Pointer may wrap in the address space "
<< *Ptr << " SCEV: " << *PtrScev << "\n");
return 0;
}
// Calculate the pointer stride and check if it is consecutive.
const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
if (!C) {
- DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr <<
+ DEBUG(dbgs() << "LV: Bad stride - Not a constant strided " << *Ptr <<
" SCEV: " << *PtrScev << "\n");
return 0;
}
}
if (MaxVFWithoutSLForwardIssues< 2*TypeByteSize) {
- DEBUG(dbgs() << "LAA: Distance " << Distance <<
+ DEBUG(dbgs() << "LV: Distance " << Distance <<
" that could cause a store-load forwarding conflict\n");
return true;
}
const SCEV *Dist = SE->getMinusSCEV(Sink, Src);
- DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
+ DEBUG(dbgs() << "LV: Src Scev: " << *Src << "Sink Scev: " << *Sink
<< "(Induction step: " << StrideAPtr << ")\n");
- DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "
+ DEBUG(dbgs() << "LV: Distance for " << *InstMap[AIdx] << " to "
<< *InstMap[BIdx] << ": " << *Dist << "\n");
// Need consecutive accesses. We don't want to vectorize
const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
if (!C) {
- DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
+ DEBUG(dbgs() << "LV: Dependence because of non-constant distance\n");
ShouldRetryWithRuntimeCheck = true;
return true;
}
ATy != BTy))
return true;
- DEBUG(dbgs() << "LAA: Dependence is negative: NoDep\n");
+ DEBUG(dbgs() << "LV: Dependence is negative: NoDep\n");
return false;
}
if (Val == 0) {
if (ATy == BTy)
return false;
- DEBUG(dbgs() << "LAA: Zero dependence difference but different types\n");
+ DEBUG(dbgs() << "LV: Zero dependence difference but different types\n");
return true;
}
// Positive distance bigger than max vectorization factor.
if (ATy != BTy) {
DEBUG(dbgs() <<
- "LAA: ReadWrite-Write positive dependency with different types\n");
+ "LV: ReadWrite-Write positive dependency with different types\n");
return false;
}
if (Distance < 2*TypeByteSize ||
2*TypeByteSize > MaxSafeDepDistBytes ||
Distance < TypeByteSize * ForcedUnroll * ForcedFactor) {
- DEBUG(dbgs() << "LAA: Failure because of Positive distance "
+ DEBUG(dbgs() << "LV: Failure because of Positive distance "
<< Val.getSExtValue() << '\n');
return true;
}
couldPreventStoreLoadForward(Distance, TypeByteSize))
return true;
- DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue() <<
+ DEBUG(dbgs() << "LV: Positive distance " << Val.getSExtValue() <<
" with max VF = " << MaxSafeDepDistBytes / TypeByteSize << '\n');
return false;
return true;
}
-bool LoopAccessInfo::canAnalyzeLoop() {
- // We can only analyze innermost loops.
- if (!TheLoop->empty()) {
- emitAnalysis(VectorizationReport() << "loop is not the innermost loop");
- return false;
- }
-
- // We must have a single backedge.
- if (TheLoop->getNumBackEdges() != 1) {
- emitAnalysis(
- VectorizationReport() <<
- "loop control flow is not understood by analyzer");
- return false;
- }
-
- // We must have a single exiting block.
- if (!TheLoop->getExitingBlock()) {
- emitAnalysis(
- VectorizationReport() <<
- "loop control flow is not understood by analyzer");
- return false;
- }
-
- // We only handle bottom-tested loops, i.e. loop in which the condition is
- // checked at the end of each iteration. With that we can assume that all
- // instructions in the loop are executed the same number of times.
- if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
- emitAnalysis(
- VectorizationReport() <<
- "loop control flow is not understood by analyzer");
- return false;
- }
-
- // We need to have a loop header.
- DEBUG(dbgs() << "LAA: Found a loop: " <<
- TheLoop->getHeader()->getName() << '\n');
-
- // ScalarEvolution needs to be able to find the exit count.
- const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop);
- if (ExitCount == SE->getCouldNotCompute()) {
- emitAnalysis(VectorizationReport() <<
- "could not determine number of loop iterations");
- DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
- return false;
- }
-
- return true;
-}
-
void LoopAccessInfo::analyzeLoop(ValueToValueMap &Strides) {
typedef SmallVector<Value*, 16> ValueVector;
if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) {
emitAnalysis(VectorizationReport(Ld)
<< "read with atomic ordering or volatile read");
- DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
+ DEBUG(dbgs() << "LV: Found a non-simple load.\n");
CanVecMem = false;
return;
}
if (!St->isSimple() && !IsAnnotatedParallel) {
emitAnalysis(VectorizationReport(St)
<< "write with atomic ordering or volatile write");
- DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
+ DEBUG(dbgs() << "LV: Found a non-simple store.\n");
CanVecMem = false;
return;
}
// Check if we see any stores. If there are no stores, then we don't
// care if the pointers are *restrict*.
if (!Stores.size()) {
- DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
+ DEBUG(dbgs() << "LV: Found a read-only loop!\n");
CanVecMem = true;
return;
}
emitAnalysis(
VectorizationReport(ST)
<< "write to a loop invariant address could not be vectorized");
- DEBUG(dbgs() << "LAA: We don't allow storing to uniform addresses\n");
+ DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n");
CanVecMem = false;
return;
}
if (IsAnnotatedParallel) {
DEBUG(dbgs()
- << "LAA: A loop annotated parallel, ignore memory dependency "
+ << "LV: A loop annotated parallel, ignore memory dependency "
<< "checks.\n");
CanVecMem = true;
return;
// If we write (or read-write) to a single destination and there are no
// other reads in this loop then is it safe to vectorize.
if (NumReadWrites == 1 && NumReads == 0) {
- DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
+ DEBUG(dbgs() << "LV: Found a write-only loop!\n");
CanVecMem = true;
return;
}
CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, TheLoop,
Strides);
- DEBUG(dbgs() << "LAA: We need to do " << NumComparisons <<
+ DEBUG(dbgs() << "LV: We need to do " << NumComparisons <<
" pointer comparisons.\n");
// If we only have one set of dependences to check pointers among we don't
}
if (CanDoRT) {
- DEBUG(dbgs() << "LAA: We can perform a memory runtime check if needed.\n");
+ DEBUG(dbgs() << "LV: We can perform a memory runtime check if needed.\n");
}
if (NeedRTCheck && !CanDoRT) {
emitAnalysis(VectorizationReport() << "cannot identify array bounds");
- DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " <<
+ DEBUG(dbgs() << "LV: We can't vectorize because we can't find " <<
"the array bounds.\n");
PtrRtCheck.reset();
CanVecMem = false;
CanVecMem = true;
if (Accesses.isDependencyCheckNeeded()) {
- DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
+ DEBUG(dbgs() << "LV: Checking memory dependencies\n");
CanVecMem = DepChecker.areDepsSafe(
DependentAccesses, Accesses.getDependenciesToCheck(), Strides);
MaxSafeDepDistBytes = DepChecker.getMaxSafeDepDistBytes();
if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) {
- DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
+ DEBUG(dbgs() << "LV: Retrying with memory checks\n");
NeedRTCheck = true;
// Clear the dependency checks. We assume they are not needed.
<< NumComparisons << " exceeds limit of "
<< VectorizerParams::RuntimeMemoryCheckThreshold
<< " dependent memory operations checked at runtime");
- DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
+ DEBUG(dbgs() << "LV: Can't vectorize with memory checks\n");
PtrRtCheck.reset();
CanVecMem = false;
return;
emitAnalysis(VectorizationReport() <<
"unsafe dependent memory operations in loop");
- DEBUG(dbgs() << "LAA: We" << (NeedRTCheck ? "" : " don't") <<
+ DEBUG(dbgs() << "LV: We" << (NeedRTCheck ? "" : " don't") <<
" need a runtime memory check.\n");
}
}
void LoopAccessInfo::emitAnalysis(VectorizationReport &Message) {
- assert(!Report && "Multiple report generated");
+ assert(!Report && "Multiple reports generated");
Report = Message;
}
const SCEV *Sc = SE->getSCEV(Ptr);
if (SE->isLoopInvariant(Sc, TheLoop)) {
- DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" <<
+ DEBUG(dbgs() << "LV: Adding RT check for a loop invariant ptr:" <<
*Ptr <<"\n");
Starts.push_back(Ptr);
Ends.push_back(Ptr);
} else {
- DEBUG(dbgs() << "LAA: Adding RT check for range:" << *Ptr << '\n');
+ DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr << '\n');
unsigned AS = Ptr->getType()->getPointerAddressSpace();
// Use this type for pointer arithmetic.
FirstInst = getFirstInst(FirstInst, Check, Loc);
return std::make_pair(FirstInst, Check);
}
-
-LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
- const DataLayout *DL,
- const TargetLibraryInfo *TLI, AliasAnalysis *AA,
- DominatorTree *DT, ValueToValueMap &Strides)
- : TheLoop(L), SE(SE), DL(DL), TLI(TLI), AA(AA), DT(DT), NumLoads(0),
- NumStores(0), MaxSafeDepDistBytes(-1U), CanVecMem(false) {
- if (canAnalyzeLoop())
- analyzeLoop(Strides);
-}
-
-LoopAccessInfo &LoopAccessAnalysis::getInfo(Loop *L, ValueToValueMap &Strides) {
- auto &LAI = LoopAccessInfoMap[L];
-
-#ifndef NDEBUG
- assert((!LAI || LAI->NumSymbolicStrides == Strides.size()) &&
- "Symbolic strides changed for loop");
-#endif
-
- if (!LAI) {
- LAI = make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, Strides);
-#ifndef NDEBUG
- LAI->NumSymbolicStrides = Strides.size();
-#endif
- }
- return *LAI.get();
-}
-
-bool LoopAccessAnalysis::runOnFunction(Function &F) {
- SE = &getAnalysis<ScalarEvolution>();
- DL = F.getParent()->getDataLayout();
- auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- TLI = TLIP ? &TLIP->getTLI() : nullptr;
- AA = &getAnalysis<AliasAnalysis>();
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-
- return false;
-}
-
-void LoopAccessAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<ScalarEvolution>();
- AU.addRequired<AliasAnalysis>();
- AU.addRequired<DominatorTreeWrapperPass>();
-
- AU.setPreservesAll();
-}
-
-char LoopAccessAnalysis::ID = 0;
-static const char laa_name[] = "Loop Access Analysis";
-#define LAA_NAME "loop-accesses"
-
-INITIALIZE_PASS_BEGIN(LoopAccessAnalysis, LAA_NAME, laa_name, false, true)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(LoopAccessAnalysis, LAA_NAME, laa_name, false, true)
-
-namespace llvm {
- Pass *createLAAPass() {
- return new LoopAccessAnalysis();
- }
-}