X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FVectorize%2FLoopVectorize.cpp;h=41a22f9bf4665492a4aa57b1569d609e5f576a37;hb=51c292a3605a0cf7be24e4b7dc40c2b8a740006c;hp=a8e1f4579c7f4242acd65c4c0583774576902d66;hpb=1386692ef64d3151da8986589eadf0c58aba5c50;p=oota-llvm.git diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index a8e1f4579c7..41a22f9bf46 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -80,6 +80,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/PatternMatch.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/ValueHandle.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -118,11 +119,11 @@ static const unsigned TinyTripCountUnrollThreshold = 128; /// than this number of comparisons. static const unsigned RuntimeMemoryCheckThreshold = 8; -/// We use a metadata with this name to indicate that a scalar loop was -/// vectorized and that we don't need to re-vectorize it if we run into it -/// again. -static const char* -AlreadyVectorizedMDName = "llvm.vectorizer.already_vectorized"; +/// Maximum simd width. +static const unsigned MaxVectorWidth = 64; + +/// Maximum vectorization unroll count. +static const unsigned MaxUnrollFactor = 16; namespace { @@ -320,11 +321,11 @@ private: /// \brief Check if conditionally executed loads are hoistable. /// -/// This class has two functions. isHoistableLoad and canHoistAllLoads. +/// This class has two functions: isHoistableLoad and canHoistAllLoads. /// isHoistableLoad should be called on all load instructions that are executed /// conditionally. After all conditional loads are processed, the client should -/// call canHoistAllLoads to determine if all of the conditional execute loads -/// have an unconditional memory access in the loop. +/// call canHoistAllLoads to determine if all of the conditional executed loads +/// have an unconditional memory access to the same memory address in the loop. class LoadHoisting { typedef SmallPtrSet MemorySet; @@ -354,24 +355,10 @@ bool LoadHoisting::isHoistableLoad(Instruction *L) { static void addMemAccesses(BasicBlock *BB, SmallPtrSet &Set) { for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) { - Instruction *I = &*BI; - Value *Addr = 0; - - // Try a load. - LoadInst *LI = dyn_cast(I); - if (LI) { - Addr = LI->getPointerOperand(); - Set.insert(Addr); - continue; - } - - // Try a store. - StoreInst *SI = dyn_cast(I); - if (!SI) - continue; - - Addr = SI->getPointerOperand(); - Set.insert(Addr); + if (LoadInst *LI = dyn_cast(BI)) // Try a load. + Set.insert(LI->getPointerOperand()); + else if (StoreInst *SI = dyn_cast(BI)) // Try a store. + Set.insert(SI->getPointerOperand()); } } @@ -472,7 +459,7 @@ public: // The starting value of the reduction. // It does not have to be zero! - Value *StartValue; + TrackingVH StartValue; // The instruction who's value is used outside the loop. Instruction *LoopExitInstr; // The kind of the reduction. @@ -517,7 +504,7 @@ public: /// This flag indicates if we need to add the runtime check. bool Need; /// Holds the pointers that we need to check. - SmallVector Pointers; + SmallVector, 2> Pointers; /// Holds the pointer value at the beginning of the loop. SmallVector Starts; /// Holds the pointer value at the end of the loop. @@ -531,7 +518,7 @@ public: InductionInfo(Value *Start, InductionKind K) : StartValue(Start), IK(K) {} InductionInfo() : StartValue(0), IK(IK_NoInduction) {} /// Start value. - Value *StartValue; + TrackingVH StartValue; /// Induction kind. InductionKind IK; }; @@ -781,6 +768,127 @@ private: const TargetLibraryInfo *TLI; }; +/// Utility class for getting and setting loop vectorizer hints in the form +/// of loop metadata. +struct LoopVectorizeHints { + /// Vectorization width. + unsigned Width; + /// Vectorization unroll factor. + unsigned Unroll; + + LoopVectorizeHints(const Loop *L) + : Width(VectorizationFactor) + , Unroll(VectorizationUnroll) + , LoopID(L->getLoopID()) { + getHints(L); + // The command line options override any loop metadata except for when + // width == 1 which is used to indicate the loop is already vectorized. + if (VectorizationFactor.getNumOccurrences() > 0 && Width != 1) + Width = VectorizationFactor; + if (VectorizationUnroll.getNumOccurrences() > 0) + Unroll = VectorizationUnroll; + } + + /// Return the loop vectorizer metadata prefix. + static StringRef Prefix() { return "llvm.vectorizer."; } + + MDNode *createHint(LLVMContext &Context, StringRef Name, unsigned V) { + SmallVector Vals; + Vals.push_back(MDString::get(Context, Name)); + Vals.push_back(ConstantInt::get(Type::getInt32Ty(Context), V)); + return MDNode::get(Context, Vals); + } + + /// Mark the loop L as already vectorized by setting the width to 1. + void setAlreadyVectorized(Loop *L) { + LLVMContext &Context = L->getHeader()->getContext(); + + Width = 1; + + // Create a new loop id with one more operand for the already_vectorized + // hint. If the loop already has a loop id then copy the existing operands. + SmallVector Vals(1); + if (LoopID) + for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) + Vals.push_back(LoopID->getOperand(i)); + + Twine Name = Prefix() + "width"; + Vals.push_back(createHint(Context, Name.str(), Width)); + + MDNode *NewLoopID = MDNode::get(Context, Vals); + // Set operand 0 to refer to the loop id itself. + NewLoopID->replaceOperandWith(0, NewLoopID); + + L->setLoopID(NewLoopID); + if (LoopID) + LoopID->replaceAllUsesWith(NewLoopID); + + LoopID = NewLoopID; + } + +private: + MDNode *LoopID; + + /// Find hints specified in the loop metadata. + void getHints(const Loop *L) { + if (!LoopID) + return; + + // First operand should refer to the loop id itself. + assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); + assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); + + for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { + const MDString *S = 0; + SmallVector Args; + + // The expected hint is either a MDString or a MDNode with the first + // operand a MDString. + if (const MDNode *MD = dyn_cast(LoopID->getOperand(i))) { + if (!MD || MD->getNumOperands() == 0) + continue; + S = dyn_cast(MD->getOperand(0)); + for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i) + Args.push_back(MD->getOperand(i)); + } else { + S = dyn_cast(LoopID->getOperand(i)); + assert(Args.size() == 0 && "too many arguments for MDString"); + } + + if (!S) + continue; + + // Check if the hint starts with the vectorizer prefix. + StringRef Hint = S->getString(); + if (!Hint.startswith(Prefix())) + continue; + // Remove the prefix. + Hint = Hint.substr(Prefix().size(), StringRef::npos); + + if (Args.size() == 1) + getHint(Hint, Args[0]); + } + } + + // Check string hint with one operand. + void getHint(StringRef Hint, Value *Arg) { + const ConstantInt *C = dyn_cast(Arg); + if (!C) return; + unsigned Val = C->getZExtValue(); + + if (Hint == "width") { + assert(isPowerOf2_32(Val) && Val <= MaxVectorWidth && + "Invalid width metadata"); + Width = Val; + } else if (Hint == "unroll") { + assert(isPowerOf2_32(Val) && Val <= MaxUnrollFactor && + "Invalid unroll metadata"); + Unroll = Val; + } else + DEBUG(dbgs() << "LV: ignoring unknown hint " << Hint); + } +}; + /// The LoopVectorize Pass. struct LoopVectorize : public LoopPass { /// Pass identification, replacement for typeid @@ -819,6 +927,13 @@ struct LoopVectorize : public LoopPass { DEBUG(dbgs() << "LV: Checking a loop in \"" << L->getHeader()->getParent()->getName() << "\"\n"); + LoopVectorizeHints Hints(L); + + if (Hints.Width == 1) { + DEBUG(dbgs() << "LV: Not vectorizing.\n"); + return false; + } + // Check if it is legal to vectorize the loop. LoopVectorizationLegality LVL(L, SE, DL, DT, TTI, AA, TLI); if (!LVL.canVectorize()) { @@ -846,10 +961,10 @@ struct LoopVectorize : public LoopPass { // Select the optimal vectorization factor. LoopVectorizationCostModel::VectorizationFactor VF; - VF = CM.selectVectorizationFactor(OptForSize, VectorizationFactor); + VF = CM.selectVectorizationFactor(OptForSize, Hints.Width); // Select the unroll factor. - unsigned UF = CM.selectUnrollFactor(OptForSize, VectorizationUnroll, - VF.Width, VF.Cost); + unsigned UF = CM.selectUnrollFactor(OptForSize, Hints.Unroll, VF.Width, + VF.Cost); if (VF.Width == 1) { DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n"); @@ -864,6 +979,9 @@ struct LoopVectorize : public LoopPass { InnerLoopVectorizer LB(L, SE, LI, DT, DL, TLI, VF.Width, UF); LB.vectorize(&LVL); + // Mark the loop as already vectorized to avoid vectorizing again. + Hints.setAlreadyVectorized(L); + DEBUG(verifyFunction(*L->getHeader()->getParent())); return true; } @@ -1331,11 +1449,6 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { BasicBlock *ExitBlock = OrigLoop->getExitBlock(); assert(ExitBlock && "Must have an exit block"); - // Mark the old scalar loop with metadata that tells us not to vectorize this - // loop again if we run into it. - MDNode *MD = MDNode::get(OldBasicBlock->getContext(), None); - OldBasicBlock->getTerminator()->setMetadata(AlreadyVectorizedMDName, MD); - // Some loops have a single integer induction variable, while other loops // don't. One example is c++ iterators that often have multiple pointer // induction variables. In the code below we also support a case where we @@ -1916,7 +2029,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { Value *LoopVal = RdxPhi->getIncomingValueForBlock(Latch); VectorParts &Val = getVectorValue(LoopVal); for (unsigned part = 0; part < UF; ++part) { - // Make sure to add the reduction stat value only to the + // Make sure to add the reduction stat value only to the // first unroll part. Value *StartVal = (part == 0) ? VectorStart : Identity; cast(VecRdxPhi[part])->addIncoming(StartVal, VecPreheader); @@ -2119,7 +2232,6 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, // optimizations will clean it up. unsigned NumIncoming = P->getNumIncomingValues(); - assert(NumIncoming > 1 && "Invalid PHI"); // Generate a sequence of selects of the form: // SELECT(Mask3, In3, @@ -2131,10 +2243,11 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, VectorParts &In0 = getVectorValue(P->getIncomingValue(In)); for (unsigned part = 0; part < UF; ++part) { - // We don't need to 'select' the first PHI operand because it is - // the default value if all of the other masks don't match. + // We might have single edge PHIs (blocks) - use an identity + // 'select' for the first PHI operand. if (In == 0) - Entry[part] = In0[part]; + Entry[part] = Builder.CreateSelect(Cond[part], In0[part], + In0[part]); else // Select between the current value and the previous incoming edge // based on the incoming mask. @@ -2432,12 +2545,19 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() { return false; } + // Check that we can actually speculate the hoistable loads. + if (!LoadSpeculation.canHoistAllLoads()) + return false; + // We can if-convert this loop. return true; } bool LoopVectorizationLegality::canVectorize() { - assert(TheLoop->getLoopPreheader() && "No preheader!!"); + // We must have a loop in canonical form. Loops with indirectbr in them cannot + // be canonicalized. + if (!TheLoop->getLoopPreheader()) + return false; // We can only vectorize innermost loops. if (TheLoop->getSubLoopsVector().size()) @@ -2522,13 +2642,6 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { BasicBlock *PreHeader = TheLoop->getLoopPreheader(); BasicBlock *Header = TheLoop->getHeader(); - // If we marked the scalar loop as "already vectorized" then no need - // to vectorize it again. - if (Header->getTerminator()->getMetadata(AlreadyVectorizedMDName)) { - DEBUG(dbgs() << "LV: This loop was vectorized before\n"); - return false; - } - // Look for the attribute signaling the absence of NaNs. Function &F = *Header->getParent(); if (F.hasFnAttribute("no-nans-fp-math")) @@ -2704,9 +2817,7 @@ void LoopVectorizationLegality::collectLoopUniforms() { Uniforms.insert(I); // Insert all operands. - for (int i = 0, Op = I->getNumOperands(); i < Op; ++i) { - Worklist.push_back(I->getOperand(i)); - } + Worklist.insert(Worklist.end(), I->op_begin(), I->op_end()); } } @@ -3354,7 +3465,7 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB) { if (it->mayReadFromMemory() && !LoadSpeculation.isHoistableLoad(it)) return false; - // We predicate stores at the moment. + // We don't predicate stores at the moment. if (it->mayWriteToMemory() || it->mayThrow()) return false; @@ -3369,10 +3480,6 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB) { } } - // Check that we can actually speculate the hoistable loads. - if (!LoadSpeculation.canHoistAllLoads()) - return false; - return true; }