X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FVectorize%2FSLPVectorizer.cpp;h=c9b8e7b3c00ba80ee73ead15e7157f9e0e762d60;hb=551dac1f62026ef32ad294d8c1cc5b545b05935a;hp=3629eeec173909b9d78d1292af39c47ee484397a;hpb=dfacdd04cd2dd3b474fcabc5497255548f5506d5;p=oota-llvm.git diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index 3629eeec173..c9b8e7b3c00 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -58,8 +58,13 @@ static const unsigned RecursionMaxDepth = 12; /// RAII pattern to save the insertion point of the IR builder. class BuilderLocGuard { public: - BuilderLocGuard(IRBuilder<> &B) : Builder(B), Loc(B.GetInsertPoint()) {} - ~BuilderLocGuard() { if (Loc) Builder.SetInsertPoint(Loc); } + BuilderLocGuard(IRBuilder<> &B) : Builder(B), Loc(B.GetInsertPoint()), + DbgLoc(B.getCurrentDebugLocation()) {} + ~BuilderLocGuard() { + Builder.SetCurrentDebugLocation(DbgLoc); + if (Loc) + Builder.SetInsertPoint(Loc); + } private: // Prevent copying. @@ -67,10 +72,11 @@ private: BuilderLocGuard &operator=(const BuilderLocGuard &); IRBuilder<> &Builder; AssertingVH Loc; + DebugLoc DbgLoc; }; -/// A helper class for numbering instructions in multible blocks. -/// Numbers starts at zero for each basic block. +/// A helper class for numbering instructions in multiple blocks. +/// Numbers start at zero for each basic block. struct BlockNumbering { BlockNumbering(BasicBlock *Bb) : BB(Bb), Valid(false) {} @@ -115,7 +121,7 @@ private: /// Maps instructions to numbers and back. SmallDenseMap InstrIdx; /// Maps integers to Instructions. - std::vector InstrVec; + SmallVector InstrVec; }; /// \returns the parent basic block if all of the instructions in \p VL @@ -264,12 +270,16 @@ private: /// This is the recursive part of buildTree. void buildTree_rec(ArrayRef Roots, unsigned Depth); - /// Vectorizer a single entry in the tree. + /// Vectorize a single entry in the tree. Value *vectorizeTree(TreeEntry *E); - /// Vectorizer a single entry in the tree, starting in \p VL. + /// Vectorize a single entry in the tree, starting in \p VL. Value *vectorizeTree(ArrayRef VL); + /// \returns the pointer to the vectorized value if \p VL is already + /// vectorized, or NULL. They may happen in cycles. + Value *alreadyVectorized(ArrayRef VL); + /// \brief Take the pointer operand from the Load/Store instruction. /// \returns NULL if this is not a valid Load/Store instruction. static Value *getPointerOperand(Value *I); @@ -298,15 +308,9 @@ private: /// \returns the index of the last instrucion in the BB from \p VL. int getLastIndex(ArrayRef VL); - /// \returns the Instrucion in the bundle \p VL. + /// \returns the Instruction in the bundle \p VL. Instruction *getLastInstruction(ArrayRef VL); - /// \returns the Instruction at index \p Index which is in Block \p BB. - Instruction *getInstructionForIndex(unsigned Index, BasicBlock *BB); - - /// \returns the index of the first User of \p VL. - int getFirstUserIndex(ArrayRef VL); - /// \returns a vector from a collection of scalars in \p VL. Value *Gather(ArrayRef VL, VectorType *Ty); @@ -391,7 +395,7 @@ private: SetVector GatherSeq; /// Numbers instructions in different blocks. - std::map BlocksNumbers; + DenseMap BlocksNumbers; // Analysis and block reference. Function *F; @@ -900,8 +904,13 @@ int BoUpSLP::getTreeCost() { DEBUG(dbgs() << "SLP: Calculating cost for tree of size " << VectorizableTree.size() << ".\n"); - if (!VectorizableTree.size()) { - assert(!ExternalUses.size() && "We should not have any external users"); + // Don't vectorize tiny trees. Small load/store chains or consecutive stores + // of constants will be vectoried in SelectionDAG in MergeConsecutiveStores. + // The SelectionDAG vectorizer can only handle pairs (trees of height = 2). + if (VectorizableTree.size() < 3) { + if (!VectorizableTree.size()) { + assert(!ExternalUses.size() && "We should not have any external users"); + } return 0; } @@ -994,19 +1003,27 @@ bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) { Type *Ty = cast(PtrA->getType())->getElementType(); int64_t Sz = DL->getTypeStoreSize(Ty); - // If both pointers are GEPs: - if (GepA && GepB) { - // Check that they have the same base pointer. - if (GepA->getPointerOperand() != GepB->getPointerOperand()) - return false; + // Check if PtrA is the base and PtrB is a constant offset. + if (GepB && GepB->getPointerOperand() == PtrA) { + APInt Offset(BW, 0); + if (GepB->accumulateConstantOffset(*DL, Offset)) + return Offset.getSExtValue() == Sz; + return false; + } - // Check if the geps use a constant offset. - APInt OffsetA(BW, 0) ,OffsetB(BW, 0); - if (GepA->accumulateConstantOffset(*DL, OffsetA) && - GepB->accumulateConstantOffset(*DL, OffsetB)) - return ((OffsetB.getSExtValue() - OffsetA.getSExtValue()) == Sz); + // Check if PtrB is the base and PtrA is a constant offset. + if (GepA && GepA->getPointerOperand() == PtrB) { + APInt Offset(BW, 0); + if (GepA->accumulateConstantOffset(*DL, Offset)) + return Offset.getSExtValue() == -Sz; + return false; + } - if (GepA->getNumIndices() != GepB->getNumIndices()) + // If both pointers are GEPs: + if (GepA && GepB) { + // Check that they have the same base pointer and number of indices. + if (GepA->getPointerOperand() != GepB->getPointerOperand() || + GepA->getNumIndices() != GepB->getNumIndices()) return false; // Try to strip the geps. This makes SCEV faster. @@ -1022,17 +1039,12 @@ bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) { Sz = 1; } - // Check if PtrA is the base and PtrB is a constant offset. - if (GepB && GepB->getPointerOperand() == PtrA) { - APInt Offset(BW, 0); - if (GepB->accumulateConstantOffset(*DL, Offset)) - return Offset.getZExtValue() == DL->getTypeStoreSize(Ty); + ConstantInt *CA = dyn_cast(PtrA); + ConstantInt *CB = dyn_cast(PtrB); + if (CA && CB) { + return (CA->getSExtValue() + Sz == CB->getSExtValue()); } - // GepA can't use PtrB as a base pointer. - if (GepA && GepA->getPointerOperand() == PtrB) - return false; - // Calculate the distance. const SCEV *PtrSCEVA = SE->getSCEV(PtrA); const SCEV *PtrSCEVB = SE->getSCEV(PtrB); @@ -1090,32 +1102,6 @@ Instruction *BoUpSLP::getLastInstruction(ArrayRef VL) { return I; } -Instruction *BoUpSLP::getInstructionForIndex(unsigned Index, BasicBlock *BB) { - BlockNumbering &BN = BlocksNumbers[BB]; - return BN.getInstruction(Index); -} - -int BoUpSLP::getFirstUserIndex(ArrayRef VL) { - BasicBlock *BB = getSameBlock(VL); - assert(BB && "All instructions must come from the same block"); - BlockNumbering &BN = BlocksNumbers[BB]; - - // Find the first user of the values. - int FirstUser = BN.getIndex(BB->getTerminator()); - for (unsigned i = 0, e = VL.size(); i < e; ++i) { - for (Value::use_iterator U = VL[i]->use_begin(), UE = VL[i]->use_end(); - U != UE; ++U) { - Instruction *Instr = dyn_cast(*U); - - if (!Instr || Instr->getParent() != BB) - continue; - - FirstUser = std::min(FirstUser, BN.getIndex(Instr)); - } - } - return FirstUser; -} - Value *BoUpSLP::Gather(ArrayRef VL, VectorType *Ty) { Value *Vec = UndefValue::get(Ty); // Generate the 'InsertElement' instruction. @@ -1146,6 +1132,16 @@ Value *BoUpSLP::Gather(ArrayRef VL, VectorType *Ty) { return Vec; } +Value *BoUpSLP::alreadyVectorized(ArrayRef VL) { + if (ScalarToTreeEntry.count(VL[0])) { + int Idx = ScalarToTreeEntry[VL[0]]; + TreeEntry *En = &VectorizableTree[Idx]; + if (En->isSame(VL) && En->VectorizedValue) + return En->VectorizedValue; + } + return 0; +} + Value *BoUpSLP::vectorizeTree(ArrayRef VL) { if (ScalarToTreeEntry.count(VL[0])) { int Idx = ScalarToTreeEntry[VL[0]]; @@ -1187,19 +1183,32 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { case Instruction::PHI: { PHINode *PH = dyn_cast(VL0); Builder.SetInsertPoint(PH->getParent()->getFirstInsertionPt()); + Builder.SetCurrentDebugLocation(PH->getDebugLoc()); PHINode *NewPhi = Builder.CreatePHI(VecTy, PH->getNumIncomingValues()); E->VectorizedValue = NewPhi; + // PHINodes may have multiple entries from the same block. We want to + // visit every block once. + SmallSet VisitedBBs; + for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) { ValueList Operands; BasicBlock *IBB = PH->getIncomingBlock(i); + if (VisitedBBs.count(IBB)) { + NewPhi->addIncoming(NewPhi->getIncomingValueForBlock(IBB), IBB); + continue; + } + + VisitedBBs.insert(IBB); + // Prepare the operand vector. for (unsigned j = 0; j < E->Scalars.size(); ++j) Operands.push_back(cast(E->Scalars[j])-> getIncomingValueForBlock(IBB)); Builder.SetInsertPoint(IBB->getTerminator()); + Builder.SetCurrentDebugLocation(PH->getDebugLoc()); Value *Vec = vectorizeTree(Operands); NewPhi->addIncoming(Vec, IBB); } @@ -1234,7 +1243,13 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { INVL.push_back(cast(E->Scalars[i])->getOperand(0)); Builder.SetInsertPoint(getLastInstruction(E->Scalars)); + Builder.SetCurrentDebugLocation(VL0->getDebugLoc()); + Value *InVec = vectorizeTree(INVL); + + if (Value *V = alreadyVectorized(E->Scalars)) + return V; + CastInst *CI = dyn_cast(VL0); Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy); E->VectorizedValue = V; @@ -1249,11 +1264,16 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { } Builder.SetInsertPoint(getLastInstruction(E->Scalars)); + Builder.SetCurrentDebugLocation(VL0->getDebugLoc()); + Value *L = vectorizeTree(LHSV); Value *R = vectorizeTree(RHSV); - Value *V; + + if (Value *V = alreadyVectorized(E->Scalars)) + return V; CmpInst::Predicate P0 = dyn_cast(VL0)->getPredicate(); + Value *V; if (Opcode == Instruction::FCmp) V = Builder.CreateFCmp(P0, L, R); else @@ -1271,9 +1291,15 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { } Builder.SetInsertPoint(getLastInstruction(E->Scalars)); + Builder.SetCurrentDebugLocation(VL0->getDebugLoc()); + Value *Cond = vectorizeTree(CondVec); Value *True = vectorizeTree(TrueVec); Value *False = vectorizeTree(FalseVec); + + if (Value *V = alreadyVectorized(E->Scalars)) + return V; + Value *V = Builder.CreateSelect(Cond, True, False); E->VectorizedValue = V; return V; @@ -1303,6 +1329,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { } Builder.SetInsertPoint(getLastInstruction(E->Scalars)); + Builder.SetCurrentDebugLocation(VL0->getDebugLoc()); + Value *LHS = vectorizeTree(LHSVL); Value *RHS = vectorizeTree(RHSVL); @@ -1310,6 +1338,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { assert((VL0->getOperand(0) == VL0->getOperand(1)) && "Invalid order"); } + if (Value *V = alreadyVectorized(E->Scalars)) + return V; + BinaryOperator *BinOp = cast(VL0); Value *V = Builder.CreateBinOp(BinOp->getOpcode(), LHS, RHS); E->VectorizedValue = V; @@ -1319,6 +1350,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { // Loads are inserted at the head of the tree because we don't want to // sink them all the way down past store instructions. Builder.SetInsertPoint(getLastInstruction(E->Scalars)); + Builder.SetCurrentDebugLocation(VL0->getDebugLoc()); + LoadInst *LI = cast(VL0); Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(), VecTy->getPointerTo()); @@ -1337,6 +1370,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { ValueOp.push_back(cast(E->Scalars[i])->getValueOperand()); Builder.SetInsertPoint(getLastInstruction(E->Scalars)); + Builder.SetCurrentDebugLocation(VL0->getDebugLoc()); + Value *VecValue = vectorizeTree(ValueOp); Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(), VecTy->getPointerTo()); @@ -1377,30 +1412,33 @@ void BoUpSLP::vectorizeTree() { Value *Vec = E->VectorizedValue; assert(Vec && "Can't find vectorizable value"); + Value *Lane = Builder.getInt32(it->Lane); // Generate extracts for out-of-tree users. // Find the insertion point for the extractelement lane. - Instruction *Loc = 0; if (PHINode *PN = dyn_cast(Vec)) { - Loc = PN->getParent()->getFirstInsertionPt(); + Builder.SetInsertPoint(PN->getParent()->getFirstInsertionPt()); + Value *Ex = Builder.CreateExtractElement(Vec, Lane); + User->replaceUsesOfWith(Scalar, Ex); } else if (isa(Vec)){ if (PHINode *PH = dyn_cast(User)) { for (int i = 0, e = PH->getNumIncomingValues(); i != e; ++i) { if (PH->getIncomingValue(i) == Scalar) { - Loc = PH->getIncomingBlock(i)->getTerminator(); - break; + Builder.SetInsertPoint(PH->getIncomingBlock(i)->getTerminator()); + Value *Ex = Builder.CreateExtractElement(Vec, Lane); + PH->setOperand(i, Ex); } } - assert(Loc && "Unable to find incoming value for the PHI"); } else { - Loc = cast(User); + Builder.SetInsertPoint(cast(User)); + Value *Ex = Builder.CreateExtractElement(Vec, Lane); + User->replaceUsesOfWith(Scalar, Ex); } } else { - Loc = F->getEntryBlock().begin(); + Builder.SetInsertPoint(F->getEntryBlock().begin()); + Value *Ex = Builder.CreateExtractElement(Vec, Lane); + User->replaceUsesOfWith(Scalar, Ex); } - Builder.SetInsertPoint(Loc); - Value *Ex = Builder.CreateExtractElement(Vec, Builder.getInt32(it->Lane)); - User->replaceUsesOfWith(Scalar, Ex); DEBUG(dbgs() << "SLP: Replaced:" << *User << ".\n"); } @@ -1420,8 +1458,8 @@ void BoUpSLP::vectorizeTree() { Type *Ty = Scalar->getType(); if (!Ty->isVoidTy()) { - for (Value::use_iterator User = Scalar->use_begin(), UE = Scalar->use_end(); - User != UE; ++User) { + for (Value::use_iterator User = Scalar->use_begin(), + UE = Scalar->use_end(); User != UE; ++User) { DEBUG(dbgs() << "SLP: \tvalidating user:" << **User << ".\n"); assert(!MustGather.count(*User) && "Replacing gathered value with undef"); @@ -1553,6 +1591,10 @@ struct SLPVectorizer : public FunctionPass { if (!DL) return false; + // Don't vectorize when the attribute NoImplicitFloat is used. + if (F.hasFnAttribute(Attribute::NoImplicitFloat)) + return false; + DEBUG(dbgs() << "SLP: Analyzing blocks in " << F.getName() << ".\n"); // Use the bollom up slp vectorizer to construct chains that start with @@ -1665,23 +1707,7 @@ bool SLPVectorizer::vectorizeStoreChain(ArrayRef Chain, } } - if (Changed || ChainLen > VF) return Changed; - - // Handle short chains. This helps us catch types such as <3 x float> that - // are smaller than vector size. - R.buildTree(Chain); - - int Cost = R.getTreeCost(); - - if (Cost < CostThreshold) { - DEBUG(dbgs() << "SLP: Found store chain cost = " << Cost - << " for size = " << ChainLen << "\n"); - R.vectorizeTree(); - return true; - } - - return false; } bool SLPVectorizer::vectorizeStores(ArrayRef Stores, @@ -1697,10 +1723,8 @@ bool SLPVectorizer::vectorizeStores(ArrayRef Stores, // Do a quadratic search on all of the given stores and find // all of the pairs of stores that follow each other. for (unsigned i = 0, e = Stores.size(); i < e; ++i) { - if (Heads.count(Stores[i])) - continue; for (unsigned j = 0; j < e; ++j) { - if (i == j || Tails.count(Stores[j])) + if (i == j) continue; if (R.isConsecutiveAccess(Stores[i], Stores[j])) { @@ -1850,6 +1874,8 @@ bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) { bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { bool Changed = false; SmallVector Incoming; + SmallSet VisitedInstrs; + // Collect the incoming values from the PHIs. for (BasicBlock::iterator instr = BB->begin(), ie = BB->end(); instr != ie; ++instr) { @@ -1858,9 +1884,21 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { if (!P) break; + // We may go through BB multiple times so skip the one we have checked. + if (VisitedInstrs.count(instr)) + continue; + VisitedInstrs.insert(instr); + // Stop constructing the list when you reach a different type. if (Incoming.size() && P->getType() != Incoming[0]->getType()) { - Changed |= tryToVectorizeList(Incoming, R); + if (tryToVectorizeList(Incoming, R)) { + // We would like to start over since some instructions are deleted + // and the iterator may become invalid value. + Changed = true; + instr = BB->begin(); + ie = BB->end(); + } + Incoming.clear(); } @@ -1870,7 +1908,15 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { if (Incoming.size() > 1) Changed |= tryToVectorizeList(Incoming, R); - for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { + VisitedInstrs.clear(); + + for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; it++) { + + // We may go through BB multiple times so skip the one we have checked. + if (VisitedInstrs.count(it)) + continue; + VisitedInstrs.insert(it); + if (isa(it)) continue; @@ -1892,20 +1938,38 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { if (Inst == P) Inst = BI->getOperand(1); - Changed |= tryToVectorize(dyn_cast(Inst), R); + if (tryToVectorize(dyn_cast(Inst), R)) { + // We would like to start over since some instructions are deleted + // and the iterator may become invalid value. + Changed = true; + it = BB->begin(); + e = BB->end(); + } continue; } // Try to vectorize trees that start at compare instructions. if (CmpInst *CI = dyn_cast(it)) { if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) { - Changed |= true; + Changed = true; + // We would like to start over since some instructions are deleted + // and the iterator may become invalid value. + it = BB->begin(); + e = BB->end(); continue; } - for (int i = 0; i < 2; ++i) - if (BinaryOperator *BI = dyn_cast(CI->getOperand(i))) - Changed |= - tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R); + + for (int i = 0; i < 2; ++i) { + if (BinaryOperator *BI = dyn_cast(CI->getOperand(i))) { + if (tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R)) { + Changed = true; + // We would like to start over since some instructions are deleted + // and the iterator may become invalid value. + it = BB->begin(); + e = BB->end(); + } + } + } continue; } }