From: Arnold Schwaighofer Date: Mon, 31 Mar 2014 23:05:56 +0000 (+0000) Subject: Revert "SLPVectorizer: Ignore users that are insertelements we can reschedule them" X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=694365f9555138e9a1d11476d05f1dd5aeec38bf Revert "SLPVectorizer: Ignore users that are insertelements we can reschedule them" This reverts commit r205018. Conflicts: lib/Transforms/Vectorize/SLPVectorizer.cpp test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll This is breaking libclc build. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205260 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index d2d4db33f69..f6b5b122742 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -365,13 +365,13 @@ public: /// A negative number means that this is profitable. int getTreeCost(); - /// Construct a vectorizable tree that starts at \p Roots, ignoring users for - /// the purpose of scheduling and extraction in the \p UserIgnoreLst. - void buildTree(ArrayRef Roots, - ArrayRef UserIgnoreLst = None); + /// Construct a vectorizable tree that starts at \p Roots and is possibly + /// used by a reduction of \p RdxOps. + void buildTree(ArrayRef Roots, ValueSet *RdxOps = 0); /// Clear the internal data structures that are created by 'buildTree'. void deleteTree() { + RdxOps = 0; VectorizableTree.clear(); ScalarToTreeEntry.clear(); MustGather.clear(); @@ -527,8 +527,8 @@ private: /// Numbers instructions in different blocks. DenseMap BlocksNumbers; - /// List of users to ignore during scheduling and that don't need extracting. - ArrayRef UserIgnoreList; + /// Reduction operators. + ValueSet *RdxOps; // Analysis and block reference. Function *F; @@ -542,10 +542,9 @@ private: IRBuilder<> Builder; }; -void BoUpSLP::buildTree(ArrayRef Roots, - ArrayRef UserIgnoreLst) { +void BoUpSLP::buildTree(ArrayRef Roots, ValueSet *Rdx) { deleteTree(); - UserIgnoreList = UserIgnoreLst; + RdxOps = Rdx; if (!getSameType(Roots)) return; buildTree_rec(Roots, 0); @@ -577,9 +576,8 @@ void BoUpSLP::buildTree(ArrayRef Roots, if (!UserInst) continue; - // Ignore users in the user ignore list. - if (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), UserInst) != - UserIgnoreList.end()) + // Ignore uses that are part of the reduction. + if (Rdx && std::find(Rdx->begin(), Rdx->end(), UserInst) != Rdx->end()) continue; DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane " << @@ -710,9 +708,8 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth) { continue; } - // Ignore users in the user ignore list. - if (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), UI) != - UserIgnoreList.end()) + // This user is part of the reduction. + if (RdxOps && RdxOps->count(UI)) continue; // Make sure that we can schedule this unknown user. @@ -1749,9 +1746,8 @@ Value *BoUpSLP::vectorizeTree() { DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n"); assert((ScalarToTreeEntry.count(U) || - // It is legal to replace users in the ignorelist by undef. - (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), U) != - UserIgnoreList.end())) && + // It is legal to replace the reduction users by undef. + (RdxOps && RdxOps->count(U))) && "Replacing out-of-tree value with undef"); } #endif @@ -1955,11 +1951,8 @@ private: bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R); /// \brief Try to vectorize a list of operands. - /// \@param BuildVector A list of users to ignore for the purpose of - /// scheduling and that don't need extracting. /// \returns true if a value was vectorized. - bool tryToVectorizeList(ArrayRef VL, BoUpSLP &R, - ArrayRef BuildVector = None); + bool tryToVectorizeList(ArrayRef VL, BoUpSLP &R); /// \brief Try to vectorize a chain that may start at the operands of \V; bool tryToVectorize(BinaryOperator *V, BoUpSLP &R); @@ -2132,8 +2125,7 @@ bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) { return tryToVectorizeList(VL, R); } -bool SLPVectorizer::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, - ArrayRef BuildVector) { +bool SLPVectorizer::tryToVectorizeList(ArrayRef VL, BoUpSLP &R) { if (VL.size() < 2) return false; @@ -2183,33 +2175,13 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, << "\n"); ArrayRef Ops = VL.slice(i, OpsWidth); - ArrayRef BuildVectorSlice; - if (!BuildVector.empty()) - BuildVectorSlice = BuildVector.slice(i, OpsWidth); - - R.buildTree(Ops, BuildVectorSlice); + R.buildTree(Ops); int Cost = R.getTreeCost(); if (Cost < -SLPCostThreshold) { DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n"); - Value *VectorizedRoot = R.vectorizeTree(); - - // Reconstruct the build vector by extracting the vectorized root. This - // way we handle the case where some elements of the vector are undefined. - // (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2)) - if (!BuildVectorSlice.empty()) { - Instruction *InsertAfter = cast(VectorizedRoot); - for (auto &V : BuildVectorSlice) { - InsertElementInst *IE = cast(V); - IRBuilder<> Builder(++BasicBlock::iterator(InsertAfter)); - Instruction *Extract = cast( - Builder.CreateExtractElement(VectorizedRoot, IE->getOperand(2))); - IE->setOperand(1, Extract); - IE->removeFromParent(); - IE->insertAfter(Extract); - InsertAfter = IE; - } - } + R.vectorizeTree(); + // Move to the next bundle. i += VF - 1; Changed = true; @@ -2318,7 +2290,7 @@ static Value *createRdxShuffleMask(unsigned VecLen, unsigned NumEltsToRdx, /// *p = /// class HorizontalReduction { - SmallVector ReductionOps; + SmallPtrSet ReductionOps; SmallVector ReducedVals; BinaryOperator *ReductionRoot; @@ -2412,7 +2384,7 @@ public: // We need to be able to reassociate the adds. if (!TreeN->isAssociative()) return false; - ReductionOps.push_back(TreeN); + ReductionOps.insert(TreeN); } // Retract. Stack.pop_back(); @@ -2449,7 +2421,7 @@ public: for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) { ArrayRef ValsToReduce(&ReducedVals[i], ReduxWidth); - V.buildTree(ValsToReduce, ReductionOps); + V.buildTree(ValsToReduce, &ReductionOps); // Estimate cost. int Cost = V.getTreeCost() + getReductionCost(TTI, ReducedVals[i]); @@ -2568,16 +2540,13 @@ private: /// /// Returns true if it matches /// -static bool findBuildVector(InsertElementInst *FirstInsertElem, - SmallVectorImpl &BuildVector, - SmallVectorImpl &BuildVectorOpds) { - if (!isa(FirstInsertElem->getOperand(0))) +static bool findBuildVector(InsertElementInst *IE, + SmallVectorImpl &Ops) { + if (!isa(IE->getOperand(0))) return false; - InsertElementInst *IE = FirstInsertElem; while (true) { - BuildVector.push_back(IE); - BuildVectorOpds.push_back(IE->getOperand(1)); + Ops.push_back(IE->getOperand(1)); if (IE->use_empty()) return false; @@ -2747,16 +2716,12 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { } // Try to vectorize trees that start at insertelement instructions. - if (InsertElementInst *FirstInsertElem = dyn_cast(it)) { - SmallVector BuildVector; - SmallVector BuildVectorOpds; - if (!findBuildVector(FirstInsertElem, BuildVector, BuildVectorOpds)) + if (InsertElementInst *IE = dyn_cast(it)) { + SmallVector Ops; + if (!findBuildVector(IE, Ops)) continue; - // Vectorize starting with the build vector operands ignoring the - // BuildVector instructions for the purpose of scheduling and user - // extraction. - if (tryToVectorizeList(BuildVectorOpds, R, BuildVector)) { + if (tryToVectorizeList(Ops, R)) { Changed = true; it = BB->begin(); e = BB->end(); diff --git a/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll index 620292bdaf3..7537ea3b053 100644 --- a/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll +++ b/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll @@ -195,30 +195,6 @@ define <4 x float> @simple_select_partial_vector(<4 x float> %a, <4 x float> %b, ret <4 x float> %rb } -; Make sure that vectorization happens even if insertelements operations -; must be rescheduled. The case here is from compiling Julia. -define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) { -; CHECK-LABEL: @reschedule_extract( -; CHECK: %1 = fadd <4 x float> %a, %b - %a0 = extractelement <4 x float> %a, i32 0 - %b0 = extractelement <4 x float> %b, i32 0 - %c0 = fadd float %a0, %b0 - %v0 = insertelement <4 x float> undef, float %c0, i32 0 - %a1 = extractelement <4 x float> %a, i32 1 - %b1 = extractelement <4 x float> %b, i32 1 - %c1 = fadd float %a1, %b1 - %v1 = insertelement <4 x float> %v0, float %c1, i32 1 - %a2 = extractelement <4 x float> %a, i32 2 - %b2 = extractelement <4 x float> %b, i32 2 - %c2 = fadd float %a2, %b2 - %v2 = insertelement <4 x float> %v1, float %c2, i32 2 - %a3 = extractelement <4 x float> %a, i32 3 - %b3 = extractelement <4 x float> %b, i32 3 - %c3 = fadd float %a3, %b3 - %v3 = insertelement <4 x float> %v2, float %c3, i32 3 - ret <4 x float> %v3 -} - ; Check that cost model for vectorization takes credit for ; instructions that are erased. define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) {