From 8f35c13842187d23e7d10ec2e5f416d0363abf50 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 20 Jan 2012 09:29:03 +0000 Subject: [PATCH] Improve 256-bit shuffle splitting to allow 2 sources in each 128-bit lane. As long as only a single lane of the source is used in the lane in the destination. This makes the splitting match much closer to what happens with 256-bit shuffles when AVX is disabled and only 128-bit XMM is allowed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@148537 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 172 ++++++++++++++--------------- 1 file changed, 86 insertions(+), 86 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 808daffc695..dcd96ae47f2 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3162,17 +3162,6 @@ static bool isUndefOrInRange(int Val, int Low, int Hi) { return (Val < 0) || (Val >= Low && Val < Hi); } -/// isUndefOrInRange - Return true if every element in Mask, begining -/// from position Pos and ending in Pos+Size, falls within the specified -/// range (L, L+Pos]. or is undef. -static bool isUndefOrInRange(ArrayRef Mask, - int Pos, int Size, int Low, int Hi) { - for (int i = Pos, e = Pos+Size; i != e; ++i) - if (!isUndefOrInRange(Mask[i], Low, Hi)) - return false; - return true; -} - /// isUndefOrEqual - Val is either less than zero (undef) or equal to the /// specified value. static bool isUndefOrEqual(int Val, int CmpVal) { @@ -5948,95 +5937,106 @@ static SDValue getVZextMovL(EVT VT, EVT OpVT, OpVT, SrcOp))); } -/// areShuffleHalvesWithinDisjointLanes - Check whether each half of a vector -/// shuffle node referes to only one lane in the sources. -static bool areShuffleHalvesWithinDisjointLanes(ShuffleVectorSDNode *SVOp) { - EVT VT = SVOp->getValueType(0); - int NumElems = VT.getVectorNumElements(); - int HalfSize = NumElems/2; - ArrayRef M = SVOp->getMask(); - bool MatchA = false, MatchB = false; - - for (int l = 0; l < NumElems*2; l += HalfSize) { - if (isUndefOrInRange(M, 0, HalfSize, l, l+HalfSize)) { - MatchA = true; - break; - } - } - - for (int l = 0; l < NumElems*2; l += HalfSize) { - if (isUndefOrInRange(M, HalfSize, HalfSize, l, l+HalfSize)) { - MatchB = true; - break; - } - } - - return MatchA && MatchB; -} - /// LowerVECTOR_SHUFFLE_256 - Handle all 256-bit wide vectors shuffles /// which could not be matched by any known target speficic shuffle static SDValue LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { - if (areShuffleHalvesWithinDisjointLanes(SVOp)) { - // If each half of a vector shuffle node referes to only one lane in the - // source vectors, extract each used 128-bit lane and shuffle them using - // 128-bit shuffles. Then, concatenate the results. Otherwise leave - // the work to the legalizer. - DebugLoc dl = SVOp->getDebugLoc(); - EVT VT = SVOp->getValueType(0); - int NumElems = VT.getVectorNumElements(); - int HalfSize = NumElems/2; - - // Extract the reference for each half - int FstVecExtractIdx = 0, SndVecExtractIdx = 0; - int FstVecOpNum = 0, SndVecOpNum = 0; - for (int i = 0; i < HalfSize; ++i) { - int Elt = SVOp->getMaskElt(i); - if (SVOp->getMaskElt(i) < 0) + EVT VT = SVOp->getValueType(0); + + unsigned NumElems = VT.getVectorNumElements(); + unsigned NumLaneElems = NumElems / 2; + + int MinRange[2][2] = { { static_cast(NumElems), + static_cast(NumElems) }, + { static_cast(NumElems), + static_cast(NumElems) } }; + int MaxRange[2][2] = { { -1, -1 }, { -1, -1 } }; + + // Collect used ranges for each source in each lane + for (unsigned l = 0; l < 2; ++l) { + unsigned LaneStart = l*NumLaneElems; + for (unsigned i = 0; i != NumLaneElems; ++i) { + int Idx = SVOp->getMaskElt(i+LaneStart); + if (Idx < 0) continue; - FstVecOpNum = Elt/NumElems; - FstVecExtractIdx = Elt % NumElems < HalfSize ? 0 : HalfSize; - break; + + int Input = 0; + if (Idx >= (int)NumElems) { + Idx -= NumElems; + Input = 1; + } + + if (Idx > MaxRange[l][Input]) + MaxRange[l][Input] = Idx; + if (Idx < MinRange[l][Input]) + MinRange[l][Input] = Idx; } - for (int i = HalfSize; i < NumElems; ++i) { - int Elt = SVOp->getMaskElt(i); - if (SVOp->getMaskElt(i) < 0) + } + + // Make sure each range is 128-bits + int ExtractIdx[2][2] = { { -1, -1 }, { -1, -1 } }; + for (unsigned l = 0; l < 2; ++l) { + for (unsigned Input = 0; Input < 2; ++Input) { + if (MinRange[l][Input] == (int)NumElems && MaxRange[l][Input] < 0) continue; - SndVecOpNum = Elt/NumElems; - SndVecExtractIdx = Elt % NumElems < HalfSize ? 0 : HalfSize; - break; + + if (MinRange[l][Input] >= 0 && MinRange[l][Input] < (int)NumLaneElems) + ExtractIdx[l][Input] = 0; + else if (MinRange[l][Input] >= (int)NumLaneElems && + MinRange[l][Input] < (int)NumElems) + ExtractIdx[l][Input] = NumLaneElems; + else + return SDValue(); } + } - // Extract the subvectors - SDValue V1 = Extract128BitVector(SVOp->getOperand(FstVecOpNum), - DAG.getConstant(FstVecExtractIdx, MVT::i32), DAG, dl); - SDValue V2 = Extract128BitVector(SVOp->getOperand(SndVecOpNum), - DAG.getConstant(SndVecExtractIdx, MVT::i32), DAG, dl); + DebugLoc dl = SVOp->getDebugLoc(); + MVT EltVT = VT.getVectorElementType().getSimpleVT(); + EVT NVT = MVT::getVectorVT(EltVT, NumElems/2); + + SDValue Ops[2][2]; + for (unsigned l = 0; l < 2; ++l) { + for (unsigned Input = 0; Input < 2; ++Input) { + if (ExtractIdx[l][Input] >= 0) + Ops[l][Input] = Extract128BitVector(SVOp->getOperand(Input), + DAG.getConstant(ExtractIdx[l][Input], MVT::i32), + DAG, dl); + else + Ops[l][Input] = DAG.getUNDEF(NVT); + } + } - // Generate 128-bit shuffles - SmallVector MaskV1, MaskV2; - for (int i = 0; i < HalfSize; ++i) { - int Elt = SVOp->getMaskElt(i); - MaskV1.push_back(Elt < 0 ? Elt : Elt % HalfSize); + // Generate 128-bit shuffles + SmallVector Mask1, Mask2; + for (unsigned i = 0; i != NumLaneElems; ++i) { + int Elt = SVOp->getMaskElt(i); + if (Elt >= (int)NumElems) { + Elt %= NumLaneElems; + Elt += NumLaneElems; + } else if (Elt >= 0) { + Elt %= NumLaneElems; } - for (int i = HalfSize; i < NumElems; ++i) { - int Elt = SVOp->getMaskElt(i); - MaskV2.push_back(Elt < 0 ? Elt : Elt % HalfSize); + Mask1.push_back(Elt); + } + for (unsigned i = NumLaneElems; i != NumElems; ++i) { + int Elt = SVOp->getMaskElt(i); + if (Elt >= (int)NumElems) { + Elt %= NumLaneElems; + Elt += NumLaneElems; + } else if (Elt >= 0) { + Elt %= NumLaneElems; } - - EVT NVT = V1.getValueType(); - V1 = DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), &MaskV1[0]); - V2 = DAG.getVectorShuffle(NVT, dl, V2, DAG.getUNDEF(NVT), &MaskV2[0]); - - // Concatenate the result back - SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), V1, - DAG.getConstant(0, MVT::i32), DAG, dl); - return Insert128BitVector(V, V2, DAG.getConstant(NumElems/2, MVT::i32), - DAG, dl); + Mask2.push_back(Elt); } - return SDValue(); + SDValue Shuf1 = DAG.getVectorShuffle(NVT, dl, Ops[0][0], Ops[0][1], &Mask1[0]); + SDValue Shuf2 = DAG.getVectorShuffle(NVT, dl, Ops[1][0], Ops[1][1], &Mask2[0]); + + // Concatenate the result back + SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Shuf1, + DAG.getConstant(0, MVT::i32), DAG, dl); + return Insert128BitVector(V, Shuf2, DAG.getConstant(NumElems/2, MVT::i32), + DAG, dl); } /// LowerVECTOR_SHUFFLE_128v4 - Handle all 128-bit wide vectors with -- 2.34.1