X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTarget%2FX86%2FUtils%2FX86ShuffleDecode.cpp;h=a3729769901f74c514db10305d003c3273ddbe4d;hp=a3f45233454761f88f6a56a1c914707b551ef87e;hb=5608047173702fc4e71c7b80c660282e30a24353;hpb=bdecfeb7237dc66d0a1977617fab627c5afc2ed3 diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index a3f45233454..a3729769901 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -35,7 +35,7 @@ void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl &ShuffleMask) { unsigned CountS = (Imm >> 6) & 3; // CountS selects which input element to use. - unsigned InVal = 4+CountS; + unsigned InVal = 4 + CountS; // CountD specifies which element of destination to update. ShuffleMask[CountD] = InVal; // ZMask zaps values, potentially overriding the CountD elt. @@ -47,20 +47,20 @@ void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl &ShuffleMask) { // <3,1> or <6,7,2,3> void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl &ShuffleMask) { - for (unsigned i = NElts/2; i != NElts; ++i) - ShuffleMask.push_back(NElts+i); + for (unsigned i = NElts / 2; i != NElts; ++i) + ShuffleMask.push_back(NElts + i); - for (unsigned i = NElts/2; i != NElts; ++i) + for (unsigned i = NElts / 2; i != NElts; ++i) ShuffleMask.push_back(i); } // <0,2> or <0,1,4,5> void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl &ShuffleMask) { - for (unsigned i = 0; i != NElts/2; ++i) + for (unsigned i = 0; i != NElts / 2; ++i) ShuffleMask.push_back(i); - for (unsigned i = 0; i != NElts/2; ++i) - ShuffleMask.push_back(NElts+i); + for (unsigned i = 0; i != NElts / 2; ++i) + ShuffleMask.push_back(NElts + i); } void DecodeMOVSLDUPMask(MVT VT, SmallVectorImpl &ShuffleMask) { @@ -79,6 +79,49 @@ void DecodeMOVSHDUPMask(MVT VT, SmallVectorImpl &ShuffleMask) { } } +void DecodeMOVDDUPMask(MVT VT, SmallVectorImpl &ShuffleMask) { + unsigned VectorSizeInBits = VT.getSizeInBits(); + unsigned ScalarSizeInBits = VT.getScalarSizeInBits(); + unsigned NumElts = VT.getVectorNumElements(); + unsigned NumLanes = VectorSizeInBits / 128; + unsigned NumLaneElts = NumElts / NumLanes; + unsigned NumLaneSubElts = 64 / ScalarSizeInBits; + + for (unsigned l = 0; l < NumElts; l += NumLaneElts) + for (unsigned i = 0; i < NumLaneElts; i += NumLaneSubElts) + for (unsigned s = 0; s != NumLaneSubElts; s++) + ShuffleMask.push_back(l + s); +} + +void DecodePSLLDQMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask) { + unsigned VectorSizeInBits = VT.getSizeInBits(); + unsigned NumElts = VectorSizeInBits / 8; + unsigned NumLanes = VectorSizeInBits / 128; + unsigned NumLaneElts = NumElts / NumLanes; + + for (unsigned l = 0; l < NumElts; l += NumLaneElts) + for (unsigned i = 0; i < NumLaneElts; ++i) { + int M = SM_SentinelZero; + if (i >= Imm) M = i - Imm + l; + ShuffleMask.push_back(M); + } +} + +void DecodePSRLDQMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask) { + unsigned VectorSizeInBits = VT.getSizeInBits(); + unsigned NumElts = VectorSizeInBits / 8; + unsigned NumLanes = VectorSizeInBits / 128; + unsigned NumLaneElts = NumElts / NumLanes; + + for (unsigned l = 0; l < NumElts; l += NumLaneElts) + for (unsigned i = 0; i < NumLaneElts; ++i) { + unsigned Base = i + Imm; + int M = Base + l; + if (Base >= NumLaneElts) M = SM_SentinelZero; + ShuffleMask.push_back(M); + } +} + void DecodePALIGNRMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask) { unsigned NumElts = VT.getVectorNumElements(); @@ -97,13 +140,14 @@ void DecodePALIGNRMask(MVT VT, unsigned Imm, } } -/// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*. +/// DecodePSHUFMask - This decodes the shuffle masks for pshufw, pshufd, and vpermilp*. /// VT indicates the type of the vector allowing it to handle different /// datatypes and vector widths. void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask) { unsigned NumElts = VT.getVectorNumElements(); unsigned NumLanes = VT.getSizeInBits() / 128; + if (NumLanes == 0) NumLanes = 1; // Handle MMX unsigned NumLaneElts = NumElts / NumLanes; unsigned NewImm = Imm; @@ -148,6 +192,16 @@ void DecodePSHUFLWMask(MVT VT, unsigned Imm, } } +void DecodePSWAPMask(MVT VT, SmallVectorImpl &ShuffleMask) { + unsigned NumElts = VT.getVectorNumElements(); + unsigned NumHalfElts = NumElts / 2; + + for (unsigned l = 0; l != NumHalfElts; ++l) + ShuffleMask.push_back(l + NumHalfElts); + for (unsigned h = 0; h != NumHalfElts; ++h) + ShuffleMask.push_back(h); +} + /// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates /// the type of the vector allowing it to handle different datatypes and vector /// widths. @@ -160,8 +214,8 @@ void DecodeSHUFPMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask) { unsigned NewImm = Imm; for (unsigned l = 0; l != NumElts; l += NumLaneElts) { // each half of a lane comes from different source - for (unsigned s = 0; s != NumElts*2; s += NumElts) { - for (unsigned i = 0; i != NumLaneElts/2; ++i) { + for (unsigned s = 0; s != NumElts * 2; s += NumElts) { + for (unsigned i = 0; i != NumLaneElts / 2; ++i) { ShuffleMask.push_back(NewImm % NumLaneElts + s + l); NewImm /= NumLaneElts; } @@ -179,13 +233,13 @@ void DecodeUNPCKHMask(MVT VT, SmallVectorImpl &ShuffleMask) { // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate // independently on 128-bit lanes. unsigned NumLanes = VT.getSizeInBits() / 128; - if (NumLanes == 0 ) NumLanes = 1; // Handle MMX + if (NumLanes == 0) NumLanes = 1; // Handle MMX unsigned NumLaneElts = NumElts / NumLanes; for (unsigned l = 0; l != NumElts; l += NumLaneElts) { - for (unsigned i = l + NumLaneElts/2, e = l + NumLaneElts; i != e; ++i) { - ShuffleMask.push_back(i); // Reads from dest/src1 - ShuffleMask.push_back(i+NumElts); // Reads from src/src2 + for (unsigned i = l + NumLaneElts / 2, e = l + NumLaneElts; i != e; ++i) { + ShuffleMask.push_back(i); // Reads from dest/src1 + ShuffleMask.push_back(i + NumElts); // Reads from src/src2 } } } @@ -203,66 +257,74 @@ void DecodeUNPCKLMask(MVT VT, SmallVectorImpl &ShuffleMask) { unsigned NumLaneElts = NumElts / NumLanes; for (unsigned l = 0; l != NumElts; l += NumLaneElts) { - for (unsigned i = l, e = l + NumLaneElts/2; i != e; ++i) { - ShuffleMask.push_back(i); // Reads from dest/src1 - ShuffleMask.push_back(i+NumElts); // Reads from src/src2 + for (unsigned i = l, e = l + NumLaneElts / 2; i != e; ++i) { + ShuffleMask.push_back(i); // Reads from dest/src1 + ShuffleMask.push_back(i + NumElts); // Reads from src/src2 } } } +/// \brief Decode a shuffle packed values at 128-bit granularity +/// (SHUFF32x4/SHUFF64x2/SHUFI32x4/SHUFI64x2) +/// immediate mask into a shuffle mask. +void decodeVSHUF64x2FamilyMask(MVT VT, unsigned Imm, + SmallVectorImpl &ShuffleMask) { + unsigned NumLanes = VT.getSizeInBits() / 128; + unsigned NumElementsInLane = 128 / VT.getScalarSizeInBits(); + unsigned ControlBitsMask = NumLanes - 1; + unsigned NumControlBits = NumLanes / 2; + + for (unsigned l = 0; l != NumLanes; ++l) { + unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask; + // We actually need the other source. + if (l >= NumLanes / 2) + LaneMask += NumLanes; + for (unsigned i = 0; i != NumElementsInLane; ++i) + ShuffleMask.push_back(LaneMask * NumElementsInLane + i); + } +} + void DecodeVPERM2X128Mask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask) { - if (Imm & 0x88) - return; // Not a shuffle - - unsigned HalfSize = VT.getVectorNumElements()/2; + unsigned HalfSize = VT.getVectorNumElements() / 2; for (unsigned l = 0; l != 2; ++l) { - unsigned HalfBegin = ((Imm >> (l*4)) & 0x3) * HalfSize; - for (unsigned i = HalfBegin, e = HalfBegin+HalfSize; i != e; ++i) - ShuffleMask.push_back(i); + unsigned HalfMask = Imm >> (l * 4); + unsigned HalfBegin = (HalfMask & 0x3) * HalfSize; + for (unsigned i = HalfBegin, e = HalfBegin + HalfSize; i != e; ++i) + ShuffleMask.push_back(HalfMask & 8 ? SM_SentinelZero : (int)i); } } void DecodePSHUFBMask(const Constant *C, SmallVectorImpl &ShuffleMask) { Type *MaskTy = C->getType(); - assert(MaskTy->isVectorTy() && "Expected a vector constant mask!"); - assert(MaskTy->getVectorElementType()->isIntegerTy(8) && - "Expected i8 constant mask elements!"); - int NumElements = MaskTy->getVectorNumElements(); - // FIXME: Add support for AVX-512. - assert((NumElements == 16 || NumElements == 32) && - "Only 128-bit and 256-bit vectors supported!"); - ShuffleMask.reserve(NumElements); - - if (auto *CDS = dyn_cast(C)) { - assert((unsigned)NumElements == CDS->getNumElements() && - "Constant mask has a different number of elements!"); - - for (int i = 0; i < NumElements; ++i) { - // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte - // lane of the vector we're inside. - int Base = i < 16 ? 0 : 16; - uint64_t Element = CDS->getElementAsInteger(i); - // If the high bit (7) of the byte is set, the element is zeroed. - if (Element & (1 << 7)) - ShuffleMask.push_back(SM_SentinelZero); - else { - // Only the least significant 4 bits of the byte are used. - int Index = Base + (Element & 0xf); - ShuffleMask.push_back(Index); - } - } - } else if (auto *CV = dyn_cast(C)) { - assert((unsigned)NumElements == CV->getNumOperands() && - "Constant mask has a different number of elements!"); + // It is not an error for the PSHUFB mask to not be a vector of i8 because the + // constant pool uniques constants by their bit representation. + // e.g. the following take up the same space in the constant pool: + // i128 -170141183420855150465331762880109871104 + // + // <2 x i64> + // + // <4 x i32> + + unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits(); + assert(MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512); + + // This is a straightforward byte vector. + if (MaskTy->isVectorTy() && MaskTy->getVectorElementType()->isIntegerTy(8)) { + int NumElements = MaskTy->getVectorNumElements(); + ShuffleMask.reserve(NumElements); for (int i = 0; i < NumElements; ++i) { // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte // lane of the vector we're inside. - int Base = i < 16 ? 0 : 16; - Constant *COp = CV->getOperand(i); - if (isa(COp)) { + int Base = i & ~0xf; + Constant *COp = C->getAggregateElement(i); + if (!COp) { + ShuffleMask.clear(); + return; + } else if (isa(COp)) { ShuffleMask.push_back(SM_SentinelUndef); continue; } @@ -277,6 +339,7 @@ void DecodePSHUFBMask(const Constant *C, SmallVectorImpl &ShuffleMask) { } } } + // TODO: Handle funny-looking vectors too. } void DecodePSHUFBMask(ArrayRef RawMask, @@ -319,48 +382,246 @@ void DecodeBLENDMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask) { /// No VT provided since it only works on 256-bit, 4 element vectors. void DecodeVPERMMask(unsigned Imm, SmallVectorImpl &ShuffleMask) { for (unsigned i = 0; i != 4; ++i) { - ShuffleMask.push_back((Imm >> (2*i)) & 3); + ShuffleMask.push_back((Imm >> (2 * i)) & 3); } } -void DecodeVPERMILPMask(const Constant *C, SmallVectorImpl &ShuffleMask) { +void DecodeVPERMILPMask(const Constant *C, unsigned ElSize, + SmallVectorImpl &ShuffleMask) { Type *MaskTy = C->getType(); - assert(MaskTy->isVectorTy() && "Expected a vector constant mask!"); - assert(MaskTy->getVectorElementType()->isIntegerTy() && - "Expected integer constant mask elements!"); - int ElementBits = MaskTy->getScalarSizeInBits(); - int NumElements = MaskTy->getVectorNumElements(); + // It is not an error for the PSHUFB mask to not be a vector of i8 because the + // constant pool uniques constants by their bit representation. + // e.g. the following take up the same space in the constant pool: + // i128 -170141183420855150465331762880109871104 + // + // <2 x i64> + // + // <4 x i32> + + unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits(); + + if (MaskTySize != 128 && MaskTySize != 256) // FIXME: Add support for AVX-512. + return; + + // Only support vector types. + if (!MaskTy->isVectorTy()) + return; + + // Make sure its an integer type. + Type *VecEltTy = MaskTy->getVectorElementType(); + if (!VecEltTy->isIntegerTy()) + return; + + // Support any element type from byte up to element size. + // This is necesary primarily because 64-bit elements get split to 32-bit + // in the constant pool on 32-bit target. + unsigned EltTySize = VecEltTy->getIntegerBitWidth(); + if (EltTySize < 8 || EltTySize > ElSize) + return; + + unsigned NumElements = MaskTySize / ElSize; assert((NumElements == 2 || NumElements == 4 || NumElements == 8) && "Unexpected number of vector elements."); ShuffleMask.reserve(NumElements); - if (auto *CDS = dyn_cast(C)) { - assert((unsigned)NumElements == CDS->getNumElements() && - "Constant mask has a different number of elements!"); + unsigned NumElementsPerLane = 128 / ElSize; + unsigned Factor = ElSize / EltTySize; + + for (unsigned i = 0; i < NumElements; ++i) { + Constant *COp = C->getAggregateElement(i * Factor); + if (!COp) { + ShuffleMask.clear(); + return; + } else if (isa(COp)) { + ShuffleMask.push_back(SM_SentinelUndef); + continue; + } + int Index = i & ~(NumElementsPerLane - 1); + uint64_t Element = cast(COp)->getZExtValue(); + if (ElSize == 64) + Index += (Element >> 1) & 0x1; + else + Index += Element & 0x3; + ShuffleMask.push_back(Index); + } - for (int i = 0; i < NumElements; ++i) { - int Base = (i * ElementBits / 128) * (128 / ElementBits); - uint64_t Element = CDS->getElementAsInteger(i); - // Only the least significant 2 bits of the integer are used. - int Index = Base + (Element & 0x3); - ShuffleMask.push_back(Index); + // TODO: Handle funny-looking vectors too. +} + +void DecodeZeroExtendMask(MVT SrcVT, MVT DstVT, SmallVectorImpl &Mask) { + unsigned NumDstElts = DstVT.getVectorNumElements(); + unsigned SrcScalarBits = SrcVT.getScalarSizeInBits(); + unsigned DstScalarBits = DstVT.getScalarSizeInBits(); + unsigned Scale = DstScalarBits / SrcScalarBits; + assert(SrcScalarBits < DstScalarBits && + "Expected zero extension mask to increase scalar size"); + assert(SrcVT.getVectorNumElements() >= NumDstElts && + "Too many zero extension lanes"); + + for (unsigned i = 0; i != NumDstElts; i++) { + Mask.push_back(i); + for (unsigned j = 1; j != Scale; j++) + Mask.push_back(SM_SentinelZero); + } +} + +void DecodeZeroMoveLowMask(MVT VT, SmallVectorImpl &ShuffleMask) { + unsigned NumElts = VT.getVectorNumElements(); + ShuffleMask.push_back(0); + for (unsigned i = 1; i < NumElts; i++) + ShuffleMask.push_back(SM_SentinelZero); +} + +void DecodeScalarMoveMask(MVT VT, bool IsLoad, SmallVectorImpl &Mask) { + // First element comes from the first element of second source. + // Remaining elements: Load zero extends / Move copies from first source. + unsigned NumElts = VT.getVectorNumElements(); + Mask.push_back(NumElts); + for (unsigned i = 1; i < NumElts; i++) + Mask.push_back(IsLoad ? static_cast(SM_SentinelZero) : i); +} + +void DecodeEXTRQIMask(int Len, int Idx, + SmallVectorImpl &ShuffleMask) { + // Only the bottom 6 bits are valid for each immediate. + Len &= 0x3F; + Idx &= 0x3F; + + // We can only decode this bit extraction instruction as a shuffle if both the + // length and index work with whole bytes. + if (0 != (Len % 8) || 0 != (Idx % 8)) + return; + + // A length of zero is equivalent to a bit length of 64. + if (Len == 0) + Len = 64; + + // If the length + index exceeds the bottom 64 bits the result is undefined. + if ((Len + Idx) > 64) { + ShuffleMask.append(16, SM_SentinelUndef); + return; + } + + // Convert index and index to work with bytes. + Len /= 8; + Idx /= 8; + + // EXTRQ: Extract Len bytes starting from Idx. Zero pad the remaining bytes + // of the lower 64-bits. The upper 64-bits are undefined. + for (int i = 0; i != Len; ++i) + ShuffleMask.push_back(i + Idx); + for (int i = Len; i != 8; ++i) + ShuffleMask.push_back(SM_SentinelZero); + for (int i = 8; i != 16; ++i) + ShuffleMask.push_back(SM_SentinelUndef); +} + +void DecodeINSERTQIMask(int Len, int Idx, + SmallVectorImpl &ShuffleMask) { + // Only the bottom 6 bits are valid for each immediate. + Len &= 0x3F; + Idx &= 0x3F; + + // We can only decode this bit insertion instruction as a shuffle if both the + // length and index work with whole bytes. + if (0 != (Len % 8) || 0 != (Idx % 8)) + return; + + // A length of zero is equivalent to a bit length of 64. + if (Len == 0) + Len = 64; + + // If the length + index exceeds the bottom 64 bits the result is undefined. + if ((Len + Idx) > 64) { + ShuffleMask.append(16, SM_SentinelUndef); + return; + } + + // Convert index and index to work with bytes. + Len /= 8; + Idx /= 8; + + // INSERTQ: Extract lowest Len bytes from lower half of second source and + // insert over first source starting at Idx byte. The upper 64-bits are + // undefined. + for (int i = 0; i != Idx; ++i) + ShuffleMask.push_back(i); + for (int i = 0; i != Len; ++i) + ShuffleMask.push_back(i + 16); + for (int i = Idx + Len; i != 8; ++i) + ShuffleMask.push_back(i); + for (int i = 8; i != 16; ++i) + ShuffleMask.push_back(SM_SentinelUndef); +} + +void DecodeVPERMVMask(ArrayRef RawMask, + SmallVectorImpl &ShuffleMask) { + for (int i = 0, e = RawMask.size(); i < e; ++i) { + uint64_t M = RawMask[i]; + ShuffleMask.push_back((int)M); + } +} + +void DecodeVPERMV3Mask(ArrayRef RawMask, + SmallVectorImpl &ShuffleMask) { + for (int i = 0, e = RawMask.size(); i < e; ++i) { + uint64_t M = RawMask[i]; + ShuffleMask.push_back((int)M); + } +} + +void DecodeVPERMVMask(const Constant *C, MVT VT, + SmallVectorImpl &ShuffleMask) { + Type *MaskTy = C->getType(); + if (MaskTy->isVectorTy()) { + unsigned NumElements = MaskTy->getVectorNumElements(); + if (NumElements == VT.getVectorNumElements()) { + for (unsigned i = 0; i < NumElements; ++i) { + Constant *COp = C->getAggregateElement(i); + if (!COp || (!isa(COp) && !isa(COp))) { + ShuffleMask.clear(); + return; + } + if (isa(COp)) + ShuffleMask.push_back(SM_SentinelUndef); + else { + uint64_t Element = cast(COp)->getZExtValue(); + Element &= (1 << NumElements) - 1; + ShuffleMask.push_back(Element); + } + } } - } else if (auto *CV = dyn_cast(C)) { - assert((unsigned)NumElements == C->getNumOperands() && - "Constant mask has a different number of elements!"); + return; + } + // Scalar value; just broadcast it + if (!isa(C)) + return; + uint64_t Element = cast(C)->getZExtValue(); + int NumElements = VT.getVectorNumElements(); + Element &= (1 << NumElements) - 1; + for (int i = 0; i < NumElements; ++i) + ShuffleMask.push_back(Element); +} - for (int i = 0; i < NumElements; ++i) { - int Base = (i * ElementBits / 128) * (128 / ElementBits); - Constant *COp = CV->getOperand(i); - if (isa(COp)) { +void DecodeVPERMV3Mask(const Constant *C, MVT VT, + SmallVectorImpl &ShuffleMask) { + Type *MaskTy = C->getType(); + unsigned NumElements = MaskTy->getVectorNumElements(); + if (NumElements == VT.getVectorNumElements()) { + for (unsigned i = 0; i < NumElements; ++i) { + Constant *COp = C->getAggregateElement(i); + if (!COp) { + ShuffleMask.clear(); + return; + } + if (isa(COp)) ShuffleMask.push_back(SM_SentinelUndef); - continue; + else { + uint64_t Element = cast(COp)->getZExtValue(); + Element &= (1 << NumElements*2) - 1; + ShuffleMask.push_back(Element); } - uint64_t Element = cast(COp)->getZExtValue(); - // Only the least significant 2 bits of the integer are used. - int Index = Base + (Element & 0x3); - ShuffleMask.push_back(Index); } } } - } // llvm namespace