X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTarget%2FX86%2FUtils%2FX86ShuffleDecode.cpp;h=04eb9c4e5037f0529347eec57de6161fe9c61e3c;hp=e66c0ff11ebb4807d1c06866315ab1ba9181b588;hb=f657b6395ac5c9fa57a3d4c871d7f32c565b11fe;hpb=fe1cc51fd12dfe817d2539af9e01f851d0757036 diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index e66c0ff11eb..04eb9c4e503 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -264,6 +264,26 @@ void DecodeUNPCKLMask(MVT VT, SmallVectorImpl &ShuffleMask) { } } +/// \brief Decode a shuffle packed values at 128-bit granularity +/// (SHUFF32x4/SHUFF64x2/SHUFI32x4/SHUFI64x2) +/// immediate mask into a shuffle mask. +void decodeVSHUF64x2FamilyMask(MVT VT, unsigned Imm, + SmallVectorImpl &ShuffleMask) { + unsigned NumLanes = VT.getSizeInBits() / 128; + unsigned NumElementsInLane = 128 / VT.getScalarSizeInBits(); + unsigned ControlBitsMask = NumLanes - 1; + unsigned NumControlBits = NumLanes / 2; + + for (unsigned l = 0; l != NumLanes; ++l) { + unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask; + // We actually need the other source. + if (l >= NumLanes / 2) + LaneMask += NumLanes; + for (unsigned i = 0; i != NumElementsInLane; ++i) + ShuffleMask.push_back(LaneMask * NumElementsInLane + i); + } +} + void DecodeVPERM2X128Mask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask) { unsigned HalfSize = VT.getVectorNumElements() / 2;