lib/Target/X86/Utils/X86ShuffleDecode.cpp

   1 //===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // Define several functions to decode x86 specific shuffle semantics into a
  11 // generic vector mask.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "X86ShuffleDecode.h"
  16
  17 //===----------------------------------------------------------------------===//
  18 //  Vector Mask Decoding
  19 //===----------------------------------------------------------------------===//
  20
  21 namespace llvm {
  22
  23 void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) {
  24   // Defaults the copying the dest value.
  25   ShuffleMask.push_back(0);
  26   ShuffleMask.push_back(1);
  27   ShuffleMask.push_back(2);
  28   ShuffleMask.push_back(3);
  29
  30   // Decode the immediate.
  31   unsigned ZMask = Imm & 15;
  32   unsigned CountD = (Imm >> 4) & 3;
  33   unsigned CountS = (Imm >> 6) & 3;
  34
  35   // CountS selects which input element to use.
  36   unsigned InVal = 4+CountS;
  37   // CountD specifies which element of destination to update.
  38   ShuffleMask[CountD] = InVal;
  39   // ZMask zaps values, potentially overriding the CountD elt.
  40   if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero;
  41   if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero;
  42   if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero;
  43   if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero;
  44 }
  45
  46 // <3,1> or <6,7,2,3>
  47 void DecodeMOVHLPSMask(unsigned NElts,
  48                        SmallVectorImpl<unsigned> &ShuffleMask) {
  49   for (unsigned i = NElts/2; i != NElts; ++i)
  50     ShuffleMask.push_back(NElts+i);
  51
  52   for (unsigned i = NElts/2; i != NElts; ++i)
  53     ShuffleMask.push_back(i);
  54 }
  55
  56 // <0,2> or <0,1,4,5>
  57 void DecodeMOVLHPSMask(unsigned NElts,
  58                        SmallVectorImpl<unsigned> &ShuffleMask) {
  59   for (unsigned i = 0; i != NElts/2; ++i)
  60     ShuffleMask.push_back(i);
  61
  62   for (unsigned i = 0; i != NElts/2; ++i)
  63     ShuffleMask.push_back(NElts+i);
  64 }
  65
  66 void DecodePSHUFMask(unsigned NElts, unsigned Imm,
  67                      SmallVectorImpl<unsigned> &ShuffleMask) {
  68   for (unsigned i = 0; i != NElts; ++i) {
  69     ShuffleMask.push_back(Imm % NElts);
  70     Imm /= NElts;
  71   }
  72 }
  73
  74 void DecodePSHUFHWMask(unsigned Imm,
  75                        SmallVectorImpl<unsigned> &ShuffleMask) {
  76   ShuffleMask.push_back(0);
  77   ShuffleMask.push_back(1);
  78   ShuffleMask.push_back(2);
  79   ShuffleMask.push_back(3);
  80   for (unsigned i = 0; i != 4; ++i) {
  81     ShuffleMask.push_back(4+(Imm & 3));
  82     Imm >>= 2;
  83   }
  84 }
  85
  86 void DecodePSHUFLWMask(unsigned Imm,
  87                        SmallVectorImpl<unsigned> &ShuffleMask) {
  88   for (unsigned i = 0; i != 4; ++i) {
  89     ShuffleMask.push_back((Imm & 3));
  90     Imm >>= 2;
  91   }
  92   ShuffleMask.push_back(4);
  93   ShuffleMask.push_back(5);
  94   ShuffleMask.push_back(6);
  95   ShuffleMask.push_back(7);
  96 }
  97
  98 void DecodeSHUFPMask(EVT VT, unsigned Imm,
  99                      SmallVectorImpl<unsigned> &ShuffleMask) {
 100   unsigned NumElts = VT.getVectorNumElements();
 101
 102   unsigned NumLanes = VT.getSizeInBits() / 128;
 103   unsigned NumLaneElts = NumElts / NumLanes;
 104
 105   int NewImm = Imm;
 106   for (unsigned l = 0; l < NumLanes; ++l) {
 107     unsigned LaneStart = l * NumLaneElts;
 108     // Part that reads from dest.
 109     for (unsigned i = 0; i != NumLaneElts/2; ++i) {
 110       ShuffleMask.push_back(NewImm % NumLaneElts + LaneStart);
 111       NewImm /= NumLaneElts;
 112     }
 113     // Part that reads from src.
 114     for (unsigned i = 0; i != NumLaneElts/2; ++i) {
 115       ShuffleMask.push_back(NewImm % NumLaneElts + NumElts + LaneStart);
 116       NewImm /= NumLaneElts;
 117     }
 118     if (NumLaneElts == 4) NewImm = Imm; // reload imm
 119   }
 120 }
 121
 122 void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
 123   unsigned NumElts = VT.getVectorNumElements();
 124
 125   // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
 126   // independently on 128-bit lanes.
 127   unsigned NumLanes = VT.getSizeInBits() / 128;
 128   if (NumLanes == 0 ) NumLanes = 1;  // Handle MMX
 129   unsigned NumLaneElts = NumElts / NumLanes;
 130
 131   for (unsigned s = 0; s < NumLanes; ++s) {
 132     unsigned Start = s * NumLaneElts + NumLaneElts/2;
 133     unsigned End   = s * NumLaneElts + NumLaneElts;
 134     for (unsigned i = Start; i != End; ++i) {
 135       ShuffleMask.push_back(i);          // Reads from dest/src1
 136       ShuffleMask.push_back(i+NumElts);  // Reads from src/src2
 137     }
 138   }
 139 }
 140
 141 /// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd
 142 /// etc.  VT indicates the type of the vector allowing it to handle different
 143 /// datatypes and vector widths.
 144 void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
 145   unsigned NumElts = VT.getVectorNumElements();
 146
 147   // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
 148   // independently on 128-bit lanes.
 149   unsigned NumLanes = VT.getSizeInBits() / 128;
 150   if (NumLanes == 0 ) NumLanes = 1;  // Handle MMX
 151   unsigned NumLaneElts = NumElts / NumLanes;
 152
 153   for (unsigned s = 0; s < NumLanes; ++s) {
 154     unsigned Start = s * NumLaneElts;
 155     unsigned End   = s * NumLaneElts + NumLaneElts/2;
 156     for (unsigned i = Start; i != End; ++i) {
 157       ShuffleMask.push_back(i);          // Reads from dest/src1
 158       ShuffleMask.push_back(i+NumElts);  // Reads from src/src2
 159     }
 160   }
 161 }
 162
 163 // DecodeVPERMILPMask - Decodes VPERMILPS/ VPERMILPD permutes for any 128-bit
 164 // 32-bit or 64-bit elements. For 256-bit vectors, it's considered as two 128
 165 // lanes. For VPERMILPS, referenced elements can't cross lanes and the mask of
 166 // the first lane must be the same of the second.
 167 void DecodeVPERMILPMask(EVT VT, unsigned Imm,
 168                         SmallVectorImpl<unsigned> &ShuffleMask) {
 169   unsigned NumElts = VT.getVectorNumElements();
 170
 171   unsigned NumLanes = VT.getSizeInBits() / 128;
 172   unsigned NumLaneElts = NumElts / NumLanes;
 173
 174   for (unsigned l = 0; l != NumLanes; ++l) {
 175     unsigned LaneStart = l*NumLaneElts;
 176     for (unsigned i = 0; i != NumLaneElts; ++i) {
 177       unsigned Idx = NumLaneElts == 4 ? (Imm >> (i*2)) & 0x3
 178                                       : (Imm >> (i+LaneStart)) & 0x1;
 179       ShuffleMask.push_back(Idx+LaneStart);
 180     }
 181   }
 182 }
 183
 184 void DecodeVPERM2F128Mask(EVT VT, unsigned Imm,
 185                           SmallVectorImpl<unsigned> &ShuffleMask) {
 186   unsigned HalfSize = VT.getVectorNumElements()/2;
 187   unsigned FstHalfBegin = (Imm & 0x3) * HalfSize;
 188   unsigned SndHalfBegin = ((Imm >> 4) & 0x3) * HalfSize;
 189
 190   for (int i = FstHalfBegin, e = FstHalfBegin+HalfSize; i != e; ++i)
 191     ShuffleMask.push_back(i);
 192   for (int i = SndHalfBegin, e = SndHalfBegin+HalfSize; i != e; ++i)
 193     ShuffleMask.push_back(i);
 194 }
 195
 196 void DecodeVPERM2F128Mask(unsigned Imm,
 197                           SmallVectorImpl<unsigned> &ShuffleMask) {
 198   // VPERM2F128 is used by any 256-bit EVT, but X86InstComments only
 199   // has information about the instruction and not the types. So for
 200   // instruction comments purpose, assume the 256-bit vector is v4i64.
 201   return DecodeVPERM2F128Mask(MVT::v4i64, Imm, ShuffleMask);
 202 }
 203
 204 } // llvm namespace