- // If this is an index into a build_vector node (which has the same number
- // of elements), dig out the input value and use it.
- SDValue V = M < Size ? V1 : V2;
- if (V.getOpcode() != ISD::BUILD_VECTOR || Size != (int)V.getNumOperands())
- continue;
-
- SDValue Input = V.getOperand(M % Size);
- // The UNDEF opcode check really should be dead code here, but not quite
- // worth asserting on (it isn't invalid, just unexpected).
- if (Input.getOpcode() == ISD::UNDEF || X86::isZeroNode(Input))
- Zeroable[i] = true;
- }
-
- return Zeroable;
-}
-
-/// \brief Try to emit a bitmask instruction for a shuffle.
-///
-/// This handles cases where we can model a blend exactly as a bitmask due to
-/// one of the inputs being zeroable.
-static SDValue lowerVectorShuffleAsBitMask(SDLoc DL, MVT VT, SDValue V1,
- SDValue V2, ArrayRef<int> Mask,
- SelectionDAG &DAG) {
- MVT EltVT = VT.getScalarType();
- int NumEltBits = EltVT.getSizeInBits();
- MVT IntEltVT = MVT::getIntegerVT(NumEltBits);
- SDValue Zero = DAG.getConstant(0, DL, IntEltVT);
- SDValue AllOnes = DAG.getConstant(APInt::getAllOnesValue(NumEltBits), DL,
- IntEltVT);
- if (EltVT.isFloatingPoint()) {
- Zero = DAG.getBitcast(EltVT, Zero);
- AllOnes = DAG.getBitcast(EltVT, AllOnes);
- }
- SmallVector<SDValue, 16> VMaskOps(Mask.size(), Zero);
- SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
- SDValue V;
- for (int i = 0, Size = Mask.size(); i < Size; ++i) {
- if (Zeroable[i])
- continue;
- if (Mask[i] % Size != i)
- return SDValue(); // Not a blend.
- if (!V)
- V = Mask[i] < Size ? V1 : V2;
- else if (V != (Mask[i] < Size ? V1 : V2))
- return SDValue(); // Can only let one input through the mask.
-
- VMaskOps[i] = AllOnes;
- }
- if (!V)
- return SDValue(); // No non-zeroable elements!
-
- SDValue VMask = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, VMaskOps);
- V = DAG.getNode(VT.isFloatingPoint()
- ? (unsigned) X86ISD::FAND : (unsigned) ISD::AND,
- DL, VT, V, VMask);
- return V;
-}
-
-/// \brief Try to lower a vector shuffle as a bit shift (shifts in zeros).
-///
-/// Attempts to match a shuffle mask against the PSLL(W/D/Q/DQ) and
-/// PSRL(W/D/Q/DQ) SSE2 and AVX2 logical bit-shift instructions. The function
-/// matches elements from one of the input vectors shuffled to the left or
-/// right with zeroable elements 'shifted in'. It handles both the strictly
-/// bit-wise element shifts and the byte shift across an entire 128-bit double
-/// quad word lane.
-///
-/// PSHL : (little-endian) left bit shift.
-/// [ zz, 0, zz, 2 ]
-/// [ -1, 4, zz, -1 ]
-/// PSRL : (little-endian) right bit shift.
-/// [ 1, zz, 3, zz]
-/// [ -1, -1, 7, zz]
-/// PSLLDQ : (little-endian) left byte shift
-/// [ zz, 0, 1, 2, 3, 4, 5, 6]
-/// [ zz, zz, -1, -1, 2, 3, 4, -1]
-/// [ zz, zz, zz, zz, zz, zz, -1, 1]
-/// PSRLDQ : (little-endian) right byte shift
-/// [ 5, 6, 7, zz, zz, zz, zz, zz]
-/// [ -1, 5, 6, 7, zz, zz, zz, zz]
-/// [ 1, 2, -1, -1, -1, -1, zz, zz]
-static SDValue lowerVectorShuffleAsShift(SDLoc DL, MVT VT, SDValue V1,
- SDValue V2, ArrayRef<int> Mask,
- SelectionDAG &DAG) {
- SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
-
- int Size = Mask.size();
- assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
-
- auto CheckZeros = [&](int Shift, int Scale, bool Left) {
- for (int i = 0; i < Size; i += Scale)
- for (int j = 0; j < Shift; ++j)
- if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
- return false;
-
- return true;
- };
-
- auto MatchShift = [&](int Shift, int Scale, bool Left, SDValue V) {
- for (int i = 0; i != Size; i += Scale) {
- unsigned Pos = Left ? i + Shift : i;
- unsigned Low = Left ? i : i + Shift;
- unsigned Len = Scale - Shift;
- if (!isSequentialOrUndefInRange(Mask, Pos, Len,
- Low + (V == V1 ? 0 : Size)))
- return SDValue();
- }
-
- int ShiftEltBits = VT.getScalarSizeInBits() * Scale;
- bool ByteShift = ShiftEltBits > 64;
- unsigned OpCode = Left ? (ByteShift ? X86ISD::VSHLDQ : X86ISD::VSHLI)
- : (ByteShift ? X86ISD::VSRLDQ : X86ISD::VSRLI);
- int ShiftAmt = Shift * VT.getScalarSizeInBits() / (ByteShift ? 8 : 1);