-/// \brief Blend two v8i16 vectors using a naive unpack strategy.
-///
-/// This strategy only works when the inputs from each vector fit into a single
-/// half of that vector, and generally there are not so many inputs as to leave
-/// the in-place shuffles required highly constrained (and thus expensive). It
-/// shifts all the inputs into a single side of both input vectors and then
-/// uses an unpack to interleave these inputs in a single vector. At that
-/// point, we will fall back on the generic single input shuffle lowering.
-static SDValue lowerV8I16BasicBlendVectorShuffle(SDLoc DL, SDValue V1,
- SDValue V2,
- MutableArrayRef<int> Mask,
- const X86Subtarget *Subtarget,
- SelectionDAG &DAG) {
- assert(V1.getSimpleValueType() == MVT::v8i16 && "Bad input type!");
- assert(V2.getSimpleValueType() == MVT::v8i16 && "Bad input type!");
- SmallVector<int, 3> LoV1Inputs, HiV1Inputs, LoV2Inputs, HiV2Inputs;
- for (int i = 0; i < 8; ++i)
- if (Mask[i] >= 0 && Mask[i] < 4)
- LoV1Inputs.push_back(i);
- else if (Mask[i] >= 4 && Mask[i] < 8)
- HiV1Inputs.push_back(i);
- else if (Mask[i] >= 8 && Mask[i] < 12)
- LoV2Inputs.push_back(i);
- else if (Mask[i] >= 12)
- HiV2Inputs.push_back(i);
-
- int NumV1Inputs = LoV1Inputs.size() + HiV1Inputs.size();
- int NumV2Inputs = LoV2Inputs.size() + HiV2Inputs.size();
- (void)NumV1Inputs;
- (void)NumV2Inputs;
- assert(NumV1Inputs > 0 && NumV1Inputs <= 3 && "At most 3 inputs supported");
- assert(NumV2Inputs > 0 && NumV2Inputs <= 3 && "At most 3 inputs supported");
- assert(NumV1Inputs + NumV2Inputs <= 4 && "At most 4 combined inputs");
-
- bool MergeFromLo = LoV1Inputs.size() + LoV2Inputs.size() >=
- HiV1Inputs.size() + HiV2Inputs.size();
-
- auto moveInputsToHalf = [&](SDValue V, ArrayRef<int> LoInputs,
- ArrayRef<int> HiInputs, bool MoveToLo,
- int MaskOffset) {
- ArrayRef<int> GoodInputs = MoveToLo ? LoInputs : HiInputs;
- ArrayRef<int> BadInputs = MoveToLo ? HiInputs : LoInputs;
- if (BadInputs.empty())
- return V;
-
- int MoveMask[] = {-1, -1, -1, -1, -1, -1, -1, -1};
- int MoveOffset = MoveToLo ? 0 : 4;
+/// \brief Helper to form a PSHUFB-based shuffle+blend.
+static SDValue lowerVectorShuffleAsPSHUFB(SDLoc DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ SelectionDAG &DAG, bool &V1InUse,
+ bool &V2InUse) {
+ SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
+ SDValue V1Mask[16];
+ SDValue V2Mask[16];
+ V1InUse = false;
+ V2InUse = false;