/// \brief Implementation of the \c isShuffleEquivalent variadic functor.
///
/// See its documentation for details.
-bool isShuffleEquivalentImpl(ArrayRef<int> Mask, ArrayRef<const int *> Args) {
+bool isShuffleEquivalentImpl(SDValue V1, SDValue V2, ArrayRef<int> Mask,
+ ArrayRef<const int *> Args) {
if (Mask.size() != Args.size())
return false;
+
+ // If the values are build vectors, we can look through them to find
+ // equivalent inputs that make the shuffles equivalent.
+ auto *BV1 = dyn_cast<BuildVectorSDNode>(V1);
+ auto *BV2 = dyn_cast<BuildVectorSDNode>(V2);
+
for (int i = 0, e = Mask.size(); i < e; ++i) {
assert(*Args[i] >= 0 && "Arguments must be positive integers!");
- if (Mask[i] != -1 && Mask[i] != *Args[i])
- return false;
+ if (Mask[i] != -1 && Mask[i] != *Args[i]) {
+ auto *MaskBV = Mask[i] < e ? BV1 : BV2;
+ auto *ArgsBV = *Args[i] < e ? BV1 : BV2;
+ if (!MaskBV || !ArgsBV ||
+ MaskBV->getOperand(Mask[i] % e) != ArgsBV->getOperand(*Args[i] % e))
+ return false;
+ }
}
return true;
}
/// It returns true if the mask is exactly as wide as the argument list, and
/// each element of the mask is either -1 (signifying undef) or the value given
/// in the argument.
-static const VariadicFunction1<
- bool, ArrayRef<int>, int, isShuffleEquivalentImpl> isShuffleEquivalent = {};
+static const VariadicFunction3<bool, SDValue, SDValue, ArrayRef<int>, int,
+ isShuffleEquivalentImpl> isShuffleEquivalent =
+ {};
/// \brief Get a 4-lane 8-bit shuffle immediate for a mask.
///
ExtVT, V1, V2);
}
+ // This lowering only works for the low element with floating point vectors.
+ if (VT.isFloatingPoint() && V2Index != 0)
+ return SDValue();
+
V2 = DAG.getNode(X86ISD::VZEXT_MOVL, DL, ExtVT, V2);
if (ExtVT != VT)
V2 = DAG.getNode(ISD::BITCAST, DL, VT, V2);
if (isSingleInputShuffleMask(Mask)) {
// Use low duplicate instructions for masks that match their pattern.
if (Subtarget->hasSSE3())
- if (isShuffleEquivalent(Mask, 0, 0))
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 0))
return DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v2f64, V1);
// Straight shuffle of a single input vector. Simulate this by using the
assert(Mask[0] >= 0 && Mask[0] < 2 && "Non-canonicalized blend!");
assert(Mask[1] >= 2 && "Non-canonicalized blend!");
- // Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(Mask, 0, 2))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2f64, V1, V2);
- if (isShuffleEquivalent(Mask, 1, 3))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2f64, V1, V2);
-
// If we have a single input, insert that into V1 if we can do so cheaply.
if ((Mask[0] >= 2) + (Mask[1] >= 2) == 1) {
if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
// Try to use one of the special instruction patterns to handle two common
// blend patterns if a zero-blend above didn't work.
- if (isShuffleEquivalent(Mask, 0, 3) || isShuffleEquivalent(Mask, 1, 3))
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 3) || isShuffleEquivalent(V1, V2, Mask, 1, 3))
if (SDValue V1S = getScalarValueForVectorElement(V1, Mask[0], DAG))
// We can either use a special instruction to load over the low double or
// to move just the low double.
Subtarget, DAG))
return Blend;
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 2))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2f64, V1, V2);
+ if (isShuffleEquivalent(V1, V2, Mask, 1, 3))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2f64, V1, V2);
+
unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1);
return DAG.getNode(X86ISD::SHUFP, SDLoc(Op), MVT::v2f64, V1, V2,
DAG.getConstant(SHUFPDMask, MVT::i8));
return Insertion;
}
- // Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(Mask, 0, 2))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, V1, V2);
- if (isShuffleEquivalent(Mask, 1, 3))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2i64, V1, V2);
-
if (Subtarget->hasSSE41())
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask,
Subtarget, DAG))
return Blend;
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 2))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, V1, V2);
+ if (isShuffleEquivalent(V1, V2, Mask, 1, 3))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2i64, V1, V2);
+
// Try to use byte rotation instructions.
// Its more profitable for pre-SSSE3 to use shuffles/unpacks.
if (Subtarget->hasSSSE3())
// To lower with a single SHUFPS we need to have the low half and high half
// each requiring a single input.
- if (Mask[0] != -1 && Mask[1] != -1 && (Mask[0] < 4 != Mask[1] < 4))
+ if (Mask[0] != -1 && Mask[1] != -1 && (Mask[0] < 4) != (Mask[1] < 4))
return false;
- if (Mask[2] != -1 && Mask[3] != -1 && (Mask[2] < 4 != Mask[3] < 4))
+ if (Mask[2] != -1 && Mask[3] != -1 && (Mask[2] < 4) != (Mask[3] < 4))
return false;
return true;
// Use even/odd duplicate instructions for masks that match their pattern.
if (Subtarget->hasSSE3()) {
- if (isShuffleEquivalent(Mask, 0, 0, 2, 2))
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 0, 2, 2))
return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v4f32, V1);
- if (isShuffleEquivalent(Mask, 1, 1, 3, 3))
+ if (isShuffleEquivalent(V1, V2, Mask, 1, 1, 3, 3))
return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v4f32, V1);
}
getV4X86ShuffleImm8ForMask(Mask, DAG));
}
- // Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(Mask, 0, 4, 1, 5))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f32, V1, V2);
- if (isShuffleEquivalent(Mask, 2, 6, 3, 7))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f32, V1, V2);
-
// There are special ways we can lower some single-element blends. However, we
// have custom ways we can lower more complex single-element blends below that
// we defer to if both this and BLENDPS fail to match, so restrict this to
return BlendPerm;
}
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 4, 1, 5))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f32, V1, V2);
+ if (isShuffleEquivalent(V1, V2, Mask, 2, 6, 3, 7))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f32, V1, V2);
+
// Otherwise fall back to a SHUFPS lowering strategy.
return lowerVectorShuffleWithSHUFPS(DL, MVT::v4f32, Mask, V1, V2, DAG);
}
// so prevents folding a load into this instruction or making a copy.
const int UnpackLoMask[] = {0, 0, 1, 1};
const int UnpackHiMask[] = {2, 2, 3, 3};
- if (isShuffleEquivalent(Mask, 0, 0, 1, 1))
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 0, 1, 1))
Mask = UnpackLoMask;
- else if (isShuffleEquivalent(Mask, 2, 2, 3, 3))
+ else if (isShuffleEquivalent(V1, V2, Mask, 2, 2, 3, 3))
Mask = UnpackHiMask;
return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1,
return Masked;
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(Mask, 0, 4, 1, 5))
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 4, 1, 5))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i32, V1, V2);
- if (isShuffleEquivalent(Mask, 2, 6, 3, 7))
+ if (isShuffleEquivalent(V1, V2, Mask, 2, 6, 3, 7))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i32, V1, V2);
// Try to use byte rotation instructions.
return Shift;
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(Mask, 0, 0, 1, 1, 2, 2, 3, 3))
+ if (isShuffleEquivalent(V, V, Mask, 0, 0, 1, 1, 2, 2, 3, 3))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i16, V, V);
- if (isShuffleEquivalent(Mask, 4, 4, 5, 5, 6, 6, 7, 7))
+ if (isShuffleEquivalent(V, V, Mask, 4, 4, 5, 5, 6, 6, 7, 7))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i16, V, V);
// Try to use byte rotation instructions.
return Masked;
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(Mask, 0, 8, 1, 9, 2, 10, 3, 11))
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 8, 1, 9, 2, 10, 3, 11))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i16, V1, V2);
- if (isShuffleEquivalent(Mask, 4, 12, 5, 13, 6, 14, 7, 15))
+ if (isShuffleEquivalent(V1, V2, Mask, 4, 12, 5, 13, 6, 14, 7, 15))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i16, V1, V2);
// Try to use byte rotation instructions.
MVT ScalarVT = VT.getScalarType();
MVT SplitVT = MVT::getVectorVT(ScalarVT, NumElements / 2);
- SDValue LoV1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, V1,
- DAG.getIntPtrConstant(0));
- SDValue HiV1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, V1,
- DAG.getIntPtrConstant(SplitNumElements));
- SDValue LoV2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, V2,
- DAG.getIntPtrConstant(0));
- SDValue HiV2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, V2,
- DAG.getIntPtrConstant(SplitNumElements));
+ // Rather than splitting build-vectors, just build two narrower build
+ // vectors. This helps shuffling with splats and zeros.
+ auto SplitVector = [&](SDValue V) {
+ while (V.getOpcode() == ISD::BITCAST)
+ V = V->getOperand(0);
+
+ MVT OrigVT = V.getSimpleValueType();
+ int OrigNumElements = OrigVT.getVectorNumElements();
+ int OrigSplitNumElements = OrigNumElements / 2;
+ MVT OrigScalarVT = OrigVT.getScalarType();
+ MVT OrigSplitVT = MVT::getVectorVT(OrigScalarVT, OrigNumElements / 2);
+
+ SDValue LoV, HiV;
+
+ auto *BV = dyn_cast<BuildVectorSDNode>(V);
+ if (!BV) {
+ LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OrigSplitVT, V,
+ DAG.getIntPtrConstant(0));
+ HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OrigSplitVT, V,
+ DAG.getIntPtrConstant(OrigSplitNumElements));
+ } else {
+
+ SmallVector<SDValue, 16> LoOps, HiOps;
+ for (int i = 0; i < OrigSplitNumElements; ++i) {
+ LoOps.push_back(BV->getOperand(i));
+ HiOps.push_back(BV->getOperand(i + OrigSplitNumElements));
+ }
+ LoV = DAG.getNode(ISD::BUILD_VECTOR, DL, OrigSplitVT, LoOps);
+ HiV = DAG.getNode(ISD::BUILD_VECTOR, DL, OrigSplitVT, HiOps);
+ }
+ return std::make_pair(DAG.getNode(ISD::BITCAST, DL, SplitVT, LoV),
+ DAG.getNode(ISD::BITCAST, DL, SplitVT, HiV));
+ };
+
+ SDValue LoV1, HiV1, LoV2, HiV2;
+ std::tie(LoV1, HiV1) = SplitVector(V1);
+ std::tie(LoV2, HiV2) = SplitVector(V2);
// Now create two 4-way blends of these half-width vectors.
auto HalfBlend = [&](ArrayRef<int> HalfMask) {
VT.getVectorNumElements() / 2);
// Check for patterns which can be matched with a single insert of a 128-bit
// subvector.
- if (isShuffleEquivalent(Mask, 0, 1, 0, 1) ||
- isShuffleEquivalent(Mask, 0, 1, 4, 5)) {
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 1, 0, 1) ||
+ isShuffleEquivalent(V1, V2, Mask, 0, 1, 4, 5)) {
SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
DAG.getIntPtrConstant(0));
SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT,
Mask[2] < 4 ? V1 : V2, DAG.getIntPtrConstant(0));
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
}
- if (isShuffleEquivalent(Mask, 0, 1, 6, 7)) {
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 1, 6, 7)) {
SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
DAG.getIntPtrConstant(0));
SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V2,
return Broadcast;
// Use low duplicate instructions for masks that match their pattern.
- if (isShuffleEquivalent(Mask, 0, 0, 2, 2))
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 0, 2, 2))
return DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v4f64, V1);
if (!is128BitLaneCrossingShuffleMask(MVT::v4f64, Mask)) {
// X86 has dedicated unpack instructions that can handle specific blend
// operations: UNPCKH and UNPCKL.
- if (isShuffleEquivalent(Mask, 0, 4, 2, 6))
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 4, 2, 6))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f64, V1, V2);
- if (isShuffleEquivalent(Mask, 1, 5, 3, 7))
+ if (isShuffleEquivalent(V1, V2, Mask, 1, 5, 3, 7))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V1, V2);
// If we have a single input to the zero element, insert that into V1 if we
}
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(Mask, 0, 4, 2, 6))
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 4, 2, 6))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i64, V1, V2);
- if (isShuffleEquivalent(Mask, 1, 5, 3, 7))
+ if (isShuffleEquivalent(V1, V2, Mask, 1, 5, 3, 7))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i64, V1, V2);
}
"Repeated masks must be half the mask width!");
// Use even/odd duplicate instructions for masks that match their pattern.
- if (isShuffleEquivalent(Mask, 0, 0, 2, 2, 4, 4, 6, 6))
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 0, 2, 2, 4, 4, 6, 6))
return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v8f32, V1);
- if (isShuffleEquivalent(Mask, 1, 1, 3, 3, 5, 5, 7, 7))
+ if (isShuffleEquivalent(V1, V2, Mask, 1, 1, 3, 3, 5, 5, 7, 7))
return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v8f32, V1);
if (isSingleInputShuffleMask(Mask))
getV4X86ShuffleImm8ForMask(RepeatedMask, DAG));
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(Mask, 0, 8, 1, 9, 4, 12, 5, 13))
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 8, 1, 9, 4, 12, 5, 13))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f32, V1, V2);
- if (isShuffleEquivalent(Mask, 2, 10, 3, 11, 6, 14, 7, 15))
+ if (isShuffleEquivalent(V1, V2, Mask, 2, 10, 3, 11, 6, 14, 7, 15))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f32, V1, V2);
// Otherwise, fall back to a SHUFPS sequence. Here it is important that we
getV4X86ShuffleImm8ForMask(RepeatedMask, DAG));
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(Mask, 0, 8, 1, 9, 4, 12, 5, 13))
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 8, 1, 9, 4, 12, 5, 13))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i32, V1, V2);
- if (isShuffleEquivalent(Mask, 2, 10, 3, 11, 6, 14, 7, 15))
+ if (isShuffleEquivalent(V1, V2, Mask, 2, 10, 3, 11, 6, 14, 7, 15))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i32, V1, V2);
}
return Blend;
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(Mask,
+ if (isShuffleEquivalent(V1, V2, Mask,
// First 128-bit lane:
0, 16, 1, 17, 2, 18, 3, 19,
// Second 128-bit lane:
8, 24, 9, 25, 10, 26, 11, 27))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i16, V1, V2);
- if (isShuffleEquivalent(Mask,
+ if (isShuffleEquivalent(V1, V2, Mask,
// First 128-bit lane:
4, 20, 5, 21, 6, 22, 7, 23,
// Second 128-bit lane:
// Note that these are repeated 128-bit lane unpacks, not unpacks across all
// 256-bit lanes.
if (isShuffleEquivalent(
- Mask,
+ V1, V2, Mask,
// First 128-bit lane:
0, 32, 1, 33, 2, 34, 3, 35, 4, 36, 5, 37, 6, 38, 7, 39,
// Second 128-bit lane:
16, 48, 17, 49, 18, 50, 19, 51, 20, 52, 21, 53, 22, 54, 23, 55))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v32i8, V1, V2);
if (isShuffleEquivalent(
- Mask,
+ V1, V2, Mask,
// First 128-bit lane:
8, 40, 9, 41, 10, 42, 11, 43, 12, 44, 13, 45, 14, 46, 15, 47,
// Second 128-bit lane:
// X86 has dedicated unpack instructions that can handle specific blend
// operations: UNPCKH and UNPCKL.
- if (isShuffleEquivalent(Mask, 0, 8, 2, 10, 4, 12, 6, 14))
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 8, 2, 10, 4, 12, 6, 14))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f64, V1, V2);
- if (isShuffleEquivalent(Mask, 1, 9, 3, 11, 5, 13, 7, 15))
+ if (isShuffleEquivalent(V1, V2, Mask, 1, 9, 3, 11, 5, 13, 7, 15))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f64, V1, V2);
// FIXME: Implement direct support for this type!
assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(Mask,
+ if (isShuffleEquivalent(V1, V2, Mask,
0, 16, 1, 17, 4, 20, 5, 21,
8, 24, 9, 25, 12, 28, 13, 29))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16f32, V1, V2);
- if (isShuffleEquivalent(Mask,
+ if (isShuffleEquivalent(V1, V2, Mask,
2, 18, 3, 19, 6, 22, 7, 23,
10, 26, 11, 27, 14, 30, 15, 31))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16f32, V1, V2);
// X86 has dedicated unpack instructions that can handle specific blend
// operations: UNPCKH and UNPCKL.
- if (isShuffleEquivalent(Mask, 0, 8, 2, 10, 4, 12, 6, 14))
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 8, 2, 10, 4, 12, 6, 14))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i64, V1, V2);
- if (isShuffleEquivalent(Mask, 1, 9, 3, 11, 5, 13, 7, 15))
+ if (isShuffleEquivalent(V1, V2, Mask, 1, 9, 3, 11, 5, 13, 7, 15))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i64, V1, V2);
// FIXME: Implement direct support for this type!
assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(Mask,
+ if (isShuffleEquivalent(V1, V2, Mask,
0, 16, 1, 17, 4, 20, 5, 21,
8, 24, 9, 25, 12, 28, 13, 29))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i32, V1, V2);
- if (isShuffleEquivalent(Mask,
+ if (isShuffleEquivalent(V1, V2, Mask,
2, 18, 3, 19, 6, 22, 7, 23,
10, 26, 11, 27, 14, 30, 15, 31))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i32, V1, V2);
return DAG.getVectorShuffle(VT, dl, V1, V2, NewMask);
}
+ // We actually see shuffles that are entirely re-arrangements of a set of
+ // zero inputs. This mostly happens while decomposing complex shuffles into
+ // simple ones. Directly lower these as a buildvector of zeros.
+ SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
+ if (Zeroable.all())
+ return getZeroVector(VT, Subtarget, DAG, dl);
+
// Try to collapse shuffles into using a vector type with fewer elements but
// wider element types. We cap this to not form integers or floating point
// elements wider than 64 bits, but it might be interesting to form i128
// We're looking for blends between FADD and FSUB nodes. We insist on these
// nodes being lined up in a specific expected pattern.
- if (!(isShuffleEquivalent(Mask, 0, 3) ||
- isShuffleEquivalent(Mask, 0, 5, 2, 7) ||
- isShuffleEquivalent(Mask, 0, 9, 2, 11, 4, 13, 6, 15)))
+ if (!(isShuffleEquivalent(V1, V2, Mask, 0, 3) ||
+ isShuffleEquivalent(V1, V2, Mask, 0, 5, 2, 7) ||
+ isShuffleEquivalent(V1, V2, Mask, 0, 9, 2, 11, 4, 13, 6, 15)))
return SDValue();
// Only specific types are legal at this point, assert so we notice if and