return true;
}
-// Hide this symbol with an anonymous namespace instead of 'static' so that MSVC
-// 2013 will allow us to use it as a non-type template parameter.
-namespace {
-
-/// \brief Implementation of the \c isShuffleEquivalent variadic functor.
-///
-/// See its documentation for details.
-bool isShuffleEquivalentImpl(SDValue V1, SDValue V2, ArrayRef<int> Mask,
- ArrayRef<const int *> Args) {
- if (Mask.size() != Args.size())
- return false;
-
- // If the values are build vectors, we can look through them to find
- // equivalent inputs that make the shuffles equivalent.
- auto *BV1 = dyn_cast<BuildVectorSDNode>(V1);
- auto *BV2 = dyn_cast<BuildVectorSDNode>(V2);
-
- for (int i = 0, e = Mask.size(); i < e; ++i) {
- assert(*Args[i] >= 0 && "Arguments must be positive integers!");
- if (Mask[i] != -1 && Mask[i] != *Args[i]) {
- auto *MaskBV = Mask[i] < e ? BV1 : BV2;
- auto *ArgsBV = *Args[i] < e ? BV1 : BV2;
- if (!MaskBV || !ArgsBV ||
- MaskBV->getOperand(Mask[i] % e) != ArgsBV->getOperand(*Args[i] % e))
- return false;
- }
+/// \brief Base case helper for testing a single mask element.
+static bool isShuffleEquivalentImpl(SDValue V1, SDValue V2,
+ BuildVectorSDNode *BV1,
+ BuildVectorSDNode *BV2, ArrayRef<int> Mask,
+ int i, int Arg) {
+ int Size = Mask.size();
+ if (Mask[i] != -1 && Mask[i] != Arg) {
+ auto *MaskBV = Mask[i] < Size ? BV1 : BV2;
+ auto *ArgsBV = Arg < Size ? BV1 : BV2;
+ if (!MaskBV || !ArgsBV ||
+ MaskBV->getOperand(Mask[i] % Size) != ArgsBV->getOperand(Arg % Size))
+ return false;
}
return true;
}
-} // namespace
+/// \brief Recursive helper to peel off and test each mask element.
+template <typename... Ts>
+static bool isShuffleEquivalentImpl(SDValue V1, SDValue V2,
+ BuildVectorSDNode *BV1,
+ BuildVectorSDNode *BV2, ArrayRef<int> Mask,
+ int i, int Arg, Ts... Args) {
+ if (!isShuffleEquivalentImpl(V1, V2, BV1, BV2, Mask, i, Arg))
+ return false;
+
+ return isShuffleEquivalentImpl(V1, V2, BV1, BV2, Mask, i + 1, Args...);
+}
/// \brief Checks whether a shuffle mask is equivalent to an explicit list of
/// arguments.
/// It returns true if the mask is exactly as wide as the argument list, and
/// each element of the mask is either -1 (signifying undef) or the value given
/// in the argument.
-static const VariadicFunction3<bool, SDValue, SDValue, ArrayRef<int>, int,
- isShuffleEquivalentImpl> isShuffleEquivalent =
- {};
+template <typename... Ts>
+static bool isShuffleEquivalent(SDValue V1, SDValue V2, ArrayRef<int> Mask,
+ Ts... Args) {
+ if (Mask.size() != sizeof...(Args))
+ return false;
+
+ // If the values are build vectors, we can look through them to find
+ // equivalent inputs that make the shuffles equivalent.
+ auto *BV1 = dyn_cast<BuildVectorSDNode>(V1);
+ auto *BV2 = dyn_cast<BuildVectorSDNode>(V2);
+
+ // Recursively peel off arguments and test them against the mask.
+ return isShuffleEquivalentImpl(V1, V2, BV1, BV2, Mask, 0, Args...);
+}
/// \brief Get a 4-lane 8-bit shuffle immediate for a mask.
///
/// elements, and takes the low elements as the result. Note that while this is
/// specified as a *right shift* because x86 is little-endian, it is a *left
/// rotate* of the vector lanes.
-///
-/// Note that this only handles 128-bit vector widths currently.
static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1,
SDValue V2,
ArrayRef<int> Mask,
SelectionDAG &DAG) {
assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!");
+ int NumElts = Mask.size();
+ int NumLanes = VT.getSizeInBits() / 128;
+ int NumLaneElts = NumElts / NumLanes;
+
// We need to detect various ways of spelling a rotation:
// [11, 12, 13, 14, 15, 0, 1, 2]
// [-1, 12, 13, 14, -1, -1, 1, -1]
// [-1, 4, 5, 6, -1, -1, -1, -1]
int Rotation = 0;
SDValue Lo, Hi;
- for (int i = 0, Size = Mask.size(); i < Size; ++i) {
- if (Mask[i] == -1)
- continue;
- assert(Mask[i] >= 0 && "Only -1 is a valid negative mask element!");
+ for (int l = 0; l < NumElts; l += NumLaneElts) {
+ for (int i = 0; i < NumLaneElts; ++i) {
+ if (Mask[l + i] == -1)
+ continue;
+ assert(Mask[l + i] >= 0 && "Only -1 is a valid negative mask element!");
- // Based on the mod-Size value of this mask element determine where
- // a rotated vector would have started.
- int StartIdx = i - (Mask[i] % Size);
- if (StartIdx == 0)
- // The identity rotation isn't interesting, stop.
- return SDValue();
+ // Get the mod-Size index and lane correct it.
+ int LaneIdx = (Mask[l + i] % NumElts) - l;
+ // Make sure it was in this lane.
+ if (LaneIdx < 0 || LaneIdx >= NumLaneElts)
+ return SDValue();
- // If we found the tail of a vector the rotation must be the missing
- // front. If we found the head of a vector, it must be how much of the head.
- int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
+ // Determine where a rotated vector would have started.
+ int StartIdx = i - LaneIdx;
+ if (StartIdx == 0)
+ // The identity rotation isn't interesting, stop.
+ return SDValue();
- if (Rotation == 0)
- Rotation = CandidateRotation;
- else if (Rotation != CandidateRotation)
- // The rotations don't match, so we can't match this mask.
- return SDValue();
+ // If we found the tail of a vector the rotation must be the missing
+ // front. If we found the head of a vector, it must be how much of the
+ // head.
+ int CandidateRotation = StartIdx < 0 ? -StartIdx : NumLaneElts - StartIdx;
- // Compute which value this mask is pointing at.
- SDValue MaskV = Mask[i] < Size ? V1 : V2;
-
- // Compute which of the two target values this index should be assigned to.
- // This reflects whether the high elements are remaining or the low elements
- // are remaining.
- SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
-
- // Either set up this value if we've not encountered it before, or check
- // that it remains consistent.
- if (!TargetV)
- TargetV = MaskV;
- else if (TargetV != MaskV)
- // This may be a rotation, but it pulls from the inputs in some
- // unsupported interleaving.
- return SDValue();
+ if (Rotation == 0)
+ Rotation = CandidateRotation;
+ else if (Rotation != CandidateRotation)
+ // The rotations don't match, so we can't match this mask.
+ return SDValue();
+
+ // Compute which value this mask is pointing at.
+ SDValue MaskV = Mask[l + i] < NumElts ? V1 : V2;
+
+ // Compute which of the two target values this index should be assigned
+ // to. This reflects whether the high elements are remaining or the low
+ // elements are remaining.
+ SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
+
+ // Either set up this value if we've not encountered it before, or check
+ // that it remains consistent.
+ if (!TargetV)
+ TargetV = MaskV;
+ else if (TargetV != MaskV)
+ // This may be a rotation, but it pulls from the inputs in some
+ // unsupported interleaving.
+ return SDValue();
+ }
}
// Check that we successfully analyzed the mask, and normalize the results.
else if (!Hi)
Hi = Lo;
- assert(VT.getSizeInBits() == 128 &&
- "Rotate-based lowering only supports 128-bit lowering!");
- assert(Mask.size() <= 16 &&
- "Can shuffle at most 16 bytes in a 128-bit vector!");
-
// The actual rotate instruction rotates bytes, so we need to scale the
- // rotation based on how many bytes are in the vector.
- int Scale = 16 / Mask.size();
+ // rotation based on how many bytes are in the vector lane.
+ int Scale = 16 / NumLaneElts;
- // SSSE3 targets can use the palignr instruction
+ // SSSE3 targets can use the palignr instruction.
if (Subtarget->hasSSSE3()) {
- // Cast the inputs to v16i8 to match PALIGNR.
- Lo = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Lo);
- Hi = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Hi);
+ // Cast the inputs to i8 vector of correct length to match PALIGNR.
+ MVT AlignVT = MVT::getVectorVT(MVT::i8, 16 * NumLanes);
+ Lo = DAG.getNode(ISD::BITCAST, DL, AlignVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, DL, AlignVT, Hi);
return DAG.getNode(ISD::BITCAST, DL, VT,
- DAG.getNode(X86ISD::PALIGNR, DL, MVT::v16i8, Hi, Lo,
+ DAG.getNode(X86ISD::PALIGNR, DL, AlignVT, Hi, Lo,
DAG.getConstant(Rotation * Scale, MVT::i8)));
}
+ assert(VT.getSizeInBits() == 128 &&
+ "Rotate-based lowering only supports 128-bit lowering!");
+ assert(Mask.size() <= 16 &&
+ "Can shuffle at most 16 bytes in a 128-bit vector!");
+
// Default SSE2 implementation
int LoByteShift = 16 - Rotation * Scale;
int HiByteShift = Rotation * Scale;
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i32, V1, V2);
}
+ // Try to use bit shift instructions.
+ if (SDValue Shift = lowerVectorShuffleAsBitShift(
+ DL, MVT::v8i32, V1, V2, Mask, DAG))
+ return Shift;
+
+ // Try to use byte shift instructions.
+ if (SDValue Shift = lowerVectorShuffleAsByteShift(
+ DL, MVT::v8i32, V1, V2, Mask, DAG))
+ return Shift;
+
+ if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
+ DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG))
+ return Rotate;
+
// If the shuffle patterns aren't repeated but it is a single input, directly
// generate a cross-lane VPERMD instruction.
if (isSingleInputShuffleMask(Mask)) {
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i32, VPermMask), V1);
}
- // Try to use bit shift instructions.
- if (SDValue Shift = lowerVectorShuffleAsBitShift(
- DL, MVT::v8i32, V1, V2, Mask, DAG))
- return Shift;
-
- // Try to use byte shift instructions.
- if (SDValue Shift = lowerVectorShuffleAsByteShift(
- DL, MVT::v8i32, V1, V2, Mask, DAG))
- return Shift;
-
// Try to simplify this by merging 128-bit lanes to enable a lane-based
// shuffle.
if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
12, 28, 13, 29, 14, 30, 15, 31))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i16, V1, V2);
+ // Try to use bit shift instructions.
+ if (SDValue Shift = lowerVectorShuffleAsBitShift(
+ DL, MVT::v16i16, V1, V2, Mask, DAG))
+ return Shift;
+
+ // Try to use byte shift instructions.
+ if (SDValue Shift = lowerVectorShuffleAsByteShift(
+ DL, MVT::v16i16, V1, V2, Mask, DAG))
+ return Shift;
+
+ // Try to use byte rotation instructions.
+ if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
+ DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG))
+ return Rotate;
+
if (isSingleInputShuffleMask(Mask)) {
// There are no generalized cross-lane shuffle operations available on i16
// element types.
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, PSHUFBMask)));
}
- // Try to use bit shift instructions.
- if (SDValue Shift = lowerVectorShuffleAsBitShift(
- DL, MVT::v16i16, V1, V2, Mask, DAG))
- return Shift;
-
- // Try to use byte shift instructions.
- if (SDValue Shift = lowerVectorShuffleAsByteShift(
- DL, MVT::v16i16, V1, V2, Mask, DAG))
- return Shift;
-
// Try to simplify this by merging 128-bit lanes to enable a lane-based
// shuffle.
if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
24, 56, 25, 57, 26, 58, 27, 59, 28, 60, 29, 61, 30, 62, 31, 63))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v32i8, V1, V2);
+ // Try to use bit shift instructions.
+ if (SDValue Shift = lowerVectorShuffleAsBitShift(
+ DL, MVT::v32i8, V1, V2, Mask, DAG))
+ return Shift;
+
+ // Try to use byte shift instructions.
+ if (SDValue Shift = lowerVectorShuffleAsByteShift(
+ DL, MVT::v32i8, V1, V2, Mask, DAG))
+ return Shift;
+
+ // Try to use byte rotation instructions.
+ if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
+ DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG))
+ return Rotate;
+
if (isSingleInputShuffleMask(Mask)) {
// There are no generalized cross-lane shuffle operations available on i8
// element types.
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, PSHUFBMask));
}
- // Try to use bit shift instructions.
- if (SDValue Shift = lowerVectorShuffleAsBitShift(
- DL, MVT::v32i8, V1, V2, Mask, DAG))
- return Shift;
-
- // Try to use byte shift instructions.
- if (SDValue Shift = lowerVectorShuffleAsByteShift(
- DL, MVT::v32i8, V1, V2, Mask, DAG))
- return Shift;
-
// Try to simplify this by merging 128-bit lanes to enable a lane-based
// shuffle.
if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(