[x86] Teach the new AVX v4f64 shuffle lowering to use UNPCK instructions

author Chandler Carruth <chandlerc@gmail.com>

Fri, 15 Aug 2014 17:42:00 +0000 (17:42 +0000)

committer Chandler Carruth <chandlerc@gmail.com>

Fri, 15 Aug 2014 17:42:00 +0000 (17:42 +0000)
author Chandler Carruth <chandlerc@gmail.com>
Fri, 15 Aug 2014 17:42:00 +0000 (17:42 +0000)
committer Chandler Carruth <chandlerc@gmail.com>
Fri, 15 Aug 2014 17:42:00 +0000 (17:42 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 327cc296fe602019c2f461839cbacbdb3bebb4a1..b31aa44699888d88bbcabbd2ea97df9946feaad5 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -7055,6 +7055,35 @@ static bool isSingleInputShuffleMask(ArrayRef<int> Mask) {
    return true;
  }
  
+/// \brief Implementation of the \c isShuffleEquivalent variadic functor.
+///
+/// See its documentation for details.
+static bool isShuffleEquivalentImpl(ArrayRef<int> Mask,
+                                    ArrayRef<const int *> Args) {
+  if (Mask.size() != Args.size())
+    return false;
+  for (int i = 0, e = Mask.size(); i < e; ++i) {
+    assert(*Args[i] >= 0 && "Arguments must be positive integers!");
+    assert(*Args[i] < (int)Args.size() * 2 &&
+           "Argument outside the range of possible shuffle inputs!");
+    if (Mask[i] != -1 && Mask[i] != *Args[i])
+      return false;
+  }
+  return true;
+}
+/// \brief Checks whether a shuffle mask is equivalent to an explicit list of
+/// arguments.
+///
+/// This is a fast way to test a shuffle mask against a fixed pattern:
+///
+///   if (isShuffleEquivalent(Mask, 3, 2, 1, 0)) { ... }
+///
+/// It returns true if the mask is exactly as wide as the argument list, and
+/// each element of the mask is either -1 (signifying undef) or the value given
+/// in the argument.
+static const VariadicFunction1<
+    bool, ArrayRef<int>, int, isShuffleEquivalentImpl> isShuffleEquivalent = {};
+
  /// \brief Get a 4-lane 8-bit shuffle immediate for a mask.
  ///
  /// This helper function produces an 8-bit shuffle immediate corresponding to
@@ -8440,6 +8469,19 @@ static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
                         DAG.getConstant(VPERMILPMask, MVT::i8));
    }
  
+  // X86 has dedicated unpack instructions that can handle specific blend
+  // operations: UNPCKH and UNPCKL.
+  if (isShuffleEquivalent(Mask, 0, 4, 2, 6))
+    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f64, V1, V2);
+  if (isShuffleEquivalent(Mask, 1, 5, 3, 7))
+    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V1, V2);
+  // FIXME: It would be nice to find a way to get canonicalization to commute
+  // these patterns.
+  if (isShuffleEquivalent(Mask, 4, 0, 6, 2))
+    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f64, V2, V1);
+  if (isShuffleEquivalent(Mask, 5, 1, 7, 3))
+    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V2, V1);
+
    // Check if the blend happens to exactly fit that of SHUFPD.
    if (Mask[0] < 4 && (Mask[1] == -1 || Mask[1] >= 4) &&
        Mask[2] < 4 && (Mask[3] == -1 || Mask[3] >= 4)) {
diff --git a/test/CodeGen/X86/vector-shuffle-256-v4.ll b/test/CodeGen/X86/vector-shuffle-256-v4.ll

index b7047724e40166ead19bbef2237bb2be1c728afe..7051888302ee0eee5a819cddc249bf71fadb7155 100644 (file)
--- a/test/CodeGen/X86/vector-shuffle-256-v4.ll
+++ b/test/CodeGen/X86/vector-shuffle-256-v4.ll
@@ -239,11 +239,35 @@ define <4 x double> @shuffle_v4f64_0462(<4 x double> %a, <4 x double> %b) {
  define <4 x double> @shuffle_v4f64_0426(<4 x double> %a, <4 x double> %b) {
  ; AVX1-LABEL: @shuffle_v4f64_0426
  ; AVX1:       # BB#0:
-; AVX1-NEXT:    vshufpd {{.*}} # ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; AVX1-NEXT:    vunpcklpd {{.*}} # ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
  ; AVX1-NEXT:    retq
    %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
    ret <4 x double> %shuffle
  }
+define <4 x double> @shuffle_v4f64_1537(<4 x double> %a, <4 x double> %b) {
+; AVX1-LABEL: @shuffle_v4f64_1537
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vunpckhpd {{.*}} # ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
+; AVX1-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ret <4 x double> %shuffle
+}
+define <4 x double> @shuffle_v4f64_4062(<4 x double> %a, <4 x double> %b) {
+; AVX1-LABEL: @shuffle_v4f64_4062
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vunpcklpd {{.*}} # ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
+; AVX1-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2>
+  ret <4 x double> %shuffle
+}
+define <4 x double> @shuffle_v4f64_5173(<4 x double> %a, <4 x double> %b) {
+; AVX1-LABEL: @shuffle_v4f64_5173
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vunpckhpd {{.*}} # ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
+; AVX1-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 7, i32 3>
+  ret <4 x double> %shuffle
+}
  define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) {
  ; AVX1-LABEL: @shuffle_v4f64_5163
  ; AVX1:       # BB#0:
author	Chandler Carruth <chandlerc@gmail.com>
	Fri, 15 Aug 2014 17:42:00 +0000 (17:42 +0000)
committer	Chandler Carruth <chandlerc@gmail.com>
	Fri, 15 Aug 2014 17:42:00 +0000 (17:42 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/vector-shuffle-256-v4.ll		patch \| blob \| history