[x86] Teach the x86 DAG combiner to form UNPCKLPS and UNPCKHPS

author Chandler Carruth <chandlerc@gmail.com>

Mon, 15 Sep 2014 11:26:25 +0000 (11:26 +0000)

committer Chandler Carruth <chandlerc@gmail.com>

Mon, 15 Sep 2014 11:26:25 +0000 (11:26 +0000)
author Chandler Carruth <chandlerc@gmail.com>
Mon, 15 Sep 2014 11:26:25 +0000 (11:26 +0000)
committer Chandler Carruth <chandlerc@gmail.com>
Mon, 15 Sep 2014 11:26:25 +0000 (11:26 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 04f1fafa2e7e4088b86056cd52af0f654f6cf631..da3ec8b35eb94091bed0bd00391cd98156fd9082 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -19413,6 +19413,20 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
                      /*AddTo*/ true);
        return true;
      }
+    if (Mask.equals(0, 0, 1, 1) || Mask.equals(2, 2, 3, 3)) {
+      bool Lo = Mask.equals(0, 0, 1, 1);
+      unsigned Shuffle = Lo ? X86ISD::UNPCKL : X86ISD::UNPCKH;
+      MVT ShuffleVT = MVT::v4f32;
+      if (Depth == 1 && Root->getOpcode() == Shuffle)
+        return false; // Nothing to do!
+      Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input);
+      DCI.AddToWorklist(Op.getNode());
+      Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op);
+      DCI.AddToWorklist(Op.getNode());
+      DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
+                    /*AddTo*/ true);
+      return true;
+    }
    }
  
    // We always canonicalize the 8 x i16 and 16 x i8 shuffles into their UNPCK
diff --git a/test/CodeGen/X86/vector-shuffle-128-v4.ll b/test/CodeGen/X86/vector-shuffle-128-v4.ll

index dcd8ab166cfe569f946ab8cca9b7b8da6e40bb5d..601db25748a2fa100b5dd0b9701e3b25bc3df9e2 100644 (file)
--- a/test/CodeGen/X86/vector-shuffle-128-v4.ll
+++ b/test/CodeGen/X86/vector-shuffle-128-v4.ll
@@ -119,6 +119,20 @@ define <4 x float> @shuffle_v4f32_3210(<4 x float> %a, <4 x float> %b) {
    %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    ret <4 x float> %shuffle
  }
+define <4 x float> @shuffle_v4f32_0011(<4 x float> %a, <4 x float> %b) {
+; ALL-LABEL: @shuffle_v4f32_0011
+; ALL:         unpcklps {{.*}} # xmm0 = xmm0[0,0,1,1]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+  ret <4 x float> %shuffle
+}
+define <4 x float> @shuffle_v4f32_2233(<4 x float> %a, <4 x float> %b) {
+; ALL-LABEL: @shuffle_v4f32_2233
+; ALL:         unpckhps {{.*}} # xmm0 = xmm0[2,2,3,3]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
+  ret <4 x float> %shuffle
+}
  define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) {
  ; SSE2-LABEL: @shuffle_v4f32_0022
  ; SSE2:         shufps {{.*}} # xmm0 = xmm0[0,0,2,2]
author	Chandler Carruth <chandlerc@gmail.com>
	Mon, 15 Sep 2014 11:26:25 +0000 (11:26 +0000)
committer	Chandler Carruth <chandlerc@gmail.com>
	Mon, 15 Sep 2014 11:26:25 +0000 (11:26 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/vector-shuffle-128-v4.ll		patch \| blob \| history