[x86] Teach the new vector shuffle lowering to also use pmovzx for v4i32

author Chandler Carruth <chandlerc@gmail.com>

Fri, 19 Sep 2014 08:37:44 +0000 (08:37 +0000)

committer Chandler Carruth <chandlerc@gmail.com>

Fri, 19 Sep 2014 08:37:44 +0000 (08:37 +0000)
author Chandler Carruth <chandlerc@gmail.com>
Fri, 19 Sep 2014 08:37:44 +0000 (08:37 +0000)
committer Chandler Carruth <chandlerc@gmail.com>
Fri, 19 Sep 2014 08:37:44 +0000 (08:37 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index ccbe0af154709a04d06067c976adef7858b9f7ac..91ca1d6cc6efdec737ca3165e5a70c0c453630ed 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -7841,6 +7841,13 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
                         getV4X86ShuffleImm8ForMask(Mask, DAG));
    }
  
+  // Whenever we can lower this as a zext, that instruction is strictly faster
+  // than any alternative.
+  if (Subtarget->hasSSE41())
+    if (SDValue ZExt =
+            lowerVectorShuffleAsZeroExtend(DL, MVT::v4i32, V1, V2, Mask, DAG))
+      return ZExt;
+
    // Use dedicated unpack instructions for masks that match their pattern.
    if (isShuffleEquivalent(Mask, 0, 4, 1, 5))
      return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i32, V1, V2);
@@ -8517,7 +8524,6 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
                                                        OrigMask, DAG))
        return ZExt;
  
-
    auto isV1 = [](int M) { return M >= 0 && M < 8; };
    auto isV2 = [](int M) { return M >= 8; };
  
diff --git a/test/CodeGen/X86/vector-shuffle-128-v4.ll b/test/CodeGen/X86/vector-shuffle-128-v4.ll

index 077780416dc3c18686d55588c8a962a79c7cf12b..21d74928ebf479187768730468be3b871aa95c04 100644 (file)
--- a/test/CodeGen/X86/vector-shuffle-128-v4.ll
+++ b/test/CodeGen/X86/vector-shuffle-128-v4.ll
@@ -765,3 +765,47 @@ define <4 x i32> @shuffle_v4i32_3456(<4 x i32> %a, <4 x i32> %b) {
    %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
    ret <4 x i32> %shuffle
  }
+
+define <4 x i32> @shuffle_v4i32_0u1u(<4 x i32> %a, <4 x i32> %b) {
+; ALL-LABEL: @shuffle_v4i32_0u1u
+; ALL:       # BB#0:
+; ALL-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,0,1,1]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>
+  ret <4 x i32> %shuffle
+}
+
+define <4 x i32> @shuffle_v4i32_0z1z(<4 x i32> %a) {
+; SSE2-LABEL: @shuffle_v4i32_0z1z
+; SSE2:       # BB#0:
+; SSE2-NEXT:    xorps %[[X:xmm[0-9]+]], %[[X]]
+; SSE2-NEXT:    shufps {{.*}} # xmm0 = xmm0[0,1],[[X]][1,3]
+; SSE2-NEXT:    shufps {{.*}} # xmm0 = xmm0[0,2,1,3]
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: @shuffle_v4i32_0z1z
+; SSE3:       # BB#0:
+; SSE3-NEXT:    xorps %[[X:xmm[0-9]+]], %[[X]]
+; SSE3-NEXT:    shufps {{.*}} # xmm0 = xmm0[0,1],[[X]][1,3]
+; SSE3-NEXT:    shufps {{.*}} # xmm0 = xmm0[0,2,1,3]
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: @shuffle_v4i32_0z1z
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    xorps %[[X:xmm[0-9]+]], %[[X]]
+; SSSE3-NEXT:    shufps {{.*}} # xmm0 = xmm0[0,1],[[X]][1,3]
+; SSSE3-NEXT:    shufps {{.*}} # xmm0 = xmm0[0,2,1,3]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: @shuffle_v4i32_0z1z
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmovzxdq %xmm0, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: @shuffle_v4i32_0z1z
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpmovzxdq %xmm0, %xmm0
+; AVX1-NEXT:    retq
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
+  ret <4 x i32> %shuffle
+}
author	Chandler Carruth <chandlerc@gmail.com>
	Fri, 19 Sep 2014 08:37:44 +0000 (08:37 +0000)
committer	Chandler Carruth <chandlerc@gmail.com>
	Fri, 19 Sep 2014 08:37:44 +0000 (08:37 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/vector-shuffle-128-v4.ll		patch \| blob \| history