[x86] Fix PR21139, one of the last remaining regressions found in the

author Chandler Carruth <chandlerc@gmail.com>

Sun, 5 Oct 2014 12:07:34 +0000 (12:07 +0000)

committer Chandler Carruth <chandlerc@gmail.com>

Sun, 5 Oct 2014 12:07:34 +0000 (12:07 +0000)
author Chandler Carruth <chandlerc@gmail.com>
Sun, 5 Oct 2014 12:07:34 +0000 (12:07 +0000)
committer Chandler Carruth <chandlerc@gmail.com>
Sun, 5 Oct 2014 12:07:34 +0000 (12:07 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 1bfacfe65a2f4492ee5b376134d6d810069fc1c7..64e0db1e70f867d34088f864b6e7ef322231adbf 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -9278,21 +9278,29 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
    //
    // FIXME: We need to handle other interleaving widths (i16, i32, ...).
    if (shouldLowerAsInterleaving(Mask)) {
-    // FIXME: Figure out whether we should pack these into the low or high
-    // halves.
-
-    int EMask[16], OMask[16];
+    int NumLoHalf = std::count_if(Mask.begin(), Mask.end(), [](int M) {
+      return (M >= 0 && M < 8) || (M >= 16 && M < 24);
+    });
+    int NumHiHalf = std::count_if(Mask.begin(), Mask.end(), [](int M) {
+      return (M >= 8 && M < 16) || M >= 24;
+    });
+    int EMask[16] = {-1, -1, -1, -1, -1, -1, -1, -1,
+                     -1, -1, -1, -1, -1, -1, -1, -1};
+    int OMask[16] = {-1, -1, -1, -1, -1, -1, -1, -1,
+                     -1, -1, -1, -1, -1, -1, -1, -1};
+    bool UnpackLo = NumLoHalf >= NumHiHalf;
+    MutableArrayRef<int> TargetEMask(UnpackLo ? EMask : EMask + 8, 8);
+    MutableArrayRef<int> TargetOMask(UnpackLo ? OMask : OMask + 8, 8);
      for (int i = 0; i < 8; ++i) {
-      EMask[i] = Mask[2*i];
-      OMask[i] = Mask[2*i + 1];
-      EMask[i + 8] = -1;
-      OMask[i + 8] = -1;
+      TargetEMask[i] = Mask[2 * i];
+      TargetOMask[i] = Mask[2 * i + 1];
      }
  
      SDValue Evens = DAG.getVectorShuffle(MVT::v16i8, DL, V1, V2, EMask);
      SDValue Odds = DAG.getVectorShuffle(MVT::v16i8, DL, V1, V2, OMask);
  
-    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, Evens, Odds);
+    return DAG.getNode(UnpackLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL,
+                       MVT::v16i8, Evens, Odds);
    }
  
    // Check for SSSE3 which lets us lower all v16i8 shuffles much more directly
diff --git a/test/CodeGen/X86/vector-shuffle-128-v16.ll b/test/CodeGen/X86/vector-shuffle-128-v16.ll

index e8613be02724eaa7609fe37447317cf3e978e9d7..acb6d50312730726322557d4bdbb801fb6972e94 100644 (file)
--- a/test/CodeGen/X86/vector-shuffle-128-v16.ll
+++ b/test/CodeGen/X86/vector-shuffle-128-v16.ll
@@ -232,6 +232,20 @@ define <16 x i8> @shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23(
    ret <16 x i8> %shuffle
  }
  
+define <16 x i8> @shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31(<16 x i8> %a, <16 x i8> %b) {
+; SSE-LABEL: shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  ret <16 x i8> %shuffle
+}
+
  define <16 x i8> @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07(<16 x i8> %a, <16 x i8> %b) {
  ; SSE-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
  ; SSE:       # BB#0:
@@ -373,7 +387,7 @@ define <16 x i8> @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20(
  define <16 x i8> @trunc_v4i32_shuffle(<16 x i8> %a) {
  ; SSE2-LABEL: trunc_v4i32_shuffle:
  ; SSE2:       # BB#0:
-; SSE2-NEXT:    pand .LCPI13_0(%rip), %xmm0
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
  ; SSE2-NEXT:    packuswb %xmm0, %xmm0
  ; SSE2-NEXT:    packuswb %xmm0, %xmm0
  ; SSE2-NEXT:    retq
@@ -444,7 +458,7 @@ entry:
  define <16 x i8> @PR20540(<8 x i8> %a) {
  ; SSE2-LABEL: PR20540:
  ; SSE2:       # BB#0:
-; SSE2-NEXT:    pand .LCPI16_0(%rip), %xmm0
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
  ; SSE2-NEXT:    packuswb %xmm0, %xmm0
  ; SSE2-NEXT:    pxor %xmm1, %xmm1
  ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
@@ -767,7 +781,7 @@ define <16 x i8> @shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00(
  ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[3,1,2,0]
  ; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[1,2,3,0,4,5,6,7]
  ; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,6,7,7]
-; SSE2-NEXT:    pand .LCPI23_0(%rip), %xmm0
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
  ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
  ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,4]
  ; SSE2-NEXT:    movdqa %xmm1, %xmm3
@@ -821,7 +835,7 @@ define <16 x i8> @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16(
  ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
  ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
  ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,7]
-; SSE2-NEXT:    pand .LCPI24_0(%rip), %xmm1
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
  ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3]
  ; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,4]
  ; SSE2-NEXT:    movdqa %xmm0, %xmm3
diff --git a/test/CodeGen/X86/vector-zext.ll b/test/CodeGen/X86/vector-zext.ll

index 0ce3ecf5d612473e978453de89bfb805c7541dc8..53d7235a3d136f8adb8e296226002f0cbc3850c0 100644 (file)
--- a/test/CodeGen/X86/vector-zext.ll
+++ b/test/CodeGen/X86/vector-zext.ll
@@ -191,9 +191,8 @@ define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %z) {
  ;
  ; AVX1-LABEL: zext_16i8_to_16i16:
  ; AVX1:       # BB#0: # %entry
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
  ; AVX1-NEXT:    vpmovzxbw %xmm0, %xmm0
  ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
  ; AVX1-NEXT:    retq
author	Chandler Carruth <chandlerc@gmail.com>
	Sun, 5 Oct 2014 12:07:34 +0000 (12:07 +0000)
committer	Chandler Carruth <chandlerc@gmail.com>
	Sun, 5 Oct 2014 12:07:34 +0000 (12:07 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/vector-shuffle-128-v16.ll		patch \| blob \| history
test/CodeGen/X86/vector-zext.ll		patch \| blob \| history