Simplify handling of v16i8 shuffles and fix a missed optimization.

author Craig Topper <craig.topper@gmail.com>

Fri, 18 May 2012 06:42:06 +0000 (06:42 +0000)

committer Craig Topper <craig.topper@gmail.com>

Fri, 18 May 2012 06:42:06 +0000 (06:42 +0000)
author Craig Topper <craig.topper@gmail.com>
Fri, 18 May 2012 06:42:06 +0000 (06:42 +0000)
committer Craig Topper <craig.topper@gmail.com>
Fri, 18 May 2012 06:42:06 +0000 (06:42 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 75e05883b6b18dca9d67f268e7e25f9975a71f92..e1f777b402aed161ae285b3138ecd08be6bff523 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -5770,21 +5770,11 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
    DebugLoc dl = SVOp->getDebugLoc();
    ArrayRef<int> MaskVals = SVOp->getMask();
  
+  bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
+
    // If we have SSSE3, case 1 is generated when all result bytes come from
    // one of  the inputs.  Otherwise, case 2 is generated.  If no SSSE3 is
    // present, fall back to case 3.
-  // FIXME: kill V2Only once shuffles are canonizalized by getNode.
-  bool V1Only = true;
-  bool V2Only = true;
-  for (unsigned i = 0; i < 16; ++i) {
-    int EltIdx = MaskVals[i];
-    if (EltIdx < 0)
-      continue;
-    if (EltIdx < 16)
-      V2Only = false;
-    else
-      V1Only = false;
-  }
  
    // If SSSE3, use 1 pshufb instruction per vector with elements in the result.
    if (TLI.getSubtarget()->hasSSSE3()) {
@@ -5796,23 +5786,16 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
      // Otherwise, we have elements from both input vectors, and must zero out
      // elements that come from V2 in the first mask, and V1 in the second mask
      // so that we can OR them together.
-    bool TwoInputs = !(V1Only || V2Only);
      for (unsigned i = 0; i != 16; ++i) {
        int EltIdx = MaskVals[i];
-      if (EltIdx < 0 || (TwoInputs && EltIdx >= 16)) {
-        pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
-        continue;
-      }
+      if (EltIdx < 0 || EltIdx >= 16)
+        EltIdx = 0x80;
        pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8));
      }
-    // If all the elements are from V2, assign it to V1 and return after
-    // building the first pshufb.
-    if (V2Only)
-      V1 = V2;
      V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1,
                       DAG.getNode(ISD::BUILD_VECTOR, dl,
                                   MVT::v16i8, &pshufbMask[0], 16));
-    if (!TwoInputs)
+    if (V2IsUndef)
        return V1;
  
      // Calculate the shuffle mask for the second input, shuffle it, and
@@ -5820,10 +5803,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
      pshufbMask.clear();
      for (unsigned i = 0; i != 16; ++i) {
        int EltIdx = MaskVals[i];
-      if (EltIdx < 16) {
-        pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
-        continue;
-      }
+      EltIdx = (EltIdx < 16) ? 0x80 : EltIdx - 16;
        pshufbMask.push_back(DAG.getConstant(EltIdx - 16, MVT::i8));
      }
      V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2,
@@ -5837,7 +5817,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
    // the 16 different words that comprise the two doublequadword input vectors.
    V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
    V2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
-  SDValue NewV = V2Only ? V2 : V1;
+  SDValue NewV = V1;
    for (int i = 0; i != 8; ++i) {
      int Elt0 = MaskVals[i*2];
      int Elt1 = MaskVals[i*2+1];
@@ -5847,9 +5827,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
        continue;
  
      // This word of the result is already in the correct place, skip it.
-    if (V1Only && (Elt0 == i*2) && (Elt1 == i*2+1))
-      continue;
-    if (V2Only && (Elt0 == i*2+16) && (Elt1 == i*2+17))
+    if ((Elt0 == i*2) && (Elt1 == i*2+1))
        continue;
  
      SDValue Elt0Src = Elt0 < 16 ? V1 : V2;
diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll

index 5ea1b4dff1c1587cb0b1bd00c23c271b9ddb5eec..48638b3b696c67bd6f1bebfd44343557b4d187cd 100644 (file)
--- a/test/CodeGen/X86/sse3.ll
+++ b/test/CodeGen/X86/sse3.ll
@@ -249,9 +249,10 @@ entry:
  ; X64:         t16:
  ; X64:                 pextrw  $8, %xmm0, %eax
  ; X64:                 pslldq  $2, %xmm0
-; X64:                 movd    %xmm0, %ecx
-; X64:                 pextrw  $1, %xmm0, %edx
-; X64:                 pinsrw  $0, %ecx, %xmm0
+; X64:                 pextrw  $1, %xmm0, %ecx
+; X64:                 movzbl  %cl, %ecx
+; X64:                 orl     %eax, %ecx
+; X64:                 pinsrw  $1, %ecx, %xmm0
  ; X64:                 ret
  }
  
diff --git a/test/CodeGen/X86/vec_shuffle-35.ll b/test/CodeGen/X86/vec_shuffle-35.ll

index 7f0fcb5969e4f48361b456a0e4eff8f2bbb40cb0..f5083b4b8011a0ec98a20d75b02e157b6abf2662 100644 (file)
--- a/test/CodeGen/X86/vec_shuffle-35.ll
+++ b/test/CodeGen/X86/vec_shuffle-35.ll
@@ -1,6 +1,6 @@
  ; RUN: llc < %s -march=x86 -mcpu=yonah -stack-alignment=16 -o %t
-; RUN: grep pextrw %t | count 13
-; RUN: grep pinsrw %t | count 14
+; RUN: grep pextrw %t | count 12
+; RUN: grep pinsrw %t | count 13
  ; RUN: grep rolw %t | count 13
  ; RUN: not grep esp %t
  ; RUN: not grep ebp %t
author	Craig Topper <craig.topper@gmail.com>
	Fri, 18 May 2012 06:42:06 +0000 (06:42 +0000)
committer	Craig Topper <craig.topper@gmail.com>
	Fri, 18 May 2012 06:42:06 +0000 (06:42 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/sse3.ll		patch \| blob \| history
test/CodeGen/X86/vec_shuffle-35.ll		patch \| blob \| history