Add a dag combine to xform 256-bit shuffles into simple vector

author Bruno Cardoso Lopes <bruno.cardoso@gmail.com>

Thu, 11 Aug 2011 21:50:44 +0000 (21:50 +0000)

committer Bruno Cardoso Lopes <bruno.cardoso@gmail.com>

Thu, 11 Aug 2011 21:50:44 +0000 (21:50 +0000)
author Bruno Cardoso Lopes <bruno.cardoso@gmail.com>
Thu, 11 Aug 2011 21:50:44 +0000 (21:50 +0000)
committer Bruno Cardoso Lopes <bruno.cardoso@gmail.com>
Thu, 11 Aug 2011 21:50:44 +0000 (21:50 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 039901769bdb7acf4efbcddcb160327a5465df85..10ab70750e3fe610bd0e80c07bf22d29ea829181 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -11548,6 +11548,38 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N,
    return TargetLowering::isGAPlusOffset(N, GA, Offset);
  }
  
+/// isShuffleHigh128VectorInsertLow - Checks whether the shuffle node is the
+/// same as extracting the high 128-bit part of 256-bit vector and then
+/// inserting the result into the low part of a new 256-bit vector
+static bool isShuffleHigh128VectorInsertLow(ShuffleVectorSDNode *SVOp) {
+  EVT VT = SVOp->getValueType(0);
+  int NumElems = VT.getVectorNumElements();
+
+  // vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
+  for (int i = 0, j = NumElems/2; i < NumElems/2; ++i, ++j)
+    if (!isUndefOrEqual(SVOp->getMaskElt(i), j) ||
+        SVOp->getMaskElt(j) >= 0)
+      return false;
+
+  return true;
+}
+
+/// isShuffleLow128VectorInsertHigh - Checks whether the shuffle node is the
+/// same as extracting the low 128-bit part of 256-bit vector and then
+/// inserting the result into the high part of a new 256-bit vector
+static bool isShuffleLow128VectorInsertHigh(ShuffleVectorSDNode *SVOp) {
+  EVT VT = SVOp->getValueType(0);
+  int NumElems = VT.getVectorNumElements();
+
+  // vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
+  for (int i = NumElems/2, j = 0; i < NumElems; ++i, ++j)
+    if (!isUndefOrEqual(SVOp->getMaskElt(i), j) ||
+        SVOp->getMaskElt(j) >= 0)
+      return false;
+
+  return true;
+}
+
  /// PerformShuffleCombine256 - Performs shuffle combines for 256-bit vectors.
  static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
                                          TargetLowering::DAGCombinerInfo &DCI) {
@@ -11556,6 +11588,7 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
    SDValue V1 = SVOp->getOperand(0);
    SDValue V2 = SVOp->getOperand(1);
    EVT VT = SVOp->getValueType(0);
+  int NumElems = VT.getVectorNumElements();
  
    if (V1.getOpcode() == ISD::CONCAT_VECTORS &&
        V2.getOpcode() == ISD::CONCAT_VECTORS) {
@@ -11580,7 +11613,6 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
      // To match the shuffle mask, the first half of the mask should
      // be exactly the first vector, and all the rest a splat with the
      // first element of the second one.
-    int NumElems = VT.getVectorNumElements();
      for (int i = 0; i < NumElems/2; ++i)
        if (!isUndefOrEqual(SVOp->getMaskElt(i), i) ||
            !isUndefOrEqual(SVOp->getMaskElt(i+NumElems/2), NumElems))
@@ -11594,6 +11626,27 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
      return DCI.CombineTo(N, InsV);
    }
  
+  //===--------------------------------------------------------------------===//
+  // Combine some shuffles into subvector extracts and inserts:
+  //
+
+  // vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
+  if (isShuffleHigh128VectorInsertLow(SVOp)) {
+    SDValue V = Extract128BitVector(V1, DAG.getConstant(NumElems/2, MVT::i32),
+                                    DAG, dl);
+    SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT),
+                                      V, DAG.getConstant(0, MVT::i32), DAG, dl);
+    return DCI.CombineTo(N, InsV);
+  }
+
+  // vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
+  if (isShuffleLow128VectorInsertHigh(SVOp)) {
+    SDValue V = Extract128BitVector(V1, DAG.getConstant(0, MVT::i32), DAG, dl);
+    SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT),
+                             V, DAG.getConstant(NumElems/2, MVT::i32), DAG, dl);
+    return DCI.CombineTo(N, InsV);
+  }
+
    return SDValue();
  }
  
diff --git a/test/CodeGen/X86/avx-vextractf128.ll b/test/CodeGen/X86/avx-vextractf128.ll

new file mode 100644 (file)

index 0000000..dccf901
--- /dev/null
+++ b/test/CodeGen/X86/avx-vextractf128.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+; CHECK-NOT: vunpck
+; CHECK: vextractf128 $1
+define <8 x float> @A(<8 x float> %a) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8>
+  ret <8 x float> %shuffle
+}
+
+; CHECK-NOT: vunpck
+; CHECK: vextractf128 $1
+define <4 x double> @B(<4 x double> %a) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 4>
+  ret <4 x double> %shuffle
+}
+
diff --git a/test/CodeGen/X86/avx-vinsertf128.ll b/test/CodeGen/X86/avx-vinsertf128.ll

new file mode 100644 (file)

index 0000000..b54b57b
--- /dev/null
+++ b/test/CodeGen/X86/avx-vinsertf128.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+; CHECK-NOT: vunpck
+; CHECK: vinsertf128 $1
+define <8 x float> @A(<8 x float> %a) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 8, i32 8, i32 8, i32 8, i32 0, i32 1, i32 2, i32 3>
+  ret <8 x float> %shuffle
+}
+
+; CHECK-NOT: vunpck
+; CHECK: vinsertf128 $1
+define <4 x double> @B(<4 x double> %a) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> <i32 4, i32 4, i32 0, i32 1>
+  ret <4 x double> %shuffle
+}
+
author	Bruno Cardoso Lopes <bruno.cardoso@gmail.com>
	Thu, 11 Aug 2011 21:50:44 +0000 (21:50 +0000)
committer	Bruno Cardoso Lopes <bruno.cardoso@gmail.com>
	Thu, 11 Aug 2011 21:50:44 +0000 (21:50 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/avx-vextractf128.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/X86/avx-vinsertf128.ll	[new file with mode: 0644]	patch \| blob