Add a DAGCombine for transforming 128->256 casts into a simple

author Bruno Cardoso Lopes <bruno.cardoso@gmail.com>

Fri, 22 Jul 2011 00:15:00 +0000 (00:15 +0000)

committer Bruno Cardoso Lopes <bruno.cardoso@gmail.com>

Fri, 22 Jul 2011 00:15:00 +0000 (00:15 +0000)
author Bruno Cardoso Lopes <bruno.cardoso@gmail.com>
Fri, 22 Jul 2011 00:15:00 +0000 (00:15 +0000)
committer Bruno Cardoso Lopes <bruno.cardoso@gmail.com>
Fri, 22 Jul 2011 00:15:00 +0000 (00:15 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 85c6f4923510935175adc5258f3a35663cd924f3..55d568092c0688cb32074f66fbdc9db43f94f797 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -11232,23 +11232,77 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N,
    return TargetLowering::isGAPlusOffset(N, GA, Offset);
  }
  
-/// PerformShuffleCombine - Combine a vector_shuffle that is equal to
-/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load
-/// if the load addresses are consecutive, non-overlapping, and in the right
-/// order.
+/// PerformShuffleCombine256 - Performs shuffle combines for 256-bit vectors.
+static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
+                                        TargetLowering::DAGCombinerInfo &DCI) {
+  DebugLoc dl = N->getDebugLoc();
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+  SDValue V1 = SVOp->getOperand(0);
+  SDValue V2 = SVOp->getOperand(1);
+  EVT VT = SVOp->getValueType(0);
+
+  if (V1.getOpcode() == ISD::CONCAT_VECTORS &&
+      V2.getOpcode() == ISD::CONCAT_VECTORS) {
+    //
+    //                   0,0,0,...
+    //                      \
+    //    V      UNDEF    BUILD_VECTOR    UNDEF
+    //     \      /           \           /
+    //  CONCAT_VECTOR         CONCAT_VECTOR
+    //         \                  /
+    //          \                /
+    //          RESULT: V + zero extended
+    //
+    if (V2.getOperand(0).getOpcode() != ISD::BUILD_VECTOR ||
+        V2.getOperand(1).getOpcode() != ISD::UNDEF ||
+        V1.getOperand(1).getOpcode() != ISD::UNDEF)
+      return SDValue();
+
+    if (!ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()))
+      return SDValue();
+
+    // To match the shuffle mask, the first half of the mask should
+    // be exactly the first vector, and all the rest a splat with the
+    // first element of the second one.
+    int NumElems = VT.getVectorNumElements();
+    for (int i = 0; i < NumElems/2; ++i)
+      if (!isUndefOrEqual(SVOp->getMaskElt(i), i) ||
+          !isUndefOrEqual(SVOp->getMaskElt(i+NumElems/2), NumElems))
+        return SDValue();
+
+    // Emit a zeroed vector and insert the desired subvector on its
+    // first half.
+    SDValue Zeros = getZeroVector(VT, true /* HasSSE2 */, DAG, dl);
+    SDValue InsV = Insert128BitVector(Zeros, V1.getOperand(0),
+                         DAG.getConstant(0, MVT::i32), DAG, dl);
+    return DCI.CombineTo(N, InsV);
+  }
+
+  return SDValue();
+}
+
+/// PerformShuffleCombine - Performs several different shuffle combines.
  static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
                                       TargetLowering::DAGCombinerInfo &DCI) {
    DebugLoc dl = N->getDebugLoc();
    EVT VT = N->getValueType(0);
  
-  if (VT.getSizeInBits() != 128)
-    return SDValue();
-
    // Don't create instructions with illegal types after legalize types has run.
    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
    if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(VT.getVectorElementType()))
      return SDValue();
  
+  // Only handle pure VECTOR_SHUFFLE nodes.
+  if (VT.getSizeInBits() == 256 && N->getOpcode() == ISD::VECTOR_SHUFFLE)
+    return PerformShuffleCombine256(N, DAG, DCI);
+
+  // Only handle 128 wide vector from here on.
+  if (VT.getSizeInBits() != 128)
+    return SDValue();
+
+  // Combine a vector_shuffle that is equal to build_vector load1, load2, load3,
+  // load4, <0, 1, 2, 3> into a 128-bit load if the load addresses are
+  // consecutive, non-overlapping, and in the right order.
    SmallVector<SDValue, 16> Elts;
    for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
      Elts.push_back(getShuffleScalarElt(N, i, DAG, 0));
diff --git a/test/CodeGen/X86/avx-cast.ll b/test/CodeGen/X86/avx-cast.ll

new file mode 100644 (file)

index 0000000..bb742f8
--- /dev/null
+++ b/test/CodeGen/X86/avx-cast.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+; CHECK: vxorps
+; CHECK-NEXT: vinsertf128 $0
+define <8 x float> @castA(<4 x float> %m) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <4 x float> %m, <4 x float> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
+  ret <8 x float> %shuffle.i
+}
+
+; CHECK: vxorps
+; CHECK-NEXT: vinsertf128 $0
+define <4 x double> @castB(<2 x double> %m) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <2 x double> %m, <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+  ret <4 x double> %shuffle.i
+}
+
+; CHECK: vxorps
+; CHECK-NEXT: vinsertf128 $0
+define <4 x i64> @castC(<2 x i64> %m) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <2 x i64> %m, <2 x i64> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+  ret <4 x i64> %shuffle.i
+}
+
author	Bruno Cardoso Lopes <bruno.cardoso@gmail.com>
	Fri, 22 Jul 2011 00:15:00 +0000 (00:15 +0000)
committer	Bruno Cardoso Lopes <bruno.cardoso@gmail.com>
	Fri, 22 Jul 2011 00:15:00 +0000 (00:15 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/avx-cast.ll	[new file with mode: 0644]	patch \| blob