Improve DAG combine pass on certain IR vector patterns

author Mehdi Amini <mehdi.amini@apple.com>

Sat, 17 Jan 2015 01:35:56 +0000 (01:35 +0000)

committer Mehdi Amini <mehdi.amini@apple.com>

Sat, 17 Jan 2015 01:35:56 +0000 (01:35 +0000)
author Mehdi Amini <mehdi.amini@apple.com>
Sat, 17 Jan 2015 01:35:56 +0000 (01:35 +0000)
committer Mehdi Amini <mehdi.amini@apple.com>
Sat, 17 Jan 2015 01:35:56 +0000 (01:35 +0000)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 5145731f6231f3922f25bfaf5d97a5c5288dac35..3bde991879396a03a32d18934e6f7b6e89c75242 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11347,7 +11347,8 @@ static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0,
    return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask());
  }
  
-// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat.
+// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
+// or turn a shuffle of a single concat into simpler shuffle then concat.
  static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
    EVT VT = N->getValueType(0);
    unsigned NumElts = VT.getVectorNumElements();
@@ -11361,6 +11362,18 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
    unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
    unsigned NumConcats = NumElts / NumElemsPerConcat;
  
+  // Special case: shuffle(concat(A,B)) can be more efficiently represented
+  // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
+  // half vector elements.
+  if (NumElemsPerConcat * 2 == NumElts && N1.getOpcode() == ISD::UNDEF &&
+      std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
+                  SVN->getMask().end(), [](int i) { return i == -1; })) {
+    N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
+                              ArrayRef<int>(SVN->getMask().begin(), NumElemsPerConcat));
+    N1 = DAG.getUNDEF(ConcatVT);
+    return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
+  }
+
    // Look at every vector that's inserted. We're looking for exact
    // subvector-sized copies from a concatenated vector
    for (unsigned I = 0; I != NumConcats; ++I) {
diff --git a/test/CodeGen/X86/vector-shuffle-256-v8.ll b/test/CodeGen/X86/vector-shuffle-256-v8.ll

index 77903da355836607fe40441fc7b2d902d500dfef..e4bd4c4f817644915d1e36e6bd28b5cbefde00ef 100644 (file)
--- a/test/CodeGen/X86/vector-shuffle-256-v8.ll
+++ b/test/CodeGen/X86/vector-shuffle-256-v8.ll
@@ -1849,3 +1849,45 @@ define <8 x float> @splat_v8f32(<4 x float> %r) {
    %1 = shufflevector <4 x float> %r, <4 x float> undef, <8 x i32> zeroinitializer
    ret <8 x float> %1
  }
+
+define <8x float> @concat_v2f32_1(<2 x float>* %tmp64, <2 x float>* %tmp65) {
+; ALL-LABEL: concat_v2f32_1:
+; ALL:       # BB#0: # %entry
+; ALL-NEXT:    vmovq (%rdi), %xmm0
+; ALL-NEXT:    vmovhpd (%rsi), %xmm0, %xmm0
+; ALL-NEXT:    retq
+entry:
+  %tmp74 = load <2 x float>* %tmp65, align 8
+  %tmp72 = load <2 x float>* %tmp64, align 8
+  %tmp73 = shufflevector <2 x float> %tmp72, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %tmp75 = shufflevector <2 x float> %tmp74, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %tmp76 = shufflevector <8 x float> %tmp73, <8 x float> %tmp75, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <8 x float> %tmp76
+}
+
+define <8x float> @concat_v2f32_2(<2 x float>* %tmp64, <2 x float>* %tmp65) {
+; ALL-LABEL: concat_v2f32_2:
+; ALL:       # BB#0: # %entry
+; ALL-NEXT:    vmovq (%rdi), %xmm0
+; ALL-NEXT:    vmovhpd (%rsi), %xmm0, %xmm0
+; ALL-NEXT:    retq
+entry:
+  %tmp74 = load <2 x float>* %tmp65, align 8
+  %tmp72 = load <2 x float>* %tmp64, align 8
+  %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <8 x float> %tmp76
+}
+
+define <8x float> @concat_v2f32_3(<2 x float>* %tmp64, <2 x float>* %tmp65) {
+; ALL-LABEL: concat_v2f32_3:
+; ALL:       # BB#0: # %entry
+; ALL-NEXT:    vmovq (%rdi), %xmm0
+; ALL-NEXT:    vmovhpd (%rsi), %xmm0, %xmm0
+; ALL-NEXT:    retq
+entry:
+  %tmp74 = load <2 x float>* %tmp65, align 8
+  %tmp72 = load <2 x float>* %tmp64, align 8
+  %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %res = shufflevector <4 x float> %tmp76, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <8 x float> %res
+}
author	Mehdi Amini <mehdi.amini@apple.com>
	Sat, 17 Jan 2015 01:35:56 +0000 (01:35 +0000)
committer	Mehdi Amini <mehdi.amini@apple.com>
	Sat, 17 Jan 2015 01:35:56 +0000 (01:35 +0000)
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
test/CodeGen/X86/vector-shuffle-256-v8.ll		patch \| blob \| history