[DagCombine] Improve DAGCombiner BUILD_VECTOR when it has two sources of elements

author Michael Kuperstein <michael.m.kuperstein@intel.com>

Tue, 23 Dec 2014 08:59:45 +0000 (08:59 +0000)

committer Michael Kuperstein <michael.m.kuperstein@intel.com>

Tue, 23 Dec 2014 08:59:45 +0000 (08:59 +0000)
author Michael Kuperstein <michael.m.kuperstein@intel.com>
Tue, 23 Dec 2014 08:59:45 +0000 (08:59 +0000)
committer Michael Kuperstein <michael.m.kuperstein@intel.com>
Tue, 23 Dec 2014 08:59:45 +0000 (08:59 +0000)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index dc556fa63df125a003ca7ff3faad9a15bcc04c2b..7a91aae2489b5f46cf6453eea11f27199923eaa2 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10832,6 +10832,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
  
    // If everything is good, we can make a shuffle operation.
    if (VecIn1.getNode()) {
+    unsigned InNumElements = VecIn1.getValueType().getVectorNumElements();
      SmallVector<int, 8> Mask;
      for (unsigned i = 0; i != NumInScalars; ++i) {
        unsigned Opcode = N->getOperand(i).getOpcode();
@@ -10858,8 +10859,8 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
          continue;
        }
  
-      // Otherwise, use InIdx + VecSize
-      Mask.push_back(NumInScalars+ExtIndex);
+      // Otherwise, use InIdx + InputVecSize
+      Mask.push_back(InNumElements + ExtIndex);
      }
  
      // Avoid introducing illegal shuffles with zero.
@@ -10869,14 +10870,12 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
      // We can't generate a shuffle node with mismatched input and output types.
      // Attempt to transform a single input vector to the correct type.
      if ((VT != VecIn1.getValueType())) {
-      // We don't support shuffeling between TWO values of different types.
-      if (VecIn2.getNode())
-        return SDValue();
-
        // If the input vector type has a different base type to the output
        // vector type, bail out.
-      if (VecIn1.getValueType().getVectorElementType() !=
-          VT.getVectorElementType())
+      EVT VTElemType = VT.getVectorElementType();
+      if ((VecIn1.getValueType().getVectorElementType() != VTElemType) ||
+          (VecIn2.getNode() &&
+           (VecIn2.getValueType().getVectorElementType() != VTElemType)))
          return SDValue();
  
        // If the input vector is too small, widen it.
@@ -10884,11 +10883,22 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
        // output registers. For example XMM->YMM widening on X86 with AVX.
        EVT VecInT = VecIn1.getValueType();
        if (VecInT.getSizeInBits() * 2 == VT.getSizeInBits()) {
-        // Widen the input vector by adding undef values.
-        VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
-                             VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
+        // If we only have one small input, widen it by adding undef values.
+        if (!VecIn2.getNode())
+          VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1,
+                               DAG.getUNDEF(VecIn1.getValueType()));
+        else if (VecIn1.getValueType() == VecIn2.getValueType()) {
+          // If we have two small inputs of the same type, try to concat them.
+          VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, VecIn2);
+          VecIn2 = SDValue(nullptr, 0);
+        } else
+          return SDValue();
        } else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) {
          // If the input vector is too large, try to split it.
+        // We don't support having two input vectors that are too large.
+        if (VecIn2.getNode())
+          return SDValue();
+
          if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements()))
            return SDValue();
          
@@ -10899,7 +10909,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
          VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
            DAG.getConstant(0, TLI.getVectorIdxTy()));
          UsesZeroVector = false;
-      } else 
+      } else
          return SDValue();
      }
  
diff --git a/test/CodeGen/X86/vector-shuffle-combining.ll b/test/CodeGen/X86/vector-shuffle-combining.ll

index e7bae3415bf91dfc118c3b8783509defdde0a6b1..897a69a5496909e2364484b87a2ae2f2a2d44780 100644 (file)
--- a/test/CodeGen/X86/vector-shuffle-combining.ll
+++ b/test/CodeGen/X86/vector-shuffle-combining.ll
@@ -1584,6 +1584,26 @@ define <4 x i32> @combine_test21(<8 x i32> %a, <4 x i32>* %ptr) {
    ret <4 x i32> %2
  }
  
+define <8 x float> @combine_test22(<2 x float>* %a, <2 x float>* %b) {
+; SSE-LABEL: combine_test22:
+; SSE:       # BB#0:
+; SSE-NEXT:    movq    (%rdi), %xmm0
+; SSE-NEXT:    movhpd  (%rsi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: combine_test22:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovq    (%rdi), %xmm0
+; AVX1-NEXT:    vmovhpd  (%rsi), %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; Current AVX2 lowering of this is still awful, not adding a test case.
+  %1 = load <2 x float>* %a, align 8
+  %2 = load <2 x float>* %b, align 8
+  %3 = shufflevector <2 x float> %1, <2 x float> %2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <8 x float> %3
+}
+
  ; Check some negative cases.
  ; FIXME: Do any of these really make sense? Are they redundant with the above tests?
author	Michael Kuperstein <michael.m.kuperstein@intel.com>
	Tue, 23 Dec 2014 08:59:45 +0000 (08:59 +0000)
committer	Michael Kuperstein <michael.m.kuperstein@intel.com>
	Tue, 23 Dec 2014 08:59:45 +0000 (08:59 +0000)
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
test/CodeGen/X86/vector-shuffle-combining.ll		patch \| blob \| history