[x86] Add the beginnings of a proper DAG combine to match ADDSUBPS and

author Chandler Carruth <chandlerc@gmail.com>

Tue, 16 Sep 2014 00:15:20 +0000 (00:15 +0000)

committer Chandler Carruth <chandlerc@gmail.com>

Tue, 16 Sep 2014 00:15:20 +0000 (00:15 +0000)
author Chandler Carruth <chandlerc@gmail.com>
Tue, 16 Sep 2014 00:15:20 +0000 (00:15 +0000)
committer Chandler Carruth <chandlerc@gmail.com>
Tue, 16 Sep 2014 00:15:20 +0000 (00:15 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 166719b854f057c2e14a02c1344e930a0b3aebe5..b5bf866b22deb8fe42e56b552449483af7f3a9e1 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -19987,6 +19987,55 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
    return SDValue();
  }
  
+/// \brief Try to combine a shuffle into a target-specific add-sub node.
+///
+/// We combine this directly on the abstract vector shuffle nodes so it is
+/// easier to generically match. We also insert dummy vector shuffle nodes for
+/// the operands which explicitly discard the lanes which are unused by this
+/// operation to try to flow through the rest of the combiner the fact that
+/// they're unused.
+static SDValue combineShuffleToAddSub(SDNode *N, SelectionDAG &DAG) {
+  SDLoc DL(N);
+
+  // We only handle target-independent shuffles.
+  // FIXME: It would be easy and harmless to use the target shuffle mask
+  // extraction tool to support more.
+  if (N->getOpcode() != ISD::VECTOR_SHUFFLE)
+    return SDValue();
+
+  auto *SVN = cast<ShuffleVectorSDNode>(N);
+  ArrayRef<int> Mask = SVN->getMask();
+  SDValue V1 = N->getOperand(0);
+  SDValue V2 = N->getOperand(1);
+
+  // We require the first shuffle operand to be the SUB node, and the second to
+  // be the ADD node.
+  // FIXME: We should support the commuted patterns.
+  if (V1->getOpcode() != ISD::FSUB || V2->getOpcode() != ISD::FADD)
+    return SDValue();
+
+  // If there are other uses of these operations we can't fold them.
+  if (!V1->hasOneUse() || !V2->hasOneUse())
+    return SDValue();
+
+  // Ensure that both operations have the same operands. Note that we can
+  // commute the FADD operands.
+  SDValue LHS = V1->getOperand(0), RHS = V1->getOperand(1);
+  if ((V2->getOperand(0) != LHS || V2->getOperand(1) != RHS) &&
+      (V2->getOperand(0) != RHS || V2->getOperand(1) != LHS))
+    return SDValue();
+
+  // We're looking for blends between FADD and FSUB nodes. We insist on these
+  // nodes being lined up in a specific expected pattern.
+  if (!isShuffleEquivalent(Mask, 0, 5, 2, 7))
+    return SDValue();
+
+  // FIXME: Munge the inputs through no-op shuffles that drop the undef lanes to
+  // allow nuking any instructions that feed only those lanes.
+
+  return DAG.getNode(X86ISD::ADDSUB, DL, N->getValueType(0), LHS, RHS);
+}
+
  /// PerformShuffleCombine - Performs several different shuffle combines.
  static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
                                       TargetLowering::DAGCombinerInfo &DCI,
@@ -20001,6 +20050,12 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
    if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(VT.getVectorElementType()))
      return SDValue();
  
+  // If we have legalized the vector types, look for blends of FADD and FSUB
+  // nodes that we can fuse into an ADDSUB node.
+  if (TLI.isTypeLegal(VT) && Subtarget->hasSSE3())
+    if (SDValue AddSub = combineShuffleToAddSub(N, DAG))
+      return AddSub;
+
    // Combine 256-bit vector shuffles. This is only profitable when in AVX mode
    if (Subtarget->hasFp256() && VT.is256BitVector() &&
        N->getOpcode() == ISD::VECTOR_SHUFFLE)
diff --git a/test/CodeGen/X86/sse3-avx-addsub.ll b/test/CodeGen/X86/sse3-avx-addsub.ll

index 4852491e46a5693a44bd037c009ec6bf8b44185b..431588f90ab275bfd490f55b27520e220a75896f 100644 (file)
--- a/test/CodeGen/X86/sse3-avx-addsub.ll
+++ b/test/CodeGen/X86/sse3-avx-addsub.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s -check-prefix=SSE -check-prefix=CHECK
+; RUN: llc < %s -march=x86-64 -mcpu=core2 | FileCheck %s -check-prefix=SSE -check-prefix=CHECK
  ; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx | FileCheck %s -check-prefix=AVX -check-prefix=CHECK
  
  ; Test ADDSUB ISel patterns.
author	Chandler Carruth <chandlerc@gmail.com>
	Tue, 16 Sep 2014 00:15:20 +0000 (00:15 +0000)
committer	Chandler Carruth <chandlerc@gmail.com>
	Tue, 16 Sep 2014 00:15:20 +0000 (00:15 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/sse3-avx-addsub.ll		patch \| blob \| history