transform fadd chains to increase parallelism

author Sanjay Patel <spatel@rotateright.com>

Tue, 28 Apr 2015 21:03:22 +0000 (21:03 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Tue, 28 Apr 2015 21:03:22 +0000 (21:03 +0000)
author Sanjay Patel <spatel@rotateright.com>
Tue, 28 Apr 2015 21:03:22 +0000 (21:03 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Tue, 28 Apr 2015 21:03:22 +0000 (21:03 +0000)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 629313c7120795c20545626d2fc5f1deaa5f03cd..66c1356152da93eca6940abbc9c4dc8fbeee7fb9 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7801,6 +7801,24 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
                             N0.getOperand(0), DAG.getConstantFP(4.0, DL, VT));
        }
      }
+
+    // Canonicalize chains of adds to LHS to simplify the following transform.
+    if (N0.getOpcode() != ISD::FADD && N1.getOpcode() == ISD::FADD)
+      return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N0);
+    
+    // Convert a chain of 3 dependent operations into 2 independent operations
+    // and 1 dependent operation:
+    //  (fadd N0: (fadd N00: (fadd z, w), N01: y), N1: x) ->
+    //  (fadd N00: (fadd z, w), (fadd N1: x, N01: y))
+    if (N0.getOpcode() == ISD::FADD &&  N0.hasOneUse() &&
+        N1.getOpcode() != ISD::FADD) {
+      SDValue N00 = N0.getOperand(0);
+      if (N00.getOpcode() == ISD::FADD) {
+        SDValue N01 = N0.getOperand(1);
+        SDValue NewAdd = DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N01);
+        return DAG.getNode(ISD::FADD, SDLoc(N), VT, N00, NewAdd);
+      }
+    }
    } // enable-unsafe-fp-math
  
    // FADD -> FMA combines:
diff --git a/test/CodeGen/X86/fp-fast.ll b/test/CodeGen/X86/fp-fast.ll

index 479f60d91f1d56331dbfdaef8bfbbb70590b3c0d..eb2ebd9119a90c355be3b1cfdcee43bbb9846fde 100644 (file)
--- a/test/CodeGen/X86/fp-fast.ll
+++ b/test/CodeGen/X86/fp-fast.ll
@@ -113,3 +113,46 @@ define float @test11(float %a) {
    %t2 = fadd float %a, %t1
    ret float %t2
  }
+
+; Verify that the first two adds are independent; the destination registers
+; are used as source registers for the third add.
+
+define float @reassociate_adds1(float %a, float %b, float %c, float %d) {
+; CHECK-LABEL: reassociate_adds1:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vaddss %xmm2, %xmm3, %xmm1
+; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %add0 = fadd float %a, %b
+  %add1 = fadd float %add0, %c
+  %add2 = fadd float %add1, %d
+  ret float %add2
+}
+
+define float @reassociate_adds2(float %a, float %b, float %c, float %d) {
+; CHECK-LABEL: reassociate_adds2:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vaddss %xmm2, %xmm3, %xmm1
+; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %add0 = fadd float %a, %b
+  %add1 = fadd float %c, %add0
+  %add2 = fadd float %add1, %d
+  ret float %add2
+}
+
+define float @reassociate_adds3(float %a, float %b, float %c, float %d) {
+; CHECK-LABEL: reassociate_adds3:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vaddss %xmm2, %xmm3, %xmm1
+; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %add0 = fadd float %a, %b
+  %add1 = fadd float %add0, %c
+  %add2 = fadd float %d, %add1
+  ret float %add2
+}
+
author	Sanjay Patel <spatel@rotateright.com>
	Tue, 28 Apr 2015 21:03:22 +0000 (21:03 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Tue, 28 Apr 2015 21:03:22 +0000 (21:03 +0000)
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
test/CodeGen/X86/fp-fast.ll		patch \| blob \| history