From: Dan Gohman <gohman@apple.com>
Date: Wed, 11 Feb 2009 21:29:39 +0000 (+0000)
Subject: Don't use special heuristics for nodes with no data predecessors
X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=c8db34cb07fea88c4b8f3e0f095fd8aed568b28e;p=oota-llvm.git

Don't use special heuristics for nodes with no data predecessors
unless they actually have data successors, and likewise for nodes
with no data successors unless they actually have data precessors.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@64327 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 55780e40918..0b7f99f42ad 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -989,15 +989,16 @@ namespace {
         // EXTRACT_SUBREG / INSERT_SUBREG should be close to its use to
         // facilitate coalescing.
         return 0;
-      if (SU->NumSuccs == 0)
-        // If SU does not have a use, i.e. it doesn't produce a value that would
-        // be consumed (e.g. store), then it terminates a chain of computation.
-        // Give it a large SethiUllman number so it will be scheduled right
-        // before its predecessors that it doesn't lengthen their live ranges.
+      if (SU->NumSuccs == 0 && SU->NumPreds != 0)
+        // If SU does not have a register use, i.e. it doesn't produce a value
+        // that would be consumed (e.g. store), then it terminates a chain of
+        // computation.  Give it a large SethiUllman number so it will be
+        // scheduled right before its predecessors that it doesn't lengthen
+        // their live ranges.
         return 0xffff;
-      if (SU->NumPreds == 0)
-        // If SU does not have a def, schedule it close to its uses because it
-        // does not lengthen any live ranges.
+      if (SU->NumPreds == 0 && SU->NumSuccs != 0)
+        // If SU does not have a register def, schedule it close to its uses
+        // because it does not lengthen any live ranges.
         return 0;
       return SethiUllmanNumbers[SU->NodeNum];
     }
diff --git a/test/CodeGen/X86/2008-02-22-ReMatBug.ll b/test/CodeGen/X86/2008-02-22-ReMatBug.ll
index 539fc15fcba..f78d52651de 100644
--- a/test/CodeGen/X86/2008-02-22-ReMatBug.ll
+++ b/test/CodeGen/X86/2008-02-22-ReMatBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of re-materialization} | grep 4
+; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of re-materialization} | grep 3
 ; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of dead spill slots removed}
 ; rdar://5761454
 
diff --git a/test/CodeGen/X86/2008-08-05-SpillerBug.ll b/test/CodeGen/X86/2008-08-05-SpillerBug.ll
index ca42f2bc137..868c4958623 100644
--- a/test/CodeGen/X86/2008-08-05-SpillerBug.ll
+++ b/test/CodeGen/X86/2008-08-05-SpillerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -disable-fp-elim -stats -info-output-file - | not grep {Number of dead stores elided}
+; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -disable-fp-elim -stats -info-output-file - | grep {Number of dead stores elided} | count 1
 ; PR2568
 
 @g_3 = external global i16		; <i16*> [#uses=1]
diff --git a/test/CodeGen/X86/pmul.ll b/test/CodeGen/X86/pmul.ll
index b619411eaff..e00d1e50e49 100644
--- a/test/CodeGen/X86/pmul.ll
+++ b/test/CodeGen/X86/pmul.ll
@@ -1,6 +1,6 @@
 ; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -stack-alignment=16 > %t
 ; RUN: grep pmul %t | count 12
-; RUN: grep mov %t | count 15
+; RUN: grep mov %t | count 12
 
 define <4 x i32> @a(<4 x i32> %i) nounwind  {
         %A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 >
diff --git a/test/CodeGen/X86/sse_reload_fold.ll b/test/CodeGen/X86/sse_reload_fold.ll
index 1100eb7c467..547763e4a79 100644
--- a/test/CodeGen/X86/sse_reload_fold.ll
+++ b/test/CodeGen/X86/sse_reload_fold.ll
@@ -2,9 +2,10 @@
 ; RUN: llc -march=x86-64 -mattr=+64bit,+sse3 -print-failed-fuse-candidates |& \
 ; RUN:   grep fail | count 1
 
-declare void @bar()
-declare <4 x float> @qux()
-declare <2 x double> @pin()
+declare float @test_f(float %f)
+declare double @test_d(double %f)
+declare <4 x float> @test_vf(<4 x float> %f)
+declare <2 x double> @test_vd(<2 x double> %f)
 declare float @llvm.sqrt.f32(float)
 declare double @llvm.sqrt.f64(double)
 
@@ -26,99 +27,99 @@ declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>)
 declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>)
 
 define float @foo(float %f) {
-  call void @bar()
+  %a = call float @test_f(float %f)
   %t = call float @llvm.sqrt.f32(float %f)
   ret float %t
 }
 define double @doo(double %f) {
-  call void @bar()
+  %a = call double @test_d(double %f)
   %t = call double @llvm.sqrt.f64(double %f)
   ret double %t
 }
 define <4 x float> @a0(<4 x float> %f) {
-  call void @bar()
+  %a = call <4 x float> @test_vf(<4 x float> %f)
   %t = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %f)
   ret <4 x float> %t
 }
 define <4 x float> @a1(<4 x float> %f) {
-  call void @bar()
+  %a = call <4 x float> @test_vf(<4 x float> %f)
   %t = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %f)
   ret <4 x float> %t
 }
 define <4 x float> @a2(<4 x float> %f) {
-  call void @bar()
+  %a = call <4 x float> @test_vf(<4 x float> %f)
   %t = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %f)
   ret <4 x float> %t
 }
 define <4 x float> @b3(<4 x float> %f) {
-  %y = call <4 x float> @qux()
+  %y = call <4 x float> @test_vf(<4 x float> %f)
   %t = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %y, <4 x float> %f)
   ret <4 x float> %t
 }
 define <4 x float> @b4(<4 x float> %f) {
-  %y = call <4 x float> @qux()
+  %y = call <4 x float> @test_vf(<4 x float> %f)
   %t = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %y, <4 x float> %f)
   ret <4 x float> %t
 }
 define <4 x float> @b5(<4 x float> %f) {
-  %y = call <4 x float> @qux()
+  %y = call <4 x float> @test_vf(<4 x float> %f)
   %t = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %y, <4 x float> %f, i8 7)
   ret <4 x float> %t
 }
 define <4 x float> @b6(<4 x float> %f) {
-  %y = call <4 x float> @qux()
+  %y = call <4 x float> @test_vf(<4 x float> %f)
   %t = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %y, <4 x float> %f)
   ret <4 x float> %t
 }
 define <4 x float> @b7(<4 x float> %f) {
-  %y = call <4 x float> @qux()
+  %y = call <4 x float> @test_vf(<4 x float> %f)
   %t = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %y, <4 x float> %f)
   ret <4 x float> %t
 }
 define <4 x float> @b8(<4 x float> %f) {
-  %y = call <4 x float> @qux()
+  %y = call <4 x float> @test_vf(<4 x float> %f)
   %t = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %y, <4 x float> %f)
   ret <4 x float> %t
 }
 define <2 x double> @c1(<2 x double> %f) {
-  call void @bar()
+  %a = call <2 x double> @test_vd(<2 x double> %f)
   %t = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %f)
   ret <2 x double> %t
 }
 define <2 x double> @d3(<2 x double> %f) {
-  %y = call <2 x double> @pin()
+  %y = call <2 x double> @test_vd(<2 x double> %f)
   %t = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %y, <2 x double> %f)
   ret <2 x double> %t
 }
 define <2 x double> @d4(<2 x double> %f) {
-  %y = call <2 x double> @pin()
+  %y = call <2 x double> @test_vd(<2 x double> %f)
   %t = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %y, <2 x double> %f)
   ret <2 x double> %t
 }
 define <2 x double> @d5(<2 x double> %f) {
-  %y = call <2 x double> @pin()
+  %y = call <2 x double> @test_vd(<2 x double> %f)
   %t = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %y, <2 x double> %f, i8 7)
   ret <2 x double> %t
 }
 define <2 x double> @d6(<2 x double> %f) {
-  %y = call <2 x double> @pin()
+  %y = call <2 x double> @test_vd(<2 x double> %f)
   %t = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %y, <2 x double> %f)
   ret <2 x double> %t
 }
 define <2 x double> @d7(<2 x double> %f) {
-  %y = call <2 x double> @pin()
+  %y = call <2 x double> @test_vd(<2 x double> %f)
   %t = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %y, <2 x double> %f)
   ret <2 x double> %t
 }
 define <2 x double> @d8(<2 x double> %f) {
-  %y = call <2 x double> @pin()
+  %y = call <2 x double> @test_vd(<2 x double> %f)
   %t = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %y, <2 x double> %f)
   ret <2 x double> %t
 }
 
 ; This one should fail to fuse.
 define <2 x double> @z0(<2 x double> %f) {
-  %y = call <2 x double> @pin()
+  %y = call <2 x double> @test_vd(<2 x double> %f)
   %t = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %f, <2 x double> %y)
   ret <2 x double> %t
 }