Don't use special heuristics for nodes with no data predecessors

author Dan Gohman <gohman@apple.com>

Wed, 11 Feb 2009 21:29:39 +0000 (21:29 +0000)

committer Dan Gohman <gohman@apple.com>

Wed, 11 Feb 2009 21:29:39 +0000 (21:29 +0000)
author Dan Gohman <gohman@apple.com>
Wed, 11 Feb 2009 21:29:39 +0000 (21:29 +0000)
committer Dan Gohman <gohman@apple.com>
Wed, 11 Feb 2009 21:29:39 +0000 (21:29 +0000)
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp

index 55780e4091890c38e8c394873062a2f70f05429a..0b7f99f42ada53bc7856dabd4029fcfeedf18e2e 100644 (file)
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -989,15 +989,16 @@ namespace {
          // EXTRACT_SUBREG / INSERT_SUBREG should be close to its use to
          // facilitate coalescing.
          return 0;
-      if (SU->NumSuccs == 0)
-        // If SU does not have a use, i.e. it doesn't produce a value that would
-        // be consumed (e.g. store), then it terminates a chain of computation.
-        // Give it a large SethiUllman number so it will be scheduled right
-        // before its predecessors that it doesn't lengthen their live ranges.
+      if (SU->NumSuccs == 0 && SU->NumPreds != 0)
+        // If SU does not have a register use, i.e. it doesn't produce a value
+        // that would be consumed (e.g. store), then it terminates a chain of
+        // computation.  Give it a large SethiUllman number so it will be
+        // scheduled right before its predecessors that it doesn't lengthen
+        // their live ranges.
          return 0xffff;
-      if (SU->NumPreds == 0)
-        // If SU does not have a def, schedule it close to its uses because it
-        // does not lengthen any live ranges.
+      if (SU->NumPreds == 0 && SU->NumSuccs != 0)
+        // If SU does not have a register def, schedule it close to its uses
+        // because it does not lengthen any live ranges.
          return 0;
        return SethiUllmanNumbers[SU->NodeNum];
      }
diff --git a/test/CodeGen/X86/2008-02-22-ReMatBug.ll b/test/CodeGen/X86/2008-02-22-ReMatBug.ll

index 539fc15fcba59e601fb271f8a9b28c2e30e4a475..f78d52651ded8d8e3535aff9b7e59b4fddc98899 100644 (file)
--- a/test/CodeGen/X86/2008-02-22-ReMatBug.ll
+++ b/test/CodeGen/X86/2008-02-22-ReMatBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of re-materialization} | grep 4
+; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of re-materialization} | grep 3
  ; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of dead spill slots removed}
  ; rdar://5761454
  
diff --git a/test/CodeGen/X86/2008-08-05-SpillerBug.ll b/test/CodeGen/X86/2008-08-05-SpillerBug.ll

index ca42f2bc137cdb5a20bed8ff749c4a49b0361c22..868c4958623e924d75de37f5ac0a549fee79c0b2 100644 (file)
--- a/test/CodeGen/X86/2008-08-05-SpillerBug.ll
+++ b/test/CodeGen/X86/2008-08-05-SpillerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -disable-fp-elim -stats -info-output-file - | not grep {Number of dead stores elided}
+; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -disable-fp-elim -stats -info-output-file - | grep {Number of dead stores elided} | count 1
  ; PR2568
  
  @g_3 = external global i16             ; <i16*> [#uses=1]
diff --git a/test/CodeGen/X86/pmul.ll b/test/CodeGen/X86/pmul.ll

index b619411eaff2221a8745a0e3dceff1b00d513721..e00d1e50e49b2c34fd90ec721d02a0d2ef566391 100644 (file)
--- a/test/CodeGen/X86/pmul.ll
+++ b/test/CodeGen/X86/pmul.ll
@@ -1,6 +1,6 @@
  ; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -stack-alignment=16 > %t
  ; RUN: grep pmul %t | count 12
-; RUN: grep mov %t | count 15
+; RUN: grep mov %t | count 12
  
  define <4 x i32> @a(<4 x i32> %i) nounwind  {
          %A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 >
diff --git a/test/CodeGen/X86/sse_reload_fold.ll b/test/CodeGen/X86/sse_reload_fold.ll

index 1100eb7c4670308030fef022c0f63c28379a9f06..547763e4a7935850b055c12b2199ccf9c3518d8e 100644 (file)
--- a/test/CodeGen/X86/sse_reload_fold.ll
+++ b/test/CodeGen/X86/sse_reload_fold.ll
@@ -2,9 +2,10 @@
  ; RUN: llc -march=x86-64 -mattr=+64bit,+sse3 -print-failed-fuse-candidates |& \
  ; RUN:   grep fail | count 1
  
-declare void @bar()
-declare <4 x float> @qux()
-declare <2 x double> @pin()
+declare float @test_f(float %f)
+declare double @test_d(double %f)
+declare <4 x float> @test_vf(<4 x float> %f)
+declare <2 x double> @test_vd(<2 x double> %f)
  declare float @llvm.sqrt.f32(float)
  declare double @llvm.sqrt.f64(double)
  
@@ -26,99 +27,99 @@ declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>)
  declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>)
  
  define float @foo(float %f) {
-  call void @bar()
+  %a = call float @test_f(float %f)
    %t = call float @llvm.sqrt.f32(float %f)
    ret float %t
  }
  define double @doo(double %f) {
-  call void @bar()
+  %a = call double @test_d(double %f)
    %t = call double @llvm.sqrt.f64(double %f)
    ret double %t
  }
  define <4 x float> @a0(<4 x float> %f) {
-  call void @bar()
+  %a = call <4 x float> @test_vf(<4 x float> %f)
    %t = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %f)
    ret <4 x float> %t
  }
  define <4 x float> @a1(<4 x float> %f) {
-  call void @bar()
+  %a = call <4 x float> @test_vf(<4 x float> %f)
    %t = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %f)
    ret <4 x float> %t
  }
  define <4 x float> @a2(<4 x float> %f) {
-  call void @bar()
+  %a = call <4 x float> @test_vf(<4 x float> %f)
    %t = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %f)
    ret <4 x float> %t
  }
  define <4 x float> @b3(<4 x float> %f) {
-  %y = call <4 x float> @qux()
+  %y = call <4 x float> @test_vf(<4 x float> %f)
    %t = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %y, <4 x float> %f)
    ret <4 x float> %t
  }
  define <4 x float> @b4(<4 x float> %f) {
-  %y = call <4 x float> @qux()
+  %y = call <4 x float> @test_vf(<4 x float> %f)
    %t = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %y, <4 x float> %f)
    ret <4 x float> %t
  }
  define <4 x float> @b5(<4 x float> %f) {
-  %y = call <4 x float> @qux()
+  %y = call <4 x float> @test_vf(<4 x float> %f)
    %t = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %y, <4 x float> %f, i8 7)
    ret <4 x float> %t
  }
  define <4 x float> @b6(<4 x float> %f) {
-  %y = call <4 x float> @qux()
+  %y = call <4 x float> @test_vf(<4 x float> %f)
    %t = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %y, <4 x float> %f)
    ret <4 x float> %t
  }
  define <4 x float> @b7(<4 x float> %f) {
-  %y = call <4 x float> @qux()
+  %y = call <4 x float> @test_vf(<4 x float> %f)
    %t = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %y, <4 x float> %f)
    ret <4 x float> %t
  }
  define <4 x float> @b8(<4 x float> %f) {
-  %y = call <4 x float> @qux()
+  %y = call <4 x float> @test_vf(<4 x float> %f)
    %t = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %y, <4 x float> %f)
    ret <4 x float> %t
  }
  define <2 x double> @c1(<2 x double> %f) {
-  call void @bar()
+  %a = call <2 x double> @test_vd(<2 x double> %f)
    %t = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %f)
    ret <2 x double> %t
  }
  define <2 x double> @d3(<2 x double> %f) {
-  %y = call <2 x double> @pin()
+  %y = call <2 x double> @test_vd(<2 x double> %f)
    %t = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %y, <2 x double> %f)
    ret <2 x double> %t
  }
  define <2 x double> @d4(<2 x double> %f) {
-  %y = call <2 x double> @pin()
+  %y = call <2 x double> @test_vd(<2 x double> %f)
    %t = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %y, <2 x double> %f)
    ret <2 x double> %t
  }
  define <2 x double> @d5(<2 x double> %f) {
-  %y = call <2 x double> @pin()
+  %y = call <2 x double> @test_vd(<2 x double> %f)
    %t = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %y, <2 x double> %f, i8 7)
    ret <2 x double> %t
  }
  define <2 x double> @d6(<2 x double> %f) {
-  %y = call <2 x double> @pin()
+  %y = call <2 x double> @test_vd(<2 x double> %f)
    %t = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %y, <2 x double> %f)
    ret <2 x double> %t
  }
  define <2 x double> @d7(<2 x double> %f) {
-  %y = call <2 x double> @pin()
+  %y = call <2 x double> @test_vd(<2 x double> %f)
    %t = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %y, <2 x double> %f)
    ret <2 x double> %t
  }
  define <2 x double> @d8(<2 x double> %f) {
-  %y = call <2 x double> @pin()
+  %y = call <2 x double> @test_vd(<2 x double> %f)
    %t = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %y, <2 x double> %f)
    ret <2 x double> %t
  }
  
  ; This one should fail to fuse.
  define <2 x double> @z0(<2 x double> %f) {
-  %y = call <2 x double> @pin()
+  %y = call <2 x double> @test_vd(<2 x double> %f)
    %t = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %f, <2 x double> %y)
    ret <2 x double> %t
  }
author	Dan Gohman <gohman@apple.com>
	Wed, 11 Feb 2009 21:29:39 +0000 (21:29 +0000)
committer	Dan Gohman <gohman@apple.com>
	Wed, 11 Feb 2009 21:29:39 +0000 (21:29 +0000)
lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp		patch \| blob \| history
test/CodeGen/X86/2008-02-22-ReMatBug.ll		patch \| blob \| history
test/CodeGen/X86/2008-08-05-SpillerBug.ll		patch \| blob \| history
test/CodeGen/X86/pmul.ll		patch \| blob \| history
test/CodeGen/X86/sse_reload_fold.ll		patch \| blob \| history