Enable ILP scheduling for all nodes by default on PPC.
authorHal Finkel <hfinkel@anl.gov>
Sun, 10 Jun 2012 19:32:29 +0000 (19:32 +0000)
committerHal Finkel <hfinkel@anl.gov>
Sun, 10 Jun 2012 19:32:29 +0000 (19:32 +0000)
Over the entire test-suite, this has an insignificantly negative average
performance impact, but reduces some of the worst slowdowns from the
anti-dep. change (r158294).

Largest speedups:
SingleSource/Benchmarks/Stanford/Quicksort - 28%
SingleSource/Benchmarks/Stanford/Towers - 24%
SingleSource/Benchmarks/Shootout-C++/matrix - 23%
MultiSource/Benchmarks/SciMark2-C/scimark2 - 19%
MultiSource/Benchmarks/MiBench/automotive-bitcount/automotive-bitcount - 15%
(matrix and automotive-bitcount were both in the top-5 slowdown list from the
anti-dep. change)

Largest slowdowns:
MultiSource/Benchmarks/McCat/03-testtrie/testtrie - 28%
MultiSource/Benchmarks/mediabench/gsm/toast/toast - 26%
MultiSource/Benchmarks/MiBench/automotive-susan/automotive-susan - 21%
SingleSource/Benchmarks/CoyoteBench/lpbench - 20%
MultiSource/Applications/d/make_dparser - 16%

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158296 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/PowerPC/PPCISelLowering.cpp
test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll
test/CodeGen/PowerPC/vec_buildvector_loadstore.ll

index c0d75b237c7e5bb2a147bbe875c21cd1f006acc4..964d5a0d9472001c97a41731f37dcf550ce2030c 100644 (file)
@@ -54,6 +54,9 @@ static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
 
+static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
+cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
+
 static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
   if (TM.getSubtargetImpl()->isDarwin())
     return new TargetLoweringObjectFileMachO();
@@ -5871,10 +5874,9 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
 }
 
 Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
-  unsigned Directive = PPCSubTarget.getDarwinDirective();
-  if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2)
-    return Sched::ILP;
+  if (DisableILPPref)
+    return TargetLowering::getSchedulingPreference(N);
 
-  return TargetLowering::getSchedulingPreference(N);
+  return Sched::ILP;
 }
 
index 6a3c440bc9e70d9deecdf885dc23e66e9a3dad26..84aa40c4b52ac99c81ecd41e91661a11af982d31 100644 (file)
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin10 -mcpu=g5 | FileCheck %s
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin10 -mcpu=g5 -disable-ppc-ilp-pref | FileCheck %s
 ; ModuleID = '<stdin>'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin10.0"
index 6161b55edee9824bbca4d4e37da142b5f1ba16d9..47d985c5f755e1ee609c9acb67becfa2bd3c936c 100644 (file)
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=powerpc-apple-darwin -mcpu=g4 | FileCheck %s
-; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g4 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -mcpu=g4 -disable-ppc-ilp-pref | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g4 -disable-ppc-ilp-pref | FileCheck %s
 
 ; ModuleID = 'tsc.c'
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
index 015c08605fead24b37da280424d7143f2b45d599..7e58ec0bdef48e6bb6424aa883879bd0c38c04ef 100644 (file)
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin -mattr=+altivec  | FileCheck %s
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin -mattr=+altivec -disable-ppc-ilp-pref  | FileCheck %s
 ; Formerly this did byte loads and word stores.
 @a = external global <16 x i8>
 @b = external global <16 x i8>