Use a bigger hammer to fix PR11314 by disabling the "forcing two-address

author Evan Cheng <evan.cheng@apple.com>

Thu, 10 Nov 2011 07:43:16 +0000 (07:43 +0000)

committer Evan Cheng <evan.cheng@apple.com>

Thu, 10 Nov 2011 07:43:16 +0000 (07:43 +0000)
author Evan Cheng <evan.cheng@apple.com>
Thu, 10 Nov 2011 07:43:16 +0000 (07:43 +0000)
committer Evan Cheng <evan.cheng@apple.com>
Thu, 10 Nov 2011 07:43:16 +0000 (07:43 +0000)
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp

index cab303dd5c386f3a5081a2c9eac2852ddb5d7351..7938a3750504f7dd8deab985481226f4dc1c3eb2 100644 (file)
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -89,6 +89,9 @@ static cl::opt<bool> DisableSchedCriticalPath(
  static cl::opt<bool> DisableSchedHeight(
    "disable-sched-height", cl::Hidden, cl::init(false),
    cl::desc("Disable scheduled-height priority in sched=list-ilp"));
+static cl::opt<bool> Disable2AddrHack(
+  "disable-2addr-hack", cl::Hidden, cl::init(true),
+  cl::desc("Disable scheduler's two-address hack"));
  
  static cl::opt<int> MaxReorderWindow(
    "max-sched-reorder", cl::Hidden, cl::init(6),
@@ -2628,7 +2631,8 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
  void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
    SUnits = &sunits;
    // Add pseudo dependency edges for two-address nodes.
-  AddPseudoTwoAddrDeps();
+  if (!Disable2AddrHack)
+    AddPseudoTwoAddrDeps();
    // Reroute edges to nodes with multiple uses.
    if (!TracksRegPressure)
      PrescheduleNodesWithMultipleUses();
diff --git a/test/CodeGen/X86/2006-05-11-InstrSched.ll b/test/CodeGen/X86/2006-05-11-InstrSched.ll

index a871ea198cf9eb91a59e7de2370fec4290774ecb..8bb9b926a2f1ac03de1a64f6d785c475aa6419ad 100644 (file)
--- a/test/CodeGen/X86/2006-05-11-InstrSched.ll
+++ b/test/CodeGen/X86/2006-05-11-InstrSched.ll
@@ -1,5 +1,5 @@
  ; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mattr=+sse2 -stats -realign-stack=0 |&\
-; RUN:     grep {asm-printer} | grep 34
+; RUN:     grep {asm-printer} | grep 35
  
  target datalayout = "e-p:32:32"
  define void @foo(i32* %mc, i32* %bp, i32* %ms, i32* %xmb, i32* %mpp, i32* %tpmm, i32* %ip, i32* %tpim, i32* %dpp, i32* %tpdm, i32* %bpi, i32 %M) nounwind {
diff --git a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll

index 620e0f3667405e6ccafda666e61c150b691f8ee5..e904b1c5cc5a3e00da48ba2e162e160789a96080 100644 (file)
--- a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
+++ b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
@@ -5,7 +5,6 @@
  
  ; CHECK: pextrw $14
  ; CHECK-NEXT: shrl $8
-; CHECK-NEXT: (%ebp)
  ; CHECK-NEXT: pinsrw
  
  define void @update(i8** %args_list) nounwind {
diff --git a/test/CodeGen/X86/change-compare-stride-1.ll b/test/CodeGen/X86/change-compare-stride-1.ll

index 8b53ae2817c643f6310e0f6be4da818212d6c271..1c5c113a72324667296d412318d9f6a1b7f2bbe4 100644 (file)
--- a/test/CodeGen/X86/change-compare-stride-1.ll
+++ b/test/CodeGen/X86/change-compare-stride-1.ll
@@ -3,6 +3,10 @@
  ; Nested LSR is required to optimize this case.
  ; We do not expect to see this form of IR without -enable-iv-rewrite.
  
+; xfailed for now because the scheduler two-address hack has been disabled.
+; Now it's generating a leal -1 rather than a decq.
+; XFAIL: *
+
  define void @borf(i8* nocapture %in, i8* nocapture %out) nounwind {
  ; CHECK: borf:
  ; CHECK-NOT: inc
diff --git a/test/CodeGen/X86/fold-pcmpeqd-0.ll b/test/CodeGen/X86/fold-pcmpeqd-0.ll

index 647bbdb7f0fde37874f34741050174661b2267fe..1d315ffe359b3dcc065c3ee6fa35fbdd975d4865 100644 (file)
--- a/test/CodeGen/X86/fold-pcmpeqd-0.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-0.ll
@@ -1,5 +1,7 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah -regalloc=linearscan | FileCheck --check-prefix=I386 %s
  ; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck --check-prefix=X86-64 %s
+; DISABLED: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah -regalloc=linearscan | FileCheck --check-prefix=I386 %s
+
+; i386 test has been disabled when scheduler 2-addr hack is disabled.
  
  ; This testcase shouldn't need to spill the -1 value,
  ; so it should just use pcmpeqd to materialize an all-ones vector.
diff --git a/test/CodeGen/X86/iv-users-in-other-loops.ll b/test/CodeGen/X86/iv-users-in-other-loops.ll

index 8f79fb8cde27b1fb7933107d78a70c8f33878657..4a6f5316a68ee6bae949f528eb573878524d6a10 100644 (file)
--- a/test/CodeGen/X86/iv-users-in-other-loops.ll
+++ b/test/CodeGen/X86/iv-users-in-other-loops.ll
@@ -1,9 +1,8 @@
  ; RUN: llc < %s -march=x86-64 -enable-lsr-nested -o %t
  ; RUN: not grep inc %t
  ; RUN: grep dec %t | count 2
-; RUN: grep addq %t | count 12
+; RUN: grep addq %t | count 10
  ; RUN: not grep addb %t
-; RUN: not grep leaq %t
  ; RUN: not grep leal %t
  ; RUN: not grep movq %t
  
diff --git a/test/CodeGen/X86/lsr-loop-exit-cond.ll b/test/CodeGen/X86/lsr-loop-exit-cond.ll

index 938023ffe03701cb6c5613763774fad999e25393..382b0e048399b6c84151283cb43b5e4fc4b36ffe 100644 (file)
--- a/test/CodeGen/X86/lsr-loop-exit-cond.ll
+++ b/test/CodeGen/X86/lsr-loop-exit-cond.ll
@@ -1,6 +1,7 @@
  ; RUN: llc -march=x86-64 < %s | FileCheck %s
  
  ; CHECK: decq
+; CHECK-NEXT: movl (
  ; CHECK-NEXT: jne
  
  @Te0 = external global [256 x i32]             ; <[256 x i32]*> [#uses=5]
diff --git a/test/CodeGen/X86/lsr-reuse-trunc.ll b/test/CodeGen/X86/lsr-reuse-trunc.ll

index 1f87089f80e701138b943817eaa587fb5bd99a3d..5f5e0937a3b37e414ccac4b873071a984a87bb78 100644 (file)
--- a/test/CodeGen/X86/lsr-reuse-trunc.ll
+++ b/test/CodeGen/X86/lsr-reuse-trunc.ll
@@ -4,13 +4,14 @@
  ; Full strength reduction wouldn't reduce register pressure, so LSR should
  ; stick with indexing here.
  
+; FIXME: This is worse off from disabling of scheduler 2-address hack.
  ; CHECK: movaps        (%{{rsi|rdx}},%rax,4), [[X3:%xmm[0-9]+]]
+; CHECK: leaq  4(%rax), %{{rcx|r9}}
  ; CHECK: cvtdq2ps
  ; CHECK: orps          {{%xmm[0-9]+}}, [[X4:%xmm[0-9]+]]
  ; CHECK: movaps        [[X4]], (%{{rdi|rcx}},%rax,4)
-; CHECK: addq  $4, %rax
-; CHECK: cmpl  %eax, (%{{rdx|r8}})
-; CHECK-NEXT: jg
+; CHECK: cmpl  %{{ecx|r9d}}, (%{{rdx|r8}})
+; CHECK: jg
  
  define void @vvfloorf(float* nocapture %y, float* nocapture %x, i32* nocapture %n) nounwind {
  entry:
diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll

index 0b4d73a683af9b3a54f810dfb129566ae181a267..3a4acb8167f077b94e5c0230087cd6744af301b2 100644 (file)
--- a/test/CodeGen/X86/masked-iv-safe.ll
+++ b/test/CodeGen/X86/masked-iv-safe.ll
@@ -3,10 +3,10 @@
  ; RUN: not grep movz %t
  ; RUN: not grep sar %t
  ; RUN: not grep shl %t
-; RUN: grep add %t | count 2
+; RUN: grep add %t | count 1
  ; RUN: grep inc %t | count 4
  ; RUN: grep dec %t | count 2
-; RUN: grep lea %t | count 2
+; RUN: grep lea %t | count 3
  
  ; Optimize away zext-inreg and sext-inreg on the loop induction
  ; variable using trip-count information.
diff --git a/test/CodeGen/X86/multiple-loop-post-inc.ll b/test/CodeGen/X86/multiple-loop-post-inc.ll

index 51a06112aada26ae9d8a8dd4a74161529de610e5..4f7e28ace3cdf6496cc87d924d74ac745861439a 100644 (file)
--- a/test/CodeGen/X86/multiple-loop-post-inc.ll
+++ b/test/CodeGen/X86/multiple-loop-post-inc.ll
@@ -1,6 +1,10 @@
  ; RUN: llc -asm-verbose=false -disable-branch-fold -disable-code-place -disable-tail-duplicate -march=x86-64 < %s | FileCheck %s
  ; rdar://7236213
  
+; Xfailed now that scheduler 2-address hack is disabled a lea is generated.
+; The code isn't any worse though.
+; XFAIL: *
+
  ; CodeGen shouldn't require any lea instructions inside the marked loop.
  ; It should properly set up post-increment uses and do coalescing for
  ; the induction variables.
diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll

index 1d74af2ba36c1b21350e04c5cc12af1d8bd7292f..36a0fd91bd87458c94defd82f07369d40c68ea7f 100644 (file)
--- a/test/CodeGen/X86/sse2.ll
+++ b/test/CodeGen/X86/sse2.ll
@@ -178,8 +178,8 @@ define <4 x float> @test14(<4 x float>* %x, <4 x float>* %y) nounwind {
          %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 >                ; <<4 x float>> [#uses=1]
          ret <4 x float> %tmp27
  ; CHECK: test14:
-; CHECK:       addps   [[X1:%xmm[0-9]+]], [[X0:%xmm[0-9]+]]
-; CHECK:       subps   [[X1]], [[X2:%xmm[0-9]+]]
+; CHECK:       subps   [[X1:%xmm[0-9]+]], [[X2:%xmm[0-9]+]]
+; CHECK:       addps   [[X1]], [[X0:%xmm[0-9]+]]
  ; CHECK:       movlhps [[X2]], [[X0]]
  }
  
diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll

index d05c45321ba61397f3e673201d840ca0fafaa242..291069d4625b16fb8571bb93b3f8561c2acd18e4 100644 (file)
--- a/test/CodeGen/X86/sse3.ll
+++ b/test/CodeGen/X86/sse3.ll
@@ -226,15 +226,16 @@ entry:
  }
  
  
-
+; FIXME: t15 is worse off from disabling of scheduler 2-address hack.
  define <8 x i16> @t15(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
  entry:
          %tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
          ret <8 x i16> %tmp8
  ; X64:         t15:
-; X64:                 pextrw  $7, %xmm0, %eax
+; X64:          movdqa %xmm0, %xmm2
  ; X64:                 punpcklqdq      %xmm1, %xmm0
  ; X64:                 pshuflw $-128, %xmm0, %xmm0
+; X64:                 pextrw  $7, %xmm2, %eax
  ; X64:                 pinsrw  $2, %eax, %xmm0
  ; X64:                 ret
  }
@@ -247,12 +248,12 @@ entry:
          %tmp9 = shufflevector <16 x i8> %tmp8, <16 x i8> %T0,  <16 x i32> < i32 0, i32 1, i32 2, i32 17,  i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
          ret <16 x i8> %tmp9
  ; X64:         t16:
-; X64:                 movdqa  %xmm1, %xmm0
-; X64:                 pslldq  $2, %xmm0
-; X64:                 pextrw  $1, %xmm0, %eax
-; X64:                 movd    %xmm0, %ecx
-; X64:                 pinsrw  $0, %ecx, %xmm0
-; X64:                 pextrw  $8, %xmm1, %ecx
+; X64:                 movdqa  %xmm1, %xmm2
+; X64:                 pslldq  $2, %xmm2
+; X64:                 movd    %xmm2, %eax
+; X64:                 pinsrw  $0, %eax, %xmm0
+; X64:                 pextrw  $8, %xmm1, %eax
+; X64:                 pextrw  $1, %xmm2, %ecx
  ; X64:                 ret
  }
author	Evan Cheng <evan.cheng@apple.com>
	Thu, 10 Nov 2011 07:43:16 +0000 (07:43 +0000)
committer	Evan Cheng <evan.cheng@apple.com>
	Thu, 10 Nov 2011 07:43:16 +0000 (07:43 +0000)
lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp		patch \| blob \| history
test/CodeGen/X86/2006-05-11-InstrSched.ll		patch \| blob \| history
test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll		patch \| blob \| history
test/CodeGen/X86/change-compare-stride-1.ll		patch \| blob \| history
test/CodeGen/X86/fold-pcmpeqd-0.ll		patch \| blob \| history
test/CodeGen/X86/iv-users-in-other-loops.ll		patch \| blob \| history
test/CodeGen/X86/lsr-loop-exit-cond.ll		patch \| blob \| history
test/CodeGen/X86/lsr-reuse-trunc.ll		patch \| blob \| history
test/CodeGen/X86/masked-iv-safe.ll		patch \| blob \| history
test/CodeGen/X86/multiple-loop-post-inc.ll		patch \| blob \| history
test/CodeGen/X86/sse2.ll		patch \| blob \| history
test/CodeGen/X86/sse3.ll		patch \| blob \| history