From d4d961615c67082bc24bfa5d372a93a1dcff3457 Mon Sep 17 00:00:00 2001
From: Preston Gurd <preston.gurd@intel.com>
Date: Wed, 18 Jul 2012 20:49:17 +0000
Subject: [PATCH] This patch fixes 8 out of 20 unexpected failures in "make
 check" when run on an Intel Atom processor. The failures have arisen due to
 changes elsewhere in the trunk over the past 8 weeks or so.

These failures were not detected by the Atom buildbot because the
CPU on the Atom buildbot was not being detected as an Atom CPU.
The fix for this problem is in Host.cpp and X86Subtarget.cpp, but
shall remain commented out until the current set of Atom test failures
are fixed.

Patch by Andy Zhang and Tyler Nowicki!


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@160451 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Support/Host.cpp                        |  3 +++
 lib/Target/X86/X86Subtarget.cpp             |  3 ++-
 test/CodeGen/X86/atom-lea-sp.ll             | 26 ++++++++++-----------
 test/CodeGen/X86/full-lsr.ll                | 16 +++++++++----
 test/CodeGen/X86/phys-reg-local-regalloc.ll | 20 +++++++++++++---
 test/CodeGen/X86/v-binop-widen2.ll          |  9 ++++++-
 test/CodeGen/X86/vshift-1.ll                |  4 ++--
 test/CodeGen/X86/vshift-2.ll                |  6 ++---
 test/CodeGen/X86/vshift-3.ll                |  4 ++--
 test/CodeGen/X86/vshift-5.ll                |  8 +++----
 test/CodeGen/X86/widen_cast-1.ll            |  9 ++++++-
 11 files changed, 74 insertions(+), 34 deletions(-)

diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index 550fa5765c5..367ab6bfd4e 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -249,6 +249,9 @@ std::string sys::getHostCPUName() {
       case 28: // Most 45 nm Intel Atom processors
       case 38: // 45 nm Atom Lincroft
       case 39: // 32 nm Atom Medfield
+      // re-enable when buildbot will pass all atom tests
+      //case 53: // 32 nm Atom Midview
+      //case 54: // 32 nm Atom Midview
         return "atom";
 
       default: return (Em64T) ? "x86-64" : "i686";
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 5be31267281..6ceacc898e2 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -254,7 +254,8 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
 
     // Set processor type. Currently only Atom is detected.
     if (Family == 6 &&
-        (Model == 28 || Model == 38 || Model == 39)) {
+        (Model == 28 || Model == 38 || Model == 39
+         /*|| Model == 53 || Model == 54*/)) {
       X86ProcFamily = IntelAtom;
 
       UseLeaForSP = true;
diff --git a/test/CodeGen/X86/atom-lea-sp.ll b/test/CodeGen/X86/atom-lea-sp.ll
index 59427880a71..19482e13d8c 100644
--- a/test/CodeGen/X86/atom-lea-sp.ll
+++ b/test/CodeGen/X86/atom-lea-sp.ll
@@ -1,15 +1,15 @@
-; RUN: llc < %s -mcpu=atom -mtriple=i686-linux  | FileCheck -check-prefix=atom %s
+; RUN: llc < %s -mcpu=atom -mtriple=i686-linux  | FileCheck -check-prefix=ATOM %s
 ; RUN: llc < %s -mcpu=core2 -mtriple=i686-linux | FileCheck %s
 
 declare void @use_arr(i8*)
 declare void @many_params(i32, i32, i32, i32, i32, i32)
 
 define void @test1() nounwind {
-; atom: test1:
-; atom: leal -1052(%esp), %esp
-; atom-NOT: sub
-; atom: call
-; atom: leal 1052(%esp), %esp
+; ATOM: test1:
+; ATOM: leal -1052(%esp), %esp
+; ATOM-NOT: sub
+; ATOM: call
+; ATOM: leal 1052(%esp), %esp
 
 ; CHECK: test1:
 ; CHECK: subl
@@ -22,10 +22,10 @@ define void @test1() nounwind {
 }
 
 define void @test2() nounwind {
-; atom: test2:
-; atom: leal -28(%esp), %esp
-; atom: call
-; atom: leal 28(%esp), %esp
+; ATOM: test2:
+; ATOM: leal -28(%esp), %esp
+; ATOM: call
+; ATOM: leal 28(%esp), %esp
 
 ; CHECK: test2:
 ; CHECK-NOT: lea
@@ -34,9 +34,9 @@ define void @test2() nounwind {
 }
 
 define void @test3() nounwind {
-; atom: test3:
-; atom: leal -8(%esp), %esp
-; atom: leal 8(%esp), %esp
+; ATOM: test3:
+; ATOM: leal -8(%esp), %esp
+; ATOM: leal 8(%esp), %esp
 
 ; CHECK: test3:
 ; CHECK-NOT: lea
diff --git a/test/CodeGen/X86/full-lsr.ll b/test/CodeGen/X86/full-lsr.ll
index 655ab291278..1344cdcd432 100644
--- a/test/CodeGen/X86/full-lsr.ll
+++ b/test/CodeGen/X86/full-lsr.ll
@@ -1,9 +1,17 @@
-; RUN: llc < %s -march=x86 >%t
-
-; RUN: grep "addl	\$4," %t | count 3
-; RUN: not grep ",%" %t
+; RUN: llc < %s -march=x86 -mcpu=generic | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=atom | FileCheck -check-prefix=ATOM %s
 
 define void @foo(float* nocapture %A, float* nocapture %B, float* nocapture %C, i32 %N) nounwind {
+; ATOM: foo
+; ATOM: addl
+; ATOM: leal
+; ATOM: leal
+
+; CHECK: foo
+; CHECK: addl
+; CHECK: addl
+; CEHCK: addl
+
 entry:
 	%0 = icmp sgt i32 %N, 0		; <i1> [#uses=1]
 	br i1 %0, label %bb, label %return
diff --git a/test/CodeGen/X86/phys-reg-local-regalloc.ll b/test/CodeGen/X86/phys-reg-local-regalloc.ll
index c565684685e..37eca1ce0a7 100644
--- a/test/CodeGen/X86/phys-reg-local-regalloc.ll
+++ b/test/CodeGen/X86/phys-reg-local-regalloc.ll
@@ -1,6 +1,7 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin9 -regalloc=fast -optimize-regalloc=0 | FileCheck %s
-; RUN: llc -O0 < %s -march=x86 -mtriple=i386-apple-darwin9 -regalloc=fast | FileCheck %s
-; CHECKed instructions should be the same with or without -O0.
+; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin9 -mcpu=generic -regalloc=fast -optimize-regalloc=0 | FileCheck %s
+; RUN: llc -O0 < %s -march=x86 -mtriple=i386-apple-darwin9 -mcpu=generic -regalloc=fast | FileCheck %s
+; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin9 -mcpu=atom -regalloc=fast -optimize-regalloc=0 | FileCheck -check-prefix=ATOM %s
+; CHECKed instructions should be the same with or without -O0 except on Intel Atom due to instruction scheduling.
 
 @.str = private constant [12 x i8] c"x + y = %i\0A\00", align 1 ; <[12 x i8]*> [#uses=1]
 
@@ -15,6 +16,19 @@ entry:
 ; CHECK: movl	%ebx, 40(%esp)
 ; CHECK-NOT: movl
 ; CHECK: addl %ebx, %eax
+
+; On Intel Atom the scheduler moves a movl instruction
+; used for the printf call to follow movl 24(%esp), %eax
+; ATOM: movl 24(%esp), %eax
+; ATOM: movl
+; ATOM: movl   %eax, 36(%esp)
+; ATOM-NOT: movl
+; ATOM: movl 28(%esp), %ebx
+; ATOM-NOT: movl
+; ATOM: movl   %ebx, 40(%esp)
+; ATOM-NOT: movl
+; ATOM: addl %ebx, %eax
+
   %retval = alloca i32                            ; <i32*> [#uses=2]
   %"%ebx" = alloca i32                            ; <i32*> [#uses=1]
   %"%eax" = alloca i32                            ; <i32*> [#uses=2]
diff --git a/test/CodeGen/X86/v-binop-widen2.ll b/test/CodeGen/X86/v-binop-widen2.ll
index ae3f55a316f..569586af498 100644
--- a/test/CodeGen/X86/v-binop-widen2.ll
+++ b/test/CodeGen/X86/v-binop-widen2.ll
@@ -1,9 +1,16 @@
-; RUN: llc -march=x86 -mattr=+sse < %s | FileCheck %s
+; RUN: llc -march=x86 -mcpu=generic -mattr=+sse < %s | FileCheck %s
+; RUN: llc -march=x86 -mcpu=atom -mattr=+sse < %s | FileCheck -check-prefix=ATOM %s
 
 %vec = type <6 x float>
 ; CHECK: divss
 ; CHECK: divss
 ; CHECK: divps
+
+; Scheduler causes a different instruction order to be produced on Intel Atom
+; ATOM: divps
+; ATOM: divss
+; ATOM: divss
+
 define %vec @vecdiv( %vec %p1, %vec %p2)
 {
   %result = fdiv %vec %p1, %p2
diff --git a/test/CodeGen/X86/vshift-1.ll b/test/CodeGen/X86/vshift-1.ll
index 49551562c5a..e775750bbea 100644
--- a/test/CodeGen/X86/vshift-1.ll
+++ b/test/CodeGen/X86/vshift-1.ll
@@ -16,7 +16,7 @@ define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
 entry:
 ; CHECK: shift1b:
 ; CHECK: movd
-; CHECK-NEXT: psllq
+; CHECK: psllq
   %0 = insertelement <2 x i64> undef, i64 %amt, i32 0
   %1 = insertelement <2 x i64> %0, i64 %amt, i32 1
   %shl = shl <2 x i64> %val, %1
@@ -38,7 +38,7 @@ define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
 entry:
 ; CHECK: shift2b:
 ; CHECK: movd
-; CHECK-NEXT: pslld
+; CHECK: pslld
   %0 = insertelement <4 x i32> undef, i32 %amt, i32 0
   %1 = insertelement <4 x i32> %0, i32 %amt, i32 1
   %2 = insertelement <4 x i32> %1, i32 %amt, i32 2
diff --git a/test/CodeGen/X86/vshift-2.ll b/test/CodeGen/X86/vshift-2.ll
index 9a9b419abea..9496893bd1a 100644
--- a/test/CodeGen/X86/vshift-2.ll
+++ b/test/CodeGen/X86/vshift-2.ll
@@ -16,7 +16,7 @@ define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
 entry:
 ; CHECK: shift1b:
 ; CHECK: movd
-; CHECK-NEXT: psrlq
+; CHECK: psrlq
   %0 = insertelement <2 x i64> undef, i64 %amt, i32 0
   %1 = insertelement <2 x i64> %0, i64 %amt, i32 1
   %lshr = lshr <2 x i64> %val, %1
@@ -37,7 +37,7 @@ define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
 entry:
 ; CHECK: shift2b:
 ; CHECK: movd
-; CHECK-NEXT: psrld
+; CHECK: psrld
   %0 = insertelement <4 x i32> undef, i32 %amt, i32 0
   %1 = insertelement <4 x i32> %0, i32 %amt, i32 1
   %2 = insertelement <4 x i32> %1, i32 %amt, i32 2
@@ -63,7 +63,7 @@ entry:
 ; CHECK: shift3b:
 ; CHECK: movzwl
 ; CHECK: movd
-; CHECK-NEXT: psrlw
+; CHECK: psrlw
   %0 = insertelement <8 x i16> undef, i16 %amt, i32 0
   %1 = insertelement <8 x i16> %0, i16 %amt, i32 1
   %2 = insertelement <8 x i16> %0, i16 %amt, i32 2
diff --git a/test/CodeGen/X86/vshift-3.ll b/test/CodeGen/X86/vshift-3.ll
index 8e8a9aa04b2..b2b48b9da93 100644
--- a/test/CodeGen/X86/vshift-3.ll
+++ b/test/CodeGen/X86/vshift-3.ll
@@ -28,7 +28,7 @@ define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
 entry:
 ; CHECK: shift2b:
 ; CHECK: movd
-; CHECK-NEXT: psrad
+; CHECK: psrad
   %0 = insertelement <4 x i32> undef, i32 %amt, i32 0
   %1 = insertelement <4 x i32> %0, i32 %amt, i32 1
   %2 = insertelement <4 x i32> %1, i32 %amt, i32 2
@@ -52,7 +52,7 @@ entry:
 ; CHECK: shift3b:
 ; CHECK: movzwl
 ; CHECK: movd
-; CHECK-NEXT: psraw
+; CHECK: psraw
   %0 = insertelement <8 x i16> undef, i16 %amt, i32 0
   %1 = insertelement <8 x i16> %0, i16 %amt, i32 1
   %2 = insertelement <8 x i16> %0, i16 %amt, i32 2
diff --git a/test/CodeGen/X86/vshift-5.ll b/test/CodeGen/X86/vshift-5.ll
index cb254aeb573..f6c311dee52 100644
--- a/test/CodeGen/X86/vshift-5.ll
+++ b/test/CodeGen/X86/vshift-5.ll
@@ -6,7 +6,7 @@ define void @shift5a(<4 x i32> %val, <4 x i32>* %dst, i32* %pamt) nounwind {
 entry:
 ; CHECK: shift5a:
 ; CHECK: movd
-; CHECK-NEXT: pslld
+; CHECK: pslld
   %amt = load i32* %pamt 
   %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
   %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer 
@@ -20,7 +20,7 @@ define void @shift5b(<4 x i32> %val, <4 x i32>* %dst, i32* %pamt) nounwind {
 entry:
 ; CHECK: shift5b:
 ; CHECK: movd
-; CHECK-NEXT: psrad
+; CHECK: psrad
   %amt = load i32* %pamt 
   %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
   %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer 
@@ -34,7 +34,7 @@ define void @shift5c(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
 entry:
 ; CHECK: shift5c:
 ; CHECK: movd
-; CHECK-NEXT: pslld
+; CHECK: pslld
   %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
   %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer
   %shl = shl <4 x i32> %val, %shamt
@@ -47,7 +47,7 @@ define void @shift5d(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
 entry:
 ; CHECK: shift5d:
 ; CHECK: movd
-; CHECK-NEXT: psrad
+; CHECK: psrad
   %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
   %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer
   %shr = ashr <4 x i32> %val, %shamt
diff --git a/test/CodeGen/X86/widen_cast-1.ll b/test/CodeGen/X86/widen_cast-1.ll
index d886f2c97e7..ebdfea9a37f 100644
--- a/test/CodeGen/X86/widen_cast-1.ll
+++ b/test/CodeGen/X86/widen_cast-1.ll
@@ -1,8 +1,15 @@
-; RUN: llc -march=x86 -mattr=+sse42 < %s | FileCheck %s
+; RUN: llc -march=x86 -mcpu=generic -mattr=+sse42 < %s | FileCheck %s
+; RUN: llc -march=x86 -mcpu=atom -mattr=+sse42 < %s | FileCheck -check-prefix=ATOM %s
+
 ; CHECK: paddd
 ; CHECK: movl
 ; CHECK: movlpd
 
+; Scheduler causes produce a different instruction order
+; ATOM: movl
+; ATOM: paddd
+; ATOM: movlpd
+
 ; bitcast a v4i16 to v2i32
 
 define void @convert(<2 x i32>* %dst, <4 x i16>* %src) nounwind {
-- 
2.34.1