From b2b5dc642cbbe781f73b9da83874d4005c50bd8e Mon Sep 17 00:00:00 2001
From: Andrew Trick <atrick@apple.com>
Date: Tue, 25 Jun 2013 02:48:58 +0000
Subject: [PATCH] Revert "Temporarily enable MI-Sched on X86."

This reverts commit 98a9b72e8c56dc13a2617de84503a3d78352789c.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184823 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/X86Subtarget.h                 |  5 +-
 test/CodeGen/X86/2006-05-02-InstrSched1.ll    |  6 +--
 test/CodeGen/X86/2007-01-08-InstrSched.ll     |  4 +-
 test/CodeGen/X86/2009-02-26-MachineLICMBug.ll |  2 +-
 .../X86/2010-02-19-TailCallRetAddrBug.ll      | 12 +++--
 .../X86/2010-09-17-SideEffectsInChain.ll      |  8 +--
 test/CodeGen/X86/2011-10-19-LegelizeLoad.ll   |  3 +-
 test/CodeGen/X86/2012-04-26-sdglue.ll         |  4 +-
 test/CodeGen/X86/3addr-16bit.ll               |  7 ++-
 test/CodeGen/X86/StackColoring.ll             |  5 +-
 test/CodeGen/X86/abi-isel.ll                  | 20 +++----
 test/CodeGen/X86/add.ll                       | 12 ++---
 test/CodeGen/X86/alloca-align-rounding.ll     |  2 +-
 test/CodeGen/X86/avx-arith.ll                 |  5 +-
 test/CodeGen/X86/avx-intel-ocl.ll             | 13 ++---
 test/CodeGen/X86/avx-shuffle.ll               |  6 +--
 test/CodeGen/X86/break-anti-dependencies.ll   |  2 +-
 test/CodeGen/X86/bt.ll                        | 54 +++++++++----------
 test/CodeGen/X86/byval7.ll                    |  4 +-
 test/CodeGen/X86/chain_order.ll               |  3 +-
 test/CodeGen/X86/cmov.ll                      | 10 ++--
 test/CodeGen/X86/commute-two-addr.ll          |  3 +-
 test/CodeGen/X86/dbg-value-dag-combine.ll     |  2 +-
 test/CodeGen/X86/fast-isel-mem.ll             |  4 +-
 test/CodeGen/X86/fastcc.ll                    |  4 +-
 test/CodeGen/X86/fold-load.ll                 |  4 +-
 test/CodeGen/X86/fold-pcmpeqd-2.ll            | 15 ++----
 test/CodeGen/X86/full-lsr.ll                  |  2 +-
 test/CodeGen/X86/gather-addresses.ll          | 38 +++++--------
 test/CodeGen/X86/ghc-cc.ll                    |  7 +--
 test/CodeGen/X86/ghc-cc64.ll                  | 31 +++++------
 test/CodeGen/X86/hipe-cc.ll                   |  6 +--
 test/CodeGen/X86/hipe-cc64.ll                 | 12 ++---
 test/CodeGen/X86/lea-recursion.ll             |  3 +-
 test/CodeGen/X86/lea.ll                       |  3 +-
 test/CodeGen/X86/lsr-loop-exit-cond.ll        |  7 +--
 test/CodeGen/X86/masked-iv-safe.ll            |  4 +-
 test/CodeGen/X86/memcpy-2.ll                  | 14 ++---
 test/CodeGen/X86/pmul.ll                      |  2 +-
 test/CodeGen/X86/pr14088.ll                   |  6 +--
 test/CodeGen/X86/pr1505b.ll                   |  3 +-
 test/CodeGen/X86/pr16031.ll                   |  2 +-
 test/CodeGen/X86/pre-ra-sched.ll              |  5 +-
 test/CodeGen/X86/rdrand.ll                    |  6 +--
 test/CodeGen/X86/rdseed.ll                    |  6 +--
 test/CodeGen/X86/segmented-stacks-dynamic.ll  |  4 +-
 test/CodeGen/X86/select.ll                    |  2 +-
 test/CodeGen/X86/shift-bmi2.ll                | 21 ++++----
 test/CodeGen/X86/sink-hoist.ll                |  9 ++--
 test/CodeGen/X86/sse2.ll                      | 25 ++++-----
 test/CodeGen/X86/store-narrow.ll              | 22 ++++----
 test/CodeGen/X86/tailcall-largecode.ll        |  2 +-
 test/CodeGen/X86/test-nofold.ll               |  9 ++--
 test/CodeGen/X86/trunc-to-bool.ll             |  2 +-
 test/CodeGen/X86/v-binop-widen.ll             |  3 +-
 test/CodeGen/X86/v-binop-widen2.ll            |  2 +-
 test/CodeGen/X86/vec_shuffle-27.ll            |  4 +-
 test/CodeGen/X86/vec_shuffle-39.ll            |  4 +-
 test/CodeGen/X86/widen_cast-1.ll              |  2 +-
 test/CodeGen/X86/win64_alloca_dynalloca.ll    |  8 +--
 test/CodeGen/X86/x86-64-psub.ll               | 25 ++++-----
 test/CodeGen/X86/x86-shifts.ll                |  4 +-
 test/CodeGen/X86/zext-fold.ll                 |  6 +--
 test/CodeGen/X86/zext-sext.ll                 |  5 +-
 64 files changed, 258 insertions(+), 277 deletions(-)

diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 59911ba656a..66832b989be 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -361,14 +361,11 @@ public:
   /// memset with zero passed as the second argument. Otherwise it
   /// returns null.
   const char *getBZeroEntry() const;
-
+  
   /// This function returns true if the target has sincos() routine in its
   /// compiler runtime or math libraries.
   bool hasSinCos() const;
 
-  /// Enable the MachineScheduler pass for all X86 subtargets.
-  bool enableMachineScheduler() const LLVM_OVERRIDE { return true; }
-
   /// enablePostRAScheduler - run for Atom optimization.
   bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
                              TargetSubtargetInfo::AntiDepBreakMode& Mode,
diff --git a/test/CodeGen/X86/2006-05-02-InstrSched1.ll b/test/CodeGen/X86/2006-05-02-InstrSched1.ll
index 69266dc4e44..0afddd8f876 100644
--- a/test/CodeGen/X86/2006-05-02-InstrSched1.ll
+++ b/test/CodeGen/X86/2006-05-02-InstrSched1.ll
@@ -1,10 +1,7 @@
 ; REQUIRES: asserts
 ; RUN: llc < %s -march=x86 -relocation-model=static -stats 2>&1 | \
-; RUN:   grep asm-printer | grep 16
+; RUN:   grep asm-printer | grep 14
 ;
-; It's possible to schedule this in 14 instructions by avoiding
-; callee-save registers, but the scheduler isn't currently that
-; conervative with registers.
 @size20 = external global i32		; <i32*> [#uses=1]
 @in5 = external global i8*		; <i8**> [#uses=1]
 
@@ -24,3 +21,4 @@ define i32 @compare(i8* %a, i8* %b) nounwind {
 }
 
 declare i32 @memcmp(i8*, i8*, i32)
+
diff --git a/test/CodeGen/X86/2007-01-08-InstrSched.ll b/test/CodeGen/X86/2007-01-08-InstrSched.ll
index 4ec703921e2..24aa5b98d0b 100644
--- a/test/CodeGen/X86/2007-01-08-InstrSched.ll
+++ b/test/CodeGen/X86/2007-01-08-InstrSched.ll
@@ -11,12 +11,12 @@ define float @foo(float %x) nounwind {
     %tmp14 = fadd float %tmp12, %tmp7
     ret float %tmp14
 
-; CHECK: mulss
-; CHECK: mulss
 ; CHECK: mulss
 ; CHECK: mulss
 ; CHECK: addss
+; CHECK: mulss
 ; CHECK: addss
+; CHECK: mulss
 ; CHECK: addss
 ; CHECK: ret
 }
diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
index ee86da5861a..8174fbdc9e4 100644
--- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
+++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
@@ -17,9 +17,9 @@ bb4:		; preds = %bb.i, %bb26, %bb4, %entry
 ; CHECK: %bb4
 ; CHECK: xorl
 ; CHECK: callq
+; CHECK: movq
 ; CHECK: xorl
 ; CHECK: xorl
-; CHECK: movq
 
 	%0 = call i32 (...)* @xxGetOffsetForCode(i32 undef) nounwind		; <i32> [#uses=0]
 	%ins = or i64 %p, 2097152		; <i64> [#uses=1]
diff --git a/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll b/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
index 10c50f9c8a5..d4a74c9e7e7 100644
--- a/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
+++ b/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
@@ -3,17 +3,17 @@
 ; Bug 6225
 ;
 ; If a call is a fastcc tail call and tail call optimization is enabled, the
-; caller frame is replaced by the callee frame. This can require that arguments are
+; caller frame is replaced by the callee frame. This can require that arguments are 
 ; placed on the former return address stack slot. Special care needs to be taken
 ; taken that the return address is moved / or stored in a register before
 ; lowering of arguments potentially overwrites the value.
 ;
-; Move return address (60(%esp)) to a temporary register (%ebp)
-; CHECK: movl 60(%esp), [[REGISTER:%[a-z]+]]
+; Move return address (76(%esp)) to a temporary register (%ebp)
+; CHECK: movl 76(%esp), [[REGISTER:%[a-z]+]]
 ; Overwrite return addresss
-; CHECK: movl [[EBX:%[a-z]+]], 60(%esp)
+; CHECK: movl [[EBX:%[a-z]+]], 76(%esp)
 ; Move return address from temporary register (%ebp) to new stack location (60(%esp))
-; CHECK: movl [[REGISTER]], 44(%esp)
+; CHECK: movl [[REGISTER]], 60(%esp)
 
 %tupl_p = type [9 x i32]*
 
@@ -51,3 +51,5 @@ false:
   tail call fastcc void @l298(i32 %r10, i32 %r9, i32 %r4) noreturn nounwind
   ret void
 }
+
+
diff --git a/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll b/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
index 39d89e3d827..1b339777f57 100644
--- a/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
+++ b/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
@@ -19,8 +19,8 @@ entry:
 }
 
 ; CHECK: movq	___stack_chk_guard@GOTPCREL(%rip)
-; CHECK: movb   (%rsp), [[R1:%.+]]
-; CHECK: movb   30(%rsp), [[R0:%.+]]
-; CHECK: movb   [[R1]], (%rsp)
-; CHECK: movb   [[R0]], 30(%rsp)
+; CHECK: movb   38(%rsp), [[R0:%.+]]
+; CHECK: movb   8(%rsp), [[R1:%.+]]
+; CHECK: movb   [[R1]], 8(%rsp)
+; CHECK: movb   [[R0]], 38(%rsp)
 ; CHECK: callq	___stack_chk_fail
diff --git a/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll b/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
index 07a6910c65e..da734d4b645 100644
--- a/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
+++ b/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
@@ -16,8 +16,8 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK: main
 define i32 @main() nounwind uwtable {
 entry:
-; CHECK: pmovsxbq  i(%rip), %
 ; CHECK: pmovsxbq  j(%rip), %
+; CHECK: pmovsxbq  i(%rip), %
   %0 = load <2 x i8>* @i, align 8
   %1 = load <2 x i8>* @j, align 8
   %div = sdiv <2 x i8> %1, %0
@@ -25,3 +25,4 @@ entry:
   ret i32 0
 ; CHECK: ret
 }
+
diff --git a/test/CodeGen/X86/2012-04-26-sdglue.ll b/test/CodeGen/X86/2012-04-26-sdglue.ll
index 8bcf8597e33..04659522d36 100644
--- a/test/CodeGen/X86/2012-04-26-sdglue.ll
+++ b/test/CodeGen/X86/2012-04-26-sdglue.ll
@@ -5,8 +5,8 @@
 ; It's hard to test for the ISEL condition because CodeGen optimizes
 ; away the bugpointed code. Just ensure the basics are still there.
 ;CHECK: func:
-;CHECK: vpxor
-;CHECK: vinserti128
+;CHECK: vxorps
+;CHECK: vinsertf128
 ;CHECK: vpshufd
 ;CHECK: vpshufd
 ;CHECK: vmulps
diff --git a/test/CodeGen/X86/3addr-16bit.ll b/test/CodeGen/X86/3addr-16bit.ll
index d843a110961..c51247ab925 100644
--- a/test/CodeGen/X86/3addr-16bit.ll
+++ b/test/CodeGen/X86/3addr-16bit.ll
@@ -34,8 +34,7 @@ entry:
 
 ; 64BIT:     t2:
 ; 64BIT-NOT: movw %si, %ax
-; 64BIT:     decl %eax
-; 64BIT:     movzwl %ax
+; 64BIT:     leal -1(%rsi), %eax
   %0 = icmp eq i16 %k, %c                         ; <i1> [#uses=1]
   %1 = add i16 %k, -1                             ; <i16> [#uses=3]
   br i1 %0, label %bb, label %bb1
@@ -59,7 +58,7 @@ entry:
 
 ; 64BIT:     t3:
 ; 64BIT-NOT: movw %si, %ax
-; 64BIT:     addl $2, %eax
+; 64BIT:     leal 2(%rsi), %eax
   %0 = add i16 %k, 2                              ; <i16> [#uses=3]
   %1 = icmp eq i16 %k, %c                         ; <i1> [#uses=1]
   br i1 %1, label %bb, label %bb1
@@ -82,7 +81,7 @@ entry:
 
 ; 64BIT:     t4:
 ; 64BIT-NOT: movw %si, %ax
-; 64BIT:     addl %edi, %eax
+; 64BIT:     leal (%rsi,%rdi), %eax
   %0 = add i16 %k, %c                             ; <i16> [#uses=3]
   %1 = icmp eq i16 %k, %c                         ; <i1> [#uses=1]
   br i1 %1, label %bb, label %bb1
diff --git a/test/CodeGen/X86/StackColoring.ll b/test/CodeGen/X86/StackColoring.ll
index 15101456ebb..6c0f00d17d5 100644
--- a/test/CodeGen/X86/StackColoring.ll
+++ b/test/CodeGen/X86/StackColoring.ll
@@ -4,8 +4,8 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
-;YESCOLOR: subq  $144, %rsp
-;NOCOLOR: subq  $272, %rsp
+;YESCOLOR: subq  $136, %rsp
+;NOCOLOR: subq  $264, %rsp
 
 define i32 @myCall_w2(i32 %in) {
 entry:
@@ -429,3 +429,4 @@ declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
 declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
 
 declare i32 @foo(i32, i8*)
+
diff --git a/test/CodeGen/X86/abi-isel.ll b/test/CodeGen/X86/abi-isel.ll
index c49698e004f..955fc629048 100644
--- a/test/CodeGen/X86/abi-isel.ll
+++ b/test/CodeGen/X86/abi-isel.ll
@@ -1,16 +1,16 @@
-; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=LINUX-32-STATIC
-; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=LINUX-32-PIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-STATIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-PIC
 
-; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=LINUX-64-STATIC
-; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=LINUX-64-PIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-64-STATIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=LINUX-64-PIC
 
-; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-32-STATIC
-; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC
-; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-32-PIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-32-STATIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-PIC
 
-; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-64-STATIC
-; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
-; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-64-PIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-64-STATIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-PIC
 
 @src = external global [131072 x i32]
 @dst = external global [131072 x i32]
diff --git a/test/CodeGen/X86/add.ll b/test/CodeGen/X86/add.ll
index dc02ac64661..5fe08ed305f 100644
--- a/test/CodeGen/X86/add.ll
+++ b/test/CodeGen/X86/add.ll
@@ -9,7 +9,7 @@ define i32 @test1(i32 inreg %a) nounwind {
   %b = add i32 %a, 128
   ret i32 %b
 ; X32: subl	$-128, %eax
-; X64: subl $-128,
+; X64: subl $-128, 
 }
 define i64 @test2(i64 inreg %a) nounwind {
   %b = add i64 %a, 2147483648
@@ -20,7 +20,7 @@ define i64 @test2(i64 inreg %a) nounwind {
 define i64 @test3(i64 inreg %a) nounwind {
   %b = add i64 %a, 128
   ret i64 %b
-
+  
 ; X32: addl $128, %eax
 ; X64: subq	$-128,
 }
@@ -38,7 +38,7 @@ normal:
 
 overflow:
   ret i1 false
-
+  
 ; X32: test4:
 ; X32: addl
 ; X32-NEXT: jo
@@ -82,11 +82,11 @@ define i64 @test6(i64 %A, i32 %B) nounwind {
         ret i64 %tmp5
 
 ; X32: test6:
-; X32:      movl 4(%esp), %eax
-; X32-NEXT: movl 12(%esp), %edx
+; X32:	    movl 12(%esp), %edx
 ; X32-NEXT: addl 8(%esp), %edx
+; X32-NEXT: movl 4(%esp), %eax
 ; X32-NEXT: ret
-
+        
 ; X64: test6:
 ; X64:	shlq	$32, %r[[A1]]
 ; X64:	leaq	(%r[[A1]],%r[[A0]]), %rax
diff --git a/test/CodeGen/X86/alloca-align-rounding.ll b/test/CodeGen/X86/alloca-align-rounding.ll
index 74b9470db75..3d76fb0aa25 100644
--- a/test/CodeGen/X86/alloca-align-rounding.ll
+++ b/test/CodeGen/X86/alloca-align-rounding.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux -enable-misched=false | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux | FileCheck %s
 
 declare void @bar(<2 x i64>* %n)
 
diff --git a/test/CodeGen/X86/avx-arith.ll b/test/CodeGen/X86/avx-arith.ll
index a9da1ec067c..4aa337033df 100644
--- a/test/CodeGen/X86/avx-arith.ll
+++ b/test/CodeGen/X86/avx-arith.ll
@@ -240,15 +240,15 @@ define <16 x i16> @vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
 ; CHECK-NEXT: vpmuludq %xmm
 ; CHECK-NEXT: vpsllq $32, %xmm
 ; CHECK-NEXT: vpaddq %xmm
+; CHECK-NEXT: vpmuludq %xmm
 ; CHECK-NEXT: vpsrlq $32, %xmm
 ; CHECK-NEXT: vpmuludq %xmm
 ; CHECK-NEXT: vpsllq $32, %xmm
-; CHECK-NEXT: vpaddq %xmm
-; CHECK-NEXT: vpmuludq %xmm
 ; CHECK-NEXT: vpsrlq $32, %xmm
 ; CHECK-NEXT: vpmuludq %xmm
 ; CHECK-NEXT: vpsllq $32, %xmm
 ; CHECK-NEXT: vpaddq %xmm
+; CHECK-NEXT: vpaddq %xmm
 ; CHECK-NEXT: vpsrlq $32, %xmm
 ; CHECK-NEXT: vpmuludq %xmm
 ; CHECK-NEXT: vpsllq $32, %xmm
@@ -269,3 +269,4 @@ define <4 x float> @int_sqrt_ss() {
  %x2 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %x1) nounwind
  ret <4 x float> %x2
 }
+
diff --git a/test/CodeGen/X86/avx-intel-ocl.ll b/test/CodeGen/X86/avx-intel-ocl.ll
index 7337815a39a..055072098a2 100644
--- a/test/CodeGen/X86/avx-intel-ocl.ll
+++ b/test/CodeGen/X86/avx-intel-ocl.ll
@@ -32,7 +32,7 @@ declare i32 @func_int(i32, i32)
 define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
   %y = alloca <16 x float>, align 16
   %x = fadd <16 x float> %a, %b
-  %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
+  %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y) 
   %2 = load <16 x float>* %y, align 16
   %3 = fadd <16 x float> %2, %1
   ret <16 x float> %3
@@ -43,21 +43,21 @@ define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
 ; preserved ymm6-ymm15
 ; WIN64: testf16_regs
 ; WIN64: call
-; WIN64: vaddps  {{%ymm[6-7]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
-; WIN64: vaddps  {{%ymm[6-7]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
+; WIN64: vaddps  {{%ymm[6-7]}}, %ymm0, %ymm0
+; WIN64: vaddps  {{%ymm[6-7]}}, %ymm1, %ymm1
 ; WIN64: ret
 
 ; preserved ymm8-ymm15
 ; X64: testf16_regs
 ; X64: call
-; X64: vaddps  {{%ymm[8-9]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
-; X64: vaddps  {{%ymm[8-9]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
+; X64: vaddps  {{%ymm[8-9]}}, %ymm0, %ymm0
+; X64: vaddps  {{%ymm[8-9]}}, %ymm1, %ymm1
 ; X64: ret
 
 define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
   %y = alloca <16 x float>, align 16
   %x = fadd <16 x float> %a, %b
-  %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
+  %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y) 
   %2 = load <16 x float>* %y, align 16
   %3 = fadd <16 x float> %1, %b
   %4 = fadd <16 x float> %2, %3
@@ -166,3 +166,4 @@ entry:
   %8 = shufflevector <8 x float> %3, <8 x float> %7, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
   ret <8 x float> %8
 }
+
diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll
index 3e11284f1af..73faa1fe0d4 100644
--- a/test/CodeGen/X86/avx-shuffle.ll
+++ b/test/CodeGen/X86/avx-shuffle.ll
@@ -81,7 +81,7 @@ entry:
 define i32 @test9(<4 x i32> %a) nounwind {
 ; CHECK: test9
 ; CHECK: vpextrd
-  %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 undef, i32 4>
+  %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 undef, i32 4> 
   %r = extractelement <8 x i32> %b, i32 2
 ; CHECK: ret
   ret i32 %r
@@ -251,8 +251,8 @@ define <8 x float> @test19(<8 x float> %A, <8 x float>%B) nounwind {
 ; CHECK: swap8doubles
 ; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
 ; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
-; CHECK: vinsertf128 $1, {{[0-9]*}}(%rdi), %ymm{{[0-9]+}}
-; CHECK: vinsertf128 $1, {{[0-9]*}}(%rdi), %ymm{{[0-9]+}}
+; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
+; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
 ; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}}
 ; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}}
 ; CHECK: vmovaps %xmm{{[0-9]+}}, {{[0-9]*}}(%rdi)
diff --git a/test/CodeGen/X86/break-anti-dependencies.ll b/test/CodeGen/X86/break-anti-dependencies.ll
index 614d0adc727..c94261467c9 100644
--- a/test/CodeGen/X86/break-anti-dependencies.ll
+++ b/test/CodeGen/X86/break-anti-dependencies.ll
@@ -1,7 +1,7 @@
 ; Without list-burr scheduling we may not see the difference in codegen here.
 ; Use a subtarget that has post-RA scheduling enabled because the anti-dependency
 ; breaker requires liveness information to be kept.
-; RUN: llc < %s -march=x86-64 -mcpu=atom -enable-misched=false -post-RA-scheduler -pre-RA-sched=list-burr -break-anti-dependencies=none > %t
+; RUN: llc < %s -march=x86-64 -mcpu=atom -post-RA-scheduler -pre-RA-sched=list-burr -break-anti-dependencies=none > %t
 ; RUN:   grep "%xmm0" %t | count 14
 ; RUN:   not grep "%xmm1" %t
 ; RUN: llc < %s -march=x86-64 -mcpu=atom -post-RA-scheduler -break-anti-dependencies=critical > %t
diff --git a/test/CodeGen/X86/bt.ll b/test/CodeGen/X86/bt.ll
index f12a3543b07..e28923bb21d 100644
--- a/test/CodeGen/X86/bt.ll
+++ b/test/CodeGen/X86/bt.ll
@@ -38,7 +38,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @test2b(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: test2b
-; CHECK: btl %e{{..}}, %e{{..}}
+; CHECK: btl %eax, %ecx
 ; CHECK: jb
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
@@ -56,7 +56,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @atest2(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: atest2
-; CHECK: btl %e{{..}}, %e{{..}}
+; CHECK: btl %eax, %ecx
 ; CHECK: jb
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
@@ -74,7 +74,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @atest2b(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: atest2b
-; CHECK: btl %e{{..}}, %e{{..}}
+; CHECK: btl %eax, %ecx
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
 	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
@@ -91,7 +91,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @test3(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: test3
-; CHECK: btl %e{{..}}, %e{{..}}
+; CHECK: btl %eax, %ecx
 ; CHECK: jb
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
@@ -109,7 +109,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @test3b(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: test3b
-; CHECK: btl %e{{..}}, %e{{..}}
+; CHECK: btl %eax, %ecx
 ; CHECK: jb
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %x, %tmp29
@@ -127,7 +127,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @testne2(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: testne2
-; CHECK: btl %e{{..}}, %e{{..}}
+; CHECK: btl %eax, %ecx
 ; CHECK: jae
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
@@ -145,7 +145,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @testne2b(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: testne2b
-; CHECK: btl %e{{..}}, %e{{..}}
+; CHECK: btl %eax, %ecx
 ; CHECK: jae
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
@@ -163,7 +163,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @atestne2(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: atestne2
-; CHECK: btl %e{{..}}, %e{{..}}
+; CHECK: btl %eax, %ecx
 ; CHECK: jae
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
@@ -181,7 +181,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @atestne2b(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: atestne2b
-; CHECK: btl %e{{..}}, %e{{..}}
+; CHECK: btl %eax, %ecx
 ; CHECK: jae
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
@@ -199,7 +199,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @testne3(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: testne3
-; CHECK: btl %e{{..}}, %e{{..}}
+; CHECK: btl %eax, %ecx
 ; CHECK: jae
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
@@ -217,7 +217,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @testne3b(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: testne3b
-; CHECK: btl %e{{..}}, %e{{..}}
+; CHECK: btl %eax, %ecx
 ; CHECK: jae
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %x, %tmp29
@@ -235,7 +235,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @query2(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: query2
-; CHECK: btl %e{{..}}, %e{{..}}
+; CHECK: btl %eax, %ecx
 ; CHECK: jae
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
@@ -253,7 +253,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @query2b(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: query2b
-; CHECK: btl %e{{..}}, %e{{..}}
+; CHECK: btl %eax, %ecx
 ; CHECK: jae
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
@@ -271,7 +271,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @aquery2(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: aquery2
-; CHECK: btl %e{{..}}, %e{{..}}
+; CHECK: btl %eax, %ecx
 ; CHECK: jae
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
@@ -289,7 +289,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @aquery2b(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: aquery2b
-; CHECK: btl %e{{..}}, %e{{..}}
+; CHECK: btl %eax, %ecx
 ; CHECK: jae
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
@@ -307,7 +307,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @query3(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: query3
-; CHECK: btl %e{{..}}, %e{{..}}
+; CHECK: btl %eax, %ecx
 ; CHECK: jae
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
@@ -325,7 +325,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @query3b(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: query3b
-; CHECK: btl %e{{..}}, %e{{..}}
+; CHECK: btl %eax, %ecx
 ; CHECK: jae
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %x, %tmp29
@@ -343,7 +343,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @query3x(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: query3x
-; CHECK: btl %e{{..}}, %e{{..}}
+; CHECK: btl %eax, %ecx
 ; CHECK: jae
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
@@ -361,7 +361,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @query3bx(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: query3bx
-; CHECK: btl %e{{..}}, %e{{..}}
+; CHECK: btl %eax, %ecx
 ; CHECK: jae
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %x, %tmp29
@@ -379,7 +379,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @queryne2(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: queryne2
-; CHECK: btl %e{{..}}, %e{{..}}
+; CHECK: btl %eax, %ecx
 ; CHECK: jb
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
@@ -397,7 +397,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @queryne2b(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: queryne2b
-; CHECK: btl %e{{..}}, %e{{..}}
+; CHECK: btl %eax, %ecx
 ; CHECK: jb
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
@@ -415,7 +415,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @aqueryne2(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: aqueryne2
-; CHECK: btl %e{{..}}, %e{{..}}
+; CHECK: btl %eax, %ecx
 ; CHECK: jb
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
@@ -433,7 +433,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @aqueryne2b(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: aqueryne2b
-; CHECK: btl %e{{..}}, %e{{..}}
+; CHECK: btl %eax, %ecx
 ; CHECK: jb
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
@@ -451,7 +451,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @queryne3(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: queryne3
-; CHECK: btl %e{{..}}, %e{{..}}
+; CHECK: btl %eax, %ecx
 ; CHECK: jb
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
@@ -469,7 +469,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @queryne3b(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: queryne3b
-; CHECK: btl %e{{..}}, %e{{..}}
+; CHECK: btl %eax, %ecx
 ; CHECK: jb
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %x, %tmp29
@@ -487,7 +487,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @queryne3x(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: queryne3x
-; CHECK: btl %e{{..}}, %e{{..}}
+; CHECK: btl %eax, %ecx
 ; CHECK: jb
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
@@ -505,7 +505,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @queryne3bx(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: queryne3bx
-; CHECK: btl %e{{..}}, %e{{..}}
+; CHECK: btl %eax, %ecx
 ; CHECK: jb
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %x, %tmp29
diff --git a/test/CodeGen/X86/byval7.ll b/test/CodeGen/X86/byval7.ll
index b6b5e6c9b12..98a26e47ab7 100644
--- a/test/CodeGen/X86/byval7.ll
+++ b/test/CodeGen/X86/byval7.ll
@@ -7,14 +7,14 @@
 define i32 @main() nounwind  {
 entry:
 ; CHECK: main:
+; CHECK: movl $1, (%esp)
 ; CHECK: leal 16(%esp), %edi
 ; CHECK: leal 160(%esp), %esi
 ; CHECK: rep;movsl
-; CHECK: movl $1, (%esp)
 	%s = alloca %struct.S		; <%struct.S*> [#uses=2]
 	%tmp15 = getelementptr %struct.S* %s, i32 0, i32 0		; <<2 x i64>*> [#uses=1]
 	store <2 x i64> < i64 8589934595, i64 1 >, <2 x i64>* %tmp15, align 16
-	call void @t( i32 1, %struct.S* byval  %s ) nounwind
+	call void @t( i32 1, %struct.S* byval  %s ) nounwind 
 	ret i32 0
 }
 
diff --git a/test/CodeGen/X86/chain_order.ll b/test/CodeGen/X86/chain_order.ll
index 942dd8a591f..056fd2741fa 100644
--- a/test/CodeGen/X86/chain_order.ll
+++ b/test/CodeGen/X86/chain_order.ll
@@ -3,8 +3,8 @@
 ;CHECK: cftx020
 ;CHECK: vmovsd  (%rdi), %xmm{{.*}}
 ;CHECK: vmovsd  16(%rdi), %xmm{{.*}}
-;CHECK: vmovsd  24(%rdi), %xmm{{.*}}
 ;CHECK: vmovhpd  8(%rdi), %xmm{{.*}}
+;CHECK: vmovsd  24(%rdi), %xmm{{.*}}
 ;CHECK: vmovupd %xmm{{.*}}, (%rdi)
 ;CHECK: vmovupd %xmm{{.*}}, 16(%rdi)
 ;CHECK: ret
@@ -35,3 +35,4 @@ entry:
   store <2 x double> %14, <2 x double>* %15, align 8
   ret void
 }
+
diff --git a/test/CodeGen/X86/cmov.ll b/test/CodeGen/X86/cmov.ll
index 63bf5c522dc..ed25c82fdda 100644
--- a/test/CodeGen/X86/cmov.ll
+++ b/test/CodeGen/X86/cmov.ll
@@ -4,8 +4,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 define i32 @test1(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone {
 entry:
 ; CHECK: test1:
-; CHECK: btl
-; CHECK-NEXT: movl	$12, %eax
+; CHECK: movl	$12, %eax
+; CHECK-NEXT: btl
 ; CHECK-NEXT: cmovael	(%rcx), %eax
 ; CHECK-NEXT: ret
 
@@ -19,8 +19,8 @@ entry:
 define i32 @test2(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone {
 entry:
 ; CHECK: test2:
-; CHECK: btl
-; CHECK-NEXT: movl	$12, %eax
+; CHECK: movl	$12, %eax
+; CHECK-NEXT: btl
 ; CHECK-NEXT: cmovbl	(%rcx), %eax
 ; CHECK-NEXT: ret
 
@@ -92,7 +92,7 @@ bb.i.i.i:                                         ; preds = %entry
 ; CHECK: testb
 ; CHECK-NOT: xor
 ; CHECK: setne
-; CHECK: testb
+; CHECK-NEXT: testb
 
 func_4.exit.i:                                    ; preds = %bb.i.i.i, %entry
   %.not.i = xor i1 %2, true                       ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/commute-two-addr.ll b/test/CodeGen/X86/commute-two-addr.ll
index 21f94423c59..0ceea29d9a7 100644
--- a/test/CodeGen/X86/commute-two-addr.ll
+++ b/test/CodeGen/X86/commute-two-addr.ll
@@ -38,11 +38,10 @@ define i32 @t2(i32 %X, i32 %Y) nounwind {
 define %0 @t3(i32 %lb, i8 zeroext %has_lb, i8 zeroext %lb_inclusive, i32 %ub, i8 zeroext %has_ub, i8 zeroext %ub_inclusive) nounwind {
 entry:
 ; DARWIN: t3:
+; DARWIN: shll $16
 ; DARWIN: shlq $32, %rcx
 ; DARWIN-NOT: leaq
 ; DARWIN: orq %rcx, %rax
-; DARWIN-NOT: leaq
-; DARWIN: shll $16
   %tmp21 = zext i32 %lb to i64
   %tmp23 = zext i32 %ub to i64
   %tmp24 = shl i64 %tmp23, 32
diff --git a/test/CodeGen/X86/dbg-value-dag-combine.ll b/test/CodeGen/X86/dbg-value-dag-combine.ll
index a37b3dffee4..7c3c361bae1 100644
--- a/test/CodeGen/X86/dbg-value-dag-combine.ll
+++ b/test/CodeGen/X86/dbg-value-dag-combine.ll
@@ -16,7 +16,7 @@ entry:
   call void @llvm.dbg.value(metadata !12, i64 0, metadata !13), !dbg !14
   %tmp2 = load i32 addrspace(1)* %ip, align 4, !dbg !15
   %tmp3 = add i32 0, %tmp2, !dbg !15
-; CHECK:  ##DEBUG_VALUE: idx <- E{{..$}}
+; CHECK:  ##DEBUG_VALUE: idx <- EAX{{$}}
   call void @llvm.dbg.value(metadata !{i32 %tmp3}, i64 0, metadata !13), !dbg
 !15
   %arrayidx = getelementptr i32 addrspace(1)* %ip, i32 %1, !dbg !16
diff --git a/test/CodeGen/X86/fast-isel-mem.ll b/test/CodeGen/X86/fast-isel-mem.ll
index f3617f28732..52b1e856433 100644
--- a/test/CodeGen/X86/fast-isel-mem.ll
+++ b/test/CodeGen/X86/fast-isel-mem.ll
@@ -40,7 +40,7 @@ entry:
 ; CHECK:	movl	L_LotsStuff$non_lazy_ptr, %ecx
 
 ; ATOM: _t:
-; ATOM:         movl    L_LotsStuff$non_lazy_ptr, %e{{..}}
-; ATOM:         movl    $0, %e{{..}}
+; ATOM:         movl    L_LotsStuff$non_lazy_ptr, %ecx
+; ATOM:         movl    $0, %eax
 
 }
diff --git a/test/CodeGen/X86/fastcc.ll b/test/CodeGen/X86/fastcc.ll
index a362f8d1ca7..705ab7bada7 100644
--- a/test/CodeGen/X86/fastcc.ll
+++ b/test/CodeGen/X86/fastcc.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 -post-RA-scheduler=false | FileCheck %s
-; CHECK: movsd %xmm{{[0-9]}}, 8(%esp)
-; CHECK: xorl %eax, %eax
+; CHECK: movsd %xmm0, 8(%esp)
+; CHECK: xorl %ecx, %ecx
 
 @d = external global double		; <double*> [#uses=1]
 @c = external global double		; <double*> [#uses=1]
diff --git a/test/CodeGen/X86/fold-load.ll b/test/CodeGen/X86/fold-load.ll
index fc809681280..d8366654c01 100644
--- a/test/CodeGen/X86/fold-load.ll
+++ b/test/CodeGen/X86/fold-load.ll
@@ -38,10 +38,10 @@ L:
 
   store i16 %A, i16* %Q
   ret i32 %D
-
+  
 ; CHECK: test2:
 ; CHECK: 	movl	4(%esp), %eax
-; CHECK-NEXT:	movzwl	(%eax), %e{{..}}
+; CHECK-NEXT:	movzwl	(%eax), %ecx
 
 }
 
diff --git a/test/CodeGen/X86/fold-pcmpeqd-2.ll b/test/CodeGen/X86/fold-pcmpeqd-2.ll
index bb0873bc5da..2bde76efd2a 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-2.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-2.ll
@@ -54,27 +54,22 @@ forbody:		; preds = %forcond
 	%mul310 = fmul <4 x float> %bitcast204.i104, zeroinitializer		; <<4 x float>> [#uses=2]
 	%mul313 = fmul <4 x float> %bitcast204.i, zeroinitializer		; <<4 x float>> [#uses=1]
 	%cmpunord.i11 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> zeroinitializer, <4 x float> zeroinitializer, i8 3) nounwind		; <<4 x float>> [#uses=1]
-	%tmp83 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul310, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
-	%bitcast.i3 = bitcast <4 x float> %mul310 to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%andps.i5 = and <4 x i32> %bitcast.i3, zeroinitializer		; <<4 x i32>> [#uses=1]
-
-	call void null(<4 x float> %mul313, <4 x float> %cmpunord.i11, <4 x float> %tmp83, <4 x float> zeroinitializer, %struct.__ImageExecInfo* null, <4 x i32> zeroinitializer) nounwind
-
-	%tmp84 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul313, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
-
 	%bitcast6.i13 = bitcast <4 x float> %cmpunord.i11 to <4 x i32>		; <<4 x i32>> [#uses=2]
 	%andps.i14 = add <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %bitcast6.i13		; <<4 x i32>> [#uses=1]
 	%not.i16 = xor <4 x i32> %bitcast6.i13, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
 	%andnps.i17 = add <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %not.i16		; <<4 x i32>> [#uses=1]
 	%orps.i18 = or <4 x i32> %andnps.i17, %andps.i14		; <<4 x i32>> [#uses=1]
 	%bitcast17.i19 = bitcast <4 x i32> %orps.i18 to <4 x float>		; <<4 x float>> [#uses=1]
-
+	%tmp83 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul310, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
+	%bitcast.i3 = bitcast <4 x float> %mul310 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%andps.i5 = and <4 x i32> %bitcast.i3, zeroinitializer		; <<4 x i32>> [#uses=1]
 	%bitcast11.i6 = bitcast <4 x float> %tmp83 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%not.i7 = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
 	%andnps.i8 = and <4 x i32> %bitcast11.i6, %not.i7		; <<4 x i32>> [#uses=1]
+	call void null(<4 x float> %mul313, <4 x float> %cmpunord.i11, <4 x float> %tmp83, <4 x float> zeroinitializer, %struct.__ImageExecInfo* null, <4 x i32> zeroinitializer) nounwind
 	%orps.i9 = or <4 x i32> %andnps.i8, %andps.i5		; <<4 x i32>> [#uses=1]
 	%bitcast17.i10 = bitcast <4 x i32> %orps.i9 to <4 x float>		; <<4 x float>> [#uses=1]
-
+	%tmp84 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul313, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
 	%bitcast6.i = bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>> [#uses=2]
 	%andps.i = and <4 x i32> zeroinitializer, %bitcast6.i		; <<4 x i32>> [#uses=1]
 	%bitcast11.i = bitcast <4 x float> %tmp84 to <4 x i32>		; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/full-lsr.ll b/test/CodeGen/X86/full-lsr.ll
index cbcc62a7011..0729dda4a12 100644
--- a/test/CodeGen/X86/full-lsr.ll
+++ b/test/CodeGen/X86/full-lsr.ll
@@ -4,7 +4,7 @@
 define void @foo(float* nocapture %A, float* nocapture %B, float* nocapture %C, i32 %N) nounwind {
 ; ATOM: foo
 ; ATOM: addl
-; ATOM: addl
+; ATOM: leal
 ; ATOM: leal
 
 ; CHECK: foo
diff --git a/test/CodeGen/X86/gather-addresses.ll b/test/CodeGen/X86/gather-addresses.ll
index d1abd71618c..72a50961b2f 100644
--- a/test/CodeGen/X86/gather-addresses.ll
+++ b/test/CodeGen/X86/gather-addresses.ll
@@ -1,35 +1,21 @@
-; RUN: llc -mtriple=x86_64-linux -mcpu=nehalem < %s | FileCheck %s --check-prefix=LIN
-; RUN: llc -mtriple=x86_64-win32 -mcpu=nehalem < %s | FileCheck %s --check-prefix=WIN
+; RUN: llc -mtriple=x86_64-linux -mcpu=nehalem < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-win32 -mcpu=nehalem < %s | FileCheck %s
 ; rdar://7398554
 
 ; When doing vector gather-scatter index calculation with 32-bit indices,
 ; bounce the vector off of cache rather than shuffling each individual
 ; element out of the index vector.
 
-; CHECK: foo:
-; LIN: movaps	(%rsi), %xmm0
-; LIN: andps	(%rdx), %xmm0
-; LIN: movaps	%xmm0, -24(%rsp)
-; LIN: movslq	-24(%rsp), %rsi
-; LIN: movslq	-20(%rsp), %rcx
-; LIN: movslq	-16(%rsp), %rdx
-; LIN: movslq	-12(%rsp), %rax
-; LIN: movsd	(%rdi,%rsi,8), %xmm0
-; LIN: movhpd	(%rdi,%rcx,8), %xmm0
-; LIN: movsd	(%rdi,%rdx,8), %xmm1
-; LIN: movhpd	(%rdi,%rax,8), %xmm1
-
-; WIN: movaps	(%rdx), %xmm0
-; WIN: andps	(%r8), %xmm0
-; WIN: movaps	%xmm0, (%rsp)
-; WIN: movslq	(%rsp), %rax
-; WIN: movslq	4(%rsp), %rdx
-; WIN: movslq	8(%rsp), %r9
-; WIN: movslq	12(%rsp), %r8
-; WIN: movsd	(%rcx,%rax,8), %xmm0
-; WIN: movhpd	(%rcx,%rdx,8), %xmm0
-; WIN: movsd	(%rcx,%r9,8), %xmm1
-; WIN: movhpd	(%rcx,%r8,8), %xmm1
+; CHECK: andps    ([[H:%rdx|%r8]]), %xmm0
+; CHECK: movaps   %xmm0, {{(-24)?}}(%rsp)
+; CHECK: movslq   {{(-24)?}}(%rsp), %rax
+; CHECK: movsd    ([[P:%rdi|%rcx]],%rax,8), %xmm0
+; CHECK: movslq   {{-20|4}}(%rsp), %rax
+; CHECK: movhpd   ([[P]],%rax,8), %xmm0
+; CHECK: movslq   {{-16|8}}(%rsp), %rax
+; CHECK: movsd    ([[P]],%rax,8), %xmm1
+; CHECK: movslq   {{-12|12}}(%rsp), %rax
+; CHECK: movhpd   ([[P]],%rax,8), %xmm1
 
 define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
   %a = load <4 x i32>* %i
diff --git a/test/CodeGen/X86/ghc-cc.ll b/test/CodeGen/X86/ghc-cc.ll
index 4dba2c08632..0e65cfdbae3 100644
--- a/test/CodeGen/X86/ghc-cc.ll
+++ b/test/CodeGen/X86/ghc-cc.ll
@@ -28,10 +28,10 @@ entry:
 
 define cc 10 void @foo() nounwind {
 entry:
-  ; CHECK:      movl r1, %esi
-  ; CHECK-NEXT: movl hp, %edi
+  ; CHECK: movl base, %ebx
   ; CHECK-NEXT: movl sp, %ebp
-  ; CHECK-NEXT: movl base, %ebx
+  ; CHECK-NEXT: movl hp, %edi
+  ; CHECK-NEXT: movl r1, %esi
   %0 = load i32* @r1
   %1 = load i32* @hp
   %2 = load i32* @sp
@@ -42,3 +42,4 @@ entry:
 }
 
 declare cc 10 void @bar(i32, i32, i32, i32)
+
diff --git a/test/CodeGen/X86/ghc-cc64.ll b/test/CodeGen/X86/ghc-cc64.ll
index 403391e8165..fcf7e1797ad 100644
--- a/test/CodeGen/X86/ghc-cc64.ll
+++ b/test/CodeGen/X86/ghc-cc64.ll
@@ -41,22 +41,22 @@ entry:
 
 define cc 10 void @foo() nounwind {
 entry:
-  ; CHECK:      movsd d2(%rip), %xmm6
-  ; CHECK-NEXT: movsd d1(%rip), %xmm5
-  ; CHECK-NEXT: movss f4(%rip), %xmm4
-  ; CHECK-NEXT: movss f3(%rip), %xmm3
-  ; CHECK-NEXT: movss f2(%rip), %xmm2
-  ; CHECK-NEXT: movss f1(%rip), %xmm1
-  ; CHECK-NEXT: movq splim(%rip), %r15
-  ; CHECK-NEXT: movq r6(%rip), %r9
-  ; CHECK-NEXT: movq r5(%rip), %r8
-  ; CHECK-NEXT: movq r4(%rip), %rdi
-  ; CHECK-NEXT: movq r3(%rip), %rsi
-  ; CHECK-NEXT: movq r2(%rip), %r14
-  ; CHECK-NEXT: movq r1(%rip), %rbx
-  ; CHECK-NEXT: movq hp(%rip), %r12
+  ; CHECK: movq base(%rip), %r13
   ; CHECK-NEXT: movq sp(%rip), %rbp
-  ; CHECK-NEXT: movq base(%rip), %r13
+  ; CHECK-NEXT: movq hp(%rip), %r12
+  ; CHECK-NEXT: movq r1(%rip), %rbx
+  ; CHECK-NEXT: movq r2(%rip), %r14
+  ; CHECK-NEXT: movq r3(%rip), %rsi
+  ; CHECK-NEXT: movq r4(%rip), %rdi
+  ; CHECK-NEXT: movq r5(%rip), %r8
+  ; CHECK-NEXT: movq r6(%rip), %r9
+  ; CHECK-NEXT: movq splim(%rip), %r15
+  ; CHECK-NEXT: movss f1(%rip), %xmm1
+  ; CHECK-NEXT: movss f2(%rip), %xmm2
+  ; CHECK-NEXT: movss f3(%rip), %xmm3
+  ; CHECK-NEXT: movss f4(%rip), %xmm4
+  ; CHECK-NEXT: movsd d1(%rip), %xmm5
+  ; CHECK-NEXT: movsd d2(%rip), %xmm6
   %0 = load double* @d2
   %1 = load double* @d1
   %2 = load float* @f4
@@ -83,3 +83,4 @@ entry:
 
 declare cc 10 void @bar(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64,
                         float, float, float, float, double, double)
+
diff --git a/test/CodeGen/X86/hipe-cc.ll b/test/CodeGen/X86/hipe-cc.ll
index b34417ebf69..76d17a09d54 100644
--- a/test/CodeGen/X86/hipe-cc.ll
+++ b/test/CodeGen/X86/hipe-cc.ll
@@ -49,10 +49,10 @@ entry:
   store i32 %arg1, i32* %arg1_var
   store i32 %arg2, i32* %arg2_var
 
-  ; CHECK:      movl  16(%esp), %esi
-  ; CHECK-NEXT: movl  12(%esp), %ebp
+  ; CHECK:      movl   4(%esp), %edx
   ; CHECK-NEXT: movl   8(%esp), %eax
-  ; CHECK-NEXT: movl   4(%esp), %edx
+  ; CHECK-NEXT: movl  12(%esp), %ebp
+  ; CHECK-NEXT: movl  16(%esp), %esi
   %0 = load i32* %hp_var
   %1 = load i32* %p_var
   %2 = load i32* %arg0_var
diff --git a/test/CodeGen/X86/hipe-cc64.ll b/test/CodeGen/X86/hipe-cc64.ll
index 27e1c723a8f..5dbb5a25cbe 100644
--- a/test/CodeGen/X86/hipe-cc64.ll
+++ b/test/CodeGen/X86/hipe-cc64.ll
@@ -5,10 +5,10 @@
 define void @zap(i64 %a, i64 %b) nounwind {
 entry:
   ; CHECK:      movq %rsi, %rax
-  ; CHECK-NEXT: movl $8, %ecx
-  ; CHECK-NEXT: movl $9, %r8d
   ; CHECK-NEXT: movq %rdi, %rsi
   ; CHECK-NEXT: movq %rax, %rdx
+  ; CHECK-NEXT: movl $8, %ecx
+  ; CHECK-NEXT: movl $9, %r8d
   ; CHECK-NEXT: callq addfour
   %0 = call cc 11 {i64, i64, i64} @addfour(i64 undef, i64 undef, i64 %a, i64 %b, i64 8, i64 9)
   %res = extractvalue {i64, i64, i64} %0, 2
@@ -57,11 +57,11 @@ entry:
   store i64 %arg2, i64* %arg2_var
   store i64 %arg3, i64* %arg3_var
 
-  ; CHECK:      movq  40(%rsp), %r15
-  ; CHECK-NEXT: movq  32(%rsp), %rbp
-  ; CHECK-NEXT: movq  24(%rsp), %rsi
+  ; CHECK:      movq  8(%rsp), %rcx
   ; CHECK-NEXT: movq  16(%rsp), %rdx
-  ; CHECK-NEXT: movq  8(%rsp), %rcx
+  ; CHECK-NEXT: movq  24(%rsp), %rsi
+  ; CHECK-NEXT: movq  32(%rsp), %rbp
+  ; CHECK-NEXT: movq  40(%rsp), %r15
   %0 = load i64* %hp_var
   %1 = load i64* %p_var
   %2 = load i64* %arg0_var
diff --git a/test/CodeGen/X86/lea-recursion.ll b/test/CodeGen/X86/lea-recursion.ll
index 9480600312c..3f32fd27c5c 100644
--- a/test/CodeGen/X86/lea-recursion.ll
+++ b/test/CodeGen/X86/lea-recursion.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | grep lea | count 13
+; RUN: llc < %s -march=x86-64 | grep lea | count 12
 
 ; This testcase was written to demonstrate an instruction-selection problem,
 ; however it also happens to expose a limitation in the DAGCombiner's
@@ -44,3 +44,4 @@ entry:
 	store i32 %tmp10.6, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 7)
 	ret void
 }
+
diff --git a/test/CodeGen/X86/lea.ll b/test/CodeGen/X86/lea.ll
index 3209de86ae4..87f0b0b30a8 100644
--- a/test/CodeGen/X86/lea.ll
+++ b/test/CodeGen/X86/lea.ll
@@ -28,7 +28,8 @@ bb.nph:
 bb2:
 	ret i32 %x_offs
 ; CHECK: test2:
-; CHECK:        leal    -5(%r[[A0:..]]), %eax
+; CHECK: movl %e[[A0]], %eax
+; CHECK: addl $-5, %eax
 ; CHECK:	andl	$-4, %eax
 ; CHECK:	negl	%eax
 ; CHECK:	leal	-4(%r[[A0]],%rax), %eax
diff --git a/test/CodeGen/X86/lsr-loop-exit-cond.ll b/test/CodeGen/X86/lsr-loop-exit-cond.ll
index 3dee2ec89c1..8a81f70a8a2 100644
--- a/test/CodeGen/X86/lsr-loop-exit-cond.ll
+++ b/test/CodeGen/X86/lsr-loop-exit-cond.ll
@@ -2,12 +2,12 @@
 ; RUN: llc -mtriple=x86_64-darwin -mcpu=atom < %s | FileCheck -check-prefix=ATOM %s
 
 ; CHECK: t:
-; CHECK:      movl (%r9,%rax,4), %e{{..}}
-; CHECK-NEXT: decq
+; CHECK: decq
+; CHECK-NEXT: movl (%r9,%rax,4), %eax
 ; CHECK-NEXT: jne
 
 ; ATOM: t:
-; ATOM: movl (%r9,%rax,4), %e{{..}}
+; ATOM: movl (%r9,%rax,4), %eax
 ; ATOM-NEXT: decq
 ; ATOM-NEXT: jne
 
@@ -190,3 +190,4 @@ for.end:                                          ; preds = %for.body, %entry
   %bi.0.lcssa = phi i32 [ 0, %entry ], [ %i.addr.0.bi.0, %for.body ]
   ret i32 %bi.0.lcssa
 }
+
diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll
index 2da58849ea6..c33cac2e05a 100644
--- a/test/CodeGen/X86/masked-iv-safe.ll
+++ b/test/CodeGen/X86/masked-iv-safe.ll
@@ -3,9 +3,9 @@
 ; RUN: not grep movz %t
 ; RUN: not grep sar %t
 ; RUN: not grep shl %t
-; RUN: grep add %t | count 6
+; RUN: grep add %t | count 5
 ; RUN: grep inc %t | count 2
-; RUN: grep lea %t | count 0
+; RUN: grep lea %t | count 3
 
 ; Optimize away zext-inreg and sext-inreg on the loop induction
 ; variable using trip-count information.
diff --git a/test/CodeGen/X86/memcpy-2.ll b/test/CodeGen/X86/memcpy-2.ll
index 5a4172e90e2..630c0ed1a33 100644
--- a/test/CodeGen/X86/memcpy-2.ll
+++ b/test/CodeGen/X86/memcpy-2.ll
@@ -56,15 +56,15 @@ entry:
 define void @t2(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp {
 entry:
 ; SSE2-Darwin: t2:
-; SSE2-Darwin: movaps (%ecx), %xmm0
+; SSE2-Darwin: movaps (%eax), %xmm0
 ; SSE2-Darwin: movaps %xmm0, (%eax)
 
 ; SSE2-Mingw32: t2:
-; SSE2-Mingw32: movaps (%ecx), %xmm0
+; SSE2-Mingw32: movaps (%eax), %xmm0
 ; SSE2-Mingw32: movaps %xmm0, (%eax)
 
 ; SSE1: t2:
-; SSE1: movaps (%ecx), %xmm0
+; SSE1: movaps (%eax), %xmm0
 ; SSE1: movaps %xmm0, (%eax)
 
 ; NOSSE: t2:
@@ -91,14 +91,14 @@ entry:
 define void @t3(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp {
 entry:
 ; SSE2-Darwin: t3:
-; SSE2-Darwin: movsd (%ecx), %xmm0
-; SSE2-Darwin: movsd 8(%ecx), %xmm1
+; SSE2-Darwin: movsd (%eax), %xmm0
+; SSE2-Darwin: movsd 8(%eax), %xmm1
 ; SSE2-Darwin: movsd %xmm1, 8(%eax)
 ; SSE2-Darwin: movsd %xmm0, (%eax)
 
 ; SSE2-Mingw32: t3:
-; SSE2-Mingw32: movsd (%ecx), %xmm0
-; SSE2-Mingw32: movsd 8(%ecx), %xmm1
+; SSE2-Mingw32: movsd (%eax), %xmm0
+; SSE2-Mingw32: movsd 8(%eax), %xmm1
 ; SSE2-Mingw32: movsd %xmm1, 8(%eax)
 ; SSE2-Mingw32: movsd %xmm0, (%eax)
 
diff --git a/test/CodeGen/X86/pmul.ll b/test/CodeGen/X86/pmul.ll
index 4374f37dfd4..da4af81959d 100644
--- a/test/CodeGen/X86/pmul.ll
+++ b/test/CodeGen/X86/pmul.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86 -mattr=sse41 -mcpu=nehalem -stack-alignment=16 > %t
 ; RUN: grep pmul %t | count 12
-; RUN: grep mov %t | count 14
+; RUN: grep mov %t | count 11
 
 define <4 x i32> @a(<4 x i32> %i) nounwind  {
         %A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 >
diff --git a/test/CodeGen/X86/pr14088.ll b/test/CodeGen/X86/pr14088.ll
index 7041d143238..505e3b5cf26 100644
--- a/test/CodeGen/X86/pr14088.ll
+++ b/test/CodeGen/X86/pr14088.ll
@@ -20,6 +20,6 @@ return:
 }
 
 ; We were miscompiling this and using %ax instead of %cx in the movw.
-; CHECK: movw	%ax, (%rsi)
-; CHECK: movswl	%ax, %eax
-; CHECK: movslq	%eax, %rax
+; CHECK: movswl	%cx, %ecx
+; CHECK: movw	%cx, (%rsi)
+; CHECK: movslq	%ecx, %rcx
diff --git a/test/CodeGen/X86/pr1505b.ll b/test/CodeGen/X86/pr1505b.ll
index c348fec5467..9b0ef83ab04 100644
--- a/test/CodeGen/X86/pr1505b.ll
+++ b/test/CodeGen/X86/pr1505b.ll
@@ -57,10 +57,11 @@ entry:
 	%tmp22 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZNSolsEd( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp16, double %tmp1920 )		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
 	%tmp30 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp22 )		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=0]
 ; reload:
+; CHECK: fld
+; CHECK: fstps
 ; CHECK: ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc
 	%tmp34 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc( %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4cout, i8* getelementptr ([13 x i8]* @.str1, i32 0, i32 0) )		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
 	%tmp3940 = fpext float %tmp1314 to double		; <double> [#uses=1]
-; CHECK: fld
 ; CHECK: fstpl
 ; CHECK: ZNSolsEd
 	%tmp42 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZNSolsEd( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp34, double %tmp3940 )		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
diff --git a/test/CodeGen/X86/pr16031.ll b/test/CodeGen/X86/pr16031.ll
index 76b6c42c632..4721173cb67 100644
--- a/test/CodeGen/X86/pr16031.ll
+++ b/test/CodeGen/X86/pr16031.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=corei7-avx -enable-misched=false | FileCheck %s
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s
 
 ; CHECK: main:
 ; CHECK: pushl %esi
diff --git a/test/CodeGen/X86/pre-ra-sched.ll b/test/CodeGen/X86/pre-ra-sched.ll
index 70135d43f49..b792ffa09fb 100644
--- a/test/CodeGen/X86/pre-ra-sched.ll
+++ b/test/CodeGen/X86/pre-ra-sched.ll
@@ -1,6 +1,5 @@
-; RUN-disabled: llc < %s -mtriple=x86_64-apple-macosx -pre-RA-sched=ilp -debug-only=pre-RA-sched \
-; RUN-disabled:     2>&1 | FileCheck %s
-; RUN: true
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -debug-only=pre-RA-sched \
+; RUN:     2>&1 | FileCheck %s
 ; REQUIRES: asserts
 ;
 ; rdar:13279013: pre-RA-sched should not check all interferences and
diff --git a/test/CodeGen/X86/rdrand.ll b/test/CodeGen/X86/rdrand.ll
index 3d1870987a5..0fd9916bfa1 100644
--- a/test/CodeGen/X86/rdrand.ll
+++ b/test/CodeGen/X86/rdrand.ll
@@ -11,10 +11,10 @@ define i32 @_rdrand16_step(i16* %random_val) {
   ret i32 %isvalid
 ; CHECK: _rdrand16_step:
 ; CHECK: rdrandw	%ax
+; CHECK: movw	%ax, (%r[[A0:di|cx]])
 ; CHECK: movzwl	%ax, %ecx
 ; CHECK: movl	$1, %eax
 ; CHECK: cmovael	%ecx, %eax
-; CHECK: movw	%cx, (%r[[A0:di|cx]])
 ; CHECK: ret
 }
 
@@ -26,9 +26,9 @@ define i32 @_rdrand32_step(i32* %random_val) {
   ret i32 %isvalid
 ; CHECK: _rdrand32_step:
 ; CHECK: rdrandl	%e[[T0:[a-z]+]]
+; CHECK: movl	%e[[T0]], (%r[[A0]])
 ; CHECK: movl	$1, %eax
 ; CHECK: cmovael	%e[[T0]], %eax
-; CHECK: movl	%e[[T0]], (%r[[A0]])
 ; CHECK: ret
 }
 
@@ -40,9 +40,9 @@ define i32 @_rdrand64_step(i64* %random_val) {
   ret i32 %isvalid
 ; CHECK: _rdrand64_step:
 ; CHECK: rdrandq	%r[[T1:[a-z]+]]
+; CHECK: movq	%r[[T1]], (%r[[A0]])
 ; CHECK: movl	$1, %eax
 ; CHECK: cmovael	%e[[T1]], %eax
-; CHECK: movq	%r[[T1]], (%r[[A0]])
 ; CHECK: ret
 }
 
diff --git a/test/CodeGen/X86/rdseed.ll b/test/CodeGen/X86/rdseed.ll
index 462cacad958..409da629888 100644
--- a/test/CodeGen/X86/rdseed.ll
+++ b/test/CodeGen/X86/rdseed.ll
@@ -12,10 +12,10 @@ define i32 @_rdseed16_step(i16* %random_val) {
   ret i32 %isvalid
 ; CHECK: _rdseed16_step:
 ; CHECK: rdseedw	%ax
+; CHECK: movw	%ax, (%r[[A0:di|cx]])
 ; CHECK: movzwl	%ax, %ecx
 ; CHECK: movl	$1, %eax
 ; CHECK: cmovael	%ecx, %eax
-; CHECK: movw	%cx, (%r[[A0:di|cx]])
 ; CHECK: ret
 }
 
@@ -27,9 +27,9 @@ define i32 @_rdseed32_step(i32* %random_val) {
   ret i32 %isvalid
 ; CHECK: _rdseed32_step:
 ; CHECK: rdseedl	%e[[T0:[a-z]+]]
+; CHECK: movl	%e[[T0]], (%r[[A0]])
 ; CHECK: movl	$1, %eax
 ; CHECK: cmovael	%e[[T0]], %eax
-; CHECK: movl	%e[[T0]], (%r[[A0]])
 ; CHECK: ret
 }
 
@@ -41,8 +41,8 @@ define i32 @_rdseed64_step(i64* %random_val) {
   ret i32 %isvalid
 ; CHECK: _rdseed64_step:
 ; CHECK: rdseedq	%r[[T1:[a-z]+]]
+; CHECK: movq	%r[[T1]], (%r[[A0]])
 ; CHECK: movl	$1, %eax
 ; CHECK: cmovael	%e[[T1]], %eax
-; CHECK: movq	%r[[T1]], (%r[[A0]])
 ; CHECK: ret
 }
diff --git a/test/CodeGen/X86/segmented-stacks-dynamic.ll b/test/CodeGen/X86/segmented-stacks-dynamic.ll
index c452282cfab..d68b00b69a2 100644
--- a/test/CodeGen/X86/segmented-stacks-dynamic.ll
+++ b/test/CodeGen/X86/segmented-stacks-dynamic.ll
@@ -31,7 +31,7 @@ false:
 ; X32-NEXT: ret
 
 ; X32:      movl %esp, %eax
-; X32:      subl %ecx, %eax
+; X32-NEXT: subl %ecx, %eax
 ; X32-NEXT: cmpl %eax, %gs:48
 
 ; X32:      movl %eax, %esp
@@ -52,7 +52,7 @@ false:
 ; X64-NEXT: ret
 
 ; X64:      movq %rsp, %[[RDI:rdi|rax]]
-; X64:      subq %{{.*}}, %[[RDI]]
+; X64-NEXT: subq %{{.*}}, %[[RDI]]
 ; X64-NEXT: cmpq %[[RDI]], %fs:112
 
 ; X64:      movq %[[RDI]], %rsp
diff --git a/test/CodeGen/X86/select.ll b/test/CodeGen/X86/select.ll
index c8d340b2ad6..09ca07b31a1 100644
--- a/test/CodeGen/X86/select.ll
+++ b/test/CodeGen/X86/select.ll
@@ -256,8 +256,8 @@ entry:
   %call = tail call noalias i8* @_Znam(i64 %D) nounwind noredzone
   ret i8* %call
 ; CHECK: test12:
-; CHECK: mulq
 ; CHECK: movq $-1, %rdi
+; CHECK: mulq
 ; CHECK: cmovnoq	%rax, %rdi
 ; CHECK: jmp	__Znam
 
diff --git a/test/CodeGen/X86/shift-bmi2.ll b/test/CodeGen/X86/shift-bmi2.ll
index 9e6562423db..d1f321f1773 100644
--- a/test/CodeGen/X86/shift-bmi2.ll
+++ b/test/CodeGen/X86/shift-bmi2.ll
@@ -30,11 +30,10 @@ entry:
   %x = load i32* %p
   %shl = shl i32 %x, %shamt
 ; BMI2: shl32p
-; Source order scheduling prevents folding, rdar:14208996.
-; BMI2: shlxl %{{.+}}, %{{.+}}, %{{.+}}
+; BMI2: shlxl %{{.+}}, ({{.+}}), %{{.+}}
 ; BMI2: ret
 ; BMI264: shl32p
-; BMI264: shlxl %{{.+}}, %{{.+}}, %{{.+}}
+; BMI264: shlxl %{{.+}}, ({{.+}}), %{{.+}}
 ; BMI264: ret
   ret i32 %shl
 }
@@ -75,7 +74,7 @@ entry:
   %x = load i64* %p
   %shl = shl i64 %x, %shamt
 ; BMI264: shl64p
-; BMI264: shlxq %{{.+}}, %{{.+}}, %{{.+}}
+; BMI264: shlxq %{{.+}}, ({{.+}}), %{{.+}}
 ; BMI264: ret
   ret i64 %shl
 }
@@ -107,11 +106,10 @@ entry:
   %x = load i32* %p
   %shl = lshr i32 %x, %shamt
 ; BMI2: lshr32p
-; Source order scheduling prevents folding, rdar:14208996.
-; BMI2: shrxl %{{.+}}, %{{.+}}, %{{.+}}
+; BMI2: shrxl %{{.+}}, ({{.+}}), %{{.+}}
 ; BMI2: ret
 ; BMI264: lshr32
-; BMI264: shrxl %{{.+}}, %{{.+}}, %{{.+}}
+; BMI264: shrxl %{{.+}}, ({{.+}}), %{{.+}}
 ; BMI264: ret
   ret i32 %shl
 }
@@ -130,7 +128,7 @@ entry:
   %x = load i64* %p
   %shl = lshr i64 %x, %shamt
 ; BMI264: lshr64p
-; BMI264: shrxq %{{.+}}, %{{.+}}, %{{.+}}
+; BMI264: shrxq %{{.+}}, ({{.+}}), %{{.+}}
 ; BMI264: ret
   ret i64 %shl
 }
@@ -152,11 +150,10 @@ entry:
   %x = load i32* %p
   %shl = ashr i32 %x, %shamt
 ; BMI2: ashr32p
-; Source order scheduling prevents folding, rdar:14208996.
-; BMI2: sarxl %{{.+}}, %{{.+}}, %{{.+}}
+; BMI2: sarxl %{{.+}}, ({{.+}}), %{{.+}}
 ; BMI2: ret
 ; BMI264: ashr32
-; BMI264: sarxl %{{.+}}, %{{.+}}, %{{.+}}
+; BMI264: sarxl %{{.+}}, ({{.+}}), %{{.+}}
 ; BMI264: ret
   ret i32 %shl
 }
@@ -175,7 +172,7 @@ entry:
   %x = load i64* %p
   %shl = ashr i64 %x, %shamt
 ; BMI264: ashr64p
-; BMI264: sarxq %{{.+}}, %{{.+}}, %{{.+}}
+; BMI264: sarxq %{{.+}}, ({{.+}}), %{{.+}}
 ; BMI264: ret
   ret i64 %shl
 }
diff --git a/test/CodeGen/X86/sink-hoist.ll b/test/CodeGen/X86/sink-hoist.ll
index ad69f93f6d4..2aca5b897d3 100644
--- a/test/CodeGen/X86/sink-hoist.ll
+++ b/test/CodeGen/X86/sink-hoist.ll
@@ -26,10 +26,11 @@ define double @foo(double %x, double %y, i1 %c) nounwind {
 
 ; CHECK: split:
 ; CHECK-NEXT: testb $1, %dil
-; CHECK-NEXT: je
+; CHECK-NEXT: jne
+; CHECK-NEXT: movaps
+; CHECK-NEXT: ret
 ; CHECK:      divsd
-; CHECK:      movaps
-; CHECK:      ret
+; CHECK-NEXT: ret
 define double @split(double %x, double %y, i1 %c) nounwind {
   %a = fdiv double %x, 3.2
   %z = select i1 %c, double %a, double %y
@@ -64,7 +65,7 @@ return:
 ; Sink instructions with dead EFLAGS defs.
 
 ; FIXME: Unfail the zzz test if we can correctly mark pregs with the kill flag.
-;
+; 
 ; See <rdar://problem/8030636>. This test isn't valid after we made machine
 ; sinking more conservative about sinking instructions that define a preg into a
 ; block when we don't know if the preg is killed within the current block.
diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll
index 21af73ad8a5..36a0fd91bd8 100644
--- a/test/CodeGen/X86/sse2.ll
+++ b/test/CodeGen/X86/sse2.ll
@@ -7,7 +7,7 @@ define void @test1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
 	%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 >
 	store <2 x double> %tmp9, <2 x double>* %r, align 16
 	ret void
-
+        
 ; CHECK: test1:
 ; CHECK: 	movl	8(%esp), %eax
 ; CHECK-NEXT: 	movapd	(%eax), %xmm0
@@ -23,12 +23,12 @@ define void @test2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
 	%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 0, i32 2 >
 	store <2 x double> %tmp9, <2 x double>* %r, align 16
 	ret void
-
+        
 ; CHECK: test2:
-; CHECK: 	movl	4(%esp), %eax
-; CHECK: 	movl	8(%esp), %ecx
-; CHECK-NEXT: 	movapd	(%ecx), %xmm0
+; CHECK: 	movl	8(%esp), %eax
+; CHECK-NEXT: 	movapd	(%eax), %xmm0
 ; CHECK-NEXT: 	movhpd	12(%esp), %xmm0
+; CHECK-NEXT: 	movl	4(%esp), %eax
 ; CHECK-NEXT: 	movapd	%xmm0, (%eax)
 ; CHECK-NEXT: 	ret
 }
@@ -48,7 +48,7 @@ define void @test3(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) nounwind
 	store <4 x float> %tmp13, <4 x float>* %res
 	ret void
 ; CHECK: @test3
-; CHECK: 	unpcklps
+; CHECK: 	unpcklps	
 }
 
 define void @test4(<4 x float> %X, <4 x float>* %res) nounwind {
@@ -85,9 +85,9 @@ define void @test6(<4 x float>* %res, <4 x float>* %A) nounwind {
         %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >          ; <<4 x float>> [#uses=1]
         store <4 x float> %tmp2, <4 x float>* %res
         ret void
-
+        
 ; CHECK: test6:
-; CHECK: 	movaps	(%ecx), %xmm0
+; CHECK: 	movaps	(%eax), %xmm0
 ; CHECK:	movaps	%xmm0, (%eax)
 }
 
@@ -96,7 +96,7 @@ define void @test7() nounwind {
         shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> zeroinitializer         ; <<4 x float>>:2 [#uses=1]
         store <4 x float> %2, <4 x float>* null
         ret void
-
+        
 ; CHECK: test7:
 ; CHECK:	xorps	%xmm0, %xmm0
 ; CHECK:	movaps	%xmm0, 0
@@ -166,7 +166,7 @@ define void @test13(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B, <4 x fl
         store <4 x float> %tmp11, <4 x float>* %res
         ret void
 ; CHECK: test13
-; CHECK: shufps	$69, (%ecx), %xmm0
+; CHECK: shufps	$69, (%eax), %xmm0
 ; CHECK: pshufd	$-40, %xmm0, %xmm0
 }
 
@@ -178,8 +178,8 @@ define <4 x float> @test14(<4 x float>* %x, <4 x float>* %y) nounwind {
         %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 >                ; <<4 x float>> [#uses=1]
         ret <4 x float> %tmp27
 ; CHECK: test14:
-; CHECK: 	addps	[[X1:%xmm[0-9]+]], [[X0:%xmm[0-9]+]]
-; CHECK: 	subps	[[X1]], [[X2:%xmm[0-9]+]]
+; CHECK: 	subps	[[X1:%xmm[0-9]+]], [[X2:%xmm[0-9]+]]
+; CHECK: 	addps	[[X1]], [[X0:%xmm[0-9]+]]
 ; CHECK: 	movlhps	[[X2]], [[X0]]
 }
 
@@ -221,3 +221,4 @@ entry:
  %double2float.i = fptrunc <4 x double> %0 to <4 x float>
  ret <4 x float> %double2float.i
 }
+
diff --git a/test/CodeGen/X86/store-narrow.ll b/test/CodeGen/X86/store-narrow.ll
index 7855a4c5b37..0dd228eb145 100644
--- a/test/CodeGen/X86/store-narrow.ll
+++ b/test/CodeGen/X86/store-narrow.ll
@@ -12,7 +12,7 @@ entry:
   %D = or i32 %C, %B
   store i32 %D, i32* %a0, align 4
   ret void
-
+  
 ; X64: test1:
 ; X64: movb	%sil, (%rdi)
 
@@ -34,8 +34,8 @@ entry:
 ; X64: movb	%sil, 1(%rdi)
 
 ; X32: test2:
-; X32: movb	8(%esp), %[[REG:[abcd]l]]
-; X32: movb	%[[REG]], 1(%{{.*}})
+; X32: movb	8(%esp), %al
+; X32: movb	%al, 1(%{{.*}})
 }
 
 define void @test3(i32* nocapture %a0, i16 zeroext %a1) nounwind ssp {
@@ -67,8 +67,8 @@ entry:
 ; X64: movw	%si, 2(%rdi)
 
 ; X32: test4:
-; X32: movl	8(%esp), %e[[REG:[abcd]x]]
-; X32: movw	%[[REG]], 2(%{{.*}})
+; X32: movl	8(%esp), %eax
+; X32: movw	%ax, 2(%{{.*}})
 }
 
 define void @test5(i64* nocapture %a0, i16 zeroext %a1) nounwind ssp {
@@ -84,8 +84,8 @@ entry:
 ; X64: movw	%si, 2(%rdi)
 
 ; X32: test5:
-; X32: movzwl	8(%esp), %e[[REG:[abcd]x]]
-; X32: movw	%[[REG]], 2(%{{.*}})
+; X32: movzwl	8(%esp), %eax
+; X32: movw	%ax, 2(%{{.*}})
 }
 
 define void @test6(i64* nocapture %a0, i8 zeroext %a1) nounwind ssp {
@@ -102,8 +102,8 @@ entry:
 
 
 ; X32: test6:
-; X32: movb	8(%esp), %[[REG:[abcd]l]]
-; X32: movb	%[[REG]], 5(%{{.*}})
+; X32: movb	8(%esp), %al
+; X32: movb	%al, 5(%{{.*}})
 }
 
 define i32 @test7(i64* nocapture %a0, i8 zeroext %a1, i32* %P2) nounwind {
@@ -121,8 +121,8 @@ entry:
 
 
 ; X32: test7:
-; X32: movb	8(%esp), %[[REG:[abcd]l]]
-; X32: movb	%[[REG]], 5(%{{.*}})
+; X32: movb	8(%esp), %cl
+; X32: movb	%cl, 5(%{{.*}})
 }
 
 ; PR7833
diff --git a/test/CodeGen/X86/tailcall-largecode.ll b/test/CodeGen/X86/tailcall-largecode.ll
index f5662d97d13..e9b8721e660 100644
--- a/test/CodeGen/X86/tailcall-largecode.ll
+++ b/test/CodeGen/X86/tailcall-largecode.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux-gnu -tailcallopt -code-model=large -enable-misched=false | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -tailcallopt -code-model=large | FileCheck %s
 
 declare fastcc i32 @callee(i32 %arg)
 define fastcc i32 @directcall(i32 %arg) {
diff --git a/test/CodeGen/X86/test-nofold.ll b/test/CodeGen/X86/test-nofold.ll
index 19fbaafc194..97db1b340e8 100644
--- a/test/CodeGen/X86/test-nofold.ll
+++ b/test/CodeGen/X86/test-nofold.ll
@@ -2,10 +2,10 @@
 ; rdar://5752025
 
 ; We want:
-;      CHECK: movl	4(%esp), %ecx
-; CHECK-NEXT: andl	$15, %ecx
-; CHECK-NEXT: movl	$42, %eax
-; CHECK-NEXT: cmovel	%ecx, %eax
+;      CHECK: movl	$42, %ecx
+; CHECK-NEXT: movl	4(%esp), %eax
+; CHECK-NEXT: andl	$15, %eax
+; CHECK-NEXT: cmovnel	%ecx, %eax
 ; CHECK-NEXT: ret
 ;
 ; We don't want:
@@ -39,3 +39,4 @@ entry:
 	%retval = select i1 %tmp4, i32 %tmp2, i32 42		; <i32> [#uses=1]
 	ret i32 %retval
 }
+
diff --git a/test/CodeGen/X86/trunc-to-bool.ll b/test/CodeGen/X86/trunc-to-bool.ll
index 59920f0fc8c..92b6859d1dc 100644
--- a/test/CodeGen/X86/trunc-to-bool.ll
+++ b/test/CodeGen/X86/trunc-to-bool.ll
@@ -22,7 +22,7 @@ ret_false:
     ret i1 false
 }
 ; CHECK: test2:
-; CHECK: btl
+; CHECK: btl %eax
 
 define i32 @test3(i8* %ptr) nounwind {
     %val = load i8* %ptr
diff --git a/test/CodeGen/X86/v-binop-widen.ll b/test/CodeGen/X86/v-binop-widen.ll
index fca4da66a85..8655c6c8ea5 100644
--- a/test/CodeGen/X86/v-binop-widen.ll
+++ b/test/CodeGen/X86/v-binop-widen.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -mcpu=generic -march=x86 -mattr=+sse < %s | FileCheck %s
+; CHECK: divss
 ; CHECK: divps
 ; CHECK: divps
-; CHECK: divss
 
 %vec = type <9 x float>
 define %vec @vecdiv( %vec %p1, %vec %p2)
@@ -9,3 +9,4 @@ define %vec @vecdiv( %vec %p1, %vec %p2)
   %result = fdiv %vec %p1, %p2
   ret %vec %result
 }
+
diff --git a/test/CodeGen/X86/v-binop-widen2.ll b/test/CodeGen/X86/v-binop-widen2.ll
index 334211132f1..569586af498 100644
--- a/test/CodeGen/X86/v-binop-widen2.ll
+++ b/test/CodeGen/X86/v-binop-widen2.ll
@@ -2,9 +2,9 @@
 ; RUN: llc -march=x86 -mcpu=atom -mattr=+sse < %s | FileCheck -check-prefix=ATOM %s
 
 %vec = type <6 x float>
-; CHECK: divps
 ; CHECK: divss
 ; CHECK: divss
+; CHECK: divps
 
 ; Scheduler causes a different instruction order to be produced on Intel Atom
 ; ATOM: divps
diff --git a/test/CodeGen/X86/vec_shuffle-27.ll b/test/CodeGen/X86/vec_shuffle-27.ll
index ffe3a9f39dc..0aff822850c 100644
--- a/test/CodeGen/X86/vec_shuffle-27.ll
+++ b/test/CodeGen/X86/vec_shuffle-27.ll
@@ -7,10 +7,10 @@ target triple = "i686-apple-cl.1.0"
 define <8 x float> @my2filter4_1d(<4 x float> %a, <8 x float> %T0, <8 x float> %T1) nounwind readnone {
 entry:
 ; CHECK: subps
-; CHECK: subps
-; CHECK: mulps
 ; CHECK: mulps
 ; CHECK: addps
+; CHECK: subps
+; CHECK: mulps
 ; CHECK: addps
 	%tmp7 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3 >		; <<8 x float>> [#uses=1]
 	%sub = fsub <8 x float> %T1, %T0		; <<8 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_shuffle-39.ll b/test/CodeGen/X86/vec_shuffle-39.ll
index fa4992e6cad..ee8d2d5e0b3 100644
--- a/test/CodeGen/X86/vec_shuffle-39.ll
+++ b/test/CodeGen/X86/vec_shuffle-39.ll
@@ -54,8 +54,8 @@ entry:
 define <2 x double> @t3() nounwind readonly {
 bb:
 ; CHECK: t3:
+; CHECK: punpcklqdq %xmm1, %xmm0
 ; CHECK: movq (%rax), %xmm1
-; CHECK: punpcklqdq %xmm2, %xmm0
 ; CHECK: movsd %xmm1, %xmm0
   %tmp0 = load i128* null, align 1
   %tmp1 = load <2 x i32>* undef, align 8
@@ -72,8 +72,8 @@ bb:
 define <2 x i64> @t4() nounwind readonly {
 bb:
 ; CHECK: t4:
+; CHECK: punpcklqdq %xmm0, %xmm1
 ; CHECK: movq (%rax), %xmm0
-; CHECK: punpcklqdq %xmm2, %xmm1
 ; CHECK: movsd %xmm1, %xmm0
   %tmp0 = load i128* null, align 1
   %tmp1 = load <2 x i32>* undef, align 8
diff --git a/test/CodeGen/X86/widen_cast-1.ll b/test/CodeGen/X86/widen_cast-1.ll
index 589247ba476..56c63644e02 100644
--- a/test/CodeGen/X86/widen_cast-1.ll
+++ b/test/CodeGen/X86/widen_cast-1.ll
@@ -1,8 +1,8 @@
 ; RUN: llc -march=x86 -mcpu=generic -mattr=+sse42 < %s | FileCheck %s
 ; RUN: llc -march=x86 -mcpu=atom < %s | FileCheck -check-prefix=ATOM %s
 
-; CHECK: movl
 ; CHECK: paddd
+; CHECK: movl
 ; CHECK: movlpd
 
 ; Scheduler causes produce a different instruction order
diff --git a/test/CodeGen/X86/win64_alloca_dynalloca.ll b/test/CodeGen/X86/win64_alloca_dynalloca.ll
index 5a6e4a9b66e..cc11e4c28e2 100644
--- a/test/CodeGen/X86/win64_alloca_dynalloca.ll
+++ b/test/CodeGen/X86/win64_alloca_dynalloca.ll
@@ -40,27 +40,27 @@ entry:
 ; W64: subq  %rax, %rsp
 ; W64: movq  %rsp, %rax
 
-; EFI: movq  %rsp, [[R64:%r.*]]
 ; EFI: leaq  15(%{{.*}}), [[R1:%r.*]]
 ; EFI: andq  $-16, [[R1]]
+; EFI: movq  %rsp, [[R64:%r.*]]
 ; EFI: subq  [[R1]], [[R64]]
 ; EFI: movq  [[R64]], %rsp
 
   %r = call i64 @bar(i64 %n, i64 %x, i64 %n, i8* %buf0, i8* %buf1) nounwind
 
 ; M64: subq  $48, %rsp
-; M64: movq  %rax, 32(%rsp)
 ; M64: leaq  -4096(%rbp), %r9
+; M64: movq  %rax, 32(%rsp)
 ; M64: callq bar
 
 ; W64: subq  $48, %rsp
-; W64: movq  %rax, 32(%rsp)
 ; W64: leaq  -4096(%rbp), %r9
+; W64: movq  %rax, 32(%rsp)
 ; W64: callq bar
 
 ; EFI: subq  $48, %rsp
-; EFI: movq  [[R64]], 32(%rsp)
 ; EFI: leaq  -[[B0OFS]](%rbp), %r9
+; EFI: movq  [[R64]], 32(%rsp)
 ; EFI: callq _bar
 
   ret i64 %r
diff --git a/test/CodeGen/X86/x86-64-psub.ll b/test/CodeGen/X86/x86-64-psub.ll
index 029eb9cf3ad..7869a80b2a2 100644
--- a/test/CodeGen/X86/x86-64-psub.ll
+++ b/test/CodeGen/X86/x86-64-psub.ll
@@ -4,8 +4,8 @@
 ; This test checks that the operands of packed sub instructions are
 ; never interchanged by the "Two-Address instruction pass".
 
-declare { i64, double } @getFirstParam()
-declare { i64, double } @getSecondParam()
+declare { i64, double } @getFirstParam() 
+declare { i64, double } @getSecondParam() 
 
 define i64 @test_psubb() {
 entry:
@@ -28,10 +28,9 @@ entry:
 
 ; CHECK: test_psubb:
 ; CHECK:   callq getFirstParam
-; CHECK:   movq %rax, [[TEMP:%[a-z0-9]+]]
 ; CHECK:   callq getSecondParam
-; CHECK:   movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
 ; CHECK:   movd %rax, [[PARAM2:%[a-z0-9]+]]
+; CHECK:   movq (%rsp), [[PARAM1:%[a-z0-9]+]]
 ; CHECK:   psubb [[PARAM2]], [[PARAM1]]
 ; CHECK: ret
 
@@ -56,10 +55,9 @@ entry:
 
 ; CHECK: test_psubw:
 ; CHECK:   callq getFirstParam
-; CHECK:   movq %rax, [[TEMP:%[a-z0-9]+]]
 ; CHECK:   callq getSecondParam
-; CHECK:   movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
 ; CHECK:   movd %rax, [[PARAM2:%[a-z0-9]+]]
+; CHECK:   movq (%rsp), [[PARAM1:%[a-z0-9]+]]
 ; CHECK:   psubw [[PARAM2]], [[PARAM1]]
 ; CHECK: ret
 
@@ -85,10 +83,9 @@ entry:
 
 ; CHECK: test_psubd:
 ; CHECK:   callq getFirstParam
-; CHECK:   movq %rax, [[TEMP:%[a-z0-9]+]]
 ; CHECK:   callq getSecondParam
-; CHECK:   movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
 ; CHECK:   movd %rax, [[PARAM2:%[a-z0-9]+]]
+; CHECK:   movq (%rsp), [[PARAM1:%[a-z0-9]+]]
 ; CHECK:   psubd [[PARAM2]], [[PARAM1]]
 ; CHECK: ret
 
@@ -113,10 +110,9 @@ entry:
 
 ; CHECK: test_psubsb:
 ; CHECK:   callq getFirstParam
-; CHECK:   movq %rax, [[TEMP:%[a-z0-9]+]]
 ; CHECK:   callq getSecondParam
-; CHECK:   movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
 ; CHECK:   movd %rax, [[PARAM2:%[a-z0-9]+]]
+; CHECK:   movq (%rsp), [[PARAM1:%[a-z0-9]+]]
 ; CHECK:   psubsb [[PARAM2]], [[PARAM1]]
 ; CHECK: ret
 
@@ -141,10 +137,9 @@ entry:
 
 ; CHECK: test_psubswv:
 ; CHECK:   callq getFirstParam
-; CHECK:   movq %rax, [[TEMP:%[a-z0-9]+]]
 ; CHECK:   callq getSecondParam
-; CHECK:   movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
 ; CHECK:   movd %rax, [[PARAM2:%[a-z0-9]+]]
+; CHECK:   movq (%rsp), [[PARAM1:%[a-z0-9]+]]
 ; CHECK:   psubsw [[PARAM2]], [[PARAM1]]
 ; CHECK: ret
 
@@ -169,10 +164,9 @@ entry:
 
 ; CHECK: test_psubusbv:
 ; CHECK:   callq getFirstParam
-; CHECK:   movq %rax, [[TEMP:%[a-z0-9]+]]
 ; CHECK:   callq getSecondParam
-; CHECK:   movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
 ; CHECK:   movd %rax, [[PARAM2:%[a-z0-9]+]]
+; CHECK:   movq (%rsp), [[PARAM1:%[a-z0-9]+]]
 ; CHECK:   psubusb [[PARAM2]], [[PARAM1]]
 ; CHECK: ret
 
@@ -197,10 +191,9 @@ entry:
 
 ; CHECK: test_psubuswv:
 ; CHECK:   callq getFirstParam
-; CHECK:   movq %rax, [[TEMP:%[a-z0-9]+]]
 ; CHECK:   callq getSecondParam
-; CHECK:   movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
 ; CHECK:   movd %rax, [[PARAM2:%[a-z0-9]+]]
+; CHECK:   movq (%rsp), [[PARAM1:%[a-z0-9]+]]
 ; CHECK:   psubusw [[PARAM2]], [[PARAM1]]
 ; CHECK: ret
 
diff --git a/test/CodeGen/X86/x86-shifts.ll b/test/CodeGen/X86/x86-shifts.ll
index 0bd651a3efa..20bccab8ff7 100644
--- a/test/CodeGen/X86/x86-shifts.ll
+++ b/test/CodeGen/X86/x86-shifts.ll
@@ -6,8 +6,8 @@
 define <4 x i32> @shl4(<4 x i32> %A) nounwind {
 entry:
 ; CHECK:      shl4
-; CHECK:      pslld
 ; CHECK:      padd
+; CHECK:      pslld
 ; CHECK:      ret
   %B = shl <4 x i32> %A,  < i32 2, i32 2, i32 2, i32 2>
   %C = shl <4 x i32> %A,  < i32 1, i32 1, i32 1, i32 1>
@@ -67,8 +67,8 @@ entry:
 define <8 x i16> @shl8(<8 x i16> %A) nounwind {
 entry:
 ; CHECK:      shl8
-; CHECK:      psllw
 ; CHECK:      padd
+; CHECK:      psllw
 ; CHECK:      ret
   %B = shl <8 x i16> %A,  < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
   %C = shl <8 x i16> %A,  < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
diff --git a/test/CodeGen/X86/zext-fold.ll b/test/CodeGen/X86/zext-fold.ll
index a10923f7a80..ff93c68ff35 100644
--- a/test/CodeGen/X86/zext-fold.ll
+++ b/test/CodeGen/X86/zext-fold.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=generic -march=x86 -enable-misched=false | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s
 
 ;; Simple case
 define i32 @test1(i8 %x) nounwind readnone {
@@ -10,7 +10,7 @@ define i32 @test1(i8 %x) nounwind readnone {
 ; CHECK: movzbl
 ; CHECK-NEXT: andl {{.*}}224
 
-;; Multiple uses of %x but easily extensible.
+;; Multiple uses of %x but easily extensible. 
 define i32 @test2(i8 %x) nounwind readnone {
   %A = and i8 %x, -32
   %B = zext i8 %A to i32
@@ -21,8 +21,8 @@ define i32 @test2(i8 %x) nounwind readnone {
 }
 ; CHECK: test2
 ; CHECK: movzbl
-; CHECK: andl $224
 ; CHECK: orl $63
+; CHECK: andl $224
 
 declare void @use(i32, i8)
 
diff --git a/test/CodeGen/X86/zext-sext.ll b/test/CodeGen/X86/zext-sext.ll
index f74d84d37cd..0ab302a31b7 100644
--- a/test/CodeGen/X86/zext-sext.ll
+++ b/test/CodeGen/X86/zext-sext.ll
@@ -34,8 +34,9 @@ entry:
   %tmp12 = add i64 %tmp11, 5089792279245435153
 
 ; CHECK:      addl	$2138875574, %e[[REGISTER_zext:[a-z0-9]+]]
-; CHECK:      cmpl	$-8608074, %e[[REGISTER_zext]]
-; CHECK:      movslq	%e[[REGISTER_zext]], [[REGISTER_sext:%r[a-z0-9]+]]
+; CHECK-NEXT: cmpl	$-8608074, %e[[REGISTER_zext]]
+; CHECK-NEXT: movslq	%e[[REGISTER_zext]], [[REGISTER_tmp:%r[a-z0-9]+]]
+; CHECK:      movq	[[REGISTER_tmp]], [[REGISTER_sext:%r[a-z0-9]+]]
 ; CHECK-NOT:  [[REGISTER_zext]]
 ; CHECK:      subq	%r[[REGISTER_zext]], [[REGISTER_sext]]
 
-- 
2.34.1