From b2b5dc642cbbe781f73b9da83874d4005c50bd8e Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Tue, 25 Jun 2013 02:48:58 +0000 Subject: [PATCH] Revert "Temporarily enable MI-Sched on X86." This reverts commit 98a9b72e8c56dc13a2617de84503a3d78352789c. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184823 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86Subtarget.h | 5 +- test/CodeGen/X86/2006-05-02-InstrSched1.ll | 6 +-- test/CodeGen/X86/2007-01-08-InstrSched.ll | 4 +- test/CodeGen/X86/2009-02-26-MachineLICMBug.ll | 2 +- .../X86/2010-02-19-TailCallRetAddrBug.ll | 12 +++-- .../X86/2010-09-17-SideEffectsInChain.ll | 8 +-- test/CodeGen/X86/2011-10-19-LegelizeLoad.ll | 3 +- test/CodeGen/X86/2012-04-26-sdglue.ll | 4 +- test/CodeGen/X86/3addr-16bit.ll | 7 ++- test/CodeGen/X86/StackColoring.ll | 5 +- test/CodeGen/X86/abi-isel.ll | 20 +++---- test/CodeGen/X86/add.ll | 12 ++--- test/CodeGen/X86/alloca-align-rounding.ll | 2 +- test/CodeGen/X86/avx-arith.ll | 5 +- test/CodeGen/X86/avx-intel-ocl.ll | 13 ++--- test/CodeGen/X86/avx-shuffle.ll | 6 +-- test/CodeGen/X86/break-anti-dependencies.ll | 2 +- test/CodeGen/X86/bt.ll | 54 +++++++++---------- test/CodeGen/X86/byval7.ll | 4 +- test/CodeGen/X86/chain_order.ll | 3 +- test/CodeGen/X86/cmov.ll | 10 ++-- test/CodeGen/X86/commute-two-addr.ll | 3 +- test/CodeGen/X86/dbg-value-dag-combine.ll | 2 +- test/CodeGen/X86/fast-isel-mem.ll | 4 +- test/CodeGen/X86/fastcc.ll | 4 +- test/CodeGen/X86/fold-load.ll | 4 +- test/CodeGen/X86/fold-pcmpeqd-2.ll | 15 ++---- test/CodeGen/X86/full-lsr.ll | 2 +- test/CodeGen/X86/gather-addresses.ll | 38 +++++-------- test/CodeGen/X86/ghc-cc.ll | 7 +-- test/CodeGen/X86/ghc-cc64.ll | 31 +++++------ test/CodeGen/X86/hipe-cc.ll | 6 +-- test/CodeGen/X86/hipe-cc64.ll | 12 ++--- test/CodeGen/X86/lea-recursion.ll | 3 +- test/CodeGen/X86/lea.ll | 3 +- test/CodeGen/X86/lsr-loop-exit-cond.ll | 7 +-- test/CodeGen/X86/masked-iv-safe.ll | 4 +- test/CodeGen/X86/memcpy-2.ll | 14 ++--- test/CodeGen/X86/pmul.ll | 2 +- test/CodeGen/X86/pr14088.ll | 6 +-- test/CodeGen/X86/pr1505b.ll | 3 +- test/CodeGen/X86/pr16031.ll | 2 +- test/CodeGen/X86/pre-ra-sched.ll | 5 +- test/CodeGen/X86/rdrand.ll | 6 +-- test/CodeGen/X86/rdseed.ll | 6 +-- test/CodeGen/X86/segmented-stacks-dynamic.ll | 4 +- test/CodeGen/X86/select.ll | 2 +- test/CodeGen/X86/shift-bmi2.ll | 21 ++++---- test/CodeGen/X86/sink-hoist.ll | 9 ++-- test/CodeGen/X86/sse2.ll | 25 ++++----- test/CodeGen/X86/store-narrow.ll | 22 ++++---- test/CodeGen/X86/tailcall-largecode.ll | 2 +- test/CodeGen/X86/test-nofold.ll | 9 ++-- test/CodeGen/X86/trunc-to-bool.ll | 2 +- test/CodeGen/X86/v-binop-widen.ll | 3 +- test/CodeGen/X86/v-binop-widen2.ll | 2 +- test/CodeGen/X86/vec_shuffle-27.ll | 4 +- test/CodeGen/X86/vec_shuffle-39.ll | 4 +- test/CodeGen/X86/widen_cast-1.ll | 2 +- test/CodeGen/X86/win64_alloca_dynalloca.ll | 8 +-- test/CodeGen/X86/x86-64-psub.ll | 25 ++++----- test/CodeGen/X86/x86-shifts.ll | 4 +- test/CodeGen/X86/zext-fold.ll | 6 +-- test/CodeGen/X86/zext-sext.ll | 5 +- 64 files changed, 258 insertions(+), 277 deletions(-) diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 59911ba656a..66832b989be 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -361,14 +361,11 @@ public: /// memset with zero passed as the second argument. Otherwise it /// returns null. const char *getBZeroEntry() const; - + /// This function returns true if the target has sincos() routine in its /// compiler runtime or math libraries. bool hasSinCos() const; - /// Enable the MachineScheduler pass for all X86 subtargets. - bool enableMachineScheduler() const LLVM_OVERRIDE { return true; } - /// enablePostRAScheduler - run for Atom optimization. bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, TargetSubtargetInfo::AntiDepBreakMode& Mode, diff --git a/test/CodeGen/X86/2006-05-02-InstrSched1.ll b/test/CodeGen/X86/2006-05-02-InstrSched1.ll index 69266dc4e44..0afddd8f876 100644 --- a/test/CodeGen/X86/2006-05-02-InstrSched1.ll +++ b/test/CodeGen/X86/2006-05-02-InstrSched1.ll @@ -1,10 +1,7 @@ ; REQUIRES: asserts ; RUN: llc < %s -march=x86 -relocation-model=static -stats 2>&1 | \ -; RUN: grep asm-printer | grep 16 +; RUN: grep asm-printer | grep 14 ; -; It's possible to schedule this in 14 instructions by avoiding -; callee-save registers, but the scheduler isn't currently that -; conervative with registers. @size20 = external global i32 ; [#uses=1] @in5 = external global i8* ; [#uses=1] @@ -24,3 +21,4 @@ define i32 @compare(i8* %a, i8* %b) nounwind { } declare i32 @memcmp(i8*, i8*, i32) + diff --git a/test/CodeGen/X86/2007-01-08-InstrSched.ll b/test/CodeGen/X86/2007-01-08-InstrSched.ll index 4ec703921e2..24aa5b98d0b 100644 --- a/test/CodeGen/X86/2007-01-08-InstrSched.ll +++ b/test/CodeGen/X86/2007-01-08-InstrSched.ll @@ -11,12 +11,12 @@ define float @foo(float %x) nounwind { %tmp14 = fadd float %tmp12, %tmp7 ret float %tmp14 -; CHECK: mulss -; CHECK: mulss ; CHECK: mulss ; CHECK: mulss ; CHECK: addss +; CHECK: mulss ; CHECK: addss +; CHECK: mulss ; CHECK: addss ; CHECK: ret } diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll index ee86da5861a..8174fbdc9e4 100644 --- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll +++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll @@ -17,9 +17,9 @@ bb4: ; preds = %bb.i, %bb26, %bb4, %entry ; CHECK: %bb4 ; CHECK: xorl ; CHECK: callq +; CHECK: movq ; CHECK: xorl ; CHECK: xorl -; CHECK: movq %0 = call i32 (...)* @xxGetOffsetForCode(i32 undef) nounwind ; [#uses=0] %ins = or i64 %p, 2097152 ; [#uses=1] diff --git a/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll b/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll index 10c50f9c8a5..d4a74c9e7e7 100644 --- a/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll +++ b/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll @@ -3,17 +3,17 @@ ; Bug 6225 ; ; If a call is a fastcc tail call and tail call optimization is enabled, the -; caller frame is replaced by the callee frame. This can require that arguments are +; caller frame is replaced by the callee frame. This can require that arguments are ; placed on the former return address stack slot. Special care needs to be taken ; taken that the return address is moved / or stored in a register before ; lowering of arguments potentially overwrites the value. ; -; Move return address (60(%esp)) to a temporary register (%ebp) -; CHECK: movl 60(%esp), [[REGISTER:%[a-z]+]] +; Move return address (76(%esp)) to a temporary register (%ebp) +; CHECK: movl 76(%esp), [[REGISTER:%[a-z]+]] ; Overwrite return addresss -; CHECK: movl [[EBX:%[a-z]+]], 60(%esp) +; CHECK: movl [[EBX:%[a-z]+]], 76(%esp) ; Move return address from temporary register (%ebp) to new stack location (60(%esp)) -; CHECK: movl [[REGISTER]], 44(%esp) +; CHECK: movl [[REGISTER]], 60(%esp) %tupl_p = type [9 x i32]* @@ -51,3 +51,5 @@ false: tail call fastcc void @l298(i32 %r10, i32 %r9, i32 %r4) noreturn nounwind ret void } + + diff --git a/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll b/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll index 39d89e3d827..1b339777f57 100644 --- a/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll +++ b/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll @@ -19,8 +19,8 @@ entry: } ; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip) -; CHECK: movb (%rsp), [[R1:%.+]] -; CHECK: movb 30(%rsp), [[R0:%.+]] -; CHECK: movb [[R1]], (%rsp) -; CHECK: movb [[R0]], 30(%rsp) +; CHECK: movb 38(%rsp), [[R0:%.+]] +; CHECK: movb 8(%rsp), [[R1:%.+]] +; CHECK: movb [[R1]], 8(%rsp) +; CHECK: movb [[R0]], 38(%rsp) ; CHECK: callq ___stack_chk_fail diff --git a/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll b/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll index 07a6910c65e..da734d4b645 100644 --- a/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll +++ b/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll @@ -16,8 +16,8 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK: main define i32 @main() nounwind uwtable { entry: -; CHECK: pmovsxbq i(%rip), % ; CHECK: pmovsxbq j(%rip), % +; CHECK: pmovsxbq i(%rip), % %0 = load <2 x i8>* @i, align 8 %1 = load <2 x i8>* @j, align 8 %div = sdiv <2 x i8> %1, %0 @@ -25,3 +25,4 @@ entry: ret i32 0 ; CHECK: ret } + diff --git a/test/CodeGen/X86/2012-04-26-sdglue.ll b/test/CodeGen/X86/2012-04-26-sdglue.ll index 8bcf8597e33..04659522d36 100644 --- a/test/CodeGen/X86/2012-04-26-sdglue.ll +++ b/test/CodeGen/X86/2012-04-26-sdglue.ll @@ -5,8 +5,8 @@ ; It's hard to test for the ISEL condition because CodeGen optimizes ; away the bugpointed code. Just ensure the basics are still there. ;CHECK: func: -;CHECK: vpxor -;CHECK: vinserti128 +;CHECK: vxorps +;CHECK: vinsertf128 ;CHECK: vpshufd ;CHECK: vpshufd ;CHECK: vmulps diff --git a/test/CodeGen/X86/3addr-16bit.ll b/test/CodeGen/X86/3addr-16bit.ll index d843a110961..c51247ab925 100644 --- a/test/CodeGen/X86/3addr-16bit.ll +++ b/test/CodeGen/X86/3addr-16bit.ll @@ -34,8 +34,7 @@ entry: ; 64BIT: t2: ; 64BIT-NOT: movw %si, %ax -; 64BIT: decl %eax -; 64BIT: movzwl %ax +; 64BIT: leal -1(%rsi), %eax %0 = icmp eq i16 %k, %c ; [#uses=1] %1 = add i16 %k, -1 ; [#uses=3] br i1 %0, label %bb, label %bb1 @@ -59,7 +58,7 @@ entry: ; 64BIT: t3: ; 64BIT-NOT: movw %si, %ax -; 64BIT: addl $2, %eax +; 64BIT: leal 2(%rsi), %eax %0 = add i16 %k, 2 ; [#uses=3] %1 = icmp eq i16 %k, %c ; [#uses=1] br i1 %1, label %bb, label %bb1 @@ -82,7 +81,7 @@ entry: ; 64BIT: t4: ; 64BIT-NOT: movw %si, %ax -; 64BIT: addl %edi, %eax +; 64BIT: leal (%rsi,%rdi), %eax %0 = add i16 %k, %c ; [#uses=3] %1 = icmp eq i16 %k, %c ; [#uses=1] br i1 %1, label %bb, label %bb1 diff --git a/test/CodeGen/X86/StackColoring.ll b/test/CodeGen/X86/StackColoring.ll index 15101456ebb..6c0f00d17d5 100644 --- a/test/CodeGen/X86/StackColoring.ll +++ b/test/CodeGen/X86/StackColoring.ll @@ -4,8 +4,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" -;YESCOLOR: subq $144, %rsp -;NOCOLOR: subq $272, %rsp +;YESCOLOR: subq $136, %rsp +;NOCOLOR: subq $264, %rsp define i32 @myCall_w2(i32 %in) { entry: @@ -429,3 +429,4 @@ declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind declare i32 @foo(i32, i8*) + diff --git a/test/CodeGen/X86/abi-isel.ll b/test/CodeGen/X86/abi-isel.ll index c49698e004f..955fc629048 100644 --- a/test/CodeGen/X86/abi-isel.ll +++ b/test/CodeGen/X86/abi-isel.ll @@ -1,16 +1,16 @@ -; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=LINUX-32-STATIC -; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=LINUX-32-PIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-STATIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-PIC -; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=LINUX-64-STATIC -; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=LINUX-64-PIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-64-STATIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=LINUX-64-PIC -; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-32-STATIC -; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC -; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-32-PIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-32-STATIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-PIC -; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-64-STATIC -; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC -; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-64-PIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-64-STATIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-PIC @src = external global [131072 x i32] @dst = external global [131072 x i32] diff --git a/test/CodeGen/X86/add.ll b/test/CodeGen/X86/add.ll index dc02ac64661..5fe08ed305f 100644 --- a/test/CodeGen/X86/add.ll +++ b/test/CodeGen/X86/add.ll @@ -9,7 +9,7 @@ define i32 @test1(i32 inreg %a) nounwind { %b = add i32 %a, 128 ret i32 %b ; X32: subl $-128, %eax -; X64: subl $-128, +; X64: subl $-128, } define i64 @test2(i64 inreg %a) nounwind { %b = add i64 %a, 2147483648 @@ -20,7 +20,7 @@ define i64 @test2(i64 inreg %a) nounwind { define i64 @test3(i64 inreg %a) nounwind { %b = add i64 %a, 128 ret i64 %b - + ; X32: addl $128, %eax ; X64: subq $-128, } @@ -38,7 +38,7 @@ normal: overflow: ret i1 false - + ; X32: test4: ; X32: addl ; X32-NEXT: jo @@ -82,11 +82,11 @@ define i64 @test6(i64 %A, i32 %B) nounwind { ret i64 %tmp5 ; X32: test6: -; X32: movl 4(%esp), %eax -; X32-NEXT: movl 12(%esp), %edx +; X32: movl 12(%esp), %edx ; X32-NEXT: addl 8(%esp), %edx +; X32-NEXT: movl 4(%esp), %eax ; X32-NEXT: ret - + ; X64: test6: ; X64: shlq $32, %r[[A1]] ; X64: leaq (%r[[A1]],%r[[A0]]), %rax diff --git a/test/CodeGen/X86/alloca-align-rounding.ll b/test/CodeGen/X86/alloca-align-rounding.ll index 74b9470db75..3d76fb0aa25 100644 --- a/test/CodeGen/X86/alloca-align-rounding.ll +++ b/test/CodeGen/X86/alloca-align-rounding.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux -enable-misched=false | FileCheck %s +; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux | FileCheck %s declare void @bar(<2 x i64>* %n) diff --git a/test/CodeGen/X86/avx-arith.ll b/test/CodeGen/X86/avx-arith.ll index a9da1ec067c..4aa337033df 100644 --- a/test/CodeGen/X86/avx-arith.ll +++ b/test/CodeGen/X86/avx-arith.ll @@ -240,15 +240,15 @@ define <16 x i16> @vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone { ; CHECK-NEXT: vpmuludq %xmm ; CHECK-NEXT: vpsllq $32, %xmm ; CHECK-NEXT: vpaddq %xmm +; CHECK-NEXT: vpmuludq %xmm ; CHECK-NEXT: vpsrlq $32, %xmm ; CHECK-NEXT: vpmuludq %xmm ; CHECK-NEXT: vpsllq $32, %xmm -; CHECK-NEXT: vpaddq %xmm -; CHECK-NEXT: vpmuludq %xmm ; CHECK-NEXT: vpsrlq $32, %xmm ; CHECK-NEXT: vpmuludq %xmm ; CHECK-NEXT: vpsllq $32, %xmm ; CHECK-NEXT: vpaddq %xmm +; CHECK-NEXT: vpaddq %xmm ; CHECK-NEXT: vpsrlq $32, %xmm ; CHECK-NEXT: vpmuludq %xmm ; CHECK-NEXT: vpsllq $32, %xmm @@ -269,3 +269,4 @@ define <4 x float> @int_sqrt_ss() { %x2 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %x1) nounwind ret <4 x float> %x2 } + diff --git a/test/CodeGen/X86/avx-intel-ocl.ll b/test/CodeGen/X86/avx-intel-ocl.ll index 7337815a39a..055072098a2 100644 --- a/test/CodeGen/X86/avx-intel-ocl.ll +++ b/test/CodeGen/X86/avx-intel-ocl.ll @@ -32,7 +32,7 @@ declare i32 @func_int(i32, i32) define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind { %y = alloca <16 x float>, align 16 %x = fadd <16 x float> %a, %b - %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y) + %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y) %2 = load <16 x float>* %y, align 16 %3 = fadd <16 x float> %2, %1 ret <16 x float> %3 @@ -43,21 +43,21 @@ define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind { ; preserved ymm6-ymm15 ; WIN64: testf16_regs ; WIN64: call -; WIN64: vaddps {{%ymm[6-7]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}} -; WIN64: vaddps {{%ymm[6-7]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}} +; WIN64: vaddps {{%ymm[6-7]}}, %ymm0, %ymm0 +; WIN64: vaddps {{%ymm[6-7]}}, %ymm1, %ymm1 ; WIN64: ret ; preserved ymm8-ymm15 ; X64: testf16_regs ; X64: call -; X64: vaddps {{%ymm[8-9]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}} -; X64: vaddps {{%ymm[8-9]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}} +; X64: vaddps {{%ymm[8-9]}}, %ymm0, %ymm0 +; X64: vaddps {{%ymm[8-9]}}, %ymm1, %ymm1 ; X64: ret define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind { %y = alloca <16 x float>, align 16 %x = fadd <16 x float> %a, %b - %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y) + %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y) %2 = load <16 x float>* %y, align 16 %3 = fadd <16 x float> %1, %b %4 = fadd <16 x float> %2, %3 @@ -166,3 +166,4 @@ entry: %8 = shufflevector <8 x float> %3, <8 x float> %7, <8 x i32> ret <8 x float> %8 } + diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll index 3e11284f1af..73faa1fe0d4 100644 --- a/test/CodeGen/X86/avx-shuffle.ll +++ b/test/CodeGen/X86/avx-shuffle.ll @@ -81,7 +81,7 @@ entry: define i32 @test9(<4 x i32> %a) nounwind { ; CHECK: test9 ; CHECK: vpextrd - %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> + %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> %r = extractelement <8 x i32> %b, i32 2 ; CHECK: ret ret i32 %r @@ -251,8 +251,8 @@ define <8 x float> @test19(<8 x float> %A, <8 x float>%B) nounwind { ; CHECK: swap8doubles ; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}} ; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}} -; CHECK: vinsertf128 $1, {{[0-9]*}}(%rdi), %ymm{{[0-9]+}} -; CHECK: vinsertf128 $1, {{[0-9]*}}(%rdi), %ymm{{[0-9]+}} +; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}} +; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}} ; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}} ; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}} ; CHECK: vmovaps %xmm{{[0-9]+}}, {{[0-9]*}}(%rdi) diff --git a/test/CodeGen/X86/break-anti-dependencies.ll b/test/CodeGen/X86/break-anti-dependencies.ll index 614d0adc727..c94261467c9 100644 --- a/test/CodeGen/X86/break-anti-dependencies.ll +++ b/test/CodeGen/X86/break-anti-dependencies.ll @@ -1,7 +1,7 @@ ; Without list-burr scheduling we may not see the difference in codegen here. ; Use a subtarget that has post-RA scheduling enabled because the anti-dependency ; breaker requires liveness information to be kept. -; RUN: llc < %s -march=x86-64 -mcpu=atom -enable-misched=false -post-RA-scheduler -pre-RA-sched=list-burr -break-anti-dependencies=none > %t +; RUN: llc < %s -march=x86-64 -mcpu=atom -post-RA-scheduler -pre-RA-sched=list-burr -break-anti-dependencies=none > %t ; RUN: grep "%xmm0" %t | count 14 ; RUN: not grep "%xmm1" %t ; RUN: llc < %s -march=x86-64 -mcpu=atom -post-RA-scheduler -break-anti-dependencies=critical > %t diff --git a/test/CodeGen/X86/bt.ll b/test/CodeGen/X86/bt.ll index f12a3543b07..e28923bb21d 100644 --- a/test/CodeGen/X86/bt.ll +++ b/test/CodeGen/X86/bt.ll @@ -38,7 +38,7 @@ UnifiedReturnBlock: ; preds = %entry define void @test2b(i32 %x, i32 %n) nounwind { entry: ; CHECK: test2b -; CHECK: btl %e{{..}}, %e{{..}} +; CHECK: btl %eax, %ecx ; CHECK: jb %tmp29 = lshr i32 %x, %n ; [#uses=1] %tmp3 = and i32 1, %tmp29 @@ -56,7 +56,7 @@ UnifiedReturnBlock: ; preds = %entry define void @atest2(i32 %x, i32 %n) nounwind { entry: ; CHECK: atest2 -; CHECK: btl %e{{..}}, %e{{..}} +; CHECK: btl %eax, %ecx ; CHECK: jb %tmp29 = ashr i32 %x, %n ; [#uses=1] %tmp3 = and i32 %tmp29, 1 ; [#uses=1] @@ -74,7 +74,7 @@ UnifiedReturnBlock: ; preds = %entry define void @atest2b(i32 %x, i32 %n) nounwind { entry: ; CHECK: atest2b -; CHECK: btl %e{{..}}, %e{{..}} +; CHECK: btl %eax, %ecx %tmp29 = ashr i32 %x, %n ; [#uses=1] %tmp3 = and i32 1, %tmp29 %tmp4 = icmp eq i32 %tmp3, 0 ; [#uses=1] @@ -91,7 +91,7 @@ UnifiedReturnBlock: ; preds = %entry define void @test3(i32 %x, i32 %n) nounwind { entry: ; CHECK: test3 -; CHECK: btl %e{{..}}, %e{{..}} +; CHECK: btl %eax, %ecx ; CHECK: jb %tmp29 = shl i32 1, %n ; [#uses=1] %tmp3 = and i32 %tmp29, %x ; [#uses=1] @@ -109,7 +109,7 @@ UnifiedReturnBlock: ; preds = %entry define void @test3b(i32 %x, i32 %n) nounwind { entry: ; CHECK: test3b -; CHECK: btl %e{{..}}, %e{{..}} +; CHECK: btl %eax, %ecx ; CHECK: jb %tmp29 = shl i32 1, %n ; [#uses=1] %tmp3 = and i32 %x, %tmp29 @@ -127,7 +127,7 @@ UnifiedReturnBlock: ; preds = %entry define void @testne2(i32 %x, i32 %n) nounwind { entry: ; CHECK: testne2 -; CHECK: btl %e{{..}}, %e{{..}} +; CHECK: btl %eax, %ecx ; CHECK: jae %tmp29 = lshr i32 %x, %n ; [#uses=1] %tmp3 = and i32 %tmp29, 1 ; [#uses=1] @@ -145,7 +145,7 @@ UnifiedReturnBlock: ; preds = %entry define void @testne2b(i32 %x, i32 %n) nounwind { entry: ; CHECK: testne2b -; CHECK: btl %e{{..}}, %e{{..}} +; CHECK: btl %eax, %ecx ; CHECK: jae %tmp29 = lshr i32 %x, %n ; [#uses=1] %tmp3 = and i32 1, %tmp29 @@ -163,7 +163,7 @@ UnifiedReturnBlock: ; preds = %entry define void @atestne2(i32 %x, i32 %n) nounwind { entry: ; CHECK: atestne2 -; CHECK: btl %e{{..}}, %e{{..}} +; CHECK: btl %eax, %ecx ; CHECK: jae %tmp29 = ashr i32 %x, %n ; [#uses=1] %tmp3 = and i32 %tmp29, 1 ; [#uses=1] @@ -181,7 +181,7 @@ UnifiedReturnBlock: ; preds = %entry define void @atestne2b(i32 %x, i32 %n) nounwind { entry: ; CHECK: atestne2b -; CHECK: btl %e{{..}}, %e{{..}} +; CHECK: btl %eax, %ecx ; CHECK: jae %tmp29 = ashr i32 %x, %n ; [#uses=1] %tmp3 = and i32 1, %tmp29 @@ -199,7 +199,7 @@ UnifiedReturnBlock: ; preds = %entry define void @testne3(i32 %x, i32 %n) nounwind { entry: ; CHECK: testne3 -; CHECK: btl %e{{..}}, %e{{..}} +; CHECK: btl %eax, %ecx ; CHECK: jae %tmp29 = shl i32 1, %n ; [#uses=1] %tmp3 = and i32 %tmp29, %x ; [#uses=1] @@ -217,7 +217,7 @@ UnifiedReturnBlock: ; preds = %entry define void @testne3b(i32 %x, i32 %n) nounwind { entry: ; CHECK: testne3b -; CHECK: btl %e{{..}}, %e{{..}} +; CHECK: btl %eax, %ecx ; CHECK: jae %tmp29 = shl i32 1, %n ; [#uses=1] %tmp3 = and i32 %x, %tmp29 @@ -235,7 +235,7 @@ UnifiedReturnBlock: ; preds = %entry define void @query2(i32 %x, i32 %n) nounwind { entry: ; CHECK: query2 -; CHECK: btl %e{{..}}, %e{{..}} +; CHECK: btl %eax, %ecx ; CHECK: jae %tmp29 = lshr i32 %x, %n ; [#uses=1] %tmp3 = and i32 %tmp29, 1 ; [#uses=1] @@ -253,7 +253,7 @@ UnifiedReturnBlock: ; preds = %entry define void @query2b(i32 %x, i32 %n) nounwind { entry: ; CHECK: query2b -; CHECK: btl %e{{..}}, %e{{..}} +; CHECK: btl %eax, %ecx ; CHECK: jae %tmp29 = lshr i32 %x, %n ; [#uses=1] %tmp3 = and i32 1, %tmp29 @@ -271,7 +271,7 @@ UnifiedReturnBlock: ; preds = %entry define void @aquery2(i32 %x, i32 %n) nounwind { entry: ; CHECK: aquery2 -; CHECK: btl %e{{..}}, %e{{..}} +; CHECK: btl %eax, %ecx ; CHECK: jae %tmp29 = ashr i32 %x, %n ; [#uses=1] %tmp3 = and i32 %tmp29, 1 ; [#uses=1] @@ -289,7 +289,7 @@ UnifiedReturnBlock: ; preds = %entry define void @aquery2b(i32 %x, i32 %n) nounwind { entry: ; CHECK: aquery2b -; CHECK: btl %e{{..}}, %e{{..}} +; CHECK: btl %eax, %ecx ; CHECK: jae %tmp29 = ashr i32 %x, %n ; [#uses=1] %tmp3 = and i32 1, %tmp29 @@ -307,7 +307,7 @@ UnifiedReturnBlock: ; preds = %entry define void @query3(i32 %x, i32 %n) nounwind { entry: ; CHECK: query3 -; CHECK: btl %e{{..}}, %e{{..}} +; CHECK: btl %eax, %ecx ; CHECK: jae %tmp29 = shl i32 1, %n ; [#uses=1] %tmp3 = and i32 %tmp29, %x ; [#uses=1] @@ -325,7 +325,7 @@ UnifiedReturnBlock: ; preds = %entry define void @query3b(i32 %x, i32 %n) nounwind { entry: ; CHECK: query3b -; CHECK: btl %e{{..}}, %e{{..}} +; CHECK: btl %eax, %ecx ; CHECK: jae %tmp29 = shl i32 1, %n ; [#uses=1] %tmp3 = and i32 %x, %tmp29 @@ -343,7 +343,7 @@ UnifiedReturnBlock: ; preds = %entry define void @query3x(i32 %x, i32 %n) nounwind { entry: ; CHECK: query3x -; CHECK: btl %e{{..}}, %e{{..}} +; CHECK: btl %eax, %ecx ; CHECK: jae %tmp29 = shl i32 1, %n ; [#uses=1] %tmp3 = and i32 %tmp29, %x ; [#uses=1] @@ -361,7 +361,7 @@ UnifiedReturnBlock: ; preds = %entry define void @query3bx(i32 %x, i32 %n) nounwind { entry: ; CHECK: query3bx -; CHECK: btl %e{{..}}, %e{{..}} +; CHECK: btl %eax, %ecx ; CHECK: jae %tmp29 = shl i32 1, %n ; [#uses=1] %tmp3 = and i32 %x, %tmp29 @@ -379,7 +379,7 @@ UnifiedReturnBlock: ; preds = %entry define void @queryne2(i32 %x, i32 %n) nounwind { entry: ; CHECK: queryne2 -; CHECK: btl %e{{..}}, %e{{..}} +; CHECK: btl %eax, %ecx ; CHECK: jb %tmp29 = lshr i32 %x, %n ; [#uses=1] %tmp3 = and i32 %tmp29, 1 ; [#uses=1] @@ -397,7 +397,7 @@ UnifiedReturnBlock: ; preds = %entry define void @queryne2b(i32 %x, i32 %n) nounwind { entry: ; CHECK: queryne2b -; CHECK: btl %e{{..}}, %e{{..}} +; CHECK: btl %eax, %ecx ; CHECK: jb %tmp29 = lshr i32 %x, %n ; [#uses=1] %tmp3 = and i32 1, %tmp29 @@ -415,7 +415,7 @@ UnifiedReturnBlock: ; preds = %entry define void @aqueryne2(i32 %x, i32 %n) nounwind { entry: ; CHECK: aqueryne2 -; CHECK: btl %e{{..}}, %e{{..}} +; CHECK: btl %eax, %ecx ; CHECK: jb %tmp29 = ashr i32 %x, %n ; [#uses=1] %tmp3 = and i32 %tmp29, 1 ; [#uses=1] @@ -433,7 +433,7 @@ UnifiedReturnBlock: ; preds = %entry define void @aqueryne2b(i32 %x, i32 %n) nounwind { entry: ; CHECK: aqueryne2b -; CHECK: btl %e{{..}}, %e{{..}} +; CHECK: btl %eax, %ecx ; CHECK: jb %tmp29 = ashr i32 %x, %n ; [#uses=1] %tmp3 = and i32 1, %tmp29 @@ -451,7 +451,7 @@ UnifiedReturnBlock: ; preds = %entry define void @queryne3(i32 %x, i32 %n) nounwind { entry: ; CHECK: queryne3 -; CHECK: btl %e{{..}}, %e{{..}} +; CHECK: btl %eax, %ecx ; CHECK: jb %tmp29 = shl i32 1, %n ; [#uses=1] %tmp3 = and i32 %tmp29, %x ; [#uses=1] @@ -469,7 +469,7 @@ UnifiedReturnBlock: ; preds = %entry define void @queryne3b(i32 %x, i32 %n) nounwind { entry: ; CHECK: queryne3b -; CHECK: btl %e{{..}}, %e{{..}} +; CHECK: btl %eax, %ecx ; CHECK: jb %tmp29 = shl i32 1, %n ; [#uses=1] %tmp3 = and i32 %x, %tmp29 @@ -487,7 +487,7 @@ UnifiedReturnBlock: ; preds = %entry define void @queryne3x(i32 %x, i32 %n) nounwind { entry: ; CHECK: queryne3x -; CHECK: btl %e{{..}}, %e{{..}} +; CHECK: btl %eax, %ecx ; CHECK: jb %tmp29 = shl i32 1, %n ; [#uses=1] %tmp3 = and i32 %tmp29, %x ; [#uses=1] @@ -505,7 +505,7 @@ UnifiedReturnBlock: ; preds = %entry define void @queryne3bx(i32 %x, i32 %n) nounwind { entry: ; CHECK: queryne3bx -; CHECK: btl %e{{..}}, %e{{..}} +; CHECK: btl %eax, %ecx ; CHECK: jb %tmp29 = shl i32 1, %n ; [#uses=1] %tmp3 = and i32 %x, %tmp29 diff --git a/test/CodeGen/X86/byval7.ll b/test/CodeGen/X86/byval7.ll index b6b5e6c9b12..98a26e47ab7 100644 --- a/test/CodeGen/X86/byval7.ll +++ b/test/CodeGen/X86/byval7.ll @@ -7,14 +7,14 @@ define i32 @main() nounwind { entry: ; CHECK: main: +; CHECK: movl $1, (%esp) ; CHECK: leal 16(%esp), %edi ; CHECK: leal 160(%esp), %esi ; CHECK: rep;movsl -; CHECK: movl $1, (%esp) %s = alloca %struct.S ; <%struct.S*> [#uses=2] %tmp15 = getelementptr %struct.S* %s, i32 0, i32 0 ; <<2 x i64>*> [#uses=1] store <2 x i64> < i64 8589934595, i64 1 >, <2 x i64>* %tmp15, align 16 - call void @t( i32 1, %struct.S* byval %s ) nounwind + call void @t( i32 1, %struct.S* byval %s ) nounwind ret i32 0 } diff --git a/test/CodeGen/X86/chain_order.ll b/test/CodeGen/X86/chain_order.ll index 942dd8a591f..056fd2741fa 100644 --- a/test/CodeGen/X86/chain_order.ll +++ b/test/CodeGen/X86/chain_order.ll @@ -3,8 +3,8 @@ ;CHECK: cftx020 ;CHECK: vmovsd (%rdi), %xmm{{.*}} ;CHECK: vmovsd 16(%rdi), %xmm{{.*}} -;CHECK: vmovsd 24(%rdi), %xmm{{.*}} ;CHECK: vmovhpd 8(%rdi), %xmm{{.*}} +;CHECK: vmovsd 24(%rdi), %xmm{{.*}} ;CHECK: vmovupd %xmm{{.*}}, (%rdi) ;CHECK: vmovupd %xmm{{.*}}, 16(%rdi) ;CHECK: ret @@ -35,3 +35,4 @@ entry: store <2 x double> %14, <2 x double>* %15, align 8 ret void } + diff --git a/test/CodeGen/X86/cmov.ll b/test/CodeGen/X86/cmov.ll index 63bf5c522dc..ed25c82fdda 100644 --- a/test/CodeGen/X86/cmov.ll +++ b/test/CodeGen/X86/cmov.ll @@ -4,8 +4,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 define i32 @test1(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone { entry: ; CHECK: test1: -; CHECK: btl -; CHECK-NEXT: movl $12, %eax +; CHECK: movl $12, %eax +; CHECK-NEXT: btl ; CHECK-NEXT: cmovael (%rcx), %eax ; CHECK-NEXT: ret @@ -19,8 +19,8 @@ entry: define i32 @test2(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone { entry: ; CHECK: test2: -; CHECK: btl -; CHECK-NEXT: movl $12, %eax +; CHECK: movl $12, %eax +; CHECK-NEXT: btl ; CHECK-NEXT: cmovbl (%rcx), %eax ; CHECK-NEXT: ret @@ -92,7 +92,7 @@ bb.i.i.i: ; preds = %entry ; CHECK: testb ; CHECK-NOT: xor ; CHECK: setne -; CHECK: testb +; CHECK-NEXT: testb func_4.exit.i: ; preds = %bb.i.i.i, %entry %.not.i = xor i1 %2, true ; [#uses=1] diff --git a/test/CodeGen/X86/commute-two-addr.ll b/test/CodeGen/X86/commute-two-addr.ll index 21f94423c59..0ceea29d9a7 100644 --- a/test/CodeGen/X86/commute-two-addr.ll +++ b/test/CodeGen/X86/commute-two-addr.ll @@ -38,11 +38,10 @@ define i32 @t2(i32 %X, i32 %Y) nounwind { define %0 @t3(i32 %lb, i8 zeroext %has_lb, i8 zeroext %lb_inclusive, i32 %ub, i8 zeroext %has_ub, i8 zeroext %ub_inclusive) nounwind { entry: ; DARWIN: t3: +; DARWIN: shll $16 ; DARWIN: shlq $32, %rcx ; DARWIN-NOT: leaq ; DARWIN: orq %rcx, %rax -; DARWIN-NOT: leaq -; DARWIN: shll $16 %tmp21 = zext i32 %lb to i64 %tmp23 = zext i32 %ub to i64 %tmp24 = shl i64 %tmp23, 32 diff --git a/test/CodeGen/X86/dbg-value-dag-combine.ll b/test/CodeGen/X86/dbg-value-dag-combine.ll index a37b3dffee4..7c3c361bae1 100644 --- a/test/CodeGen/X86/dbg-value-dag-combine.ll +++ b/test/CodeGen/X86/dbg-value-dag-combine.ll @@ -16,7 +16,7 @@ entry: call void @llvm.dbg.value(metadata !12, i64 0, metadata !13), !dbg !14 %tmp2 = load i32 addrspace(1)* %ip, align 4, !dbg !15 %tmp3 = add i32 0, %tmp2, !dbg !15 -; CHECK: ##DEBUG_VALUE: idx <- E{{..$}} +; CHECK: ##DEBUG_VALUE: idx <- EAX{{$}} call void @llvm.dbg.value(metadata !{i32 %tmp3}, i64 0, metadata !13), !dbg !15 %arrayidx = getelementptr i32 addrspace(1)* %ip, i32 %1, !dbg !16 diff --git a/test/CodeGen/X86/fast-isel-mem.ll b/test/CodeGen/X86/fast-isel-mem.ll index f3617f28732..52b1e856433 100644 --- a/test/CodeGen/X86/fast-isel-mem.ll +++ b/test/CodeGen/X86/fast-isel-mem.ll @@ -40,7 +40,7 @@ entry: ; CHECK: movl L_LotsStuff$non_lazy_ptr, %ecx ; ATOM: _t: -; ATOM: movl L_LotsStuff$non_lazy_ptr, %e{{..}} -; ATOM: movl $0, %e{{..}} +; ATOM: movl L_LotsStuff$non_lazy_ptr, %ecx +; ATOM: movl $0, %eax } diff --git a/test/CodeGen/X86/fastcc.ll b/test/CodeGen/X86/fastcc.ll index a362f8d1ca7..705ab7bada7 100644 --- a/test/CodeGen/X86/fastcc.ll +++ b/test/CodeGen/X86/fastcc.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 -post-RA-scheduler=false | FileCheck %s -; CHECK: movsd %xmm{{[0-9]}}, 8(%esp) -; CHECK: xorl %eax, %eax +; CHECK: movsd %xmm0, 8(%esp) +; CHECK: xorl %ecx, %ecx @d = external global double ; [#uses=1] @c = external global double ; [#uses=1] diff --git a/test/CodeGen/X86/fold-load.ll b/test/CodeGen/X86/fold-load.ll index fc809681280..d8366654c01 100644 --- a/test/CodeGen/X86/fold-load.ll +++ b/test/CodeGen/X86/fold-load.ll @@ -38,10 +38,10 @@ L: store i16 %A, i16* %Q ret i32 %D - + ; CHECK: test2: ; CHECK: movl 4(%esp), %eax -; CHECK-NEXT: movzwl (%eax), %e{{..}} +; CHECK-NEXT: movzwl (%eax), %ecx } diff --git a/test/CodeGen/X86/fold-pcmpeqd-2.ll b/test/CodeGen/X86/fold-pcmpeqd-2.ll index bb0873bc5da..2bde76efd2a 100644 --- a/test/CodeGen/X86/fold-pcmpeqd-2.ll +++ b/test/CodeGen/X86/fold-pcmpeqd-2.ll @@ -54,27 +54,22 @@ forbody: ; preds = %forcond %mul310 = fmul <4 x float> %bitcast204.i104, zeroinitializer ; <<4 x float>> [#uses=2] %mul313 = fmul <4 x float> %bitcast204.i, zeroinitializer ; <<4 x float>> [#uses=1] %cmpunord.i11 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> zeroinitializer, <4 x float> zeroinitializer, i8 3) nounwind ; <<4 x float>> [#uses=1] - %tmp83 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul310, <4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1] - %bitcast.i3 = bitcast <4 x float> %mul310 to <4 x i32> ; <<4 x i32>> [#uses=1] - %andps.i5 = and <4 x i32> %bitcast.i3, zeroinitializer ; <<4 x i32>> [#uses=1] - - call void null(<4 x float> %mul313, <4 x float> %cmpunord.i11, <4 x float> %tmp83, <4 x float> zeroinitializer, %struct.__ImageExecInfo* null, <4 x i32> zeroinitializer) nounwind - - %tmp84 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul313, <4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1] - %bitcast6.i13 = bitcast <4 x float> %cmpunord.i11 to <4 x i32> ; <<4 x i32>> [#uses=2] %andps.i14 = add <4 x i32> , %bitcast6.i13 ; <<4 x i32>> [#uses=1] %not.i16 = xor <4 x i32> %bitcast6.i13, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] %andnps.i17 = add <4 x i32> , %not.i16 ; <<4 x i32>> [#uses=1] %orps.i18 = or <4 x i32> %andnps.i17, %andps.i14 ; <<4 x i32>> [#uses=1] %bitcast17.i19 = bitcast <4 x i32> %orps.i18 to <4 x float> ; <<4 x float>> [#uses=1] - + %tmp83 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul310, <4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1] + %bitcast.i3 = bitcast <4 x float> %mul310 to <4 x i32> ; <<4 x i32>> [#uses=1] + %andps.i5 = and <4 x i32> %bitcast.i3, zeroinitializer ; <<4 x i32>> [#uses=1] %bitcast11.i6 = bitcast <4 x float> %tmp83 to <4 x i32> ; <<4 x i32>> [#uses=1] %not.i7 = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] %andnps.i8 = and <4 x i32> %bitcast11.i6, %not.i7 ; <<4 x i32>> [#uses=1] + call void null(<4 x float> %mul313, <4 x float> %cmpunord.i11, <4 x float> %tmp83, <4 x float> zeroinitializer, %struct.__ImageExecInfo* null, <4 x i32> zeroinitializer) nounwind %orps.i9 = or <4 x i32> %andnps.i8, %andps.i5 ; <<4 x i32>> [#uses=1] %bitcast17.i10 = bitcast <4 x i32> %orps.i9 to <4 x float> ; <<4 x float>> [#uses=1] - + %tmp84 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul313, <4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1] %bitcast6.i = bitcast <4 x float> zeroinitializer to <4 x i32> ; <<4 x i32>> [#uses=2] %andps.i = and <4 x i32> zeroinitializer, %bitcast6.i ; <<4 x i32>> [#uses=1] %bitcast11.i = bitcast <4 x float> %tmp84 to <4 x i32> ; <<4 x i32>> [#uses=1] diff --git a/test/CodeGen/X86/full-lsr.ll b/test/CodeGen/X86/full-lsr.ll index cbcc62a7011..0729dda4a12 100644 --- a/test/CodeGen/X86/full-lsr.ll +++ b/test/CodeGen/X86/full-lsr.ll @@ -4,7 +4,7 @@ define void @foo(float* nocapture %A, float* nocapture %B, float* nocapture %C, i32 %N) nounwind { ; ATOM: foo ; ATOM: addl -; ATOM: addl +; ATOM: leal ; ATOM: leal ; CHECK: foo diff --git a/test/CodeGen/X86/gather-addresses.ll b/test/CodeGen/X86/gather-addresses.ll index d1abd71618c..72a50961b2f 100644 --- a/test/CodeGen/X86/gather-addresses.ll +++ b/test/CodeGen/X86/gather-addresses.ll @@ -1,35 +1,21 @@ -; RUN: llc -mtriple=x86_64-linux -mcpu=nehalem < %s | FileCheck %s --check-prefix=LIN -; RUN: llc -mtriple=x86_64-win32 -mcpu=nehalem < %s | FileCheck %s --check-prefix=WIN +; RUN: llc -mtriple=x86_64-linux -mcpu=nehalem < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-win32 -mcpu=nehalem < %s | FileCheck %s ; rdar://7398554 ; When doing vector gather-scatter index calculation with 32-bit indices, ; bounce the vector off of cache rather than shuffling each individual ; element out of the index vector. -; CHECK: foo: -; LIN: movaps (%rsi), %xmm0 -; LIN: andps (%rdx), %xmm0 -; LIN: movaps %xmm0, -24(%rsp) -; LIN: movslq -24(%rsp), %rsi -; LIN: movslq -20(%rsp), %rcx -; LIN: movslq -16(%rsp), %rdx -; LIN: movslq -12(%rsp), %rax -; LIN: movsd (%rdi,%rsi,8), %xmm0 -; LIN: movhpd (%rdi,%rcx,8), %xmm0 -; LIN: movsd (%rdi,%rdx,8), %xmm1 -; LIN: movhpd (%rdi,%rax,8), %xmm1 - -; WIN: movaps (%rdx), %xmm0 -; WIN: andps (%r8), %xmm0 -; WIN: movaps %xmm0, (%rsp) -; WIN: movslq (%rsp), %rax -; WIN: movslq 4(%rsp), %rdx -; WIN: movslq 8(%rsp), %r9 -; WIN: movslq 12(%rsp), %r8 -; WIN: movsd (%rcx,%rax,8), %xmm0 -; WIN: movhpd (%rcx,%rdx,8), %xmm0 -; WIN: movsd (%rcx,%r9,8), %xmm1 -; WIN: movhpd (%rcx,%r8,8), %xmm1 +; CHECK: andps ([[H:%rdx|%r8]]), %xmm0 +; CHECK: movaps %xmm0, {{(-24)?}}(%rsp) +; CHECK: movslq {{(-24)?}}(%rsp), %rax +; CHECK: movsd ([[P:%rdi|%rcx]],%rax,8), %xmm0 +; CHECK: movslq {{-20|4}}(%rsp), %rax +; CHECK: movhpd ([[P]],%rax,8), %xmm0 +; CHECK: movslq {{-16|8}}(%rsp), %rax +; CHECK: movsd ([[P]],%rax,8), %xmm1 +; CHECK: movslq {{-12|12}}(%rsp), %rax +; CHECK: movhpd ([[P]],%rax,8), %xmm1 define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind { %a = load <4 x i32>* %i diff --git a/test/CodeGen/X86/ghc-cc.ll b/test/CodeGen/X86/ghc-cc.ll index 4dba2c08632..0e65cfdbae3 100644 --- a/test/CodeGen/X86/ghc-cc.ll +++ b/test/CodeGen/X86/ghc-cc.ll @@ -28,10 +28,10 @@ entry: define cc 10 void @foo() nounwind { entry: - ; CHECK: movl r1, %esi - ; CHECK-NEXT: movl hp, %edi + ; CHECK: movl base, %ebx ; CHECK-NEXT: movl sp, %ebp - ; CHECK-NEXT: movl base, %ebx + ; CHECK-NEXT: movl hp, %edi + ; CHECK-NEXT: movl r1, %esi %0 = load i32* @r1 %1 = load i32* @hp %2 = load i32* @sp @@ -42,3 +42,4 @@ entry: } declare cc 10 void @bar(i32, i32, i32, i32) + diff --git a/test/CodeGen/X86/ghc-cc64.ll b/test/CodeGen/X86/ghc-cc64.ll index 403391e8165..fcf7e1797ad 100644 --- a/test/CodeGen/X86/ghc-cc64.ll +++ b/test/CodeGen/X86/ghc-cc64.ll @@ -41,22 +41,22 @@ entry: define cc 10 void @foo() nounwind { entry: - ; CHECK: movsd d2(%rip), %xmm6 - ; CHECK-NEXT: movsd d1(%rip), %xmm5 - ; CHECK-NEXT: movss f4(%rip), %xmm4 - ; CHECK-NEXT: movss f3(%rip), %xmm3 - ; CHECK-NEXT: movss f2(%rip), %xmm2 - ; CHECK-NEXT: movss f1(%rip), %xmm1 - ; CHECK-NEXT: movq splim(%rip), %r15 - ; CHECK-NEXT: movq r6(%rip), %r9 - ; CHECK-NEXT: movq r5(%rip), %r8 - ; CHECK-NEXT: movq r4(%rip), %rdi - ; CHECK-NEXT: movq r3(%rip), %rsi - ; CHECK-NEXT: movq r2(%rip), %r14 - ; CHECK-NEXT: movq r1(%rip), %rbx - ; CHECK-NEXT: movq hp(%rip), %r12 + ; CHECK: movq base(%rip), %r13 ; CHECK-NEXT: movq sp(%rip), %rbp - ; CHECK-NEXT: movq base(%rip), %r13 + ; CHECK-NEXT: movq hp(%rip), %r12 + ; CHECK-NEXT: movq r1(%rip), %rbx + ; CHECK-NEXT: movq r2(%rip), %r14 + ; CHECK-NEXT: movq r3(%rip), %rsi + ; CHECK-NEXT: movq r4(%rip), %rdi + ; CHECK-NEXT: movq r5(%rip), %r8 + ; CHECK-NEXT: movq r6(%rip), %r9 + ; CHECK-NEXT: movq splim(%rip), %r15 + ; CHECK-NEXT: movss f1(%rip), %xmm1 + ; CHECK-NEXT: movss f2(%rip), %xmm2 + ; CHECK-NEXT: movss f3(%rip), %xmm3 + ; CHECK-NEXT: movss f4(%rip), %xmm4 + ; CHECK-NEXT: movsd d1(%rip), %xmm5 + ; CHECK-NEXT: movsd d2(%rip), %xmm6 %0 = load double* @d2 %1 = load double* @d1 %2 = load float* @f4 @@ -83,3 +83,4 @@ entry: declare cc 10 void @bar(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, float, float, float, float, double, double) + diff --git a/test/CodeGen/X86/hipe-cc.ll b/test/CodeGen/X86/hipe-cc.ll index b34417ebf69..76d17a09d54 100644 --- a/test/CodeGen/X86/hipe-cc.ll +++ b/test/CodeGen/X86/hipe-cc.ll @@ -49,10 +49,10 @@ entry: store i32 %arg1, i32* %arg1_var store i32 %arg2, i32* %arg2_var - ; CHECK: movl 16(%esp), %esi - ; CHECK-NEXT: movl 12(%esp), %ebp + ; CHECK: movl 4(%esp), %edx ; CHECK-NEXT: movl 8(%esp), %eax - ; CHECK-NEXT: movl 4(%esp), %edx + ; CHECK-NEXT: movl 12(%esp), %ebp + ; CHECK-NEXT: movl 16(%esp), %esi %0 = load i32* %hp_var %1 = load i32* %p_var %2 = load i32* %arg0_var diff --git a/test/CodeGen/X86/hipe-cc64.ll b/test/CodeGen/X86/hipe-cc64.ll index 27e1c723a8f..5dbb5a25cbe 100644 --- a/test/CodeGen/X86/hipe-cc64.ll +++ b/test/CodeGen/X86/hipe-cc64.ll @@ -5,10 +5,10 @@ define void @zap(i64 %a, i64 %b) nounwind { entry: ; CHECK: movq %rsi, %rax - ; CHECK-NEXT: movl $8, %ecx - ; CHECK-NEXT: movl $9, %r8d ; CHECK-NEXT: movq %rdi, %rsi ; CHECK-NEXT: movq %rax, %rdx + ; CHECK-NEXT: movl $8, %ecx + ; CHECK-NEXT: movl $9, %r8d ; CHECK-NEXT: callq addfour %0 = call cc 11 {i64, i64, i64} @addfour(i64 undef, i64 undef, i64 %a, i64 %b, i64 8, i64 9) %res = extractvalue {i64, i64, i64} %0, 2 @@ -57,11 +57,11 @@ entry: store i64 %arg2, i64* %arg2_var store i64 %arg3, i64* %arg3_var - ; CHECK: movq 40(%rsp), %r15 - ; CHECK-NEXT: movq 32(%rsp), %rbp - ; CHECK-NEXT: movq 24(%rsp), %rsi + ; CHECK: movq 8(%rsp), %rcx ; CHECK-NEXT: movq 16(%rsp), %rdx - ; CHECK-NEXT: movq 8(%rsp), %rcx + ; CHECK-NEXT: movq 24(%rsp), %rsi + ; CHECK-NEXT: movq 32(%rsp), %rbp + ; CHECK-NEXT: movq 40(%rsp), %r15 %0 = load i64* %hp_var %1 = load i64* %p_var %2 = load i64* %arg0_var diff --git a/test/CodeGen/X86/lea-recursion.ll b/test/CodeGen/X86/lea-recursion.ll index 9480600312c..3f32fd27c5c 100644 --- a/test/CodeGen/X86/lea-recursion.ll +++ b/test/CodeGen/X86/lea-recursion.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86-64 | grep lea | count 13 +; RUN: llc < %s -march=x86-64 | grep lea | count 12 ; This testcase was written to demonstrate an instruction-selection problem, ; however it also happens to expose a limitation in the DAGCombiner's @@ -44,3 +44,4 @@ entry: store i32 %tmp10.6, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 7) ret void } + diff --git a/test/CodeGen/X86/lea.ll b/test/CodeGen/X86/lea.ll index 3209de86ae4..87f0b0b30a8 100644 --- a/test/CodeGen/X86/lea.ll +++ b/test/CodeGen/X86/lea.ll @@ -28,7 +28,8 @@ bb.nph: bb2: ret i32 %x_offs ; CHECK: test2: -; CHECK: leal -5(%r[[A0:..]]), %eax +; CHECK: movl %e[[A0]], %eax +; CHECK: addl $-5, %eax ; CHECK: andl $-4, %eax ; CHECK: negl %eax ; CHECK: leal -4(%r[[A0]],%rax), %eax diff --git a/test/CodeGen/X86/lsr-loop-exit-cond.ll b/test/CodeGen/X86/lsr-loop-exit-cond.ll index 3dee2ec89c1..8a81f70a8a2 100644 --- a/test/CodeGen/X86/lsr-loop-exit-cond.ll +++ b/test/CodeGen/X86/lsr-loop-exit-cond.ll @@ -2,12 +2,12 @@ ; RUN: llc -mtriple=x86_64-darwin -mcpu=atom < %s | FileCheck -check-prefix=ATOM %s ; CHECK: t: -; CHECK: movl (%r9,%rax,4), %e{{..}} -; CHECK-NEXT: decq +; CHECK: decq +; CHECK-NEXT: movl (%r9,%rax,4), %eax ; CHECK-NEXT: jne ; ATOM: t: -; ATOM: movl (%r9,%rax,4), %e{{..}} +; ATOM: movl (%r9,%rax,4), %eax ; ATOM-NEXT: decq ; ATOM-NEXT: jne @@ -190,3 +190,4 @@ for.end: ; preds = %for.body, %entry %bi.0.lcssa = phi i32 [ 0, %entry ], [ %i.addr.0.bi.0, %for.body ] ret i32 %bi.0.lcssa } + diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll index 2da58849ea6..c33cac2e05a 100644 --- a/test/CodeGen/X86/masked-iv-safe.ll +++ b/test/CodeGen/X86/masked-iv-safe.ll @@ -3,9 +3,9 @@ ; RUN: not grep movz %t ; RUN: not grep sar %t ; RUN: not grep shl %t -; RUN: grep add %t | count 6 +; RUN: grep add %t | count 5 ; RUN: grep inc %t | count 2 -; RUN: grep lea %t | count 0 +; RUN: grep lea %t | count 3 ; Optimize away zext-inreg and sext-inreg on the loop induction ; variable using trip-count information. diff --git a/test/CodeGen/X86/memcpy-2.ll b/test/CodeGen/X86/memcpy-2.ll index 5a4172e90e2..630c0ed1a33 100644 --- a/test/CodeGen/X86/memcpy-2.ll +++ b/test/CodeGen/X86/memcpy-2.ll @@ -56,15 +56,15 @@ entry: define void @t2(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp { entry: ; SSE2-Darwin: t2: -; SSE2-Darwin: movaps (%ecx), %xmm0 +; SSE2-Darwin: movaps (%eax), %xmm0 ; SSE2-Darwin: movaps %xmm0, (%eax) ; SSE2-Mingw32: t2: -; SSE2-Mingw32: movaps (%ecx), %xmm0 +; SSE2-Mingw32: movaps (%eax), %xmm0 ; SSE2-Mingw32: movaps %xmm0, (%eax) ; SSE1: t2: -; SSE1: movaps (%ecx), %xmm0 +; SSE1: movaps (%eax), %xmm0 ; SSE1: movaps %xmm0, (%eax) ; NOSSE: t2: @@ -91,14 +91,14 @@ entry: define void @t3(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp { entry: ; SSE2-Darwin: t3: -; SSE2-Darwin: movsd (%ecx), %xmm0 -; SSE2-Darwin: movsd 8(%ecx), %xmm1 +; SSE2-Darwin: movsd (%eax), %xmm0 +; SSE2-Darwin: movsd 8(%eax), %xmm1 ; SSE2-Darwin: movsd %xmm1, 8(%eax) ; SSE2-Darwin: movsd %xmm0, (%eax) ; SSE2-Mingw32: t3: -; SSE2-Mingw32: movsd (%ecx), %xmm0 -; SSE2-Mingw32: movsd 8(%ecx), %xmm1 +; SSE2-Mingw32: movsd (%eax), %xmm0 +; SSE2-Mingw32: movsd 8(%eax), %xmm1 ; SSE2-Mingw32: movsd %xmm1, 8(%eax) ; SSE2-Mingw32: movsd %xmm0, (%eax) diff --git a/test/CodeGen/X86/pmul.ll b/test/CodeGen/X86/pmul.ll index 4374f37dfd4..da4af81959d 100644 --- a/test/CodeGen/X86/pmul.ll +++ b/test/CodeGen/X86/pmul.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=x86 -mattr=sse41 -mcpu=nehalem -stack-alignment=16 > %t ; RUN: grep pmul %t | count 12 -; RUN: grep mov %t | count 14 +; RUN: grep mov %t | count 11 define <4 x i32> @a(<4 x i32> %i) nounwind { %A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 > diff --git a/test/CodeGen/X86/pr14088.ll b/test/CodeGen/X86/pr14088.ll index 7041d143238..505e3b5cf26 100644 --- a/test/CodeGen/X86/pr14088.ll +++ b/test/CodeGen/X86/pr14088.ll @@ -20,6 +20,6 @@ return: } ; We were miscompiling this and using %ax instead of %cx in the movw. -; CHECK: movw %ax, (%rsi) -; CHECK: movswl %ax, %eax -; CHECK: movslq %eax, %rax +; CHECK: movswl %cx, %ecx +; CHECK: movw %cx, (%rsi) +; CHECK: movslq %ecx, %rcx diff --git a/test/CodeGen/X86/pr1505b.ll b/test/CodeGen/X86/pr1505b.ll index c348fec5467..9b0ef83ab04 100644 --- a/test/CodeGen/X86/pr1505b.ll +++ b/test/CodeGen/X86/pr1505b.ll @@ -57,10 +57,11 @@ entry: %tmp22 = tail call %"struct.std::basic_ostream >"* @_ZNSolsEd( %"struct.std::basic_ostream >"* %tmp16, double %tmp1920 ) ; <%"struct.std::basic_ostream >"*> [#uses=1] %tmp30 = tail call %"struct.std::basic_ostream >"* @_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_( %"struct.std::basic_ostream >"* %tmp22 ) ; <%"struct.std::basic_ostream >"*> [#uses=0] ; reload: +; CHECK: fld +; CHECK: fstps ; CHECK: ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc %tmp34 = tail call %"struct.std::basic_ostream >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc( %"struct.std::basic_ostream >"* @_ZSt4cout, i8* getelementptr ([13 x i8]* @.str1, i32 0, i32 0) ) ; <%"struct.std::basic_ostream >"*> [#uses=1] %tmp3940 = fpext float %tmp1314 to double ; [#uses=1] -; CHECK: fld ; CHECK: fstpl ; CHECK: ZNSolsEd %tmp42 = tail call %"struct.std::basic_ostream >"* @_ZNSolsEd( %"struct.std::basic_ostream >"* %tmp34, double %tmp3940 ) ; <%"struct.std::basic_ostream >"*> [#uses=1] diff --git a/test/CodeGen/X86/pr16031.ll b/test/CodeGen/X86/pr16031.ll index 76b6c42c632..4721173cb67 100644 --- a/test/CodeGen/X86/pr16031.ll +++ b/test/CodeGen/X86/pr16031.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=corei7-avx -enable-misched=false | FileCheck %s +; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s ; CHECK: main: ; CHECK: pushl %esi diff --git a/test/CodeGen/X86/pre-ra-sched.ll b/test/CodeGen/X86/pre-ra-sched.ll index 70135d43f49..b792ffa09fb 100644 --- a/test/CodeGen/X86/pre-ra-sched.ll +++ b/test/CodeGen/X86/pre-ra-sched.ll @@ -1,6 +1,5 @@ -; RUN-disabled: llc < %s -mtriple=x86_64-apple-macosx -pre-RA-sched=ilp -debug-only=pre-RA-sched \ -; RUN-disabled: 2>&1 | FileCheck %s -; RUN: true +; RUN: llc < %s -mtriple=x86_64-apple-macosx -debug-only=pre-RA-sched \ +; RUN: 2>&1 | FileCheck %s ; REQUIRES: asserts ; ; rdar:13279013: pre-RA-sched should not check all interferences and diff --git a/test/CodeGen/X86/rdrand.ll b/test/CodeGen/X86/rdrand.ll index 3d1870987a5..0fd9916bfa1 100644 --- a/test/CodeGen/X86/rdrand.ll +++ b/test/CodeGen/X86/rdrand.ll @@ -11,10 +11,10 @@ define i32 @_rdrand16_step(i16* %random_val) { ret i32 %isvalid ; CHECK: _rdrand16_step: ; CHECK: rdrandw %ax +; CHECK: movw %ax, (%r[[A0:di|cx]]) ; CHECK: movzwl %ax, %ecx ; CHECK: movl $1, %eax ; CHECK: cmovael %ecx, %eax -; CHECK: movw %cx, (%r[[A0:di|cx]]) ; CHECK: ret } @@ -26,9 +26,9 @@ define i32 @_rdrand32_step(i32* %random_val) { ret i32 %isvalid ; CHECK: _rdrand32_step: ; CHECK: rdrandl %e[[T0:[a-z]+]] +; CHECK: movl %e[[T0]], (%r[[A0]]) ; CHECK: movl $1, %eax ; CHECK: cmovael %e[[T0]], %eax -; CHECK: movl %e[[T0]], (%r[[A0]]) ; CHECK: ret } @@ -40,9 +40,9 @@ define i32 @_rdrand64_step(i64* %random_val) { ret i32 %isvalid ; CHECK: _rdrand64_step: ; CHECK: rdrandq %r[[T1:[a-z]+]] +; CHECK: movq %r[[T1]], (%r[[A0]]) ; CHECK: movl $1, %eax ; CHECK: cmovael %e[[T1]], %eax -; CHECK: movq %r[[T1]], (%r[[A0]]) ; CHECK: ret } diff --git a/test/CodeGen/X86/rdseed.ll b/test/CodeGen/X86/rdseed.ll index 462cacad958..409da629888 100644 --- a/test/CodeGen/X86/rdseed.ll +++ b/test/CodeGen/X86/rdseed.ll @@ -12,10 +12,10 @@ define i32 @_rdseed16_step(i16* %random_val) { ret i32 %isvalid ; CHECK: _rdseed16_step: ; CHECK: rdseedw %ax +; CHECK: movw %ax, (%r[[A0:di|cx]]) ; CHECK: movzwl %ax, %ecx ; CHECK: movl $1, %eax ; CHECK: cmovael %ecx, %eax -; CHECK: movw %cx, (%r[[A0:di|cx]]) ; CHECK: ret } @@ -27,9 +27,9 @@ define i32 @_rdseed32_step(i32* %random_val) { ret i32 %isvalid ; CHECK: _rdseed32_step: ; CHECK: rdseedl %e[[T0:[a-z]+]] +; CHECK: movl %e[[T0]], (%r[[A0]]) ; CHECK: movl $1, %eax ; CHECK: cmovael %e[[T0]], %eax -; CHECK: movl %e[[T0]], (%r[[A0]]) ; CHECK: ret } @@ -41,8 +41,8 @@ define i32 @_rdseed64_step(i64* %random_val) { ret i32 %isvalid ; CHECK: _rdseed64_step: ; CHECK: rdseedq %r[[T1:[a-z]+]] +; CHECK: movq %r[[T1]], (%r[[A0]]) ; CHECK: movl $1, %eax ; CHECK: cmovael %e[[T1]], %eax -; CHECK: movq %r[[T1]], (%r[[A0]]) ; CHECK: ret } diff --git a/test/CodeGen/X86/segmented-stacks-dynamic.ll b/test/CodeGen/X86/segmented-stacks-dynamic.ll index c452282cfab..d68b00b69a2 100644 --- a/test/CodeGen/X86/segmented-stacks-dynamic.ll +++ b/test/CodeGen/X86/segmented-stacks-dynamic.ll @@ -31,7 +31,7 @@ false: ; X32-NEXT: ret ; X32: movl %esp, %eax -; X32: subl %ecx, %eax +; X32-NEXT: subl %ecx, %eax ; X32-NEXT: cmpl %eax, %gs:48 ; X32: movl %eax, %esp @@ -52,7 +52,7 @@ false: ; X64-NEXT: ret ; X64: movq %rsp, %[[RDI:rdi|rax]] -; X64: subq %{{.*}}, %[[RDI]] +; X64-NEXT: subq %{{.*}}, %[[RDI]] ; X64-NEXT: cmpq %[[RDI]], %fs:112 ; X64: movq %[[RDI]], %rsp diff --git a/test/CodeGen/X86/select.ll b/test/CodeGen/X86/select.ll index c8d340b2ad6..09ca07b31a1 100644 --- a/test/CodeGen/X86/select.ll +++ b/test/CodeGen/X86/select.ll @@ -256,8 +256,8 @@ entry: %call = tail call noalias i8* @_Znam(i64 %D) nounwind noredzone ret i8* %call ; CHECK: test12: -; CHECK: mulq ; CHECK: movq $-1, %rdi +; CHECK: mulq ; CHECK: cmovnoq %rax, %rdi ; CHECK: jmp __Znam diff --git a/test/CodeGen/X86/shift-bmi2.ll b/test/CodeGen/X86/shift-bmi2.ll index 9e6562423db..d1f321f1773 100644 --- a/test/CodeGen/X86/shift-bmi2.ll +++ b/test/CodeGen/X86/shift-bmi2.ll @@ -30,11 +30,10 @@ entry: %x = load i32* %p %shl = shl i32 %x, %shamt ; BMI2: shl32p -; Source order scheduling prevents folding, rdar:14208996. -; BMI2: shlxl %{{.+}}, %{{.+}}, %{{.+}} +; BMI2: shlxl %{{.+}}, ({{.+}}), %{{.+}} ; BMI2: ret ; BMI264: shl32p -; BMI264: shlxl %{{.+}}, %{{.+}}, %{{.+}} +; BMI264: shlxl %{{.+}}, ({{.+}}), %{{.+}} ; BMI264: ret ret i32 %shl } @@ -75,7 +74,7 @@ entry: %x = load i64* %p %shl = shl i64 %x, %shamt ; BMI264: shl64p -; BMI264: shlxq %{{.+}}, %{{.+}}, %{{.+}} +; BMI264: shlxq %{{.+}}, ({{.+}}), %{{.+}} ; BMI264: ret ret i64 %shl } @@ -107,11 +106,10 @@ entry: %x = load i32* %p %shl = lshr i32 %x, %shamt ; BMI2: lshr32p -; Source order scheduling prevents folding, rdar:14208996. -; BMI2: shrxl %{{.+}}, %{{.+}}, %{{.+}} +; BMI2: shrxl %{{.+}}, ({{.+}}), %{{.+}} ; BMI2: ret ; BMI264: lshr32 -; BMI264: shrxl %{{.+}}, %{{.+}}, %{{.+}} +; BMI264: shrxl %{{.+}}, ({{.+}}), %{{.+}} ; BMI264: ret ret i32 %shl } @@ -130,7 +128,7 @@ entry: %x = load i64* %p %shl = lshr i64 %x, %shamt ; BMI264: lshr64p -; BMI264: shrxq %{{.+}}, %{{.+}}, %{{.+}} +; BMI264: shrxq %{{.+}}, ({{.+}}), %{{.+}} ; BMI264: ret ret i64 %shl } @@ -152,11 +150,10 @@ entry: %x = load i32* %p %shl = ashr i32 %x, %shamt ; BMI2: ashr32p -; Source order scheduling prevents folding, rdar:14208996. -; BMI2: sarxl %{{.+}}, %{{.+}}, %{{.+}} +; BMI2: sarxl %{{.+}}, ({{.+}}), %{{.+}} ; BMI2: ret ; BMI264: ashr32 -; BMI264: sarxl %{{.+}}, %{{.+}}, %{{.+}} +; BMI264: sarxl %{{.+}}, ({{.+}}), %{{.+}} ; BMI264: ret ret i32 %shl } @@ -175,7 +172,7 @@ entry: %x = load i64* %p %shl = ashr i64 %x, %shamt ; BMI264: ashr64p -; BMI264: sarxq %{{.+}}, %{{.+}}, %{{.+}} +; BMI264: sarxq %{{.+}}, ({{.+}}), %{{.+}} ; BMI264: ret ret i64 %shl } diff --git a/test/CodeGen/X86/sink-hoist.ll b/test/CodeGen/X86/sink-hoist.ll index ad69f93f6d4..2aca5b897d3 100644 --- a/test/CodeGen/X86/sink-hoist.ll +++ b/test/CodeGen/X86/sink-hoist.ll @@ -26,10 +26,11 @@ define double @foo(double %x, double %y, i1 %c) nounwind { ; CHECK: split: ; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: je +; CHECK-NEXT: jne +; CHECK-NEXT: movaps +; CHECK-NEXT: ret ; CHECK: divsd -; CHECK: movaps -; CHECK: ret +; CHECK-NEXT: ret define double @split(double %x, double %y, i1 %c) nounwind { %a = fdiv double %x, 3.2 %z = select i1 %c, double %a, double %y @@ -64,7 +65,7 @@ return: ; Sink instructions with dead EFLAGS defs. ; FIXME: Unfail the zzz test if we can correctly mark pregs with the kill flag. -; +; ; See . This test isn't valid after we made machine ; sinking more conservative about sinking instructions that define a preg into a ; block when we don't know if the preg is killed within the current block. diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll index 21af73ad8a5..36a0fd91bd8 100644 --- a/test/CodeGen/X86/sse2.ll +++ b/test/CodeGen/X86/sse2.ll @@ -7,7 +7,7 @@ define void @test1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind { %tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 > store <2 x double> %tmp9, <2 x double>* %r, align 16 ret void - + ; CHECK: test1: ; CHECK: movl 8(%esp), %eax ; CHECK-NEXT: movapd (%eax), %xmm0 @@ -23,12 +23,12 @@ define void @test2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind { %tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 0, i32 2 > store <2 x double> %tmp9, <2 x double>* %r, align 16 ret void - + ; CHECK: test2: -; CHECK: movl 4(%esp), %eax -; CHECK: movl 8(%esp), %ecx -; CHECK-NEXT: movapd (%ecx), %xmm0 +; CHECK: movl 8(%esp), %eax +; CHECK-NEXT: movapd (%eax), %xmm0 ; CHECK-NEXT: movhpd 12(%esp), %xmm0 +; CHECK-NEXT: movl 4(%esp), %eax ; CHECK-NEXT: movapd %xmm0, (%eax) ; CHECK-NEXT: ret } @@ -48,7 +48,7 @@ define void @test3(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) nounwind store <4 x float> %tmp13, <4 x float>* %res ret void ; CHECK: @test3 -; CHECK: unpcklps +; CHECK: unpcklps } define void @test4(<4 x float> %X, <4 x float>* %res) nounwind { @@ -85,9 +85,9 @@ define void @test6(<4 x float>* %res, <4 x float>* %A) nounwind { %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>> [#uses=1] store <4 x float> %tmp2, <4 x float>* %res ret void - + ; CHECK: test6: -; CHECK: movaps (%ecx), %xmm0 +; CHECK: movaps (%eax), %xmm0 ; CHECK: movaps %xmm0, (%eax) } @@ -96,7 +96,7 @@ define void @test7() nounwind { shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> zeroinitializer ; <<4 x float>>:2 [#uses=1] store <4 x float> %2, <4 x float>* null ret void - + ; CHECK: test7: ; CHECK: xorps %xmm0, %xmm0 ; CHECK: movaps %xmm0, 0 @@ -166,7 +166,7 @@ define void @test13(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B, <4 x fl store <4 x float> %tmp11, <4 x float>* %res ret void ; CHECK: test13 -; CHECK: shufps $69, (%ecx), %xmm0 +; CHECK: shufps $69, (%eax), %xmm0 ; CHECK: pshufd $-40, %xmm0, %xmm0 } @@ -178,8 +178,8 @@ define <4 x float> @test14(<4 x float>* %x, <4 x float>* %y) nounwind { %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 > ; <<4 x float>> [#uses=1] ret <4 x float> %tmp27 ; CHECK: test14: -; CHECK: addps [[X1:%xmm[0-9]+]], [[X0:%xmm[0-9]+]] -; CHECK: subps [[X1]], [[X2:%xmm[0-9]+]] +; CHECK: subps [[X1:%xmm[0-9]+]], [[X2:%xmm[0-9]+]] +; CHECK: addps [[X1]], [[X0:%xmm[0-9]+]] ; CHECK: movlhps [[X2]], [[X0]] } @@ -221,3 +221,4 @@ entry: %double2float.i = fptrunc <4 x double> %0 to <4 x float> ret <4 x float> %double2float.i } + diff --git a/test/CodeGen/X86/store-narrow.ll b/test/CodeGen/X86/store-narrow.ll index 7855a4c5b37..0dd228eb145 100644 --- a/test/CodeGen/X86/store-narrow.ll +++ b/test/CodeGen/X86/store-narrow.ll @@ -12,7 +12,7 @@ entry: %D = or i32 %C, %B store i32 %D, i32* %a0, align 4 ret void - + ; X64: test1: ; X64: movb %sil, (%rdi) @@ -34,8 +34,8 @@ entry: ; X64: movb %sil, 1(%rdi) ; X32: test2: -; X32: movb 8(%esp), %[[REG:[abcd]l]] -; X32: movb %[[REG]], 1(%{{.*}}) +; X32: movb 8(%esp), %al +; X32: movb %al, 1(%{{.*}}) } define void @test3(i32* nocapture %a0, i16 zeroext %a1) nounwind ssp { @@ -67,8 +67,8 @@ entry: ; X64: movw %si, 2(%rdi) ; X32: test4: -; X32: movl 8(%esp), %e[[REG:[abcd]x]] -; X32: movw %[[REG]], 2(%{{.*}}) +; X32: movl 8(%esp), %eax +; X32: movw %ax, 2(%{{.*}}) } define void @test5(i64* nocapture %a0, i16 zeroext %a1) nounwind ssp { @@ -84,8 +84,8 @@ entry: ; X64: movw %si, 2(%rdi) ; X32: test5: -; X32: movzwl 8(%esp), %e[[REG:[abcd]x]] -; X32: movw %[[REG]], 2(%{{.*}}) +; X32: movzwl 8(%esp), %eax +; X32: movw %ax, 2(%{{.*}}) } define void @test6(i64* nocapture %a0, i8 zeroext %a1) nounwind ssp { @@ -102,8 +102,8 @@ entry: ; X32: test6: -; X32: movb 8(%esp), %[[REG:[abcd]l]] -; X32: movb %[[REG]], 5(%{{.*}}) +; X32: movb 8(%esp), %al +; X32: movb %al, 5(%{{.*}}) } define i32 @test7(i64* nocapture %a0, i8 zeroext %a1, i32* %P2) nounwind { @@ -121,8 +121,8 @@ entry: ; X32: test7: -; X32: movb 8(%esp), %[[REG:[abcd]l]] -; X32: movb %[[REG]], 5(%{{.*}}) +; X32: movb 8(%esp), %cl +; X32: movb %cl, 5(%{{.*}}) } ; PR7833 diff --git a/test/CodeGen/X86/tailcall-largecode.ll b/test/CodeGen/X86/tailcall-largecode.ll index f5662d97d13..e9b8721e660 100644 --- a/test/CodeGen/X86/tailcall-largecode.ll +++ b/test/CodeGen/X86/tailcall-largecode.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-linux-gnu -tailcallopt -code-model=large -enable-misched=false | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-linux-gnu -tailcallopt -code-model=large | FileCheck %s declare fastcc i32 @callee(i32 %arg) define fastcc i32 @directcall(i32 %arg) { diff --git a/test/CodeGen/X86/test-nofold.ll b/test/CodeGen/X86/test-nofold.ll index 19fbaafc194..97db1b340e8 100644 --- a/test/CodeGen/X86/test-nofold.ll +++ b/test/CodeGen/X86/test-nofold.ll @@ -2,10 +2,10 @@ ; rdar://5752025 ; We want: -; CHECK: movl 4(%esp), %ecx -; CHECK-NEXT: andl $15, %ecx -; CHECK-NEXT: movl $42, %eax -; CHECK-NEXT: cmovel %ecx, %eax +; CHECK: movl $42, %ecx +; CHECK-NEXT: movl 4(%esp), %eax +; CHECK-NEXT: andl $15, %eax +; CHECK-NEXT: cmovnel %ecx, %eax ; CHECK-NEXT: ret ; ; We don't want: @@ -39,3 +39,4 @@ entry: %retval = select i1 %tmp4, i32 %tmp2, i32 42 ; [#uses=1] ret i32 %retval } + diff --git a/test/CodeGen/X86/trunc-to-bool.ll b/test/CodeGen/X86/trunc-to-bool.ll index 59920f0fc8c..92b6859d1dc 100644 --- a/test/CodeGen/X86/trunc-to-bool.ll +++ b/test/CodeGen/X86/trunc-to-bool.ll @@ -22,7 +22,7 @@ ret_false: ret i1 false } ; CHECK: test2: -; CHECK: btl +; CHECK: btl %eax define i32 @test3(i8* %ptr) nounwind { %val = load i8* %ptr diff --git a/test/CodeGen/X86/v-binop-widen.ll b/test/CodeGen/X86/v-binop-widen.ll index fca4da66a85..8655c6c8ea5 100644 --- a/test/CodeGen/X86/v-binop-widen.ll +++ b/test/CodeGen/X86/v-binop-widen.ll @@ -1,7 +1,7 @@ ; RUN: llc -mcpu=generic -march=x86 -mattr=+sse < %s | FileCheck %s +; CHECK: divss ; CHECK: divps ; CHECK: divps -; CHECK: divss %vec = type <9 x float> define %vec @vecdiv( %vec %p1, %vec %p2) @@ -9,3 +9,4 @@ define %vec @vecdiv( %vec %p1, %vec %p2) %result = fdiv %vec %p1, %p2 ret %vec %result } + diff --git a/test/CodeGen/X86/v-binop-widen2.ll b/test/CodeGen/X86/v-binop-widen2.ll index 334211132f1..569586af498 100644 --- a/test/CodeGen/X86/v-binop-widen2.ll +++ b/test/CodeGen/X86/v-binop-widen2.ll @@ -2,9 +2,9 @@ ; RUN: llc -march=x86 -mcpu=atom -mattr=+sse < %s | FileCheck -check-prefix=ATOM %s %vec = type <6 x float> -; CHECK: divps ; CHECK: divss ; CHECK: divss +; CHECK: divps ; Scheduler causes a different instruction order to be produced on Intel Atom ; ATOM: divps diff --git a/test/CodeGen/X86/vec_shuffle-27.ll b/test/CodeGen/X86/vec_shuffle-27.ll index ffe3a9f39dc..0aff822850c 100644 --- a/test/CodeGen/X86/vec_shuffle-27.ll +++ b/test/CodeGen/X86/vec_shuffle-27.ll @@ -7,10 +7,10 @@ target triple = "i686-apple-cl.1.0" define <8 x float> @my2filter4_1d(<4 x float> %a, <8 x float> %T0, <8 x float> %T1) nounwind readnone { entry: ; CHECK: subps -; CHECK: subps -; CHECK: mulps ; CHECK: mulps ; CHECK: addps +; CHECK: subps +; CHECK: mulps ; CHECK: addps %tmp7 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3 > ; <<8 x float>> [#uses=1] %sub = fsub <8 x float> %T1, %T0 ; <<8 x float>> [#uses=1] diff --git a/test/CodeGen/X86/vec_shuffle-39.ll b/test/CodeGen/X86/vec_shuffle-39.ll index fa4992e6cad..ee8d2d5e0b3 100644 --- a/test/CodeGen/X86/vec_shuffle-39.ll +++ b/test/CodeGen/X86/vec_shuffle-39.ll @@ -54,8 +54,8 @@ entry: define <2 x double> @t3() nounwind readonly { bb: ; CHECK: t3: +; CHECK: punpcklqdq %xmm1, %xmm0 ; CHECK: movq (%rax), %xmm1 -; CHECK: punpcklqdq %xmm2, %xmm0 ; CHECK: movsd %xmm1, %xmm0 %tmp0 = load i128* null, align 1 %tmp1 = load <2 x i32>* undef, align 8 @@ -72,8 +72,8 @@ bb: define <2 x i64> @t4() nounwind readonly { bb: ; CHECK: t4: +; CHECK: punpcklqdq %xmm0, %xmm1 ; CHECK: movq (%rax), %xmm0 -; CHECK: punpcklqdq %xmm2, %xmm1 ; CHECK: movsd %xmm1, %xmm0 %tmp0 = load i128* null, align 1 %tmp1 = load <2 x i32>* undef, align 8 diff --git a/test/CodeGen/X86/widen_cast-1.ll b/test/CodeGen/X86/widen_cast-1.ll index 589247ba476..56c63644e02 100644 --- a/test/CodeGen/X86/widen_cast-1.ll +++ b/test/CodeGen/X86/widen_cast-1.ll @@ -1,8 +1,8 @@ ; RUN: llc -march=x86 -mcpu=generic -mattr=+sse42 < %s | FileCheck %s ; RUN: llc -march=x86 -mcpu=atom < %s | FileCheck -check-prefix=ATOM %s -; CHECK: movl ; CHECK: paddd +; CHECK: movl ; CHECK: movlpd ; Scheduler causes produce a different instruction order diff --git a/test/CodeGen/X86/win64_alloca_dynalloca.ll b/test/CodeGen/X86/win64_alloca_dynalloca.ll index 5a6e4a9b66e..cc11e4c28e2 100644 --- a/test/CodeGen/X86/win64_alloca_dynalloca.ll +++ b/test/CodeGen/X86/win64_alloca_dynalloca.ll @@ -40,27 +40,27 @@ entry: ; W64: subq %rax, %rsp ; W64: movq %rsp, %rax -; EFI: movq %rsp, [[R64:%r.*]] ; EFI: leaq 15(%{{.*}}), [[R1:%r.*]] ; EFI: andq $-16, [[R1]] +; EFI: movq %rsp, [[R64:%r.*]] ; EFI: subq [[R1]], [[R64]] ; EFI: movq [[R64]], %rsp %r = call i64 @bar(i64 %n, i64 %x, i64 %n, i8* %buf0, i8* %buf1) nounwind ; M64: subq $48, %rsp -; M64: movq %rax, 32(%rsp) ; M64: leaq -4096(%rbp), %r9 +; M64: movq %rax, 32(%rsp) ; M64: callq bar ; W64: subq $48, %rsp -; W64: movq %rax, 32(%rsp) ; W64: leaq -4096(%rbp), %r9 +; W64: movq %rax, 32(%rsp) ; W64: callq bar ; EFI: subq $48, %rsp -; EFI: movq [[R64]], 32(%rsp) ; EFI: leaq -[[B0OFS]](%rbp), %r9 +; EFI: movq [[R64]], 32(%rsp) ; EFI: callq _bar ret i64 %r diff --git a/test/CodeGen/X86/x86-64-psub.ll b/test/CodeGen/X86/x86-64-psub.ll index 029eb9cf3ad..7869a80b2a2 100644 --- a/test/CodeGen/X86/x86-64-psub.ll +++ b/test/CodeGen/X86/x86-64-psub.ll @@ -4,8 +4,8 @@ ; This test checks that the operands of packed sub instructions are ; never interchanged by the "Two-Address instruction pass". -declare { i64, double } @getFirstParam() -declare { i64, double } @getSecondParam() +declare { i64, double } @getFirstParam() +declare { i64, double } @getSecondParam() define i64 @test_psubb() { entry: @@ -28,10 +28,9 @@ entry: ; CHECK: test_psubb: ; CHECK: callq getFirstParam -; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]] ; CHECK: callq getSecondParam -; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]] ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]] +; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]] ; CHECK: psubb [[PARAM2]], [[PARAM1]] ; CHECK: ret @@ -56,10 +55,9 @@ entry: ; CHECK: test_psubw: ; CHECK: callq getFirstParam -; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]] ; CHECK: callq getSecondParam -; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]] ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]] +; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]] ; CHECK: psubw [[PARAM2]], [[PARAM1]] ; CHECK: ret @@ -85,10 +83,9 @@ entry: ; CHECK: test_psubd: ; CHECK: callq getFirstParam -; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]] ; CHECK: callq getSecondParam -; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]] ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]] +; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]] ; CHECK: psubd [[PARAM2]], [[PARAM1]] ; CHECK: ret @@ -113,10 +110,9 @@ entry: ; CHECK: test_psubsb: ; CHECK: callq getFirstParam -; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]] ; CHECK: callq getSecondParam -; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]] ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]] +; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]] ; CHECK: psubsb [[PARAM2]], [[PARAM1]] ; CHECK: ret @@ -141,10 +137,9 @@ entry: ; CHECK: test_psubswv: ; CHECK: callq getFirstParam -; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]] ; CHECK: callq getSecondParam -; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]] ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]] +; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]] ; CHECK: psubsw [[PARAM2]], [[PARAM1]] ; CHECK: ret @@ -169,10 +164,9 @@ entry: ; CHECK: test_psubusbv: ; CHECK: callq getFirstParam -; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]] ; CHECK: callq getSecondParam -; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]] ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]] +; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]] ; CHECK: psubusb [[PARAM2]], [[PARAM1]] ; CHECK: ret @@ -197,10 +191,9 @@ entry: ; CHECK: test_psubuswv: ; CHECK: callq getFirstParam -; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]] ; CHECK: callq getSecondParam -; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]] ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]] +; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]] ; CHECK: psubusw [[PARAM2]], [[PARAM1]] ; CHECK: ret diff --git a/test/CodeGen/X86/x86-shifts.ll b/test/CodeGen/X86/x86-shifts.ll index 0bd651a3efa..20bccab8ff7 100644 --- a/test/CodeGen/X86/x86-shifts.ll +++ b/test/CodeGen/X86/x86-shifts.ll @@ -6,8 +6,8 @@ define <4 x i32> @shl4(<4 x i32> %A) nounwind { entry: ; CHECK: shl4 -; CHECK: pslld ; CHECK: padd +; CHECK: pslld ; CHECK: ret %B = shl <4 x i32> %A, < i32 2, i32 2, i32 2, i32 2> %C = shl <4 x i32> %A, < i32 1, i32 1, i32 1, i32 1> @@ -67,8 +67,8 @@ entry: define <8 x i16> @shl8(<8 x i16> %A) nounwind { entry: ; CHECK: shl8 -; CHECK: psllw ; CHECK: padd +; CHECK: psllw ; CHECK: ret %B = shl <8 x i16> %A, < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> %C = shl <8 x i16> %A, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> diff --git a/test/CodeGen/X86/zext-fold.ll b/test/CodeGen/X86/zext-fold.ll index a10923f7a80..ff93c68ff35 100644 --- a/test/CodeGen/X86/zext-fold.ll +++ b/test/CodeGen/X86/zext-fold.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mcpu=generic -march=x86 -enable-misched=false | FileCheck %s +; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s ;; Simple case define i32 @test1(i8 %x) nounwind readnone { @@ -10,7 +10,7 @@ define i32 @test1(i8 %x) nounwind readnone { ; CHECK: movzbl ; CHECK-NEXT: andl {{.*}}224 -;; Multiple uses of %x but easily extensible. +;; Multiple uses of %x but easily extensible. define i32 @test2(i8 %x) nounwind readnone { %A = and i8 %x, -32 %B = zext i8 %A to i32 @@ -21,8 +21,8 @@ define i32 @test2(i8 %x) nounwind readnone { } ; CHECK: test2 ; CHECK: movzbl -; CHECK: andl $224 ; CHECK: orl $63 +; CHECK: andl $224 declare void @use(i32, i8) diff --git a/test/CodeGen/X86/zext-sext.ll b/test/CodeGen/X86/zext-sext.ll index f74d84d37cd..0ab302a31b7 100644 --- a/test/CodeGen/X86/zext-sext.ll +++ b/test/CodeGen/X86/zext-sext.ll @@ -34,8 +34,9 @@ entry: %tmp12 = add i64 %tmp11, 5089792279245435153 ; CHECK: addl $2138875574, %e[[REGISTER_zext:[a-z0-9]+]] -; CHECK: cmpl $-8608074, %e[[REGISTER_zext]] -; CHECK: movslq %e[[REGISTER_zext]], [[REGISTER_sext:%r[a-z0-9]+]] +; CHECK-NEXT: cmpl $-8608074, %e[[REGISTER_zext]] +; CHECK-NEXT: movslq %e[[REGISTER_zext]], [[REGISTER_tmp:%r[a-z0-9]+]] +; CHECK: movq [[REGISTER_tmp]], [[REGISTER_sext:%r[a-z0-9]+]] ; CHECK-NOT: [[REGISTER_zext]] ; CHECK: subq %r[[REGISTER_zext]], [[REGISTER_sext]] -- 2.34.1