X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=test%2FCodeGen%2FX86%2Fsse41.ll;h=2ac4cb435a751ba6b2d4151e8fbd1ade992deaab;hb=782c8fbd6e5f210f6273622c2df588d0e0b01761;hp=6c093a8b752767031ec94db17bb49b980c29d1ab;hpb=a1a491c09429d9d0b3e74b404dd1d0aad8890eed;p=oota-llvm.git diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll index 6c093a8b752..2ac4cb435a7 100644 --- a/test/CodeGen/X86/sse41.ll +++ b/test/CodeGen/X86/sse41.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin9 -mattr=sse41 | FileCheck %s -check-prefix=X32 -; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin9 -mattr=sse41 | FileCheck %s -check-prefix=X64 +; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse41 | FileCheck %s -check-prefix=X32 +; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse41 | FileCheck %s -check-prefix=X64 @g16 = external global i16 @@ -123,11 +123,11 @@ define float @ext_1(<4 x float> %v) nounwind { ; X32: _ext_1: ; X32: pshufd $3, %xmm0, %xmm0 -; X32: addss LCPI8_0, %xmm0 +; X32: addss LCPI7_0, %xmm0 ; X64: _ext_1: ; X64: pshufd $3, %xmm0, %xmm0 -; X64: addss LCPI8_0(%rip), %xmm0 +; X64: addss LCPI7_0(%rip), %xmm0 } define float @ext_2(<4 x float> %v) nounwind { %s = extractelement <4 x float> %v, i32 3 @@ -181,4 +181,71 @@ define <4 x float> @insertps_3(<4 x float> %t1, <4 x float> %t2) nounwind { ; X64: _insertps_3: ; X64: insertps $0, %xmm1, %xmm0 -} \ No newline at end of file +} + +define i32 @ptestz_1(<4 x float> %t1, <4 x float> %t2) nounwind { + %tmp1 = call i32 @llvm.x86.sse41.ptestz(<4 x float> %t1, <4 x float> %t2) nounwind readnone + ret i32 %tmp1 +; X32: _ptestz_1: +; X32: ptest %xmm1, %xmm0 +; X32: sete %al + +; X64: _ptestz_1: +; X64: ptest %xmm1, %xmm0 +; X64: sete %al +} + +define i32 @ptestz_2(<4 x float> %t1, <4 x float> %t2) nounwind { + %tmp1 = call i32 @llvm.x86.sse41.ptestc(<4 x float> %t1, <4 x float> %t2) nounwind readnone + ret i32 %tmp1 +; X32: _ptestz_2: +; X32: ptest %xmm1, %xmm0 +; X32: sbbl %eax + +; X64: _ptestz_2: +; X64: ptest %xmm1, %xmm0 +; X64: sbbl %eax +} + +define i32 @ptestz_3(<4 x float> %t1, <4 x float> %t2) nounwind { + %tmp1 = call i32 @llvm.x86.sse41.ptestnzc(<4 x float> %t1, <4 x float> %t2) nounwind readnone + ret i32 %tmp1 +; X32: _ptestz_3: +; X32: ptest %xmm1, %xmm0 +; X32: seta %al + +; X64: _ptestz_3: +; X64: ptest %xmm1, %xmm0 +; X64: seta %al +} + + +declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone +declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone +declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone + +; This used to compile to insertps $0 + insertps $16. insertps $0 is always +; pointless. +define <2 x float> @buildvector(<2 x float> %A, <2 x float> %B) nounwind { +entry: + %tmp7 = extractelement <2 x float> %A, i32 0 + %tmp5 = extractelement <2 x float> %A, i32 1 + %tmp3 = extractelement <2 x float> %B, i32 0 + %tmp1 = extractelement <2 x float> %B, i32 1 + %add.r = fadd float %tmp7, %tmp3 + %add.i = fadd float %tmp5, %tmp1 + %tmp11 = insertelement <2 x float> undef, float %add.r, i32 0 + %tmp9 = insertelement <2 x float> %tmp11, float %add.i, i32 1 + ret <2 x float> %tmp9 +; X32: buildvector: +; X32-NOT: insertps $0 +; X32: insertps $16 +; X32-NOT: insertps $0 +; X32: ret +; X64: buildvector: +; X64-NOT: insertps $0 +; X64: insertps $16 +; X64-NOT: insertps $0 +; X64: ret +} +