1 ; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
5 ;CHECK-LABEL: intrin_pmov:
6 ;CHECK: pmovzxbw (%{{.*}}), %xmm0
9 define void @intrin_pmov(i16* noalias %dest, i8* noalias %src) nounwind uwtable ssp {
10 %1 = bitcast i8* %src to <2 x i64>*
11 %2 = load <2 x i64>, <2 x i64>* %1, align 16
12 %3 = bitcast <2 x i64> %2 to <16 x i8>
13 %4 = tail call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %3) nounwind
14 %5 = bitcast i16* %dest to i8*
15 %6 = bitcast <8 x i16> %4 to <16 x i8>
16 tail call void @llvm.x86.sse2.storeu.dq(i8* %5, <16 x i8> %6) nounwind
20 declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
21 declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
25 define <4 x i32> @foo0(double %v.coerce) nounwind ssp {
27 ; CHECK: pmovzxwd %xmm0, %xmm0
29 %tmp = bitcast double %v.coerce to <4 x i16>
30 %tmp1 = shufflevector <4 x i16> %tmp, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
31 %tmp2 = tail call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %tmp1) nounwind
35 define <8 x i16> @foo1(double %v.coerce) nounwind ssp {
37 ; CHECK: pmovzxbw %xmm0, %xmm0
39 %tmp = bitcast double %v.coerce to <8 x i8>
40 %tmp1 = shufflevector <8 x i8> %tmp, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
41 %tmp2 = tail call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %tmp1)
45 declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone