X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=test%2FCodeGen%2FX86%2Fvector-trunc.ll;h=d2eef9af2a25ec4c05eea0c669081d8a94c85d97;hb=90c9a16dbfa347016977fa49b0cfe0db438a9504;hp=80f3dee4332dae2b68b390266775c9ffc96d2e09;hpb=3d4542ce3da1cb0782c65d38130556a00ed2586d;p=oota-llvm.git diff --git a/test/CodeGen/X86/vector-trunc.ll b/test/CodeGen/X86/vector-trunc.ll index 80f3dee4332..d2eef9af2a2 100644 --- a/test/CodeGen/X86/vector-trunc.ll +++ b/test/CodeGen/X86/vector-trunc.ll @@ -82,16 +82,18 @@ define <8 x i16> @trunc2x4i32(<4 x i32> %a, <4 x i32> %b) { ; ; SSE41-LABEL: trunc2x4i32: ; SSE41: # BB#0: # %entry -; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,12,13] -; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] +; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; SSE41-NEXT: pshufb %xmm2, %xmm1 +; SSE41-NEXT: pshufb %xmm2, %xmm0 +; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE41-NEXT: retq ; ; AVX-LABEL: trunc2x4i32: ; AVX: # BB#0: # %entry -; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,12,13] -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] -; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] +; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: retq @@ -219,3 +221,20 @@ entry: %1 = bitcast <8 x i8> %0 to i64 ret i64 %1 } + +define <16 x i8> @trunc16i64_const() { +; SSE-LABEL: trunc16i64_const +; SSE: # BB#0: # %entry +; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: trunc16i64_const +; AVX: # BB#0: # %entry +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX-NEXT: retq + +entry: + %0 = trunc <16 x i64> zeroinitializer to <16 x i8> + %1 = shufflevector <16 x i8> %0, <16 x i8> %0, <16 x i32> + ret <16 x i8> %1 +}