From: Simon Pilgrim Date: Sun, 4 Jan 2015 19:08:03 +0000 (+0000) Subject: [X86][SSE] Added vector packing test for pr12412 X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=c0c36083da09b53bf907fce1ca543ab67ffb5f7b;p=oota-llvm.git [X86][SSE] Added vector packing test for pr12412 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225138 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/CodeGen/X86/vector-shuffle-128-v16.ll b/test/CodeGen/X86/vector-shuffle-128-v16.ll index 7c92a263667..131410606aa 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v16.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v16.ll @@ -1088,3 +1088,37 @@ entry: store <4 x i32> zeroinitializer, <4 x i32>* %ptr2, align 16 ret void } + +define <16 x i8> @PR12412(<16 x i8> %inval1, <16 x i8> %inval2) { +; SSE2-LABEL: PR12412: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] +; SSE2-NEXT: pand %xmm2, %xmm1 +; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: packuswb %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: PR12412: +; SSSE3: # BB#0: # %entry +; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,2,4,6,8,10,12,14] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero +; SSSE3-NEXT: por %xmm1, %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: PR12412: +; SSE41: # BB#0: # %entry +; SSE41-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,2,4,6,8,10,12,14] +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero +; SSE41-NEXT: por %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: PR12412: +; AVX: # BB#0: # %entry +; AVX-NEXT: vpshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,2,4,6,8,10,12,14] +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %0 = shufflevector <16 x i8> %inval1, <16 x i8> %inval2, <16 x i32> + ret <16 x i8> %0 +}