X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=test%2FCodeGen%2FX86%2Favx512-mask-op.ll;h=015c70a6ba08c631b902bea2f7992a7df5590a77;hb=6e961aa243f223ddb704ce708056238d7c1d7e24;hp=244d761058c30290434429eee1592ff16cf2b0b5;hpb=52ebd433386d85eee0b06d4592961294563fb067;p=oota-llvm.git diff --git a/test/CodeGen/X86/avx512-mask-op.ll b/test/CodeGen/X86/avx512-mask-op.ll index 244d761058c..015c70a6ba0 100644 --- a/test/CodeGen/X86/avx512-mask-op.ll +++ b/test/CodeGen/X86/avx512-mask-op.ll @@ -1,39 +1,48 @@ -; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL --check-prefix=CHECK -; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX --check-prefix=CHECK +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX -; CHECK-LABEL: mask16 -; CHECK: kmovw -; CHECK-NEXT: knotw -; CHECK-NEXT: kmovw define i16 @mask16(i16 %x) { +; CHECK-LABEL: mask16: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k0 +; CHECK-NEXT: knotw %k0, %k0 +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: retq %m0 = bitcast i16 %x to <16 x i1> %m1 = xor <16 x i1> %m0, %ret = bitcast <16 x i1> %m1 to i16 ret i16 %ret } -; CHECK-LABEL: mask8 -; KNL: kmovw -; KNL-NEXT: knotw -; KNL-NEXT: kmovw -; SKX: kmovb -; SKX-NEXT: knotb -; SKX-NEXT: kmovb - define i8 @mask8(i8 %x) { +; KNL-LABEL: mask8: +; KNL: ## BB#0: +; KNL-NEXT: movzbl %dil, %eax +; KNL-NEXT: kmovw %eax, %k0 +; KNL-NEXT: knotw %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: retq +; +; SKX-LABEL: mask8: +; SKX: ## BB#0: +; SKX-NEXT: kmovb %edi, %k0 +; SKX-NEXT: knotb %k0, %k0 +; SKX-NEXT: kmovb %k0, %eax +; SKX-NEXT: retq %m0 = bitcast i8 %x to <8 x i1> %m1 = xor <8 x i1> %m0, %ret = bitcast <8 x i1> %m1 to i8 ret i8 %ret } -; CHECK-LABEL: mask16_mem -; CHECK: kmovw ([[ARG1:%rdi|%rcx]]), %k{{[0-7]}} -; CHECK-NEXT: knotw -; CHECK-NEXT: kmovw %k{{[0-7]}}, ([[ARG1]]) -; CHECK: ret - define void @mask16_mem(i16* %ptr) { +; CHECK-LABEL: mask16_mem: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw (%rdi), %k0 +; CHECK-NEXT: knotw %k0, %k0 +; CHECK-NEXT: kmovw %k0, (%rdi) +; CHECK-NEXT: retq %x = load i16, i16* %ptr, align 4 %m0 = bitcast i16 %x to <16 x i1> %m1 = xor <16 x i1> %m0, @@ -42,15 +51,20 @@ define void @mask16_mem(i16* %ptr) { ret void } -; CHECK-LABEL: mask8_mem -; KNL: kmovw ([[ARG1]]), %k{{[0-7]}} -; KNL-NEXT: knotw -; KNL-NEXT: kmovw %k{{[0-7]}}, ([[ARG1]]) -; SKX: kmovb ([[ARG1]]), %k{{[0-7]}} -; SKX-NEXT: knotb -; SKX-NEXT: kmovb %k{{[0-7]}}, ([[ARG1]]) - define void @mask8_mem(i8* %ptr) { +; KNL-LABEL: mask8_mem: +; KNL: ## BB#0: +; KNL-NEXT: kmovw (%rdi), %k0 +; KNL-NEXT: knotw %k0, %k0 +; KNL-NEXT: kmovw %k0, (%rdi) +; KNL-NEXT: retq +; +; SKX-LABEL: mask8_mem: +; SKX: ## BB#0: +; SKX-NEXT: kmovb (%rdi), %k0 +; SKX-NEXT: knotb %k0, %k0 +; SKX-NEXT: kmovb %k0, (%rdi) +; SKX-NEXT: retq %x = load i8, i8* %ptr, align 4 %m0 = bitcast i8 %x to <8 x i1> %m1 = xor <8 x i1> %m0, @@ -59,11 +73,16 @@ define void @mask8_mem(i8* %ptr) { ret void } -; CHECK-LABEL: mand16 -; CHECK: kandw -; CHECK: kxorw -; CHECK: korw define i16 @mand16(i16 %x, i16 %y) { +; CHECK-LABEL: mand16: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k0 +; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kandw %k1, %k0, %k2 +; CHECK-NEXT: kxorw %k1, %k0, %k0 +; CHECK-NEXT: korw %k0, %k2, %k0 +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: retq %ma = bitcast i16 %x to <16 x i1> %mb = bitcast i16 %y to <16 x i1> %mc = and <16 x i1> %ma, %mb @@ -73,56 +92,68 @@ define i16 @mand16(i16 %x, i16 %y) { ret i16 %ret } -; CHECK-LABEL: shuf_test1 -; CHECK: kshiftrw $8 define i8 @shuf_test1(i16 %v) nounwind { +; KNL-LABEL: shuf_test1: +; KNL: ## BB#0: +; KNL-NEXT: kmovw %edi, %k0 +; KNL-NEXT: kshiftrw $8, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: retq +; +; SKX-LABEL: shuf_test1: +; SKX: ## BB#0: +; SKX-NEXT: kmovw %edi, %k0 +; SKX-NEXT: kshiftrw $8, %k0, %k0 +; SKX-NEXT: kmovb %k0, %eax +; SKX-NEXT: retq %v1 = bitcast i16 %v to <16 x i1> %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> %mask1 = bitcast <8 x i1> %mask to i8 ret i8 %mask1 } -; CHECK-LABEL: zext_test1 -; CHECK: kshiftlw -; CHECK: kshiftrw -; CHECK: kmovw - define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) { +; CHECK-LABEL: zext_test1: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 +; CHECK-NEXT: kshiftlw $10, %k0, %k0 +; CHECK-NEXT: kshiftrw $15, %k0, %k0 +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: retq %cmp_res = icmp ugt <16 x i32> %a, %b %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 %res = zext i1 %cmp_res.i1 to i32 ret i32 %res -} - -; CHECK-LABEL: zext_test2 -; CHECK: kshiftlw -; CHECK: kshiftrw -; CHECK: kmovw - -define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) { +}define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) { %cmp_res = icmp ugt <16 x i32> %a, %b %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 %res = zext i1 %cmp_res.i1 to i16 ret i16 %res -} - -; CHECK-LABEL: zext_test3 -; CHECK: kshiftlw -; CHECK: kshiftrw -; CHECK: kmovw - -define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) { +}define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) { %cmp_res = icmp ugt <16 x i32> %a, %b %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 %res = zext i1 %cmp_res.i1 to i8 ret i8 %res } -; CHECK-LABEL: conv1 -; KNL: kmovw %k0, %eax -; KNL: movb %al, (%rdi) -; SKX: kmovb %k0, (%rdi) define i8 @conv1(<8 x i1>* %R) { +; KNL-LABEL: conv1: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: kxnorw %k0, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: movb %al, (%rdi) +; KNL-NEXT: movb $-2, -{{[0-9]+}}(%rsp) +; KNL-NEXT: movb $-2, %al +; KNL-NEXT: retq +; +; SKX-LABEL: conv1: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: kxnorw %k0, %k0, %k0 +; SKX-NEXT: kmovb %k0, (%rdi) +; SKX-NEXT: movb $-2, -{{[0-9]+}}(%rsp) +; SKX-NEXT: movb $-2, %al +; SKX-NEXT: retq entry: store <8 x i1> , <8 x i1>* %R @@ -133,12 +164,27 @@ entry: ret i8 %mask_convert } -; SKX-LABEL: test4 -; SKX: vpcmpgt -; SKX: knot -; SKX: vpcmpgt -; SKX: vpmovm2d define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) { +; KNL-LABEL: test4: +; KNL: ## BB#0: +; KNL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 +; KNL-NEXT: vpmovqd %zmm0, %ymm0 +; KNL-NEXT: vpslld $31, %xmm0, %xmm0 +; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 +; KNL-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1 +; KNL-NEXT: vpmovqd %zmm1, %ymm1 +; KNL-NEXT: vpslld $31, %xmm1, %xmm1 +; KNL-NEXT: vpsrad $31, %xmm1, %xmm1 +; KNL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retq +; +; SKX-LABEL: test4: +; SKX: ## BB#0: +; SKX-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 +; SKX-NEXT: knotw %k0, %k1 +; SKX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} +; SKX-NEXT: vpmovm2d %k0, %xmm0 +; SKX-NEXT: retq %x_gt_y = icmp sgt <4 x i64> %x, %y %x1_gt_y1 = icmp sgt <4 x i64> %x1, %y1 %res = icmp sgt <4 x i1>%x_gt_y, %x1_gt_y1 @@ -146,30 +192,27 @@ define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1 ret <4 x i32> %resse } -; SKX-LABEL: test5 -; SKX: vpcmpgt -; SKX: knot -; SKX: vpcmpgt -; SKX: vpmovm2q define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) { +; KNL-LABEL: test5: +; KNL: ## BB#0: +; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 +; KNL-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm1 +; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 +; KNL-NEXT: retq +; +; SKX-LABEL: test5: +; SKX: ## BB#0: +; SKX-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 +; SKX-NEXT: knotw %k0, %k1 +; SKX-NEXT: vpcmpgtq %xmm3, %xmm2, %k0 {%k1} +; SKX-NEXT: vpmovm2q %k0, %xmm0 +; SKX-NEXT: retq %x_gt_y = icmp slt <2 x i64> %x, %y %x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1 %res = icmp slt <2 x i1>%x_gt_y, %x1_gt_y1 %resse = sext <2 x i1>%res to <2 x i64> ret <2 x i64> %resse -} - -; KNL-LABEL: test6 -; KNL: vpmovsxbd -; KNL: vpandd -; KNL: kmovw %eax, %k1 -; KNL vptestmd {{.*}}, %k0 {%k1} - -; SKX-LABEL: test6 -; SKX: vpmovb2m -; SKX: kmovw %eax, %k1 -; SKX: kandw -define void @test6(<16 x i1> %mask) { +}define void @test6(<16 x i1> %mask) { allocas: %a= and <16 x i1> %mask, %b = bitcast <16 x i1> %a to i16 @@ -182,19 +225,30 @@ true: false: ret void } - -; KNL-LABEL: test7 -; KNL: vpmovsxwq -; KNL: vpandq -; KNL: vptestmq {{.*}}, %k0 -; KNL: korw - -; SKX-LABEL: test7 -; SKX: vpmovw2m -; SKX: kmovb %eax, %k1 -; SKX: korb - define void @test7(<8 x i1> %mask) { +; KNL-LABEL: test7: +; KNL: ## BB#0: ## %allocas +; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 +; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 +; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 +; KNL-NEXT: movb $85, %al +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: korw %k1, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: testb %al, %al +; KNL-NEXT: retq +; +; SKX-LABEL: test7: +; SKX: ## BB#0: ## %allocas +; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 +; SKX-NEXT: vpmovw2m %xmm0, %k0 +; SKX-NEXT: movb $85, %al +; SKX-NEXT: kmovb %eax, %k1 +; SKX-NEXT: korb %k1, %k0, %k0 +; SKX-NEXT: kmovb %k0, %eax +; SKX-NEXT: testb %al, %al +; SKX-NEXT: retq allocas: %a= or <8 x i1> %mask, %b = bitcast <8 x i1> %a to i8 @@ -207,22 +261,35 @@ true: false: ret void } - -; KNL-LABEL: test8 -; KNL: vpxord %zmm2, %zmm2, %zmm2 -; KNL: jg -; KNL: vpcmpltud %zmm2, %zmm1, %k1 -; KNL: jmp -; KNL: vpcmpgtd %zmm2, %zmm0, %k1 - -; SKX-LABEL: test8 -; SKX: jg -; SKX: vpcmpltud {{.*}}, %k0 -; SKX: vpmovm2b -; SKX: vpcmpgtd {{.*}}, %k0 -; SKX: vpmovm2b - define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) { +; KNL-LABEL: test8: +; KNL: ## BB#0: +; KNL-NEXT: vpxord %zmm2, %zmm2, %zmm2 +; KNL-NEXT: cmpl %esi, %edi +; KNL-NEXT: jg LBB14_1 +; KNL-NEXT: ## BB#2: +; KNL-NEXT: vpcmpltud %zmm2, %zmm1, %k1 +; KNL-NEXT: jmp LBB14_3 +; KNL-NEXT: LBB14_1: +; KNL-NEXT: vpcmpgtd %zmm2, %zmm0, %k1 +; KNL-NEXT: LBB14_3: +; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} +; KNL-NEXT: vpmovdb %zmm0, %xmm0 +; KNL-NEXT: retq +; +; SKX-LABEL: test8: +; SKX: ## BB#0: +; SKX-NEXT: vpxord %zmm2, %zmm2, %zmm2 +; SKX-NEXT: cmpl %esi, %edi +; SKX-NEXT: jg LBB14_1 +; SKX-NEXT: ## BB#2: +; SKX-NEXT: vpcmpltud %zmm2, %zmm1, %k0 +; SKX-NEXT: vpmovm2b %k0, %xmm0 +; SKX-NEXT: retq +; SKX-NEXT: LBB14_1: +; SKX-NEXT: vpcmpgtd %zmm2, %zmm0, %k0 +; SKX-NEXT: vpmovm2b %k0, %xmm0 +; SKX-NEXT: retq %cond = icmp sgt i32 %a1, %b1 %cmp1 = icmp sgt <16 x i32> %a, zeroinitializer %cmp2 = icmp ult <16 x i32> %b, zeroinitializer @@ -230,91 +297,121 @@ define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) { %res = sext <16 x i1> %mix to <16 x i8> ret <16 x i8> %res } - -; KNL-LABEL: test9 -; KNL: jg -; KNL: vpmovsxbd %xmm1, %zmm0 -; KNL: jmp -; KNL: vpmovsxbd %xmm0, %zmm0 - -; SKX-LABEL: test9 -; SKX: vpmovb2m %xmm1, %k0 -; SKX: vpmovm2b %k0, %xmm0 -; SKX: retq -; SKX: vpmovb2m %xmm0, %k0 -; SKX: vpmovm2b %k0, %xmm0 - define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) { +; KNL-LABEL: test9: +; KNL: ## BB#0: +; KNL-NEXT: cmpl %esi, %edi +; KNL-NEXT: jg LBB15_1 +; KNL-NEXT: ## BB#2: +; KNL-NEXT: vpmovsxbd %xmm1, %zmm0 +; KNL-NEXT: jmp LBB15_3 +; KNL-NEXT: LBB15_1: +; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 +; KNL-NEXT: LBB15_3: +; KNL-NEXT: vpslld $31, %zmm0, %zmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} +; KNL-NEXT: vpmovdb %zmm0, %xmm0 +; KNL-NEXT: retq +; +; SKX-LABEL: test9: +; SKX: ## BB#0: +; SKX-NEXT: cmpl %esi, %edi +; SKX-NEXT: jg LBB15_1 +; SKX-NEXT: ## BB#2: +; SKX-NEXT: vpsllw $7, %xmm1, %xmm0 +; SKX-NEXT: jmp LBB15_3 +; SKX-NEXT: LBB15_1: +; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 +; SKX-NEXT: LBB15_3: +; SKX-NEXT: vpmovb2m %xmm0, %k0 +; SKX-NEXT: vpmovm2b %k0, %xmm0 +; SKX-NEXT: retq %mask = icmp sgt i32 %a1, %b1 %c = select i1 %mask, <16 x i1>%a, <16 x i1>%b ret <16 x i1>%c -} - -; KNL-LABEL: test10 -; KNL: jg -; KNL: vpmovsxwq %xmm1, %zmm0 -; KNL: jmp -; KNL: vpmovsxwq %xmm0, %zmm0 - -; SKX-LABEL: test10 -; SKX: jg -; SKX: vpmovw2m %xmm1, %k0 -; SKX: vpmovm2w %k0, %xmm0 -; SKX: retq -; SKX: vpmovw2m %xmm0, %k0 -; SKX: vpmovm2w %k0, %xmm0 -define <8 x i1> @test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) { +}define <8 x i1> @test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) { %mask = icmp sgt i32 %a1, %b1 %c = select i1 %mask, <8 x i1>%a, <8 x i1>%b ret <8 x i1>%c } -; SKX-LABEL: test11 -; SKX: jg -; SKX: vpmovd2m %xmm1, %k0 -; SKX: vpmovm2d %k0, %xmm0 -; SKX: retq -; SKX: vpmovd2m %xmm0, %k0 -; SKX: vpmovm2d %k0, %xmm0 define <4 x i1> @test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) { +; KNL-LABEL: test11: +; KNL: ## BB#0: +; KNL-NEXT: cmpl %esi, %edi +; KNL-NEXT: jg LBB17_2 +; KNL-NEXT: ## BB#1: +; KNL-NEXT: vmovaps %zmm1, %zmm0 +; KNL-NEXT: LBB17_2: +; KNL-NEXT: retq +; +; SKX-LABEL: test11: +; SKX: ## BB#0: +; SKX-NEXT: cmpl %esi, %edi +; SKX-NEXT: jg LBB17_1 +; SKX-NEXT: ## BB#2: +; SKX-NEXT: vpslld $31, %xmm1, %xmm0 +; SKX-NEXT: jmp LBB17_3 +; SKX-NEXT: LBB17_1: +; SKX-NEXT: vpslld $31, %xmm0, %xmm0 +; SKX-NEXT: LBB17_3: +; SKX-NEXT: vpmovd2m %xmm0, %k0 +; SKX-NEXT: vpmovm2d %k0, %xmm0 +; SKX-NEXT: retq %mask = icmp sgt i32 %a1, %b1 %c = select i1 %mask, <4 x i1>%a, <4 x i1>%b ret <4 x i1>%c } -; KNL-LABEL: test12 -; KNL: movl %edi, %eax define i32 @test12(i32 %x, i32 %y) { +; CHECK-LABEL: test12: +; CHECK: ## BB#0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: retq %a = bitcast i16 21845 to <16 x i1> %b = extractelement <16 x i1> %a, i32 0 %c = select i1 %b, i32 %x, i32 %y ret i32 %c } -; KNL-LABEL: test13 -; KNL: movl %esi, %eax define i32 @test13(i32 %x, i32 %y) { +; CHECK-LABEL: test13: +; CHECK: ## BB#0: +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: retq %a = bitcast i16 21845 to <16 x i1> %b = extractelement <16 x i1> %a, i32 3 %c = select i1 %b, i32 %x, i32 %y ret i32 %c -} - -; SKX-LABEL: test14 -; SKX: movb $11, %al -; SKX: kmovb %eax, %k0 -; SKX: vpmovm2d %k0, %xmm0 - -define <4 x i1> @test14() { +}define <4 x i1> @test14() { %a = bitcast i16 21845 to <16 x i1> %b = extractelement <16 x i1> %a, i32 2 %c = insertelement <4 x i1> , i1 %b, i32 1 ret <4 x i1> %c } -; KNL-LABEL: test15 -; KNL: cmovgw define <16 x i1> @test15(i32 %x, i32 %y) { +; KNL-LABEL: test15: +; KNL: ## BB#0: +; KNL-NEXT: cmpl %esi, %edi +; KNL-NEXT: movw $21845, %ax ## imm = 0x5555 +; KNL-NEXT: movw $1, %cx +; KNL-NEXT: cmovgw %ax, %cx +; KNL-NEXT: kmovw %ecx, %k1 +; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} +; KNL-NEXT: vpmovdb %zmm0, %xmm0 +; KNL-NEXT: retq +; +; SKX-LABEL: test15: +; SKX: ## BB#0: +; SKX-NEXT: cmpl %esi, %edi +; SKX-NEXT: movw $21845, %ax ## imm = 0x5555 +; SKX-NEXT: movw $1, %cx +; SKX-NEXT: cmovgw %ax, %cx +; SKX-NEXT: kmovw %ecx, %k0 +; SKX-NEXT: vpmovm2b %k0, %xmm0 +; SKX-NEXT: retq %a = bitcast i16 21845 to <16 x i1> %b = bitcast i16 1 to <16 x i1> %mask = icmp sgt i32 %x, %y @@ -322,27 +419,914 @@ define <16 x i1> @test15(i32 %x, i32 %y) { ret <16 x i1> %c } -; SKX-LABEL: test16 -; SKX: kxnorw %k0, %k0, %k1 -; SKX: kshiftrw $15, %k1, %k1 -; SKX: kshiftlq $5, %k1, %k1 -; SKX: korq %k1, %k0, %k0 -; SKX: vpmovm2b %k0, %zmm0 define <64 x i8> @test16(i64 %x) { +; KNL-LABEL: test16: +; KNL: ## BB#0: +; KNL-NEXT: pushq %rbp +; KNL-NEXT: Ltmp0: +; KNL-NEXT: .cfi_def_cfa_offset 16 +; KNL-NEXT: Ltmp1: +; KNL-NEXT: .cfi_offset %rbp, -16 +; KNL-NEXT: movq %rsp, %rbp +; KNL-NEXT: Ltmp2: +; KNL-NEXT: .cfi_def_cfa_register %rbp +; KNL-NEXT: pushq %r15 +; KNL-NEXT: pushq %r14 +; KNL-NEXT: pushq %r13 +; KNL-NEXT: pushq %r12 +; KNL-NEXT: pushq %rbx +; KNL-NEXT: andq $-32, %rsp +; KNL-NEXT: subq $128, %rsp +; KNL-NEXT: Ltmp3: +; KNL-NEXT: .cfi_offset %rbx, -56 +; KNL-NEXT: Ltmp4: +; KNL-NEXT: .cfi_offset %r12, -48 +; KNL-NEXT: Ltmp5: +; KNL-NEXT: .cfi_offset %r13, -40 +; KNL-NEXT: Ltmp6: +; KNL-NEXT: .cfi_offset %r14, -32 +; KNL-NEXT: Ltmp7: +; KNL-NEXT: .cfi_offset %r15, -24 +; KNL-NEXT: movq %rdi, %rax +; KNL-NEXT: shrq $32, %rax +; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) +; KNL-NEXT: movl $271, %eax ## imm = 0x10F +; KNL-NEXT: bextrl %eax, %edi, %eax +; KNL-NEXT: movl %edi, %ecx +; KNL-NEXT: andl $1, %ecx +; KNL-NEXT: vmovd %ecx, %xmm0 +; KNL-NEXT: movl $257, %ecx ## imm = 0x101 +; KNL-NEXT: bextrl %ecx, %edi, %ecx +; KNL-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 +; KNL-NEXT: movl $258, %ecx ## imm = 0x102 +; KNL-NEXT: bextrl %ecx, %edi, %ecx +; KNL-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; KNL-NEXT: movl $259, %ecx ## imm = 0x103 +; KNL-NEXT: bextrl %ecx, %edi, %ecx +; KNL-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0 +; KNL-NEXT: movl $260, %ecx ## imm = 0x104 +; KNL-NEXT: bextrl %ecx, %edi, %ecx +; KNL-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 +; KNL-NEXT: movl $261, %ecx ## imm = 0x105 +; KNL-NEXT: bextrl %ecx, %edi, %ecx +; KNL-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0 +; KNL-NEXT: movl $262, %ecx ## imm = 0x106 +; KNL-NEXT: bextrl %ecx, %edi, %ecx +; KNL-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 +; KNL-NEXT: movl $263, %ecx ## imm = 0x107 +; KNL-NEXT: bextrl %ecx, %edi, %ecx +; KNL-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0 +; KNL-NEXT: movl $264, %ecx ## imm = 0x108 +; KNL-NEXT: bextrl %ecx, %edi, %ecx +; KNL-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 +; KNL-NEXT: movl $265, %ecx ## imm = 0x109 +; KNL-NEXT: bextrl %ecx, %edi, %ecx +; KNL-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0 +; KNL-NEXT: movl $266, %ecx ## imm = 0x10A +; KNL-NEXT: bextrl %ecx, %edi, %ecx +; KNL-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0 +; KNL-NEXT: movl $267, %ecx ## imm = 0x10B +; KNL-NEXT: bextrl %ecx, %edi, %ecx +; KNL-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0 +; KNL-NEXT: movl $268, %ecx ## imm = 0x10C +; KNL-NEXT: bextrl %ecx, %edi, %ecx +; KNL-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 +; KNL-NEXT: movl $269, %ecx ## imm = 0x10D +; KNL-NEXT: bextrl %ecx, %edi, %ecx +; KNL-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 +; KNL-NEXT: movl $270, %ecx ## imm = 0x10E +; KNL-NEXT: bextrl %ecx, %edi, %ecx +; KNL-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm1 +; KNL-NEXT: movl $1, %eax +; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm0 +; KNL-NEXT: movl {{[0-9]+}}(%rsp), %r15d +; KNL-NEXT: movq %r15, %rdx +; KNL-NEXT: shrq $17, %rdx +; KNL-NEXT: andb $1, %dl +; KNL-NEXT: je LBB22_2 +; KNL-NEXT: ## BB#1: +; KNL-NEXT: movb $-1, %dl +; KNL-NEXT: LBB22_2: +; KNL-NEXT: movq %r15, %r11 +; KNL-NEXT: shrq $16, %r11 +; KNL-NEXT: andb $1, %r11b +; KNL-NEXT: je LBB22_4 +; KNL-NEXT: ## BB#3: +; KNL-NEXT: movb $-1, %r11b +; KNL-NEXT: LBB22_4: +; KNL-NEXT: movq %r15, %r10 +; KNL-NEXT: shrq $18, %r10 +; KNL-NEXT: andb $1, %r10b +; KNL-NEXT: je LBB22_6 +; KNL-NEXT: ## BB#5: +; KNL-NEXT: movb $-1, %r10b +; KNL-NEXT: LBB22_6: +; KNL-NEXT: movq %r15, %r9 +; KNL-NEXT: shrq $19, %r9 +; KNL-NEXT: andb $1, %r9b +; KNL-NEXT: je LBB22_8 +; KNL-NEXT: ## BB#7: +; KNL-NEXT: movb $-1, %r9b +; KNL-NEXT: LBB22_8: +; KNL-NEXT: movq %r15, %rbx +; KNL-NEXT: shrq $20, %rbx +; KNL-NEXT: andb $1, %bl +; KNL-NEXT: je LBB22_10 +; KNL-NEXT: ## BB#9: +; KNL-NEXT: movb $-1, %bl +; KNL-NEXT: LBB22_10: +; KNL-NEXT: movq %r15, %r12 +; KNL-NEXT: shrq $21, %r12 +; KNL-NEXT: andb $1, %r12b +; KNL-NEXT: je LBB22_12 +; KNL-NEXT: ## BB#11: +; KNL-NEXT: movb $-1, %r12b +; KNL-NEXT: LBB22_12: +; KNL-NEXT: movq %r15, %r14 +; KNL-NEXT: shrq $22, %r14 +; KNL-NEXT: andb $1, %r14b +; KNL-NEXT: je LBB22_14 +; KNL-NEXT: ## BB#13: +; KNL-NEXT: movb $-1, %r14b +; KNL-NEXT: LBB22_14: +; KNL-NEXT: movq %r15, %r8 +; KNL-NEXT: shrq $23, %r8 +; KNL-NEXT: andb $1, %r8b +; KNL-NEXT: je LBB22_16 +; KNL-NEXT: ## BB#15: +; KNL-NEXT: movb $-1, %r8b +; KNL-NEXT: LBB22_16: +; KNL-NEXT: movq %r15, %r13 +; KNL-NEXT: shrq $24, %r13 +; KNL-NEXT: andb $1, %r13b +; KNL-NEXT: je LBB22_18 +; KNL-NEXT: ## BB#17: +; KNL-NEXT: movb $-1, %r13b +; KNL-NEXT: LBB22_18: +; KNL-NEXT: movq %r15, %rax +; KNL-NEXT: shrq $25, %rax +; KNL-NEXT: andb $1, %al +; KNL-NEXT: je LBB22_20 +; KNL-NEXT: ## BB#19: +; KNL-NEXT: movb $-1, %al +; KNL-NEXT: LBB22_20: +; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill +; KNL-NEXT: movq %r15, %rax +; KNL-NEXT: shrq $26, %rax +; KNL-NEXT: andb $1, %al +; KNL-NEXT: je LBB22_22 +; KNL-NEXT: ## BB#21: +; KNL-NEXT: movb $-1, %al +; KNL-NEXT: LBB22_22: +; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill +; KNL-NEXT: movl $272, %esi ## imm = 0x110 +; KNL-NEXT: movq %r15, %rax +; KNL-NEXT: shrq $27, %rax +; KNL-NEXT: andb $1, %al +; KNL-NEXT: je LBB22_24 +; KNL-NEXT: ## BB#23: +; KNL-NEXT: movb $-1, %al +; KNL-NEXT: LBB22_24: +; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill +; KNL-NEXT: movl $273, %eax ## imm = 0x111 +; KNL-NEXT: bextrl %esi, %edi, %esi +; KNL-NEXT: movq %r15, %rcx +; KNL-NEXT: shrq $28, %rcx +; KNL-NEXT: andb $1, %cl +; KNL-NEXT: je LBB22_26 +; KNL-NEXT: ## BB#25: +; KNL-NEXT: movb $-1, %cl +; KNL-NEXT: LBB22_26: +; KNL-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill +; KNL-NEXT: bextrl %eax, %edi, %eax +; KNL-NEXT: vmovd %esi, %xmm2 +; KNL-NEXT: movl $274, %esi ## imm = 0x112 +; KNL-NEXT: movq %r15, %rcx +; KNL-NEXT: shrq $29, %rcx +; KNL-NEXT: andb $1, %cl +; KNL-NEXT: je LBB22_28 +; KNL-NEXT: ## BB#27: +; KNL-NEXT: movb $-1, %cl +; KNL-NEXT: LBB22_28: +; KNL-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill +; KNL-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 +; KNL-NEXT: bextrl %esi, %edi, %eax +; KNL-NEXT: movzbl %r11b, %esi +; KNL-NEXT: movq %r15, %rcx +; KNL-NEXT: shrq $30, %rcx +; KNL-NEXT: andb $1, %cl +; KNL-NEXT: je LBB22_30 +; KNL-NEXT: ## BB#29: +; KNL-NEXT: movb $-1, %cl +; KNL-NEXT: LBB22_30: +; KNL-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 +; KNL-NEXT: movl $275, %eax ## imm = 0x113 +; KNL-NEXT: bextrl %eax, %edi, %r11d +; KNL-NEXT: movzbl %dl, %edx +; KNL-NEXT: vmovd %esi, %xmm3 +; KNL-NEXT: movq %r15, %rax +; KNL-NEXT: shrq $31, %rax +; KNL-NEXT: andb $1, %al +; KNL-NEXT: je LBB22_32 +; KNL-NEXT: ## BB#31: +; KNL-NEXT: movb $-1, %al +; KNL-NEXT: LBB22_32: +; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill +; KNL-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill +; KNL-NEXT: vpinsrb $3, %r11d, %xmm2, %xmm2 +; KNL-NEXT: movl $276, %eax ## imm = 0x114 +; KNL-NEXT: bextrl %eax, %edi, %esi +; KNL-NEXT: movl $277, %r11d ## imm = 0x115 +; KNL-NEXT: vpinsrb $1, %edx, %xmm3, %xmm3 +; KNL-NEXT: movzbl %r10b, %r10d +; KNL-NEXT: movb %r15b, %al +; KNL-NEXT: shrb %al +; KNL-NEXT: andb $1, %al +; KNL-NEXT: je LBB22_34 +; KNL-NEXT: ## BB#33: +; KNL-NEXT: movb $-1, %al +; KNL-NEXT: LBB22_34: +; KNL-NEXT: vpinsrb $4, %esi, %xmm2, %xmm2 +; KNL-NEXT: bextrl %r11d, %edi, %edx +; KNL-NEXT: movl $278, %r11d ## imm = 0x116 +; KNL-NEXT: vpinsrb $2, %r10d, %xmm3, %xmm3 +; KNL-NEXT: movzbl %r9b, %esi +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: movq %r15, %rcx +; KNL-NEXT: shlq $63, %rcx +; KNL-NEXT: sarq $63, %rcx +; KNL-NEXT: vmovd %ecx, %xmm4 +; KNL-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 +; KNL-NEXT: movb %r15b, %al +; KNL-NEXT: shrb $2, %al +; KNL-NEXT: andb $1, %al +; KNL-NEXT: je LBB22_36 +; KNL-NEXT: ## BB#35: +; KNL-NEXT: movb $-1, %al +; KNL-NEXT: LBB22_36: +; KNL-NEXT: vpinsrb $5, %edx, %xmm2, %xmm2 +; KNL-NEXT: bextrl %r11d, %edi, %edx +; KNL-NEXT: movl $279, %r9d ## imm = 0x117 +; KNL-NEXT: vpinsrb $3, %esi, %xmm3, %xmm3 +; KNL-NEXT: movzbl %bl, %ebx +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 +; KNL-NEXT: movb %r15b, %al +; KNL-NEXT: shrb $3, %al +; KNL-NEXT: andb $1, %al +; KNL-NEXT: je LBB22_38 +; KNL-NEXT: ## BB#37: +; KNL-NEXT: movb $-1, %al +; KNL-NEXT: LBB22_38: +; KNL-NEXT: vpinsrb $6, %edx, %xmm2, %xmm2 +; KNL-NEXT: bextrl %r9d, %edi, %edx +; KNL-NEXT: movl $280, %esi ## imm = 0x118 +; KNL-NEXT: vpinsrb $4, %ebx, %xmm3, %xmm3 +; KNL-NEXT: movzbl %r12b, %ebx +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 +; KNL-NEXT: movb %r15b, %al +; KNL-NEXT: shrb $4, %al +; KNL-NEXT: andb $1, %al +; KNL-NEXT: je LBB22_40 +; KNL-NEXT: ## BB#39: +; KNL-NEXT: movb $-1, %al +; KNL-NEXT: LBB22_40: +; KNL-NEXT: vpinsrb $7, %edx, %xmm2, %xmm2 +; KNL-NEXT: bextrl %esi, %edi, %ecx +; KNL-NEXT: movl $281, %edx ## imm = 0x119 +; KNL-NEXT: vpinsrb $5, %ebx, %xmm3, %xmm3 +; KNL-NEXT: movzbl %r14b, %esi +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 +; KNL-NEXT: movb %r15b, %al +; KNL-NEXT: shrb $5, %al +; KNL-NEXT: andb $1, %al +; KNL-NEXT: je LBB22_42 +; KNL-NEXT: ## BB#41: +; KNL-NEXT: movb $-1, %al +; KNL-NEXT: LBB22_42: +; KNL-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2 +; KNL-NEXT: bextrl %edx, %edi, %ecx +; KNL-NEXT: movl $282, %edx ## imm = 0x11A +; KNL-NEXT: vpinsrb $6, %esi, %xmm3, %xmm3 +; KNL-NEXT: movzbl %r8b, %esi +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 +; KNL-NEXT: movb %r15b, %bl +; KNL-NEXT: shrb $6, %bl +; KNL-NEXT: andb $1, %bl +; KNL-NEXT: je LBB22_44 +; KNL-NEXT: ## BB#43: +; KNL-NEXT: movb $-1, %bl +; KNL-NEXT: LBB22_44: +; KNL-NEXT: vpinsrb $9, %ecx, %xmm2, %xmm2 +; KNL-NEXT: bextrl %edx, %edi, %eax +; KNL-NEXT: movl $283, %ecx ## imm = 0x11B +; KNL-NEXT: vpinsrb $7, %esi, %xmm3, %xmm3 +; KNL-NEXT: movzbl %r13b, %esi +; KNL-NEXT: movzbl %bl, %edx +; KNL-NEXT: vpinsrb $6, %edx, %xmm4, %xmm4 +; KNL-NEXT: movb %r15b, %bl +; KNL-NEXT: shrb $7, %bl +; KNL-NEXT: je LBB22_46 +; KNL-NEXT: ## BB#45: +; KNL-NEXT: movb $-1, %bl +; KNL-NEXT: LBB22_46: +; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 +; KNL-NEXT: bextrl %ecx, %edi, %ecx +; KNL-NEXT: movl $284, %edx ## imm = 0x11C +; KNL-NEXT: vpinsrb $8, %esi, %xmm3, %xmm3 +; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rax ## 8-byte Reload +; KNL-NEXT: movzbl %al, %esi +; KNL-NEXT: movzbl %bl, %eax +; KNL-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 +; KNL-NEXT: movq %r15, %rax +; KNL-NEXT: shrq $8, %rax +; KNL-NEXT: andb $1, %al +; KNL-NEXT: je LBB22_48 +; KNL-NEXT: ## BB#47: +; KNL-NEXT: movb $-1, %al +; KNL-NEXT: LBB22_48: +; KNL-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2 +; KNL-NEXT: bextrl %edx, %edi, %ecx +; KNL-NEXT: movl $285, %edx ## imm = 0x11D +; KNL-NEXT: vpinsrb $9, %esi, %xmm3, %xmm3 +; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rsi ## 8-byte Reload +; KNL-NEXT: movzbl %sil, %esi +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 +; KNL-NEXT: movq %r15, %rax +; KNL-NEXT: shrq $9, %rax +; KNL-NEXT: andb $1, %al +; KNL-NEXT: je LBB22_50 +; KNL-NEXT: ## BB#49: +; KNL-NEXT: movb $-1, %al +; KNL-NEXT: LBB22_50: +; KNL-NEXT: vpinsrb $12, %ecx, %xmm2, %xmm2 +; KNL-NEXT: bextrl %edx, %edi, %ecx +; KNL-NEXT: movl $286, %edx ## imm = 0x11E +; KNL-NEXT: vpinsrb $10, %esi, %xmm3, %xmm3 +; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rsi ## 8-byte Reload +; KNL-NEXT: movzbl %sil, %esi +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 +; KNL-NEXT: movq %r15, %rax +; KNL-NEXT: shrq $10, %rax +; KNL-NEXT: andb $1, %al +; KNL-NEXT: je LBB22_52 +; KNL-NEXT: ## BB#51: +; KNL-NEXT: movb $-1, %al +; KNL-NEXT: LBB22_52: +; KNL-NEXT: vpinsrb $13, %ecx, %xmm2, %xmm2 +; KNL-NEXT: bextrl %edx, %edi, %edx +; KNL-NEXT: vpinsrb $11, %esi, %xmm3, %xmm3 +; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload +; KNL-NEXT: movzbl %cl, %ecx +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 +; KNL-NEXT: movq %r15, %rax +; KNL-NEXT: shrq $11, %rax +; KNL-NEXT: andb $1, %al +; KNL-NEXT: je LBB22_54 +; KNL-NEXT: ## BB#53: +; KNL-NEXT: movb $-1, %al +; KNL-NEXT: LBB22_54: +; KNL-NEXT: vpinsrb $14, %edx, %xmm2, %xmm2 +; KNL-NEXT: shrl $31, %edi +; KNL-NEXT: vpinsrb $12, %ecx, %xmm3, %xmm3 +; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload +; KNL-NEXT: movzbl %cl, %ecx +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 +; KNL-NEXT: movq %r15, %rax +; KNL-NEXT: shrq $12, %rax +; KNL-NEXT: andb $1, %al +; KNL-NEXT: je LBB22_56 +; KNL-NEXT: ## BB#55: +; KNL-NEXT: movb $-1, %al +; KNL-NEXT: LBB22_56: +; KNL-NEXT: vpinsrb $15, %edi, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $13, %ecx, %xmm3, %xmm3 +; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload +; KNL-NEXT: movzbl %cl, %ecx +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 +; KNL-NEXT: movq %r15, %rax +; KNL-NEXT: shrq $13, %rax +; KNL-NEXT: andb $1, %al +; KNL-NEXT: je LBB22_58 +; KNL-NEXT: ## BB#57: +; KNL-NEXT: movb $-1, %al +; KNL-NEXT: LBB22_58: +; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; KNL-NEXT: vpinsrb $14, %ecx, %xmm3, %xmm2 +; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload +; KNL-NEXT: movzbl %cl, %ecx +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: vpinsrb $13, %eax, %xmm4, %xmm3 +; KNL-NEXT: movq %r15, %rax +; KNL-NEXT: shrq $14, %rax +; KNL-NEXT: andb $1, %al +; KNL-NEXT: je LBB22_60 +; KNL-NEXT: ## BB#59: +; KNL-NEXT: movb $-1, %al +; KNL-NEXT: LBB22_60: +; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; KNL-NEXT: vpinsrb $15, %ecx, %xmm2, %xmm1 +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm2 +; KNL-NEXT: shrq $15, %r15 +; KNL-NEXT: andb $1, %r15b +; KNL-NEXT: je LBB22_62 +; KNL-NEXT: ## BB#61: +; KNL-NEXT: movb $-1, %r15b +; KNL-NEXT: LBB22_62: +; KNL-NEXT: movzbl %r15b, %eax +; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 +; KNL-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 +; KNL-NEXT: vpsllw $7, %ymm0, %ymm0 +; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 +; KNL-NEXT: vpxor %ymm2, %ymm2, %ymm2 +; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 +; KNL-NEXT: leaq -40(%rbp), %rsp +; KNL-NEXT: popq %rbx +; KNL-NEXT: popq %r12 +; KNL-NEXT: popq %r13 +; KNL-NEXT: popq %r14 +; KNL-NEXT: popq %r15 +; KNL-NEXT: popq %rbp +; KNL-NEXT: retq +; +; SKX-LABEL: test16: +; SKX: ## BB#0: +; SKX-NEXT: kmovq %rdi, %k0 +; SKX-NEXT: kxnorw %k0, %k0, %k1 +; SKX-NEXT: kshiftrw $15, %k1, %k1 +; SKX-NEXT: kshiftlq $5, %k1, %k1 +; SKX-NEXT: korq %k1, %k0, %k0 +; SKX-NEXT: vpmovm2b %k0, %zmm0 +; SKX-NEXT: retq %a = bitcast i64 %x to <64 x i1> %b = insertelement <64 x i1>%a, i1 true, i32 5 %c = sext <64 x i1>%b to <64 x i8> ret <64 x i8>%c } -; SKX-LABEL: test17 -; SKX: setg %al -; SKX: andl $1, %eax -; SKX: kmovw %eax, %k1 -; SKX: kshiftlq $5, %k1, %k1 -; SKX: korq %k1, %k0, %k0 -; SKX: vpmovm2b %k0, %zmm0 define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) { +; KNL-LABEL: test17: +; KNL: ## BB#0: +; KNL-NEXT: pushq %rbp +; KNL-NEXT: Ltmp8: +; KNL-NEXT: .cfi_def_cfa_offset 16 +; KNL-NEXT: Ltmp9: +; KNL-NEXT: .cfi_offset %rbp, -16 +; KNL-NEXT: movq %rsp, %rbp +; KNL-NEXT: Ltmp10: +; KNL-NEXT: .cfi_def_cfa_register %rbp +; KNL-NEXT: pushq %r15 +; KNL-NEXT: pushq %r14 +; KNL-NEXT: pushq %r13 +; KNL-NEXT: pushq %r12 +; KNL-NEXT: pushq %rbx +; KNL-NEXT: andq $-32, %rsp +; KNL-NEXT: subq $128, %rsp +; KNL-NEXT: Ltmp11: +; KNL-NEXT: .cfi_offset %rbx, -56 +; KNL-NEXT: Ltmp12: +; KNL-NEXT: .cfi_offset %r12, -48 +; KNL-NEXT: Ltmp13: +; KNL-NEXT: .cfi_offset %r13, -40 +; KNL-NEXT: Ltmp14: +; KNL-NEXT: .cfi_offset %r14, -32 +; KNL-NEXT: Ltmp15: +; KNL-NEXT: .cfi_offset %r15, -24 +; KNL-NEXT: movq %rdi, %rax +; KNL-NEXT: shrq $32, %rax +; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp) +; KNL-NEXT: movl %edi, %eax +; KNL-NEXT: andl $1, %eax +; KNL-NEXT: vmovd %eax, %xmm0 +; KNL-NEXT: movl $257, %eax ## imm = 0x101 +; KNL-NEXT: bextrl %eax, %edi, %eax +; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 +; KNL-NEXT: movl $258, %eax ## imm = 0x102 +; KNL-NEXT: bextrl %eax, %edi, %eax +; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 +; KNL-NEXT: movl $259, %eax ## imm = 0x103 +; KNL-NEXT: bextrl %eax, %edi, %eax +; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 +; KNL-NEXT: movl $260, %eax ## imm = 0x104 +; KNL-NEXT: bextrl %eax, %edi, %eax +; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 +; KNL-NEXT: movl $261, %eax ## imm = 0x105 +; KNL-NEXT: bextrl %eax, %edi, %eax +; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 +; KNL-NEXT: movl $262, %eax ## imm = 0x106 +; KNL-NEXT: bextrl %eax, %edi, %eax +; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 +; KNL-NEXT: movl $263, %eax ## imm = 0x107 +; KNL-NEXT: bextrl %eax, %edi, %eax +; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 +; KNL-NEXT: movl $264, %eax ## imm = 0x108 +; KNL-NEXT: bextrl %eax, %edi, %eax +; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 +; KNL-NEXT: movl $265, %eax ## imm = 0x109 +; KNL-NEXT: bextrl %eax, %edi, %eax +; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 +; KNL-NEXT: movl $266, %eax ## imm = 0x10A +; KNL-NEXT: bextrl %eax, %edi, %eax +; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 +; KNL-NEXT: movl $267, %eax ## imm = 0x10B +; KNL-NEXT: bextrl %eax, %edi, %eax +; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 +; KNL-NEXT: movl $268, %eax ## imm = 0x10C +; KNL-NEXT: bextrl %eax, %edi, %eax +; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 +; KNL-NEXT: movl $269, %eax ## imm = 0x10D +; KNL-NEXT: bextrl %eax, %edi, %eax +; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 +; KNL-NEXT: movl $270, %eax ## imm = 0x10E +; KNL-NEXT: bextrl %eax, %edi, %eax +; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 +; KNL-NEXT: movl $271, %eax ## imm = 0x10F +; KNL-NEXT: bextrl %eax, %edi, %eax +; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm1 +; KNL-NEXT: cmpl %edx, %esi +; KNL-NEXT: setg %al +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm0 +; KNL-NEXT: movl {{[0-9]+}}(%rsp), %r15d +; KNL-NEXT: movq %r15, %rdx +; KNL-NEXT: shrq $17, %rdx +; KNL-NEXT: andb $1, %dl +; KNL-NEXT: je LBB23_2 +; KNL-NEXT: ## BB#1: +; KNL-NEXT: movb $-1, %dl +; KNL-NEXT: LBB23_2: +; KNL-NEXT: movq %r15, %r11 +; KNL-NEXT: shrq $16, %r11 +; KNL-NEXT: andb $1, %r11b +; KNL-NEXT: je LBB23_4 +; KNL-NEXT: ## BB#3: +; KNL-NEXT: movb $-1, %r11b +; KNL-NEXT: LBB23_4: +; KNL-NEXT: movq %r15, %r10 +; KNL-NEXT: shrq $18, %r10 +; KNL-NEXT: andb $1, %r10b +; KNL-NEXT: je LBB23_6 +; KNL-NEXT: ## BB#5: +; KNL-NEXT: movb $-1, %r10b +; KNL-NEXT: LBB23_6: +; KNL-NEXT: movq %r15, %r9 +; KNL-NEXT: shrq $19, %r9 +; KNL-NEXT: andb $1, %r9b +; KNL-NEXT: je LBB23_8 +; KNL-NEXT: ## BB#7: +; KNL-NEXT: movb $-1, %r9b +; KNL-NEXT: LBB23_8: +; KNL-NEXT: movq %r15, %rbx +; KNL-NEXT: shrq $20, %rbx +; KNL-NEXT: andb $1, %bl +; KNL-NEXT: je LBB23_10 +; KNL-NEXT: ## BB#9: +; KNL-NEXT: movb $-1, %bl +; KNL-NEXT: LBB23_10: +; KNL-NEXT: movq %r15, %r12 +; KNL-NEXT: shrq $21, %r12 +; KNL-NEXT: andb $1, %r12b +; KNL-NEXT: je LBB23_12 +; KNL-NEXT: ## BB#11: +; KNL-NEXT: movb $-1, %r12b +; KNL-NEXT: LBB23_12: +; KNL-NEXT: movq %r15, %r14 +; KNL-NEXT: shrq $22, %r14 +; KNL-NEXT: andb $1, %r14b +; KNL-NEXT: je LBB23_14 +; KNL-NEXT: ## BB#13: +; KNL-NEXT: movb $-1, %r14b +; KNL-NEXT: LBB23_14: +; KNL-NEXT: movq %r15, %r8 +; KNL-NEXT: shrq $23, %r8 +; KNL-NEXT: andb $1, %r8b +; KNL-NEXT: je LBB23_16 +; KNL-NEXT: ## BB#15: +; KNL-NEXT: movb $-1, %r8b +; KNL-NEXT: LBB23_16: +; KNL-NEXT: movq %r15, %r13 +; KNL-NEXT: shrq $24, %r13 +; KNL-NEXT: andb $1, %r13b +; KNL-NEXT: je LBB23_18 +; KNL-NEXT: ## BB#17: +; KNL-NEXT: movb $-1, %r13b +; KNL-NEXT: LBB23_18: +; KNL-NEXT: movq %r15, %rax +; KNL-NEXT: shrq $25, %rax +; KNL-NEXT: andb $1, %al +; KNL-NEXT: je LBB23_20 +; KNL-NEXT: ## BB#19: +; KNL-NEXT: movb $-1, %al +; KNL-NEXT: LBB23_20: +; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill +; KNL-NEXT: movq %r15, %rax +; KNL-NEXT: shrq $26, %rax +; KNL-NEXT: andb $1, %al +; KNL-NEXT: je LBB23_22 +; KNL-NEXT: ## BB#21: +; KNL-NEXT: movb $-1, %al +; KNL-NEXT: LBB23_22: +; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill +; KNL-NEXT: movl $272, %esi ## imm = 0x110 +; KNL-NEXT: movq %r15, %rax +; KNL-NEXT: shrq $27, %rax +; KNL-NEXT: andb $1, %al +; KNL-NEXT: je LBB23_24 +; KNL-NEXT: ## BB#23: +; KNL-NEXT: movb $-1, %al +; KNL-NEXT: LBB23_24: +; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill +; KNL-NEXT: movl $273, %eax ## imm = 0x111 +; KNL-NEXT: bextrl %esi, %edi, %esi +; KNL-NEXT: movq %r15, %rcx +; KNL-NEXT: shrq $28, %rcx +; KNL-NEXT: andb $1, %cl +; KNL-NEXT: je LBB23_26 +; KNL-NEXT: ## BB#25: +; KNL-NEXT: movb $-1, %cl +; KNL-NEXT: LBB23_26: +; KNL-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill +; KNL-NEXT: bextrl %eax, %edi, %eax +; KNL-NEXT: vmovd %esi, %xmm2 +; KNL-NEXT: movl $274, %esi ## imm = 0x112 +; KNL-NEXT: movq %r15, %rcx +; KNL-NEXT: shrq $29, %rcx +; KNL-NEXT: andb $1, %cl +; KNL-NEXT: je LBB23_28 +; KNL-NEXT: ## BB#27: +; KNL-NEXT: movb $-1, %cl +; KNL-NEXT: LBB23_28: +; KNL-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill +; KNL-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 +; KNL-NEXT: bextrl %esi, %edi, %eax +; KNL-NEXT: movzbl %r11b, %esi +; KNL-NEXT: movq %r15, %rcx +; KNL-NEXT: shrq $30, %rcx +; KNL-NEXT: andb $1, %cl +; KNL-NEXT: je LBB23_30 +; KNL-NEXT: ## BB#29: +; KNL-NEXT: movb $-1, %cl +; KNL-NEXT: LBB23_30: +; KNL-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 +; KNL-NEXT: movl $275, %eax ## imm = 0x113 +; KNL-NEXT: bextrl %eax, %edi, %r11d +; KNL-NEXT: movzbl %dl, %edx +; KNL-NEXT: vmovd %esi, %xmm3 +; KNL-NEXT: movq %r15, %rax +; KNL-NEXT: shrq $31, %rax +; KNL-NEXT: andb $1, %al +; KNL-NEXT: je LBB23_32 +; KNL-NEXT: ## BB#31: +; KNL-NEXT: movb $-1, %al +; KNL-NEXT: LBB23_32: +; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill +; KNL-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill +; KNL-NEXT: vpinsrb $3, %r11d, %xmm2, %xmm2 +; KNL-NEXT: movl $276, %eax ## imm = 0x114 +; KNL-NEXT: bextrl %eax, %edi, %esi +; KNL-NEXT: movl $277, %r11d ## imm = 0x115 +; KNL-NEXT: vpinsrb $1, %edx, %xmm3, %xmm3 +; KNL-NEXT: movzbl %r10b, %r10d +; KNL-NEXT: movb %r15b, %al +; KNL-NEXT: shrb %al +; KNL-NEXT: andb $1, %al +; KNL-NEXT: je LBB23_34 +; KNL-NEXT: ## BB#33: +; KNL-NEXT: movb $-1, %al +; KNL-NEXT: LBB23_34: +; KNL-NEXT: vpinsrb $4, %esi, %xmm2, %xmm2 +; KNL-NEXT: bextrl %r11d, %edi, %edx +; KNL-NEXT: movl $278, %r11d ## imm = 0x116 +; KNL-NEXT: vpinsrb $2, %r10d, %xmm3, %xmm3 +; KNL-NEXT: movzbl %r9b, %esi +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: movq %r15, %rcx +; KNL-NEXT: shlq $63, %rcx +; KNL-NEXT: sarq $63, %rcx +; KNL-NEXT: vmovd %ecx, %xmm4 +; KNL-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 +; KNL-NEXT: movb %r15b, %al +; KNL-NEXT: shrb $2, %al +; KNL-NEXT: andb $1, %al +; KNL-NEXT: je LBB23_36 +; KNL-NEXT: ## BB#35: +; KNL-NEXT: movb $-1, %al +; KNL-NEXT: LBB23_36: +; KNL-NEXT: vpinsrb $5, %edx, %xmm2, %xmm2 +; KNL-NEXT: bextrl %r11d, %edi, %edx +; KNL-NEXT: movl $279, %r9d ## imm = 0x117 +; KNL-NEXT: vpinsrb $3, %esi, %xmm3, %xmm3 +; KNL-NEXT: movzbl %bl, %ebx +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 +; KNL-NEXT: movb %r15b, %al +; KNL-NEXT: shrb $3, %al +; KNL-NEXT: andb $1, %al +; KNL-NEXT: je LBB23_38 +; KNL-NEXT: ## BB#37: +; KNL-NEXT: movb $-1, %al +; KNL-NEXT: LBB23_38: +; KNL-NEXT: vpinsrb $6, %edx, %xmm2, %xmm2 +; KNL-NEXT: bextrl %r9d, %edi, %edx +; KNL-NEXT: movl $280, %esi ## imm = 0x118 +; KNL-NEXT: vpinsrb $4, %ebx, %xmm3, %xmm3 +; KNL-NEXT: movzbl %r12b, %ebx +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 +; KNL-NEXT: movb %r15b, %al +; KNL-NEXT: shrb $4, %al +; KNL-NEXT: andb $1, %al +; KNL-NEXT: je LBB23_40 +; KNL-NEXT: ## BB#39: +; KNL-NEXT: movb $-1, %al +; KNL-NEXT: LBB23_40: +; KNL-NEXT: vpinsrb $7, %edx, %xmm2, %xmm2 +; KNL-NEXT: bextrl %esi, %edi, %ecx +; KNL-NEXT: movl $281, %edx ## imm = 0x119 +; KNL-NEXT: vpinsrb $5, %ebx, %xmm3, %xmm3 +; KNL-NEXT: movzbl %r14b, %esi +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 +; KNL-NEXT: movb %r15b, %al +; KNL-NEXT: shrb $5, %al +; KNL-NEXT: andb $1, %al +; KNL-NEXT: je LBB23_42 +; KNL-NEXT: ## BB#41: +; KNL-NEXT: movb $-1, %al +; KNL-NEXT: LBB23_42: +; KNL-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2 +; KNL-NEXT: bextrl %edx, %edi, %ecx +; KNL-NEXT: movl $282, %edx ## imm = 0x11A +; KNL-NEXT: vpinsrb $6, %esi, %xmm3, %xmm3 +; KNL-NEXT: movzbl %r8b, %esi +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 +; KNL-NEXT: movb %r15b, %bl +; KNL-NEXT: shrb $6, %bl +; KNL-NEXT: andb $1, %bl +; KNL-NEXT: je LBB23_44 +; KNL-NEXT: ## BB#43: +; KNL-NEXT: movb $-1, %bl +; KNL-NEXT: LBB23_44: +; KNL-NEXT: vpinsrb $9, %ecx, %xmm2, %xmm2 +; KNL-NEXT: bextrl %edx, %edi, %eax +; KNL-NEXT: movl $283, %ecx ## imm = 0x11B +; KNL-NEXT: vpinsrb $7, %esi, %xmm3, %xmm3 +; KNL-NEXT: movzbl %r13b, %esi +; KNL-NEXT: movzbl %bl, %edx +; KNL-NEXT: vpinsrb $6, %edx, %xmm4, %xmm4 +; KNL-NEXT: movb %r15b, %bl +; KNL-NEXT: shrb $7, %bl +; KNL-NEXT: je LBB23_46 +; KNL-NEXT: ## BB#45: +; KNL-NEXT: movb $-1, %bl +; KNL-NEXT: LBB23_46: +; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 +; KNL-NEXT: bextrl %ecx, %edi, %ecx +; KNL-NEXT: movl $284, %edx ## imm = 0x11C +; KNL-NEXT: vpinsrb $8, %esi, %xmm3, %xmm3 +; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rax ## 8-byte Reload +; KNL-NEXT: movzbl %al, %esi +; KNL-NEXT: movzbl %bl, %eax +; KNL-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 +; KNL-NEXT: movq %r15, %rax +; KNL-NEXT: shrq $8, %rax +; KNL-NEXT: andb $1, %al +; KNL-NEXT: je LBB23_48 +; KNL-NEXT: ## BB#47: +; KNL-NEXT: movb $-1, %al +; KNL-NEXT: LBB23_48: +; KNL-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2 +; KNL-NEXT: bextrl %edx, %edi, %ecx +; KNL-NEXT: movl $285, %edx ## imm = 0x11D +; KNL-NEXT: vpinsrb $9, %esi, %xmm3, %xmm3 +; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rsi ## 8-byte Reload +; KNL-NEXT: movzbl %sil, %esi +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 +; KNL-NEXT: movq %r15, %rax +; KNL-NEXT: shrq $9, %rax +; KNL-NEXT: andb $1, %al +; KNL-NEXT: je LBB23_50 +; KNL-NEXT: ## BB#49: +; KNL-NEXT: movb $-1, %al +; KNL-NEXT: LBB23_50: +; KNL-NEXT: vpinsrb $12, %ecx, %xmm2, %xmm2 +; KNL-NEXT: bextrl %edx, %edi, %ecx +; KNL-NEXT: movl $286, %edx ## imm = 0x11E +; KNL-NEXT: vpinsrb $10, %esi, %xmm3, %xmm3 +; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rsi ## 8-byte Reload +; KNL-NEXT: movzbl %sil, %esi +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 +; KNL-NEXT: movq %r15, %rax +; KNL-NEXT: shrq $10, %rax +; KNL-NEXT: andb $1, %al +; KNL-NEXT: je LBB23_52 +; KNL-NEXT: ## BB#51: +; KNL-NEXT: movb $-1, %al +; KNL-NEXT: LBB23_52: +; KNL-NEXT: vpinsrb $13, %ecx, %xmm2, %xmm2 +; KNL-NEXT: bextrl %edx, %edi, %edx +; KNL-NEXT: vpinsrb $11, %esi, %xmm3, %xmm3 +; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload +; KNL-NEXT: movzbl %cl, %ecx +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 +; KNL-NEXT: movq %r15, %rax +; KNL-NEXT: shrq $11, %rax +; KNL-NEXT: andb $1, %al +; KNL-NEXT: je LBB23_54 +; KNL-NEXT: ## BB#53: +; KNL-NEXT: movb $-1, %al +; KNL-NEXT: LBB23_54: +; KNL-NEXT: vpinsrb $14, %edx, %xmm2, %xmm2 +; KNL-NEXT: shrl $31, %edi +; KNL-NEXT: vpinsrb $12, %ecx, %xmm3, %xmm3 +; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload +; KNL-NEXT: movzbl %cl, %ecx +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 +; KNL-NEXT: movq %r15, %rax +; KNL-NEXT: shrq $12, %rax +; KNL-NEXT: andb $1, %al +; KNL-NEXT: je LBB23_56 +; KNL-NEXT: ## BB#55: +; KNL-NEXT: movb $-1, %al +; KNL-NEXT: LBB23_56: +; KNL-NEXT: vpinsrb $15, %edi, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $13, %ecx, %xmm3, %xmm3 +; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload +; KNL-NEXT: movzbl %cl, %ecx +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 +; KNL-NEXT: movq %r15, %rax +; KNL-NEXT: shrq $13, %rax +; KNL-NEXT: andb $1, %al +; KNL-NEXT: je LBB23_58 +; KNL-NEXT: ## BB#57: +; KNL-NEXT: movb $-1, %al +; KNL-NEXT: LBB23_58: +; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; KNL-NEXT: vpinsrb $14, %ecx, %xmm3, %xmm2 +; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload +; KNL-NEXT: movzbl %cl, %ecx +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: vpinsrb $13, %eax, %xmm4, %xmm3 +; KNL-NEXT: movq %r15, %rax +; KNL-NEXT: shrq $14, %rax +; KNL-NEXT: andb $1, %al +; KNL-NEXT: je LBB23_60 +; KNL-NEXT: ## BB#59: +; KNL-NEXT: movb $-1, %al +; KNL-NEXT: LBB23_60: +; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; KNL-NEXT: vpinsrb $15, %ecx, %xmm2, %xmm1 +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm2 +; KNL-NEXT: shrq $15, %r15 +; KNL-NEXT: andb $1, %r15b +; KNL-NEXT: je LBB23_62 +; KNL-NEXT: ## BB#61: +; KNL-NEXT: movb $-1, %r15b +; KNL-NEXT: LBB23_62: +; KNL-NEXT: movzbl %r15b, %eax +; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 +; KNL-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 +; KNL-NEXT: vpsllw $7, %ymm0, %ymm0 +; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 +; KNL-NEXT: vpxor %ymm2, %ymm2, %ymm2 +; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 +; KNL-NEXT: leaq -40(%rbp), %rsp +; KNL-NEXT: popq %rbx +; KNL-NEXT: popq %r12 +; KNL-NEXT: popq %r13 +; KNL-NEXT: popq %r14 +; KNL-NEXT: popq %r15 +; KNL-NEXT: popq %rbp +; KNL-NEXT: retq +; +; SKX-LABEL: test17: +; SKX: ## BB#0: +; SKX-NEXT: kmovq %rdi, %k0 +; SKX-NEXT: cmpl %edx, %esi +; SKX-NEXT: setg %al +; SKX-NEXT: andl $1, %eax +; SKX-NEXT: kmovw %eax, %k1 +; SKX-NEXT: kshiftlq $5, %k1, %k1 +; SKX-NEXT: korq %k1, %k0, %k0 +; SKX-NEXT: vpmovm2b %k0, %zmm0 +; SKX-NEXT: retq %a = bitcast i64 %x to <64 x i1> %b = icmp sgt i32 %y, %z %c = insertelement <64 x i1>%a, i1 %b, i32 5 @@ -350,8 +1334,38 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) { ret <64 x i8>%d } -; KNL-LABEL: test18 define <8 x i1> @test18(i8 %a, i16 %y) { +; KNL-LABEL: test18: +; KNL: ## BB#0: +; KNL-NEXT: movzbl %dil, %eax +; KNL-NEXT: kmovw %eax, %k0 +; KNL-NEXT: kmovw %esi, %k1 +; KNL-NEXT: kshiftlw $7, %k1, %k2 +; KNL-NEXT: kshiftrw $15, %k2, %k2 +; KNL-NEXT: kshiftlw $6, %k1, %k1 +; KNL-NEXT: kshiftrw $15, %k1, %k1 +; KNL-NEXT: kshiftlw $6, %k1, %k1 +; KNL-NEXT: korw %k1, %k0, %k0 +; KNL-NEXT: kshiftlw $7, %k2, %k1 +; KNL-NEXT: korw %k1, %k0, %k1 +; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} +; KNL-NEXT: vpmovqw %zmm0, %xmm0 +; KNL-NEXT: retq +; +; SKX-LABEL: test18: +; SKX: ## BB#0: +; SKX-NEXT: kmovb %edi, %k0 +; SKX-NEXT: kmovw %esi, %k1 +; SKX-NEXT: kshiftlw $6, %k1, %k2 +; SKX-NEXT: kshiftrw $15, %k2, %k2 +; SKX-NEXT: kshiftlw $7, %k1, %k1 +; SKX-NEXT: kshiftrw $15, %k1, %k1 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftlb $6, %k2, %k2 +; SKX-NEXT: korb %k2, %k0, %k0 +; SKX-NEXT: korb %k1, %k0, %k0 +; SKX-NEXT: vpmovm2w %k0, %xmm0 +; SKX-NEXT: retq %b = bitcast i8 %a to <8 x i1> %b1 = bitcast i16 %y to <16 x i1> %el1 = extractelement <16 x i1>%b1, i32 8 @@ -360,31 +1374,76 @@ define <8 x i1> @test18(i8 %a, i16 %y) { %d = insertelement <8 x i1>%c, i1 %el2, i32 6 ret <8 x i1>%d } - -; KNL-LABEL: test21 -; KNL: vpand %ymm -; KNL: vextracti128 $1, %ymm2 -; KNL: vpand %ymm - -; SKX-LABEL: test21 -; SKX: vpmovb2m -; SKX: vmovdqu16 {{.*}}%k1 - define <32 x i16> @test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone { +; KNL-LABEL: test21: +; KNL: ## BB#0: +; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero +; KNL-NEXT: vpsllw $15, %ymm3, %ymm3 +; KNL-NEXT: vpsraw $15, %ymm3, %ymm3 +; KNL-NEXT: vpand %ymm0, %ymm3, %ymm0 +; KNL-NEXT: vextracti128 $1, %ymm2, %xmm2 +; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero +; KNL-NEXT: vpsllw $15, %ymm2, %ymm2 +; KNL-NEXT: vpsraw $15, %ymm2, %ymm2 +; KNL-NEXT: vpand %ymm1, %ymm2, %ymm1 +; KNL-NEXT: retq +; +; SKX-LABEL: test21: +; SKX: ## BB#0: +; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 +; SKX-NEXT: vpmovb2m %ymm1, %k1 +; SKX-NEXT: vpxord %zmm1, %zmm1, %zmm1 +; SKX-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1} +; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: retq %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer ret <32 x i16> %ret } -; SKX-LABEL: test22 -; SKX: kmovb define void @test22(<4 x i1> %a, <4 x i1>* %addr) { +; KNL-LABEL: test22: +; KNL: ## BB#0: +; KNL-NEXT: vpextrd $3, %xmm0, %eax +; KNL-NEXT: andl $1, %eax +; KNL-NEXT: movb %al, (%rdi) +; KNL-NEXT: vpextrd $2, %xmm0, %eax +; KNL-NEXT: andl $1, %eax +; KNL-NEXT: movb %al, (%rdi) +; KNL-NEXT: vpextrd $1, %xmm0, %eax +; KNL-NEXT: andl $1, %eax +; KNL-NEXT: movb %al, (%rdi) +; KNL-NEXT: vmovd %xmm0, %eax +; KNL-NEXT: andl $1, %eax +; KNL-NEXT: movb %al, (%rdi) +; KNL-NEXT: retq +; +; SKX-LABEL: test22: +; SKX: ## BB#0: +; SKX-NEXT: vpslld $31, %xmm0, %xmm0 +; SKX-NEXT: vpmovd2m %xmm0, %k0 +; SKX-NEXT: kmovb %k0, (%rdi) +; SKX-NEXT: retq store <4 x i1> %a, <4 x i1>* %addr ret void } -; SKX-LABEL: test23 -; SKX: kmovb define void @test23(<2 x i1> %a, <2 x i1>* %addr) { +; KNL-LABEL: test23: +; KNL: ## BB#0: +; KNL-NEXT: vpextrq $1, %xmm0, %rax +; KNL-NEXT: andl $1, %eax +; KNL-NEXT: movb %al, (%rdi) +; KNL-NEXT: vmovq %xmm0, %rax +; KNL-NEXT: andl $1, %eax +; KNL-NEXT: movb %al, (%rdi) +; KNL-NEXT: retq +; +; SKX-LABEL: test23: +; SKX: ## BB#0: +; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 +; SKX-NEXT: vpmovq2m %xmm0, %k0 +; SKX-NEXT: kmovb %k0, (%rdi) +; SKX-NEXT: retq store <2 x i1> %a, <2 x i1>* %addr ret void }