X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=test%2FCodeGen%2FX86%2Favx512-arith.ll;h=c43da9c03a6ef7bad7bd1aeab036bf3ed9262ae6;hp=e6cffba2732f88a9385f91856744c301ce11f8d2;hb=4a524934577d85e5095df8ea62ad6a3261076d0c;hpb=5c10a029addc6ad31012de706f46cb242847cfe7 diff --git a/test/CodeGen/X86/avx512-arith.ll b/test/CodeGen/X86/avx512-arith.ll index e6cffba2732..c43da9c03a6 100644 --- a/test/CodeGen/X86/avx512-arith.ll +++ b/test/CodeGen/X86/avx512-arith.ll @@ -1,189 +1,217 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s -; CHECK-LABEL: addpd512 -; CHECK: vaddpd -; CHECK: ret define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) { +; CHECK-LABEL: addpd512: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq entry: %add.i = fadd <8 x double> %x, %y ret <8 x double> %add.i } -; CHECK-LABEL: addpd512fold -; CHECK: vaddpd LCP{{.*}}(%rip) -; CHECK: ret define <8 x double> @addpd512fold(<8 x double> %y) { +; CHECK-LABEL: addpd512fold: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0 +; CHECK-NEXT: retq entry: %add.i = fadd <8 x double> %y, ret <8 x double> %add.i } -; CHECK-LABEL: addps512 -; CHECK: vaddps -; CHECK: ret define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) { +; CHECK-LABEL: addps512: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq entry: %add.i = fadd <16 x float> %x, %y ret <16 x float> %add.i } -; CHECK-LABEL: addps512fold -; CHECK: vaddps LCP{{.*}}(%rip) -; CHECK: ret define <16 x float> @addps512fold(<16 x float> %y) { +; CHECK-LABEL: addps512fold: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0 +; CHECK-NEXT: retq entry: %add.i = fadd <16 x float> %y, ret <16 x float> %add.i } -; CHECK-LABEL: subpd512 -; CHECK: vsubpd -; CHECK: ret define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) { +; CHECK-LABEL: subpd512: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vsubpd %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq entry: %sub.i = fsub <8 x double> %x, %y ret <8 x double> %sub.i } -; CHECK-LABEL: @subpd512fold -; CHECK: vsubpd (% -; CHECK: ret define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) { +; CHECK-LABEL: subpd512fold: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vsubpd (%rdi), %zmm0, %zmm0 +; CHECK-NEXT: retq entry: %tmp2 = load <8 x double>* %x, align 8 %sub.i = fsub <8 x double> %y, %tmp2 ret <8 x double> %sub.i } -; CHECK-LABEL: @subps512 -; CHECK: vsubps -; CHECK: ret define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) { +; CHECK-LABEL: subps512: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vsubps %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq entry: %sub.i = fsub <16 x float> %x, %y ret <16 x float> %sub.i } -; CHECK-LABEL: subps512fold -; CHECK: vsubps (% -; CHECK: ret define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) { +; CHECK-LABEL: subps512fold: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vsubps (%rdi), %zmm0, %zmm0 +; CHECK-NEXT: retq entry: %tmp2 = load <16 x float>* %x, align 4 %sub.i = fsub <16 x float> %y, %tmp2 ret <16 x float> %sub.i } -; CHECK-LABEL: imulq512 -; CHECK: vpmuludq -; CHECK: vpmuludq -; CHECK: ret define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) { +; CHECK-LABEL: imulq512: +; CHECK: ## BB#0: +; CHECK-NEXT: vpmuludq %zmm0, %zmm1, %zmm2 +; CHECK-NEXT: vpsrlq $32, %zmm0, %zmm3 +; CHECK-NEXT: vpmuludq %zmm3, %zmm1, %zmm3 +; CHECK-NEXT: vpsllq $32, %zmm3, %zmm3 +; CHECK-NEXT: vpaddq %zmm3, %zmm2, %zmm2 +; CHECK-NEXT: vpsrlq $32, %zmm1, %zmm1 +; CHECK-NEXT: vpmuludq %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: vpsllq $32, %zmm0, %zmm0 +; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 +; CHECK-NEXT: retq %z = mul <8 x i64>%x, %y ret <8 x i64>%z } -; CHECK-LABEL: mulpd512 -; CHECK: vmulpd -; CHECK: ret define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) { +; CHECK-LABEL: mulpd512: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmulpd %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq entry: %mul.i = fmul <8 x double> %x, %y ret <8 x double> %mul.i } -; CHECK-LABEL: mulpd512fold -; CHECK: vmulpd LCP{{.*}}(%rip) -; CHECK: ret define <8 x double> @mulpd512fold(<8 x double> %y) { +; CHECK-LABEL: mulpd512fold: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0 +; CHECK-NEXT: retq entry: %mul.i = fmul <8 x double> %y, ret <8 x double> %mul.i } -; CHECK-LABEL: mulps512 -; CHECK: vmulps -; CHECK: ret define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) { +; CHECK-LABEL: mulps512: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmulps %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq entry: %mul.i = fmul <16 x float> %x, %y ret <16 x float> %mul.i } -; CHECK-LABEL: mulps512fold -; CHECK: vmulps LCP{{.*}}(%rip) -; CHECK: ret define <16 x float> @mulps512fold(<16 x float> %y) { +; CHECK-LABEL: mulps512fold: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 +; CHECK-NEXT: retq entry: %mul.i = fmul <16 x float> %y, ret <16 x float> %mul.i } -; CHECK-LABEL: divpd512 -; CHECK: vdivpd -; CHECK: ret define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) { +; CHECK-LABEL: divpd512: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vdivpd %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq entry: %div.i = fdiv <8 x double> %x, %y ret <8 x double> %div.i } -; CHECK-LABEL: divpd512fold -; CHECK: vdivpd LCP{{.*}}(%rip) -; CHECK: ret define <8 x double> @divpd512fold(<8 x double> %y) { +; CHECK-LABEL: divpd512fold: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0 +; CHECK-NEXT: retq entry: %div.i = fdiv <8 x double> %y, ret <8 x double> %div.i } -; CHECK-LABEL: divps512 -; CHECK: vdivps -; CHECK: ret define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) { +; CHECK-LABEL: divps512: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vdivps %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq entry: %div.i = fdiv <16 x float> %x, %y ret <16 x float> %div.i } -; CHECK-LABEL: divps512fold -; CHECK: vdivps LCP{{.*}}(%rip) -; CHECK: ret define <16 x float> @divps512fold(<16 x float> %y) { +; CHECK-LABEL: divps512fold: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0 +; CHECK-NEXT: retq entry: %div.i = fdiv <16 x float> %y, ret <16 x float> %div.i } -; CHECK-LABEL: vpaddq_test -; CHECK: vpaddq %zmm -; CHECK: ret define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone { +; CHECK-LABEL: vpaddq_test: +; CHECK: ## BB#0: +; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: retq %x = add <8 x i64> %i, %j ret <8 x i64> %x } -; CHECK-LABEL: vpaddq_fold_test -; CHECK: vpaddq (% -; CHECK: ret define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind { +; CHECK-LABEL: vpaddq_fold_test: +; CHECK: ## BB#0: +; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0 +; CHECK-NEXT: retq %tmp = load <8 x i64>* %j, align 4 %x = add <8 x i64> %i, %tmp ret <8 x i64> %x } -; CHECK-LABEL: vpaddq_broadcast_test -; CHECK: vpaddq LCP{{.*}}(%rip){1to8} -; CHECK: ret define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind { +; CHECK-LABEL: vpaddq_broadcast_test: +; CHECK: ## BB#0: +; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; CHECK-NEXT: retq %x = add <8 x i64> %i, ret <8 x i64> %x } -; CHECK-LABEL: vpaddq_broadcast2_test -; CHECK: vpaddq (%rdi){1to8} -; CHECK: ret define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind { +; CHECK-LABEL: vpaddq_broadcast2_test: +; CHECK: ## BB#0: +; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 +; CHECK-NEXT: retq %tmp = load i64* %j %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0 %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1 @@ -197,55 +225,67 @@ define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind { ret <8 x i64> %x } -; CHECK-LABEL: vpaddd_test -; CHECK: vpaddd %zmm -; CHECK: ret define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { +; CHECK-LABEL: vpaddd_test: +; CHECK: ## BB#0: +; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: retq %x = add <16 x i32> %i, %j ret <16 x i32> %x } -; CHECK-LABEL: vpaddd_fold_test -; CHECK: vpaddd (% -; CHECK: ret define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind { +; CHECK-LABEL: vpaddd_fold_test: +; CHECK: ## BB#0: +; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 +; CHECK-NEXT: retq %tmp = load <16 x i32>* %j, align 4 %x = add <16 x i32> %i, %tmp ret <16 x i32> %x } -; CHECK-LABEL: vpaddd_broadcast_test -; CHECK: vpaddd LCP{{.*}}(%rip){1to16} -; CHECK: ret define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind { +; CHECK-LABEL: vpaddd_broadcast_test: +; CHECK: ## BB#0: +; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; CHECK-NEXT: retq %x = add <16 x i32> %i, ret <16 x i32> %x } -; CHECK-LABEL: vpaddd_mask_test -; CHECK: vpaddd {{%zmm[0-9], %zmm[0-9], %zmm[0-9] {%k[1-7]}}} -; CHECK: ret define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone { +; CHECK-LABEL: vpaddd_mask_test: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3 +; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 +; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} +; CHECK-NEXT: retq %mask = icmp ne <16 x i32> %mask1, zeroinitializer %x = add <16 x i32> %i, %j %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i ret <16 x i32> %r } -; CHECK-LABEL: vpaddd_maskz_test -; CHECK: vpaddd {{%zmm[0-9], %zmm[0-9], %zmm[0-9] {%k[1-7]} {z}}} -; CHECK: ret define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone { +; CHECK-LABEL: vpaddd_maskz_test: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3 +; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 +; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} +; CHECK-NEXT: retq %mask = icmp ne <16 x i32> %mask1, zeroinitializer %x = add <16 x i32> %i, %j %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer ret <16 x i32> %r } -; CHECK-LABEL: vpaddd_mask_fold_test -; CHECK: vpaddd (%rdi), {{%zmm[0-9], %zmm[0-9] {%k[1-7]}}} -; CHECK: ret define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone { +; CHECK-LABEL: vpaddd_mask_fold_test: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 +; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 +; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} +; CHECK-NEXT: retq %mask = icmp ne <16 x i32> %mask1, zeroinitializer %j = load <16 x i32>* %j.ptr %x = add <16 x i32> %i, %j @@ -253,20 +293,26 @@ define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 ret <16 x i32> %r } -; CHECK-LABEL: vpaddd_mask_broadcast_test -; CHECK: vpaddd LCP{{.*}}(%rip){1to16}, {{%zmm[0-9], %zmm[0-9] {%k[1-7]}}} -; CHECK: ret define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { +; CHECK-LABEL: vpaddd_mask_broadcast_test: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 +; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 +; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} +; CHECK-NEXT: retq %mask = icmp ne <16 x i32> %mask1, zeroinitializer %x = add <16 x i32> %i, %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i ret <16 x i32> %r } -; CHECK-LABEL: vpaddd_maskz_fold_test -; CHECK: vpaddd (%rdi), {{%zmm[0-9], %zmm[0-9] {%k[1-7]}}} {z} -; CHECK: ret define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone { +; CHECK-LABEL: vpaddd_maskz_fold_test: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 +; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 +; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} +; CHECK-NEXT: retq %mask = icmp ne <16 x i32> %mask1, zeroinitializer %j = load <16 x i32>* %j.ptr %x = add <16 x i32> %i, %j @@ -274,125 +320,141 @@ define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 ret <16 x i32> %r } -; CHECK-LABEL: vpaddd_maskz_broadcast_test -; CHECK: vpaddd LCP{{.*}}(%rip){1to16}, {{%zmm[0-9], %zmm[0-9] {%k[1-7]}}} {z} -; CHECK: ret define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { +; CHECK-LABEL: vpaddd_maskz_broadcast_test: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 +; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 +; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} +; CHECK-NEXT: retq %mask = icmp ne <16 x i32> %mask1, zeroinitializer %x = add <16 x i32> %i, %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer ret <16 x i32> %r } -; CHECK-LABEL: vpsubq_test -; CHECK: vpsubq %zmm -; CHECK: ret define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone { +; CHECK-LABEL: vpsubq_test: +; CHECK: ## BB#0: +; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: retq %x = sub <8 x i64> %i, %j ret <8 x i64> %x } -; CHECK-LABEL: vpsubd_test -; CHECK: vpsubd -; CHECK: ret define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { +; CHECK-LABEL: vpsubd_test: +; CHECK: ## BB#0: +; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: retq %x = sub <16 x i32> %i, %j ret <16 x i32> %x } -; CHECK-LABEL: vpmulld_test -; CHECK: vpmulld %zmm -; CHECK: ret define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) { +; CHECK-LABEL: vpmulld_test: +; CHECK: ## BB#0: +; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: retq %x = mul <16 x i32> %i, %j ret <16 x i32> %x } -; CHECK-LABEL: sqrtA -; CHECK: vsqrtss {{.*}} -; CHECK: ret declare float @sqrtf(float) readnone define float @sqrtA(float %a) nounwind uwtable readnone ssp { +; CHECK-LABEL: sqrtA: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: retq entry: %conv1 = tail call float @sqrtf(float %a) nounwind readnone ret float %conv1 } -; CHECK-LABEL: sqrtB -; CHECK: vsqrtsd {{.*}} -; CHECK: ret declare double @sqrt(double) readnone define double @sqrtB(double %a) nounwind uwtable readnone ssp { +; CHECK-LABEL: sqrtB: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: retq entry: %call = tail call double @sqrt(double %a) nounwind readnone ret double %call } -; CHECK-LABEL: sqrtC -; CHECK: vsqrtss {{.*}} -; CHECK: ret declare float @llvm.sqrt.f32(float) define float @sqrtC(float %a) nounwind { +; CHECK-LABEL: sqrtC: +; CHECK: ## BB#0: +; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: retq %b = call float @llvm.sqrt.f32(float %a) ret float %b } -; CHECK-LABEL: sqrtD -; CHECK: vsqrtps {{.*}} -; CHECK: ret declare <16 x float> @llvm.sqrt.v16f32(<16 x float>) define <16 x float> @sqrtD(<16 x float> %a) nounwind { +; CHECK-LABEL: sqrtD: +; CHECK: ## BB#0: +; CHECK-NEXT: vsqrtps %zmm0, %zmm0 +; CHECK-NEXT: retq %b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a) ret <16 x float> %b } -; CHECK-LABEL: sqrtE -; CHECK: vsqrtpd {{.*}} -; CHECK: ret declare <8 x double> @llvm.sqrt.v8f64(<8 x double>) define <8 x double> @sqrtE(<8 x double> %a) nounwind { +; CHECK-LABEL: sqrtE: +; CHECK: ## BB#0: +; CHECK-NEXT: vsqrtpd %zmm0, %zmm0 +; CHECK-NEXT: retq %b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a) ret <8 x double> %b } -; CHECK-LABEL: fadd_broadcast -; CHECK: LCP{{.*}}(%rip){1to16}, %zmm0, %zmm0 -; CHECK: ret define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind { +; CHECK-LABEL: fadd_broadcast: +; CHECK: ## BB#0: +; CHECK-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; CHECK-NEXT: retq %b = fadd <16 x float> %a, ret <16 x float> %b } -; CHECK-LABEL: addq_broadcast -; CHECK: vpaddq LCP{{.*}}(%rip){1to8}, %zmm0, %zmm0 -; CHECK: ret define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind { +; CHECK-LABEL: addq_broadcast: +; CHECK: ## BB#0: +; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; CHECK-NEXT: retq %b = add <8 x i64> %a, ret <8 x i64> %b } -; CHECK-LABEL: orq_broadcast -; CHECK: vporq LCP{{.*}}(%rip){1to8}, %zmm0, %zmm0 -; CHECK: ret define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind { +; CHECK-LABEL: orq_broadcast: +; CHECK: ## BB#0: +; CHECK-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; CHECK-NEXT: retq %b = or <8 x i64> %a, ret <8 x i64> %b } -; CHECK-LABEL: andd512fold -; CHECK: vpandd (% -; CHECK: ret define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) { +; CHECK-LABEL: andd512fold: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpandd (%rdi), %zmm0, %zmm0 +; CHECK-NEXT: retq entry: %a = load <16 x i32>* %x, align 4 %b = and <16 x i32> %y, %a ret <16 x i32> %b } -; CHECK-LABEL: andqbrst -; CHECK: vpandq (%rdi){1to8}, %zmm -; CHECK: ret define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) { +; CHECK-LABEL: andqbrst: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0 +; CHECK-NEXT: retq entry: %a = load i64* %ap, align 8 %b = insertelement <8 x i64> undef, i64 %a, i32 0