; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
-; CHECK-LABEL: addpd512
-; CHECK: vaddpd
-; CHECK: ret
define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) {
+; CHECK-LABEL: addpd512:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
entry:
%add.i = fadd <8 x double> %x, %y
ret <8 x double> %add.i
}
-; CHECK-LABEL: addpd512fold
-; CHECK: vaddpd LCP{{.*}}(%rip)
-; CHECK: ret
define <8 x double> @addpd512fold(<8 x double> %y) {
+; CHECK-LABEL: addpd512fold:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0
+; CHECK-NEXT: retq
entry:
%add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00>
ret <8 x double> %add.i
}
-; CHECK-LABEL: addps512
-; CHECK: vaddps
-; CHECK: ret
define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) {
+; CHECK-LABEL: addps512:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
entry:
%add.i = fadd <16 x float> %x, %y
ret <16 x float> %add.i
}
-; CHECK-LABEL: addps512fold
-; CHECK: vaddps LCP{{.*}}(%rip)
-; CHECK: ret
define <16 x float> @addps512fold(<16 x float> %y) {
+; CHECK-LABEL: addps512fold:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0
+; CHECK-NEXT: retq
entry:
%add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
ret <16 x float> %add.i
}
-; CHECK-LABEL: subpd512
-; CHECK: vsubpd
-; CHECK: ret
define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) {
+; CHECK-LABEL: subpd512:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vsubpd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
entry:
%sub.i = fsub <8 x double> %x, %y
ret <8 x double> %sub.i
}
-; CHECK-LABEL: @subpd512fold
-; CHECK: vsubpd (%
-; CHECK: ret
define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) {
+; CHECK-LABEL: subpd512fold:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vsubpd (%rdi), %zmm0, %zmm0
+; CHECK-NEXT: retq
entry:
%tmp2 = load <8 x double>* %x, align 8
%sub.i = fsub <8 x double> %y, %tmp2
ret <8 x double> %sub.i
}
-; CHECK-LABEL: @subps512
-; CHECK: vsubps
-; CHECK: ret
define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) {
+; CHECK-LABEL: subps512:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vsubps %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
entry:
%sub.i = fsub <16 x float> %x, %y
ret <16 x float> %sub.i
}
-; CHECK-LABEL: subps512fold
-; CHECK: vsubps (%
-; CHECK: ret
define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) {
+; CHECK-LABEL: subps512fold:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vsubps (%rdi), %zmm0, %zmm0
+; CHECK-NEXT: retq
entry:
%tmp2 = load <16 x float>* %x, align 4
%sub.i = fsub <16 x float> %y, %tmp2
ret <16 x float> %sub.i
}
-; CHECK-LABEL: mulpd512
-; CHECK: vmulpd
-; CHECK: ret
+define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
+; CHECK-LABEL: imulq512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmuludq %zmm0, %zmm1, %zmm2
+; CHECK-NEXT: vpsrlq $32, %zmm0, %zmm3
+; CHECK-NEXT: vpmuludq %zmm3, %zmm1, %zmm3
+; CHECK-NEXT: vpsllq $32, %zmm3, %zmm3
+; CHECK-NEXT: vpaddq %zmm3, %zmm2, %zmm2
+; CHECK-NEXT: vpsrlq $32, %zmm1, %zmm1
+; CHECK-NEXT: vpmuludq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: vpsllq $32, %zmm0, %zmm0
+; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: retq
+ %z = mul <8 x i64>%x, %y
+ ret <8 x i64>%z
+}
+
define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) {
+; CHECK-LABEL: mulpd512:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vmulpd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
entry:
%mul.i = fmul <8 x double> %x, %y
ret <8 x double> %mul.i
}
-; CHECK-LABEL: mulpd512fold
-; CHECK: vmulpd LCP{{.*}}(%rip)
-; CHECK: ret
define <8 x double> @mulpd512fold(<8 x double> %y) {
+; CHECK-LABEL: mulpd512fold:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0
+; CHECK-NEXT: retq
entry:
%mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
ret <8 x double> %mul.i
}
-; CHECK-LABEL: mulps512
-; CHECK: vmulps
-; CHECK: ret
define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) {
+; CHECK-LABEL: mulps512:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vmulps %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
entry:
%mul.i = fmul <16 x float> %x, %y
ret <16 x float> %mul.i
}
-; CHECK-LABEL: mulps512fold
-; CHECK: vmulps LCP{{.*}}(%rip)
-; CHECK: ret
define <16 x float> @mulps512fold(<16 x float> %y) {
+; CHECK-LABEL: mulps512fold:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0
+; CHECK-NEXT: retq
entry:
%mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
ret <16 x float> %mul.i
}
-; CHECK-LABEL: divpd512
-; CHECK: vdivpd
-; CHECK: ret
define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) {
+; CHECK-LABEL: divpd512:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vdivpd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
entry:
%div.i = fdiv <8 x double> %x, %y
ret <8 x double> %div.i
}
-; CHECK-LABEL: divpd512fold
-; CHECK: vdivpd LCP{{.*}}(%rip)
-; CHECK: ret
define <8 x double> @divpd512fold(<8 x double> %y) {
+; CHECK-LABEL: divpd512fold:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0
+; CHECK-NEXT: retq
entry:
%div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
ret <8 x double> %div.i
}
-; CHECK-LABEL: divps512
-; CHECK: vdivps
-; CHECK: ret
define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) {
+; CHECK-LABEL: divps512:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vdivps %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
entry:
%div.i = fdiv <16 x float> %x, %y
ret <16 x float> %div.i
}
-; CHECK-LABEL: divps512fold
-; CHECK: vdivps LCP{{.*}}(%rip)
-; CHECK: ret
define <16 x float> @divps512fold(<16 x float> %y) {
+; CHECK-LABEL: divps512fold:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0
+; CHECK-NEXT: retq
entry:
%div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000>
ret <16 x float> %div.i
}
-; CHECK-LABEL: vpaddq_test
-; CHECK: vpaddq %zmm
-; CHECK: ret
define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
+; CHECK-LABEL: vpaddq_test:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%x = add <8 x i64> %i, %j
ret <8 x i64> %x
}
-; CHECK-LABEL: vpaddd_test
-; CHECK: vpaddd %zmm
-; CHECK: ret
+define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind {
+; CHECK-LABEL: vpaddq_fold_test:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %tmp = load <8 x i64>* %j, align 4
+ %x = add <8 x i64> %i, %tmp
+ ret <8 x i64> %x
+}
+
+define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind {
+; CHECK-LABEL: vpaddq_broadcast_test:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %x = add <8 x i64> %i, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+ ret <8 x i64> %x
+}
+
+define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind {
+; CHECK-LABEL: vpaddq_broadcast2_test:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %tmp = load i64* %j
+ %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0
+ %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1
+ %j.2 = insertelement <8 x i64> %j.1, i64 %tmp, i32 2
+ %j.3 = insertelement <8 x i64> %j.2, i64 %tmp, i32 3
+ %j.4 = insertelement <8 x i64> %j.3, i64 %tmp, i32 4
+ %j.5 = insertelement <8 x i64> %j.4, i64 %tmp, i32 5
+ %j.6 = insertelement <8 x i64> %j.5, i64 %tmp, i32 6
+ %j.7 = insertelement <8 x i64> %j.6, i64 %tmp, i32 7
+ %x = add <8 x i64> %i, %j.7
+ ret <8 x i64> %x
+}
+
define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
+; CHECK-LABEL: vpaddd_test:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%x = add <16 x i32> %i, %j
ret <16 x i32> %x
}
-; CHECK-LABEL: vpsubq_test
-; CHECK: vpsubq %zmm
-; CHECK: ret
+define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind {
+; CHECK-LABEL: vpaddd_fold_test:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %tmp = load <16 x i32>* %j, align 4
+ %x = add <16 x i32> %i, %tmp
+ ret <16 x i32> %x
+}
+
+define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind {
+; CHECK-LABEL: vpaddd_broadcast_test:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ ret <16 x i32> %x
+}
+
+define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
+; CHECK-LABEL: vpaddd_mask_test:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3
+; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
+; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %mask = icmp ne <16 x i32> %mask1, zeroinitializer
+ %x = add <16 x i32> %i, %j
+ %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
+ ret <16 x i32> %r
+}
+
+define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
+; CHECK-LABEL: vpaddd_maskz_test:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3
+; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
+; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %mask = icmp ne <16 x i32> %mask1, zeroinitializer
+ %x = add <16 x i32> %i, %j
+ %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+ ret <16 x i32> %r
+}
+
+define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
+; CHECK-LABEL: vpaddd_mask_fold_test:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
+; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
+; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %mask = icmp ne <16 x i32> %mask1, zeroinitializer
+ %j = load <16 x i32>* %j.ptr
+ %x = add <16 x i32> %i, %j
+ %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
+ ret <16 x i32> %r
+}
+
+define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
+; CHECK-LABEL: vpaddd_mask_broadcast_test:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
+; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
+; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %mask = icmp ne <16 x i32> %mask1, zeroinitializer
+ %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
+ ret <16 x i32> %r
+}
+
+define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
+; CHECK-LABEL: vpaddd_maskz_fold_test:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
+; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
+; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %mask = icmp ne <16 x i32> %mask1, zeroinitializer
+ %j = load <16 x i32>* %j.ptr
+ %x = add <16 x i32> %i, %j
+ %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+ ret <16 x i32> %r
+}
+
+define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
+; CHECK-LABEL: vpaddd_maskz_broadcast_test:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
+; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
+; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %mask = icmp ne <16 x i32> %mask1, zeroinitializer
+ %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+ ret <16 x i32> %r
+}
+
define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
+; CHECK-LABEL: vpsubq_test:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%x = sub <8 x i64> %i, %j
ret <8 x i64> %x
}
-; CHECK-LABEL: vpsubd_test
-; CHECK: vpsubd
-; CHECK: ret
define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
+; CHECK-LABEL: vpsubd_test:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%x = sub <16 x i32> %i, %j
ret <16 x i32> %x
}
-; CHECK-LABEL: vpmulld_test
-; CHECK: vpmulld %zmm
-; CHECK: ret
define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) {
+; CHECK-LABEL: vpmulld_test:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%x = mul <16 x i32> %i, %j
ret <16 x i32> %x
}
-; CHECK-LABEL: addq_broadcast
-; CHECK: vpaddq LCP{{.*}}(%rip){1to8}, %zmm0, %zmm0
-; CHECK: ret
+declare float @sqrtf(float) readnone
+define float @sqrtA(float %a) nounwind uwtable readnone ssp {
+; CHECK-LABEL: sqrtA:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: retq
+entry:
+ %conv1 = tail call float @sqrtf(float %a) nounwind readnone
+ ret float %conv1
+}
+
+declare double @sqrt(double) readnone
+define double @sqrtB(double %a) nounwind uwtable readnone ssp {
+; CHECK-LABEL: sqrtB:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: retq
+entry:
+ %call = tail call double @sqrt(double %a) nounwind readnone
+ ret double %call
+}
+
+declare float @llvm.sqrt.f32(float)
+define float @sqrtC(float %a) nounwind {
+; CHECK-LABEL: sqrtC:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %b = call float @llvm.sqrt.f32(float %a)
+ ret float %b
+}
+
+declare <16 x float> @llvm.sqrt.v16f32(<16 x float>)
+define <16 x float> @sqrtD(<16 x float> %a) nounwind {
+; CHECK-LABEL: sqrtD:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vsqrtps %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a)
+ ret <16 x float> %b
+}
+
+declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)
+define <8 x double> @sqrtE(<8 x double> %a) nounwind {
+; CHECK-LABEL: sqrtE:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vsqrtpd %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a)
+ ret <8 x double> %b
+}
+
+define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind {
+; CHECK-LABEL: fadd_broadcast:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
+ ret <16 x float> %b
+}
+
define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind {
+; CHECK-LABEL: addq_broadcast:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; CHECK-NEXT: retq
%b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
ret <8 x i64> %b
}
-; CHECK-LABEL: orq_broadcast
-; CHECK: vporq LCP{{.*}}(%rip){1to8}, %zmm0, %zmm0
-; CHECK: ret
define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
+; CHECK-LABEL: orq_broadcast:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; CHECK-NEXT: retq
%b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
ret <8 x i64> %b
}
-; CHECK-LABEL: andd512fold
-; CHECK: vpandd (%
-; CHECK: ret
define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
+; CHECK-LABEL: andd512fold:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpandd (%rdi), %zmm0, %zmm0
+; CHECK-NEXT: retq
entry:
%a = load <16 x i32>* %x, align 4
%b = and <16 x i32> %y, %a
ret <16 x i32> %b
}
-; CHECK-LABEL: andqbrst
-; CHECK: vpandq (%rdi){1to8}, %zmm
-; CHECK: ret
define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
+; CHECK-LABEL: andqbrst:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0
+; CHECK-NEXT: retq
entry:
%a = load i64* %ap, align 8
%b = insertelement <8 x i64> undef, i64 %a, i32 0
%c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
%d = and <8 x i64> %p1, %c
ret <8 x i64>%d
-}
\ No newline at end of file
+}