1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck --check-prefix=CHECK --check-prefix=AVX512F %s
3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl | FileCheck --check-prefix=CHECK --check-prefix=AVX512VL %s
4 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512bw | FileCheck --check-prefix=CHECK --check-prefix=AVX512BW %s
5 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512dq | FileCheck --check-prefix=CHECK --check-prefix=AVX512DQ %s
6 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512dq -mattr=+avx512bw -mattr=+avx512vl| FileCheck --check-prefix=CHECK --check-prefix=SKX %s
8 define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) {
9 ; CHECK-LABEL: addpd512:
10 ; CHECK: ## BB#0: ## %entry
11 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
14 %add.i = fadd <8 x double> %x, %y
15 ret <8 x double> %add.i
18 define <8 x double> @addpd512fold(<8 x double> %y) {
19 ; CHECK-LABEL: addpd512fold:
20 ; CHECK: ## BB#0: ## %entry
21 ; CHECK-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0
24 %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00>
25 ret <8 x double> %add.i
28 define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) {
29 ; CHECK-LABEL: addps512:
30 ; CHECK: ## BB#0: ## %entry
31 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
34 %add.i = fadd <16 x float> %x, %y
35 ret <16 x float> %add.i
38 define <16 x float> @addps512fold(<16 x float> %y) {
39 ; CHECK-LABEL: addps512fold:
40 ; CHECK: ## BB#0: ## %entry
41 ; CHECK-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0
44 %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
45 ret <16 x float> %add.i
48 define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) {
49 ; CHECK-LABEL: subpd512:
50 ; CHECK: ## BB#0: ## %entry
51 ; CHECK-NEXT: vsubpd %zmm0, %zmm1, %zmm0
54 %sub.i = fsub <8 x double> %x, %y
55 ret <8 x double> %sub.i
58 define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) {
59 ; CHECK-LABEL: subpd512fold:
60 ; CHECK: ## BB#0: ## %entry
61 ; CHECK-NEXT: vsubpd (%rdi), %zmm0, %zmm0
64 %tmp2 = load <8 x double>, <8 x double>* %x, align 8
65 %sub.i = fsub <8 x double> %y, %tmp2
66 ret <8 x double> %sub.i
69 define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) {
70 ; CHECK-LABEL: subps512:
71 ; CHECK: ## BB#0: ## %entry
72 ; CHECK-NEXT: vsubps %zmm0, %zmm1, %zmm0
75 %sub.i = fsub <16 x float> %x, %y
76 ret <16 x float> %sub.i
79 define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) {
80 ; CHECK-LABEL: subps512fold:
81 ; CHECK: ## BB#0: ## %entry
82 ; CHECK-NEXT: vsubps (%rdi), %zmm0, %zmm0
85 %tmp2 = load <16 x float>, <16 x float>* %x, align 4
86 %sub.i = fsub <16 x float> %y, %tmp2
87 ret <16 x float> %sub.i
90 define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
91 ; AVX512F-LABEL: imulq512:
93 ; AVX512F-NEXT: vpmuludq %zmm0, %zmm1, %zmm2
94 ; AVX512F-NEXT: vpsrlq $32, %zmm0, %zmm3
95 ; AVX512F-NEXT: vpmuludq %zmm3, %zmm1, %zmm3
96 ; AVX512F-NEXT: vpsllq $32, %zmm3, %zmm3
97 ; AVX512F-NEXT: vpaddq %zmm3, %zmm2, %zmm2
98 ; AVX512F-NEXT: vpsrlq $32, %zmm1, %zmm1
99 ; AVX512F-NEXT: vpmuludq %zmm0, %zmm1, %zmm0
100 ; AVX512F-NEXT: vpsllq $32, %zmm0, %zmm0
101 ; AVX512F-NEXT: vpaddq %zmm0, %zmm2, %zmm0
104 ; AVX512VL-LABEL: imulq512:
106 ; AVX512VL-NEXT: vpmuludq %zmm0, %zmm1, %zmm2
107 ; AVX512VL-NEXT: vpsrlq $32, %zmm0, %zmm3
108 ; AVX512VL-NEXT: vpmuludq %zmm3, %zmm1, %zmm3
109 ; AVX512VL-NEXT: vpsllq $32, %zmm3, %zmm3
110 ; AVX512VL-NEXT: vpaddq %zmm3, %zmm2, %zmm2
111 ; AVX512VL-NEXT: vpsrlq $32, %zmm1, %zmm1
112 ; AVX512VL-NEXT: vpmuludq %zmm0, %zmm1, %zmm0
113 ; AVX512VL-NEXT: vpsllq $32, %zmm0, %zmm0
114 ; AVX512VL-NEXT: vpaddq %zmm0, %zmm2, %zmm0
115 ; AVX512VL-NEXT: retq
117 ; AVX512BW-LABEL: imulq512:
119 ; AVX512BW-NEXT: vpmuludq %zmm0, %zmm1, %zmm2
120 ; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm3
121 ; AVX512BW-NEXT: vpmuludq %zmm3, %zmm1, %zmm3
122 ; AVX512BW-NEXT: vpsllq $32, %zmm3, %zmm3
123 ; AVX512BW-NEXT: vpaddq %zmm3, %zmm2, %zmm2
124 ; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm1
125 ; AVX512BW-NEXT: vpmuludq %zmm0, %zmm1, %zmm0
126 ; AVX512BW-NEXT: vpsllq $32, %zmm0, %zmm0
127 ; AVX512BW-NEXT: vpaddq %zmm0, %zmm2, %zmm0
128 ; AVX512BW-NEXT: retq
130 ; AVX512DQ-LABEL: imulq512:
132 ; AVX512DQ-NEXT: vpmullq %zmm0, %zmm1, %zmm0
133 ; AVX512DQ-NEXT: retq
135 ; SKX-LABEL: imulq512:
137 ; SKX-NEXT: vpmullq %zmm0, %zmm1, %zmm0
139 %z = mul <8 x i64>%x, %y
143 define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) {
144 ; CHECK-LABEL: mulpd512:
145 ; CHECK: ## BB#0: ## %entry
146 ; CHECK-NEXT: vmulpd %zmm0, %zmm1, %zmm0
149 %mul.i = fmul <8 x double> %x, %y
150 ret <8 x double> %mul.i
153 define <8 x double> @mulpd512fold(<8 x double> %y) {
154 ; CHECK-LABEL: mulpd512fold:
155 ; CHECK: ## BB#0: ## %entry
156 ; CHECK-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0
159 %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
160 ret <8 x double> %mul.i
163 define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) {
164 ; CHECK-LABEL: mulps512:
165 ; CHECK: ## BB#0: ## %entry
166 ; CHECK-NEXT: vmulps %zmm0, %zmm1, %zmm0
169 %mul.i = fmul <16 x float> %x, %y
170 ret <16 x float> %mul.i
173 define <16 x float> @mulps512fold(<16 x float> %y) {
174 ; CHECK-LABEL: mulps512fold:
175 ; CHECK: ## BB#0: ## %entry
176 ; CHECK-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0
179 %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
180 ret <16 x float> %mul.i
183 define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) {
184 ; CHECK-LABEL: divpd512:
185 ; CHECK: ## BB#0: ## %entry
186 ; CHECK-NEXT: vdivpd %zmm0, %zmm1, %zmm0
189 %div.i = fdiv <8 x double> %x, %y
190 ret <8 x double> %div.i
193 define <8 x double> @divpd512fold(<8 x double> %y) {
194 ; CHECK-LABEL: divpd512fold:
195 ; CHECK: ## BB#0: ## %entry
196 ; CHECK-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0
199 %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
200 ret <8 x double> %div.i
203 define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) {
204 ; CHECK-LABEL: divps512:
205 ; CHECK: ## BB#0: ## %entry
206 ; CHECK-NEXT: vdivps %zmm0, %zmm1, %zmm0
209 %div.i = fdiv <16 x float> %x, %y
210 ret <16 x float> %div.i
213 define <16 x float> @divps512fold(<16 x float> %y) {
214 ; CHECK-LABEL: divps512fold:
215 ; CHECK: ## BB#0: ## %entry
216 ; CHECK-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0
219 %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000>
220 ret <16 x float> %div.i
223 define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
224 ; CHECK-LABEL: vpaddq_test:
226 ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
228 %x = add <8 x i64> %i, %j
232 define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind {
233 ; CHECK-LABEL: vpaddq_fold_test:
235 ; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0
237 %tmp = load <8 x i64>, <8 x i64>* %j, align 4
238 %x = add <8 x i64> %i, %tmp
242 define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind {
243 ; CHECK-LABEL: vpaddq_broadcast_test:
245 ; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
247 %x = add <8 x i64> %i, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
251 define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind {
252 ; CHECK-LABEL: vpaddq_broadcast2_test:
254 ; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0
256 %tmp = load i64, i64* %j
257 %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0
258 %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1
259 %j.2 = insertelement <8 x i64> %j.1, i64 %tmp, i32 2
260 %j.3 = insertelement <8 x i64> %j.2, i64 %tmp, i32 3
261 %j.4 = insertelement <8 x i64> %j.3, i64 %tmp, i32 4
262 %j.5 = insertelement <8 x i64> %j.4, i64 %tmp, i32 5
263 %j.6 = insertelement <8 x i64> %j.5, i64 %tmp, i32 6
264 %j.7 = insertelement <8 x i64> %j.6, i64 %tmp, i32 7
265 %x = add <8 x i64> %i, %j.7
269 define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
270 ; CHECK-LABEL: vpaddd_test:
272 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
274 %x = add <16 x i32> %i, %j
278 define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind {
279 ; CHECK-LABEL: vpaddd_fold_test:
281 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0
283 %tmp = load <16 x i32>, <16 x i32>* %j, align 4
284 %x = add <16 x i32> %i, %tmp
288 define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind {
289 ; CHECK-LABEL: vpaddd_broadcast_test:
291 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
293 %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
297 define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
298 ; CHECK-LABEL: vpaddd_mask_test:
300 ; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3
301 ; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
302 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1}
304 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
305 %x = add <16 x i32> %i, %j
306 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
310 define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
311 ; CHECK-LABEL: vpaddd_maskz_test:
313 ; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3
314 ; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
315 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z}
317 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
318 %x = add <16 x i32> %i, %j
319 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
323 define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
324 ; CHECK-LABEL: vpaddd_mask_fold_test:
326 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
327 ; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
328 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1}
330 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
331 %j = load <16 x i32>, <16 x i32>* %j.ptr
332 %x = add <16 x i32> %i, %j
333 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
337 define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
338 ; CHECK-LABEL: vpaddd_mask_broadcast_test:
340 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
341 ; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
342 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1}
344 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
345 %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
346 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
350 define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
351 ; CHECK-LABEL: vpaddd_maskz_fold_test:
353 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
354 ; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
355 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z}
357 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
358 %j = load <16 x i32>, <16 x i32>* %j.ptr
359 %x = add <16 x i32> %i, %j
360 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
364 define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
365 ; CHECK-LABEL: vpaddd_maskz_broadcast_test:
367 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
368 ; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
369 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z}
371 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
372 %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
373 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
377 define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
378 ; CHECK-LABEL: vpsubq_test:
380 ; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0
382 %x = sub <8 x i64> %i, %j
386 define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
387 ; CHECK-LABEL: vpsubd_test:
389 ; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0
391 %x = sub <16 x i32> %i, %j
395 define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) {
396 ; CHECK-LABEL: vpmulld_test:
398 ; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0
400 %x = mul <16 x i32> %i, %j
404 declare float @sqrtf(float) readnone
405 define float @sqrtA(float %a) nounwind uwtable readnone ssp {
406 ; CHECK-LABEL: sqrtA:
407 ; CHECK: ## BB#0: ## %entry
408 ; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
411 %conv1 = tail call float @sqrtf(float %a) nounwind readnone
415 declare double @sqrt(double) readnone
416 define double @sqrtB(double %a) nounwind uwtable readnone ssp {
417 ; CHECK-LABEL: sqrtB:
418 ; CHECK: ## BB#0: ## %entry
419 ; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
422 %call = tail call double @sqrt(double %a) nounwind readnone
426 declare float @llvm.sqrt.f32(float)
427 define float @sqrtC(float %a) nounwind {
428 ; CHECK-LABEL: sqrtC:
430 ; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
432 %b = call float @llvm.sqrt.f32(float %a)
436 declare <16 x float> @llvm.sqrt.v16f32(<16 x float>)
437 define <16 x float> @sqrtD(<16 x float> %a) nounwind {
438 ; CHECK-LABEL: sqrtD:
440 ; CHECK-NEXT: vsqrtps %zmm0, %zmm0
442 %b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a)
446 declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)
447 define <8 x double> @sqrtE(<8 x double> %a) nounwind {
448 ; CHECK-LABEL: sqrtE:
450 ; CHECK-NEXT: vsqrtpd %zmm0, %zmm0
452 %b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a)
456 define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind {
457 ; CHECK-LABEL: fadd_broadcast:
459 ; CHECK-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0
461 %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
465 define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind {
466 ; CHECK-LABEL: addq_broadcast:
468 ; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
470 %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
474 define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
475 ; CHECK-LABEL: orq_broadcast:
477 ; CHECK-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
479 %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
483 define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
484 ; CHECK-LABEL: andd512fold:
485 ; CHECK: ## BB#0: ## %entry
486 ; CHECK-NEXT: vpandd (%rdi), %zmm0, %zmm0
489 %a = load <16 x i32>, <16 x i32>* %x, align 4
490 %b = and <16 x i32> %y, %a
494 define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
495 ; CHECK-LABEL: andqbrst:
496 ; CHECK: ## BB#0: ## %entry
497 ; CHECK-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0
500 %a = load i64, i64* %ap, align 8
501 %b = insertelement <8 x i64> undef, i64 %a, i32 0
502 %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
503 %d = and <8 x i64> %p1, %c
507 define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i,
508 ; CHECK-LABEL: test_mask_vaddps:
510 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
511 ; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
512 ; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1}
514 <16 x float> %j, <16 x i32> %mask1)
516 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
517 %x = fadd <16 x float> %i, %j
518 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
522 define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i,
523 ; CHECK-LABEL: test_mask_vmulps:
525 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
526 ; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
527 ; CHECK-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1}
529 <16 x float> %j, <16 x i32> %mask1)
531 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
532 %x = fmul <16 x float> %i, %j
533 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
537 define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i,
538 ; CHECK-LABEL: test_mask_vminps:
540 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
541 ; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
542 ; CHECK-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1}
544 <16 x float> %j, <16 x i32> %mask1)
546 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
547 %cmp_res = fcmp olt <16 x float> %i, %j
548 %min = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
549 %r = select <16 x i1> %mask, <16 x float> %min, <16 x float> %dst
553 define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i,
554 ; AVX512F-LABEL: test_mask_vminpd:
556 ; AVX512F-NEXT: vpxor %ymm4, %ymm4, %ymm4
557 ; AVX512F-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
558 ; AVX512F-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
561 ; AVX512VL-LABEL: test_mask_vminpd:
563 ; AVX512VL-NEXT: vpxor %ymm4, %ymm4, %ymm4
564 ; AVX512VL-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
565 ; AVX512VL-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
566 ; AVX512VL-NEXT: retq
568 ; AVX512BW-LABEL: test_mask_vminpd:
570 ; AVX512BW-NEXT: vpxor %ymm4, %ymm4, %ymm4
571 ; AVX512BW-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
572 ; AVX512BW-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
573 ; AVX512BW-NEXT: retq
575 ; AVX512DQ-LABEL: test_mask_vminpd:
577 ; AVX512DQ-NEXT: vpxor %ymm4, %ymm4, %ymm4
578 ; AVX512DQ-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
579 ; AVX512DQ-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
580 ; AVX512DQ-NEXT: retq
582 ; SKX-LABEL: test_mask_vminpd:
584 ; SKX-NEXT: vpxor %ymm4, %ymm4, %ymm4
585 ; SKX-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
586 ; SKX-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
588 <8 x double> %j, <8 x i32> %mask1)
590 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
591 %cmp_res = fcmp olt <8 x double> %i, %j
592 %min = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
593 %r = select <8 x i1> %mask, <8 x double> %min, <8 x double> %dst
597 define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i,
598 ; CHECK-LABEL: test_mask_vmaxps:
600 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
601 ; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
602 ; CHECK-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1}
604 <16 x float> %j, <16 x i32> %mask1)
606 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
607 %cmp_res = fcmp ogt <16 x float> %i, %j
608 %max = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
609 %r = select <16 x i1> %mask, <16 x float> %max, <16 x float> %dst
613 define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i,
614 ; AVX512F-LABEL: test_mask_vmaxpd:
616 ; AVX512F-NEXT: vpxor %ymm4, %ymm4, %ymm4
617 ; AVX512F-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
618 ; AVX512F-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
621 ; AVX512VL-LABEL: test_mask_vmaxpd:
623 ; AVX512VL-NEXT: vpxor %ymm4, %ymm4, %ymm4
624 ; AVX512VL-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
625 ; AVX512VL-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
626 ; AVX512VL-NEXT: retq
628 ; AVX512BW-LABEL: test_mask_vmaxpd:
630 ; AVX512BW-NEXT: vpxor %ymm4, %ymm4, %ymm4
631 ; AVX512BW-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
632 ; AVX512BW-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
633 ; AVX512BW-NEXT: retq
635 ; AVX512DQ-LABEL: test_mask_vmaxpd:
637 ; AVX512DQ-NEXT: vpxor %ymm4, %ymm4, %ymm4
638 ; AVX512DQ-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
639 ; AVX512DQ-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
640 ; AVX512DQ-NEXT: retq
642 ; SKX-LABEL: test_mask_vmaxpd:
644 ; SKX-NEXT: vpxor %ymm4, %ymm4, %ymm4
645 ; SKX-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
646 ; SKX-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
648 <8 x double> %j, <8 x i32> %mask1)
650 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
651 %cmp_res = fcmp ogt <8 x double> %i, %j
652 %max = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
653 %r = select <8 x i1> %mask, <8 x double> %max, <8 x double> %dst
657 define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i,
658 ; CHECK-LABEL: test_mask_vsubps:
660 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
661 ; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
662 ; CHECK-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1}
664 <16 x float> %j, <16 x i32> %mask1)
666 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
667 %x = fsub <16 x float> %i, %j
668 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
672 define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i,
673 ; CHECK-LABEL: test_mask_vdivps:
675 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
676 ; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
677 ; CHECK-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1}
679 <16 x float> %j, <16 x i32> %mask1)
681 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
682 %x = fdiv <16 x float> %i, %j
683 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
687 define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i,
688 ; CHECK-LABEL: test_mask_vaddpd:
690 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
691 ; CHECK-NEXT: vpcmpneqq %zmm4, %zmm3, %k1
692 ; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1}
694 <8 x double> %j, <8 x i64> %mask1)
696 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
697 %x = fadd <8 x double> %i, %j
698 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
702 define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j,
703 ; CHECK-LABEL: test_maskz_vaddpd:
705 ; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3
706 ; CHECK-NEXT: vpcmpneqq %zmm3, %zmm2, %k1
707 ; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z}
709 <8 x i64> %mask1) nounwind readnone {
710 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
711 %x = fadd <8 x double> %i, %j
712 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
716 define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i,
717 ; CHECK-LABEL: test_mask_fold_vaddpd:
719 ; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3
720 ; CHECK-NEXT: vpcmpneqq %zmm3, %zmm2, %k1
721 ; CHECK-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1}
723 <8 x double>* %j, <8 x i64> %mask1)
725 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
726 %tmp = load <8 x double>, <8 x double>* %j, align 8
727 %x = fadd <8 x double> %i, %tmp
728 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
732 define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j,
733 ; CHECK-LABEL: test_maskz_fold_vaddpd:
735 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
736 ; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1
737 ; CHECK-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z}
739 <8 x i64> %mask1) nounwind {
740 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
741 %tmp = load <8 x double>, <8 x double>* %j, align 8
742 %x = fadd <8 x double> %i, %tmp
743 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
747 define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind {
748 ; CHECK-LABEL: test_broadcast_vaddpd:
750 ; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0
752 %tmp = load double, double* %j
753 %b = insertelement <8 x double> undef, double %tmp, i32 0
754 %c = shufflevector <8 x double> %b, <8 x double> undef,
755 <8 x i32> zeroinitializer
756 %x = fadd <8 x double> %c, %i
760 define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i,
761 ; CHECK-LABEL: test_mask_broadcast_vaddpd:
763 ; CHECK-NEXT: vpxord %zmm0, %zmm0, %zmm0
764 ; CHECK-NEXT: vpcmpneqq %zmm0, %zmm2, %k1
765 ; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1}
766 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
768 double* %j, <8 x i64> %mask1) nounwind {
769 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
770 %tmp = load double, double* %j
771 %b = insertelement <8 x double> undef, double %tmp, i32 0
772 %c = shufflevector <8 x double> %b, <8 x double> undef,
773 <8 x i32> zeroinitializer
774 %x = fadd <8 x double> %c, %i
775 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %i
779 define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
780 ; CHECK-LABEL: test_maskz_broadcast_vaddpd:
782 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
783 ; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1
784 ; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
786 <8 x i64> %mask1) nounwind {
787 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
788 %tmp = load double, double* %j
789 %b = insertelement <8 x double> undef, double %tmp, i32 0
790 %c = shufflevector <8 x double> %b, <8 x double> undef,
791 <8 x i32> zeroinitializer
792 %x = fadd <8 x double> %c, %i
793 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
797 define <16 x float> @test_fxor(<16 x float> %a) {
798 ; AVX512F-LABEL: test_fxor:
800 ; AVX512F-NEXT: vpxord {{.*}}(%rip), %zmm0, %zmm0
803 ; AVX512VL-LABEL: test_fxor:
805 ; AVX512VL-NEXT: vpxord {{.*}}(%rip), %zmm0, %zmm0
806 ; AVX512VL-NEXT: retq
808 ; AVX512BW-LABEL: test_fxor:
810 ; AVX512BW-NEXT: vpxord {{.*}}(%rip), %zmm0, %zmm0
811 ; AVX512BW-NEXT: retq
813 ; AVX512DQ-LABEL: test_fxor:
815 ; AVX512DQ-NEXT: vxorps {{.*}}(%rip), %zmm0, %zmm0
816 ; AVX512DQ-NEXT: retq
818 ; SKX-LABEL: test_fxor:
820 ; SKX-NEXT: vxorps {{.*}}(%rip), %zmm0, %zmm0
823 %res = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
827 define <8 x float> @test_fxor_8f32(<8 x float> %a) {
828 ; CHECK-LABEL: test_fxor_8f32:
830 ; CHECK-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0
832 %res = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
836 define <8 x double> @fabs_v8f64(<8 x double> %p)
837 ; AVX512F-LABEL: fabs_v8f64:
839 ; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
842 ; AVX512VL-LABEL: fabs_v8f64:
844 ; AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
845 ; AVX512VL-NEXT: retq
847 ; AVX512BW-LABEL: fabs_v8f64:
849 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
850 ; AVX512BW-NEXT: retq
852 ; AVX512DQ-LABEL: fabs_v8f64:
854 ; AVX512DQ-NEXT: vandpd {{.*}}(%rip), %zmm0, %zmm0
855 ; AVX512DQ-NEXT: retq
857 ; SKX-LABEL: fabs_v8f64:
859 ; SKX-NEXT: vandpd {{.*}}(%rip), %zmm0, %zmm0
862 %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
865 declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
867 define <16 x float> @fabs_v16f32(<16 x float> %p)
868 ; AVX512F-LABEL: fabs_v16f32:
870 ; AVX512F-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0
873 ; AVX512VL-LABEL: fabs_v16f32:
875 ; AVX512VL-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0
876 ; AVX512VL-NEXT: retq
878 ; AVX512BW-LABEL: fabs_v16f32:
880 ; AVX512BW-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0
881 ; AVX512BW-NEXT: retq
883 ; AVX512DQ-LABEL: fabs_v16f32:
885 ; AVX512DQ-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0
886 ; AVX512DQ-NEXT: retq
888 ; SKX-LABEL: fabs_v16f32:
890 ; SKX-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0
893 %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
896 declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p)