1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
3 define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) {
4 ; CHECK-LABEL: addpd512:
5 ; CHECK: ## BB#0: ## %entry
6 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
9 %add.i = fadd <8 x double> %x, %y
10 ret <8 x double> %add.i
13 define <8 x double> @addpd512fold(<8 x double> %y) {
14 ; CHECK-LABEL: addpd512fold:
15 ; CHECK: ## BB#0: ## %entry
16 ; CHECK-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0
19 %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00>
20 ret <8 x double> %add.i
23 define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) {
24 ; CHECK-LABEL: addps512:
25 ; CHECK: ## BB#0: ## %entry
26 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
29 %add.i = fadd <16 x float> %x, %y
30 ret <16 x float> %add.i
33 define <16 x float> @addps512fold(<16 x float> %y) {
34 ; CHECK-LABEL: addps512fold:
35 ; CHECK: ## BB#0: ## %entry
36 ; CHECK-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0
39 %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
40 ret <16 x float> %add.i
43 define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) {
44 ; CHECK-LABEL: subpd512:
45 ; CHECK: ## BB#0: ## %entry
46 ; CHECK-NEXT: vsubpd %zmm0, %zmm1, %zmm0
49 %sub.i = fsub <8 x double> %x, %y
50 ret <8 x double> %sub.i
53 define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) {
54 ; CHECK-LABEL: subpd512fold:
55 ; CHECK: ## BB#0: ## %entry
56 ; CHECK-NEXT: vsubpd (%rdi), %zmm0, %zmm0
59 %tmp2 = load <8 x double>, <8 x double>* %x, align 8
60 %sub.i = fsub <8 x double> %y, %tmp2
61 ret <8 x double> %sub.i
64 define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) {
65 ; CHECK-LABEL: subps512:
66 ; CHECK: ## BB#0: ## %entry
67 ; CHECK-NEXT: vsubps %zmm0, %zmm1, %zmm0
70 %sub.i = fsub <16 x float> %x, %y
71 ret <16 x float> %sub.i
74 define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) {
75 ; CHECK-LABEL: subps512fold:
76 ; CHECK: ## BB#0: ## %entry
77 ; CHECK-NEXT: vsubps (%rdi), %zmm0, %zmm0
80 %tmp2 = load <16 x float>, <16 x float>* %x, align 4
81 %sub.i = fsub <16 x float> %y, %tmp2
82 ret <16 x float> %sub.i
85 define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
86 ; CHECK-LABEL: imulq512:
88 ; CHECK-NEXT: vpmuludq %zmm0, %zmm1, %zmm2
89 ; CHECK-NEXT: vpsrlq $32, %zmm0, %zmm3
90 ; CHECK-NEXT: vpmuludq %zmm3, %zmm1, %zmm3
91 ; CHECK-NEXT: vpsllq $32, %zmm3, %zmm3
92 ; CHECK-NEXT: vpaddq %zmm3, %zmm2, %zmm2
93 ; CHECK-NEXT: vpsrlq $32, %zmm1, %zmm1
94 ; CHECK-NEXT: vpmuludq %zmm0, %zmm1, %zmm0
95 ; CHECK-NEXT: vpsllq $32, %zmm0, %zmm0
96 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
98 %z = mul <8 x i64>%x, %y
102 define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) {
103 ; CHECK-LABEL: mulpd512:
104 ; CHECK: ## BB#0: ## %entry
105 ; CHECK-NEXT: vmulpd %zmm0, %zmm1, %zmm0
108 %mul.i = fmul <8 x double> %x, %y
109 ret <8 x double> %mul.i
112 define <8 x double> @mulpd512fold(<8 x double> %y) {
113 ; CHECK-LABEL: mulpd512fold:
114 ; CHECK: ## BB#0: ## %entry
115 ; CHECK-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0
118 %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
119 ret <8 x double> %mul.i
122 define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) {
123 ; CHECK-LABEL: mulps512:
124 ; CHECK: ## BB#0: ## %entry
125 ; CHECK-NEXT: vmulps %zmm0, %zmm1, %zmm0
128 %mul.i = fmul <16 x float> %x, %y
129 ret <16 x float> %mul.i
132 define <16 x float> @mulps512fold(<16 x float> %y) {
133 ; CHECK-LABEL: mulps512fold:
134 ; CHECK: ## BB#0: ## %entry
135 ; CHECK-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0
138 %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
139 ret <16 x float> %mul.i
142 define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) {
143 ; CHECK-LABEL: divpd512:
144 ; CHECK: ## BB#0: ## %entry
145 ; CHECK-NEXT: vdivpd %zmm0, %zmm1, %zmm0
148 %div.i = fdiv <8 x double> %x, %y
149 ret <8 x double> %div.i
152 define <8 x double> @divpd512fold(<8 x double> %y) {
153 ; CHECK-LABEL: divpd512fold:
154 ; CHECK: ## BB#0: ## %entry
155 ; CHECK-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0
158 %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
159 ret <8 x double> %div.i
162 define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) {
163 ; CHECK-LABEL: divps512:
164 ; CHECK: ## BB#0: ## %entry
165 ; CHECK-NEXT: vdivps %zmm0, %zmm1, %zmm0
168 %div.i = fdiv <16 x float> %x, %y
169 ret <16 x float> %div.i
172 define <16 x float> @divps512fold(<16 x float> %y) {
173 ; CHECK-LABEL: divps512fold:
174 ; CHECK: ## BB#0: ## %entry
175 ; CHECK-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0
178 %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000>
179 ret <16 x float> %div.i
182 define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
183 ; CHECK-LABEL: vpaddq_test:
185 ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
187 %x = add <8 x i64> %i, %j
191 define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind {
192 ; CHECK-LABEL: vpaddq_fold_test:
194 ; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0
196 %tmp = load <8 x i64>, <8 x i64>* %j, align 4
197 %x = add <8 x i64> %i, %tmp
201 define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind {
202 ; CHECK-LABEL: vpaddq_broadcast_test:
204 ; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
206 %x = add <8 x i64> %i, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
210 define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind {
211 ; CHECK-LABEL: vpaddq_broadcast2_test:
213 ; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0
215 %tmp = load i64, i64* %j
216 %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0
217 %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1
218 %j.2 = insertelement <8 x i64> %j.1, i64 %tmp, i32 2
219 %j.3 = insertelement <8 x i64> %j.2, i64 %tmp, i32 3
220 %j.4 = insertelement <8 x i64> %j.3, i64 %tmp, i32 4
221 %j.5 = insertelement <8 x i64> %j.4, i64 %tmp, i32 5
222 %j.6 = insertelement <8 x i64> %j.5, i64 %tmp, i32 6
223 %j.7 = insertelement <8 x i64> %j.6, i64 %tmp, i32 7
224 %x = add <8 x i64> %i, %j.7
228 define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
229 ; CHECK-LABEL: vpaddd_test:
231 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
233 %x = add <16 x i32> %i, %j
237 define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind {
238 ; CHECK-LABEL: vpaddd_fold_test:
240 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0
242 %tmp = load <16 x i32>, <16 x i32>* %j, align 4
243 %x = add <16 x i32> %i, %tmp
247 define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind {
248 ; CHECK-LABEL: vpaddd_broadcast_test:
250 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
252 %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
256 define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
257 ; CHECK-LABEL: vpaddd_mask_test:
259 ; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3
260 ; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
261 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1}
263 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
264 %x = add <16 x i32> %i, %j
265 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
269 define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
270 ; CHECK-LABEL: vpaddd_maskz_test:
272 ; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3
273 ; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
274 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z}
276 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
277 %x = add <16 x i32> %i, %j
278 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
282 define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
283 ; CHECK-LABEL: vpaddd_mask_fold_test:
285 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
286 ; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
287 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1}
289 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
290 %j = load <16 x i32>, <16 x i32>* %j.ptr
291 %x = add <16 x i32> %i, %j
292 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
296 define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
297 ; CHECK-LABEL: vpaddd_mask_broadcast_test:
299 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
300 ; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
301 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1}
303 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
304 %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
305 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
309 define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
310 ; CHECK-LABEL: vpaddd_maskz_fold_test:
312 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
313 ; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
314 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z}
316 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
317 %j = load <16 x i32>, <16 x i32>* %j.ptr
318 %x = add <16 x i32> %i, %j
319 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
323 define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
324 ; CHECK-LABEL: vpaddd_maskz_broadcast_test:
326 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
327 ; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
328 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z}
330 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
331 %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
332 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
336 define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
337 ; CHECK-LABEL: vpsubq_test:
339 ; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0
341 %x = sub <8 x i64> %i, %j
345 define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
346 ; CHECK-LABEL: vpsubd_test:
348 ; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0
350 %x = sub <16 x i32> %i, %j
354 define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) {
355 ; CHECK-LABEL: vpmulld_test:
357 ; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0
359 %x = mul <16 x i32> %i, %j
363 declare float @sqrtf(float) readnone
364 define float @sqrtA(float %a) nounwind uwtable readnone ssp {
365 ; CHECK-LABEL: sqrtA:
366 ; CHECK: ## BB#0: ## %entry
367 ; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
370 %conv1 = tail call float @sqrtf(float %a) nounwind readnone
374 declare double @sqrt(double) readnone
375 define double @sqrtB(double %a) nounwind uwtable readnone ssp {
376 ; CHECK-LABEL: sqrtB:
377 ; CHECK: ## BB#0: ## %entry
378 ; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
381 %call = tail call double @sqrt(double %a) nounwind readnone
385 declare float @llvm.sqrt.f32(float)
386 define float @sqrtC(float %a) nounwind {
387 ; CHECK-LABEL: sqrtC:
389 ; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
391 %b = call float @llvm.sqrt.f32(float %a)
395 declare <16 x float> @llvm.sqrt.v16f32(<16 x float>)
396 define <16 x float> @sqrtD(<16 x float> %a) nounwind {
397 ; CHECK-LABEL: sqrtD:
399 ; CHECK-NEXT: vsqrtps %zmm0, %zmm0
401 %b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a)
405 declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)
406 define <8 x double> @sqrtE(<8 x double> %a) nounwind {
407 ; CHECK-LABEL: sqrtE:
409 ; CHECK-NEXT: vsqrtpd %zmm0, %zmm0
411 %b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a)
415 define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind {
416 ; CHECK-LABEL: fadd_broadcast:
418 ; CHECK-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0
420 %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
424 define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind {
425 ; CHECK-LABEL: addq_broadcast:
427 ; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
429 %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
433 define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
434 ; CHECK-LABEL: orq_broadcast:
436 ; CHECK-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
438 %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
442 define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
443 ; CHECK-LABEL: andd512fold:
444 ; CHECK: ## BB#0: ## %entry
445 ; CHECK-NEXT: vpandd (%rdi), %zmm0, %zmm0
448 %a = load <16 x i32>, <16 x i32>* %x, align 4
449 %b = and <16 x i32> %y, %a
453 define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
454 ; CHECK-LABEL: andqbrst:
455 ; CHECK: ## BB#0: ## %entry
456 ; CHECK-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0
459 %a = load i64, i64* %ap, align 8
460 %b = insertelement <8 x i64> undef, i64 %a, i32 0
461 %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
462 %d = and <8 x i64> %p1, %c
466 ; CHECK-LABEL: test_mask_vaddps
467 ; CHECK: vaddps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
469 define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i,
470 <16 x float> %j, <16 x i32> %mask1)
472 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
473 %x = fadd <16 x float> %i, %j
474 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
478 ; CHECK-LABEL: test_mask_vmulps
479 ; CHECK: vmulps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
481 define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i,
482 <16 x float> %j, <16 x i32> %mask1)
484 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
485 %x = fmul <16 x float> %i, %j
486 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
490 ; CHECK-LABEL: test_mask_vminps
491 ; CHECK: vminps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
493 define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i,
494 <16 x float> %j, <16 x i32> %mask1)
496 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
497 %cmp_res = fcmp olt <16 x float> %i, %j
498 %min = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
499 %r = select <16 x i1> %mask, <16 x float> %min, <16 x float> %dst
503 ; CHECK-LABEL: test_mask_vminpd
504 ; CHECK: vminpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
506 define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i,
507 <8 x double> %j, <8 x i32> %mask1)
509 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
510 %cmp_res = fcmp olt <8 x double> %i, %j
511 %min = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
512 %r = select <8 x i1> %mask, <8 x double> %min, <8 x double> %dst
516 ; CHECK-LABEL: test_mask_vmaxps
517 ; CHECK: vmaxps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
519 define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i,
520 <16 x float> %j, <16 x i32> %mask1)
522 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
523 %cmp_res = fcmp ogt <16 x float> %i, %j
524 %max = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
525 %r = select <16 x i1> %mask, <16 x float> %max, <16 x float> %dst
529 ; CHECK-LABEL: test_mask_vmaxpd
530 ; CHECK: vmaxpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
532 define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i,
533 <8 x double> %j, <8 x i32> %mask1)
535 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
536 %cmp_res = fcmp ogt <8 x double> %i, %j
537 %max = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
538 %r = select <8 x i1> %mask, <8 x double> %max, <8 x double> %dst
542 ; CHECK-LABEL: test_mask_vsubps
543 ; CHECK: vsubps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
545 define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i,
546 <16 x float> %j, <16 x i32> %mask1)
548 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
549 %x = fsub <16 x float> %i, %j
550 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
554 ; CHECK-LABEL: test_mask_vdivps
555 ; CHECK: vdivps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
557 define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i,
558 <16 x float> %j, <16 x i32> %mask1)
560 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
561 %x = fdiv <16 x float> %i, %j
562 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
566 ; CHECK-LABEL: test_mask_vaddpd
567 ; CHECK: vaddpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
569 define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i,
570 <8 x double> %j, <8 x i64> %mask1)
572 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
573 %x = fadd <8 x double> %i, %j
574 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
578 ; CHECK-LABEL: test_maskz_vaddpd
579 ; CHECK: vaddpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} {z}}}
581 define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j,
582 <8 x i64> %mask1) nounwind readnone {
583 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
584 %x = fadd <8 x double> %i, %j
585 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
589 ; CHECK-LABEL: test_mask_fold_vaddpd
590 ; CHECK: vaddpd (%rdi), {{.*%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}.*}}
592 define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i,
593 <8 x double>* %j, <8 x i64> %mask1)
595 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
596 %tmp = load <8 x double>, <8 x double>* %j, align 8
597 %x = fadd <8 x double> %i, %tmp
598 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
602 ; CHECK-LABEL: test_maskz_fold_vaddpd
603 ; CHECK: vaddpd (%rdi), {{.*%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} {z}.*}}
605 define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j,
606 <8 x i64> %mask1) nounwind {
607 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
608 %tmp = load <8 x double>, <8 x double>* %j, align 8
609 %x = fadd <8 x double> %i, %tmp
610 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
614 ; CHECK-LABEL: test_broadcast_vaddpd
615 ; CHECK: vaddpd (%rdi){1to8}, %zmm{{.*}}
617 define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind {
618 %tmp = load double, double* %j
619 %b = insertelement <8 x double> undef, double %tmp, i32 0
620 %c = shufflevector <8 x double> %b, <8 x double> undef,
621 <8 x i32> zeroinitializer
622 %x = fadd <8 x double> %c, %i
626 ; CHECK-LABEL: test_mask_broadcast_vaddpd
627 ; CHECK: vaddpd (%rdi){1to8}, %zmm{{.*{%k[1-7]}.*}}
629 define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i,
630 double* %j, <8 x i64> %mask1) nounwind {
631 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
632 %tmp = load double, double* %j
633 %b = insertelement <8 x double> undef, double %tmp, i32 0
634 %c = shufflevector <8 x double> %b, <8 x double> undef,
635 <8 x i32> zeroinitializer
636 %x = fadd <8 x double> %c, %i
637 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %i
641 ; CHECK-LABEL: test_maskz_broadcast_vaddpd
642 ; CHECK: vaddpd (%rdi){1to8}, %zmm{{.*{%k[1-7]} {z}.*}}
644 define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
645 <8 x i64> %mask1) nounwind {
646 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
647 %tmp = load double, double* %j
648 %b = insertelement <8 x double> undef, double %tmp, i32 0
649 %c = shufflevector <8 x double> %b, <8 x double> undef,
650 <8 x i32> zeroinitializer
651 %x = fadd <8 x double> %c, %i
652 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer