1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX
4 define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind {
7 ; KNL-NEXT: vcmpleps %zmm1, %zmm0, %k1
8 ; KNL-NEXT: vmovaps %zmm0, %zmm1 {%k1}
9 ; KNL-NEXT: vmovaps %zmm1, %zmm0
11 %mask = fcmp ole <16 x float> %x, %y
12 %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y
16 define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind {
19 ; KNL-NEXT: vcmplepd %zmm1, %zmm0, %k1
20 ; KNL-NEXT: vmovapd %zmm0, %zmm1 {%k1}
21 ; KNL-NEXT: vmovaps %zmm1, %zmm0
23 %mask = fcmp ole <8 x double> %x, %y
24 %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y
28 define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %yp) nounwind {
31 ; KNL-NEXT: vpcmpeqd (%rdi), %zmm0, %k1
32 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
33 ; KNL-NEXT: vmovaps %zmm1, %zmm0
35 %y = load <16 x i32>, <16 x i32>* %yp, align 4
36 %mask = icmp eq <16 x i32> %x, %y
37 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
41 define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
42 ; KNL-LABEL: test4_unsigned:
44 ; KNL-NEXT: vpcmpnltud %zmm1, %zmm0, %k1
45 ; KNL-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
46 ; KNL-NEXT: vmovaps %zmm1, %zmm0
48 %mask = icmp uge <16 x i32> %x, %y
49 %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
53 define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind {
56 ; KNL-NEXT: vpcmpeqq %zmm1, %zmm0, %k1
57 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
58 ; KNL-NEXT: vmovaps %zmm1, %zmm0
60 %mask = icmp eq <8 x i64> %x, %y
61 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
65 define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1) nounwind {
66 ; KNL-LABEL: test6_unsigned:
68 ; KNL-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1
69 ; KNL-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1}
70 ; KNL-NEXT: vmovaps %zmm1, %zmm0
72 %mask = icmp ugt <8 x i64> %x, %y
73 %max = select <8 x i1> %mask, <8 x i64> %x1, <8 x i64> %y
77 define <4 x float> @test7(<4 x float> %a, <4 x float> %b) {
80 ; KNL-NEXT: vxorps %xmm2, %xmm2, %xmm2
81 ; KNL-NEXT: vcmpltps %xmm2, %xmm0, %xmm2
82 ; KNL-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
86 ; SKX: vxorps %xmm2, %xmm2, %xmm2
87 ; SKX: vcmpltps %xmm2, %xmm0, %k1
88 ; SKX: vmovaps %xmm0, %xmm1 {%k1}
89 ; SKX: vmovaps %zmm1, %zmm0
92 %mask = fcmp olt <4 x float> %a, zeroinitializer
93 %c = select <4 x i1>%mask, <4 x float>%a, <4 x float>%b
97 define <2 x double> @test8(<2 x double> %a, <2 x double> %b) {
100 ; KNL-NEXT: vxorpd %xmm2, %xmm2, %xmm2
101 ; KNL-NEXT: vcmpltpd %xmm2, %xmm0, %xmm2
102 ; KNL-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
106 ; SKX: vxorpd %xmm2, %xmm2, %xmm2
107 ; SKX: vcmpltpd %xmm2, %xmm0, %k1
108 ; SKX: vmovapd %xmm0, %xmm1 {%k1}
109 ; SKX: vmovaps %zmm1, %zmm0
111 %mask = fcmp olt <2 x double> %a, zeroinitializer
112 %c = select <2 x i1>%mask, <2 x double>%a, <2 x double>%b
116 define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind {
119 ; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
120 ; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
122 %mask = icmp eq <8 x i32> %x, %y
123 %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
127 define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind {
130 ; KNL-NEXT: vcmpeqps %zmm1, %zmm0, %k1
131 ; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
135 ; SKX: vcmpeqps %ymm1, %ymm0, %k1
136 ; SKX: vmovaps %ymm0, %ymm1 {%k1}
137 ; SKX: vmovaps %zmm1, %zmm0
140 %mask = fcmp oeq <8 x float> %x, %y
141 %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y
145 define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind {
146 ; KNL-LABEL: test11_unsigned:
148 ; KNL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
150 %mask = icmp ugt <8 x i32> %x, %y
151 %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
155 define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind {
158 ; KNL-NEXT: vpcmpeqq %zmm2, %zmm0, %k0
159 ; KNL-NEXT: vpcmpeqq %zmm3, %zmm1, %k1
160 ; KNL-NEXT: kunpckbw %k0, %k1, %k0
161 ; KNL-NEXT: kmovw %k0, %eax
163 %res = icmp eq <16 x i64> %a, %b
164 %res1 = bitcast <16 x i1> %res to i16
168 define i32 @test12_v32i32(<32 x i32> %a, <32 x i32> %b) nounwind {
169 ; SKX-LABEL: test12_v32i32:
171 ; SKX-NEXT: vpcmpeqd %zmm2, %zmm0, %k0
172 ; SKX-NEXT: vpcmpeqd %zmm3, %zmm1, %k1
173 ; SKX-NEXT: kunpckwd %k0, %k1, %k0
174 ; SKX-NEXT: kmovd %k0, %eax
176 %res = icmp eq <32 x i32> %a, %b
177 %res1 = bitcast <32 x i1> %res to i32
181 define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind {
182 ; SKX-LABEL: test12_v64i16:
184 ; SKX-NEXT: vpcmpeqw %zmm2, %zmm0, %k0
185 ; SKX-NEXT: vpcmpeqw %zmm3, %zmm1, %k1
186 ; SKX-NEXT: kunpckdq %k0, %k1, %k0
187 ; SKX-NEXT: kmovq %k0, %rax
189 %res = icmp eq <64 x i16> %a, %b
190 %res1 = bitcast <64 x i1> %res to i64
194 define <16 x i32> @test13(<16 x float>%a, <16 x float>%b)
197 ; KNL-NEXT: vcmpeqps %zmm1, %zmm0, %k1
198 ; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
201 %cmpvector_i = fcmp oeq <16 x float> %a, %b
202 %conv = zext <16 x i1> %cmpvector_i to <16 x i32>
206 define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) {
209 ; KNL-NEXT: vpsubd %zmm1, %zmm0, %zmm1
210 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
211 ; KNL-NEXT: knotw %k0, %k0
212 ; KNL-NEXT: knotw %k0, %k1
213 ; KNL-NEXT: vmovdqu32 %zmm1, %zmm0 {%k1} {z}
215 %sub_r = sub <16 x i32> %a, %b
216 %cmp.i2.i = icmp sgt <16 x i32> %sub_r, %a
217 %sext.i3.i = sext <16 x i1> %cmp.i2.i to <16 x i32>
218 %mask = icmp eq <16 x i32> %sext.i3.i, zeroinitializer
219 %res = select <16 x i1> %mask, <16 x i32> zeroinitializer, <16 x i32> %sub_r
223 define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) {
226 ; KNL-NEXT: vpsubq %zmm1, %zmm0, %zmm1
227 ; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
228 ; KNL-NEXT: knotw %k0, %k0
229 ; KNL-NEXT: knotw %k0, %k1
230 ; KNL-NEXT: vmovdqu64 %zmm1, %zmm0 {%k1} {z}
232 %sub_r = sub <8 x i64> %a, %b
233 %cmp.i2.i = icmp sgt <8 x i64> %sub_r, %a
234 %sext.i3.i = sext <8 x i1> %cmp.i2.i to <8 x i64>
235 %mask = icmp eq <8 x i64> %sext.i3.i, zeroinitializer
236 %res = select <8 x i1> %mask, <8 x i64> zeroinitializer, <8 x i64> %sub_r
240 define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
243 ; KNL-NEXT: vpcmpled %zmm0, %zmm1, %k1
244 ; KNL-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
245 ; KNL-NEXT: vmovaps %zmm1, %zmm0
247 %mask = icmp sge <16 x i32> %x, %y
248 %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
252 define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
255 ; KNL-NEXT: vpcmpgtd (%rdi), %zmm0, %k1
256 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
257 ; KNL-NEXT: vmovaps %zmm1, %zmm0
259 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
260 %mask = icmp sgt <16 x i32> %x, %y
261 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
265 define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
268 ; KNL-NEXT: vpcmpled (%rdi), %zmm0, %k1
269 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
270 ; KNL-NEXT: vmovaps %zmm1, %zmm0
272 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
273 %mask = icmp sle <16 x i32> %x, %y
274 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
278 define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
281 ; KNL-NEXT: vpcmpleud (%rdi), %zmm0, %k1
282 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
283 ; KNL-NEXT: vmovaps %zmm1, %zmm0
285 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
286 %mask = icmp ule <16 x i32> %x, %y
287 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
291 define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) nounwind {
294 ; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
295 ; KNL-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1}
296 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
297 ; KNL-NEXT: vmovaps %zmm1, %zmm0
299 %mask1 = icmp eq <16 x i32> %x1, %y1
300 %mask0 = icmp eq <16 x i32> %x, %y
301 %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
302 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %y
306 define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) nounwind {
309 ; KNL-NEXT: vpcmpleq %zmm1, %zmm0, %k1
310 ; KNL-NEXT: vpcmpleq %zmm2, %zmm3, %k1 {%k1}
311 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm2 {%k1}
312 ; KNL-NEXT: vmovaps %zmm2, %zmm0
314 %mask1 = icmp sge <8 x i64> %x1, %y1
315 %mask0 = icmp sle <8 x i64> %x, %y
316 %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
317 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
321 define <8 x i64> @test22(<8 x i64> %x, <8 x i64>* %y.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
324 ; KNL-NEXT: vpcmpgtq %zmm2, %zmm1, %k1
325 ; KNL-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1}
326 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
327 ; KNL-NEXT: vmovaps %zmm1, %zmm0
329 %mask1 = icmp sgt <8 x i64> %x1, %y1
330 %y = load <8 x i64>, <8 x i64>* %y.ptr, align 4
331 %mask0 = icmp sgt <8 x i64> %x, %y
332 %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
333 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
337 define <16 x i32> @test23(<16 x i32> %x, <16 x i32>* %y.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
340 ; KNL-NEXT: vpcmpled %zmm1, %zmm2, %k1
341 ; KNL-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1}
342 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
343 ; KNL-NEXT: vmovaps %zmm1, %zmm0
345 %mask1 = icmp sge <16 x i32> %x1, %y1
346 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
347 %mask0 = icmp ule <16 x i32> %x, %y
348 %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
349 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
353 define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, i64* %yb.ptr) nounwind {
356 ; KNL-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1
357 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
358 ; KNL-NEXT: vmovaps %zmm1, %zmm0
360 %yb = load i64, i64* %yb.ptr, align 4
361 %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
362 %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
363 %mask = icmp eq <8 x i64> %x, %y
364 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
368 define <16 x i32> @test25(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1) nounwind {
371 ; KNL-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1
372 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
373 ; KNL-NEXT: vmovaps %zmm1, %zmm0
375 %yb = load i32, i32* %yb.ptr, align 4
376 %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
377 %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
378 %mask = icmp sle <16 x i32> %x, %y
379 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
383 define <16 x i32> @test26(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
386 ; KNL-NEXT: vpcmpled %zmm1, %zmm2, %k1
387 ; KNL-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1}
388 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
389 ; KNL-NEXT: vmovaps %zmm1, %zmm0
391 %mask1 = icmp sge <16 x i32> %x1, %y1
392 %yb = load i32, i32* %yb.ptr, align 4
393 %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
394 %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
395 %mask0 = icmp sgt <16 x i32> %x, %y
396 %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
397 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
401 define <8 x i64> @test27(<8 x i64> %x, i64* %yb.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
404 ; KNL-NEXT: vpcmpleq %zmm1, %zmm2, %k1
405 ; KNL-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1}
406 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
407 ; KNL-NEXT: vmovaps %zmm1, %zmm0
409 %mask1 = icmp sge <8 x i64> %x1, %y1
410 %yb = load i64, i64* %yb.ptr, align 4
411 %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
412 %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
413 %mask0 = icmp sle <8 x i64> %x, %y
414 %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
415 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
423 define <8 x i32>@test28(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) {
424 %x_gt_y = icmp sgt <8 x i64> %x, %y
425 %x1_gt_y1 = icmp sgt <8 x i64> %x1, %y1
426 %res = icmp eq <8 x i1>%x_gt_y, %x1_gt_y1
427 %resse = sext <8 x i1>%res to <8 x i32>
435 define <16 x i8>@test29(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) {
436 %x_gt_y = icmp sgt <16 x i32> %x, %y
437 %x1_gt_y1 = icmp sgt <16 x i32> %x1, %y1
438 %res = icmp ne <16 x i1>%x_gt_y, %x1_gt_y1
439 %resse = sext <16 x i1>%res to <16 x i8>
443 define <4 x double> @test30(<4 x double> %x, <4 x double> %y) nounwind {
445 ; SKX: vcmpeqpd %ymm1, %ymm0, %k1
446 ; SKX: vmovapd %ymm0, %ymm1 {%k1}
448 %mask = fcmp oeq <4 x double> %x, %y
449 %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %y
450 ret <4 x double> %max
453 define <2 x double> @test31(<2 x double> %x, <2 x double> %x1, <2 x double>* %yp) nounwind {
455 ; SKX: vcmpltpd (%rdi), %xmm0, %k1
456 ; SKX: vmovapd %xmm0, %xmm1 {%k1}
458 %y = load <2 x double>, <2 x double>* %yp, align 4
459 %mask = fcmp olt <2 x double> %x, %y
460 %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
461 ret <2 x double> %max
464 define <4 x double> @test32(<4 x double> %x, <4 x double> %x1, <4 x double>* %yp) nounwind {
466 ; SKX: vcmpltpd (%rdi), %ymm0, %k1
467 ; SKX: vmovapd %ymm0, %ymm1 {%k1}
469 %y = load <4 x double>, <4 x double>* %yp, align 4
470 %mask = fcmp ogt <4 x double> %y, %x
471 %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
472 ret <4 x double> %max
475 define <8 x double> @test33(<8 x double> %x, <8 x double> %x1, <8 x double>* %yp) nounwind {
477 ; SKX: vcmpltpd (%rdi), %zmm0, %k1
478 ; SKX: vmovapd %zmm0, %zmm1 {%k1}
479 %y = load <8 x double>, <8 x double>* %yp, align 4
480 %mask = fcmp olt <8 x double> %x, %y
481 %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
482 ret <8 x double> %max
485 define <4 x float> @test34(<4 x float> %x, <4 x float> %x1, <4 x float>* %yp) nounwind {
487 ; SKX: vcmpltps (%rdi), %xmm0, %k1
488 ; SKX: vmovaps %xmm0, %xmm1 {%k1}
489 %y = load <4 x float>, <4 x float>* %yp, align 4
490 %mask = fcmp olt <4 x float> %x, %y
491 %max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
495 define <8 x float> @test35(<8 x float> %x, <8 x float> %x1, <8 x float>* %yp) nounwind {
497 ; SKX: vcmpltps (%rdi), %ymm0, %k1
498 ; SKX: vmovaps %ymm0, %ymm1 {%k1}
500 %y = load <8 x float>, <8 x float>* %yp, align 4
501 %mask = fcmp ogt <8 x float> %y, %x
502 %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1
506 define <16 x float> @test36(<16 x float> %x, <16 x float> %x1, <16 x float>* %yp) nounwind {
508 ; SKX: vcmpltps (%rdi), %zmm0, %k1
509 ; SKX: vmovaps %zmm0, %zmm1 {%k1}
510 %y = load <16 x float>, <16 x float>* %yp, align 4
511 %mask = fcmp olt <16 x float> %x, %y
512 %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
513 ret <16 x float> %max
516 define <8 x double> @test37(<8 x double> %x, <8 x double> %x1, double* %ptr) nounwind {
518 ; SKX: vcmpltpd (%rdi){1to8}, %zmm0, %k1
519 ; SKX: vmovapd %zmm0, %zmm1 {%k1}
521 %a = load double, double* %ptr
522 %v = insertelement <8 x double> undef, double %a, i32 0
523 %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
525 %mask = fcmp ogt <8 x double> %shuffle, %x
526 %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
527 ret <8 x double> %max
530 define <4 x double> @test38(<4 x double> %x, <4 x double> %x1, double* %ptr) nounwind {
532 ; SKX: vcmpltpd (%rdi){1to4}, %ymm0, %k1
533 ; SKX: vmovapd %ymm0, %ymm1 {%k1}
535 %a = load double, double* %ptr
536 %v = insertelement <4 x double> undef, double %a, i32 0
537 %shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> zeroinitializer
539 %mask = fcmp ogt <4 x double> %shuffle, %x
540 %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
541 ret <4 x double> %max
544 define <2 x double> @test39(<2 x double> %x, <2 x double> %x1, double* %ptr) nounwind {
546 ; SKX: vcmpltpd (%rdi){1to2}, %xmm0, %k1
547 ; SKX: vmovapd %xmm0, %xmm1 {%k1}
549 %a = load double, double* %ptr
550 %v = insertelement <2 x double> undef, double %a, i32 0
551 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
553 %mask = fcmp ogt <2 x double> %shuffle, %x
554 %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
555 ret <2 x double> %max
559 define <16 x float> @test40(<16 x float> %x, <16 x float> %x1, float* %ptr) nounwind {
561 ; SKX: vcmpltps (%rdi){1to16}, %zmm0, %k1
562 ; SKX: vmovaps %zmm0, %zmm1 {%k1}
564 %a = load float, float* %ptr
565 %v = insertelement <16 x float> undef, float %a, i32 0
566 %shuffle = shufflevector <16 x float> %v, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
568 %mask = fcmp ogt <16 x float> %shuffle, %x
569 %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
570 ret <16 x float> %max
573 define <8 x float> @test41(<8 x float> %x, <8 x float> %x1, float* %ptr) nounwind {
575 ; SKX: vcmpltps (%rdi){1to8}, %ymm0, %k1
576 ; SKX: vmovaps %ymm0, %ymm1 {%k1}
578 %a = load float, float* %ptr
579 %v = insertelement <8 x float> undef, float %a, i32 0
580 %shuffle = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
582 %mask = fcmp ogt <8 x float> %shuffle, %x
583 %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1
587 define <4 x float> @test42(<4 x float> %x, <4 x float> %x1, float* %ptr) nounwind {
589 ; SKX: vcmpltps (%rdi){1to4}, %xmm0, %k1
590 ; SKX: vmovaps %xmm0, %xmm1 {%k1}
592 %a = load float, float* %ptr
593 %v = insertelement <4 x float> undef, float %a, i32 0
594 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
596 %mask = fcmp ogt <4 x float> %shuffle, %x
597 %max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
601 define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x i1> %mask_in) nounwind {
603 ; SKX: vpmovw2m %xmm2, %k1
604 ; SKX: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1}
605 ; SKX: vmovapd %zmm0, %zmm1 {%k1}
607 %a = load double, double* %ptr
608 %v = insertelement <8 x double> undef, double %a, i32 0
609 %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
611 %mask_cmp = fcmp ogt <8 x double> %shuffle, %x
612 %mask = and <8 x i1> %mask_cmp, %mask_in
613 %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
614 ret <8 x double> %max