1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX
4 define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind {
7 ; KNL-NEXT: vcmpleps %zmm1, %zmm0, %k1
8 ; KNL-NEXT: vmovaps %zmm0, %zmm1 {%k1}
9 ; KNL-NEXT: vmovaps %zmm1, %zmm0
11 %mask = fcmp ole <16 x float> %x, %y
12 %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y
16 define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind {
19 ; KNL-NEXT: vcmplepd %zmm1, %zmm0, %k1
20 ; KNL-NEXT: vmovapd %zmm0, %zmm1 {%k1}
21 ; KNL-NEXT: vmovaps %zmm1, %zmm0
23 %mask = fcmp ole <8 x double> %x, %y
24 %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y
28 define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %yp) nounwind {
31 ; KNL-NEXT: vpcmpeqd (%rdi), %zmm0, %k1
32 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
33 ; KNL-NEXT: vmovaps %zmm1, %zmm0
35 %y = load <16 x i32>, <16 x i32>* %yp, align 4
36 %mask = icmp eq <16 x i32> %x, %y
37 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
41 define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
42 ; KNL-LABEL: test4_unsigned:
44 ; KNL-NEXT: vpcmpnltud %zmm1, %zmm0, %k1
45 ; KNL-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
46 ; KNL-NEXT: vmovaps %zmm1, %zmm0
48 %mask = icmp uge <16 x i32> %x, %y
49 %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
53 define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind {
56 ; KNL-NEXT: vpcmpeqq %zmm1, %zmm0, %k1
57 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
58 ; KNL-NEXT: vmovaps %zmm1, %zmm0
60 %mask = icmp eq <8 x i64> %x, %y
61 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
65 define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1) nounwind {
66 ; KNL-LABEL: test6_unsigned:
68 ; KNL-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1
69 ; KNL-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1}
70 ; KNL-NEXT: vmovaps %zmm1, %zmm0
72 %mask = icmp ugt <8 x i64> %x, %y
73 %max = select <8 x i1> %mask, <8 x i64> %x1, <8 x i64> %y
77 define <4 x float> @test7(<4 x float> %a, <4 x float> %b) {
80 ; KNL-NEXT: vxorps %xmm2, %xmm2, %xmm2
81 ; KNL-NEXT: vcmpltps %xmm2, %xmm0, %xmm2
82 ; KNL-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
86 ; SKX: vxorps %xmm2, %xmm2, %xmm2
87 ; SKX: vcmpltps %xmm2, %xmm0, %k1
88 ; SKX: vmovaps %xmm0, %xmm1 {%k1}
89 ; SKX: vmovaps %zmm1, %zmm0
92 %mask = fcmp olt <4 x float> %a, zeroinitializer
93 %c = select <4 x i1>%mask, <4 x float>%a, <4 x float>%b
97 define <2 x double> @test8(<2 x double> %a, <2 x double> %b) {
100 ; KNL-NEXT: vxorpd %xmm2, %xmm2, %xmm2
101 ; KNL-NEXT: vcmpltpd %xmm2, %xmm0, %xmm2
102 ; KNL-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
106 ; SKX: vxorpd %xmm2, %xmm2, %xmm2
107 ; SKX: vcmpltpd %xmm2, %xmm0, %k1
108 ; SKX: vmovapd %xmm0, %xmm1 {%k1}
109 ; SKX: vmovaps %zmm1, %zmm0
111 %mask = fcmp olt <2 x double> %a, zeroinitializer
112 %c = select <2 x i1>%mask, <2 x double>%a, <2 x double>%b
116 define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind {
119 ; KNL-NEXT: ## kill: YMM1<def> YMM1<kill> ZMM1<def>
120 ; KNL-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<def>
121 ; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
122 ; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
123 ; KNL-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<kill>
125 %mask = icmp eq <8 x i32> %x, %y
126 %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
130 define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind {
133 ; KNL-NEXT: ## kill: YMM1<def> YMM1<kill> ZMM1<def>
134 ; KNL-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<def>
135 ; KNL-NEXT: vcmpeqps %zmm1, %zmm0, %k1
136 ; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
137 ; KNL-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<kill>
141 ; SKX: vcmpeqps %ymm1, %ymm0, %k1
142 ; SKX: vmovaps %ymm0, %ymm1 {%k1}
143 ; SKX: vmovaps %zmm1, %zmm0
146 %mask = fcmp oeq <8 x float> %x, %y
147 %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y
151 define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind {
152 ; KNL-LABEL: test11_unsigned:
154 ; KNL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
156 %mask = icmp ugt <8 x i32> %x, %y
157 %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
162 define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind {
165 ; KNL-NEXT: vpcmpeqq %zmm2, %zmm0, %k0
166 ; KNL-NEXT: vpcmpeqq %zmm3, %zmm1, %k1
167 ; KNL-NEXT: kunpckbw %k0, %k1, %k0
168 ; KNL-NEXT: kmovw %k0, %eax
169 ; KNL-NEXT: ## kill: AX<def> AX<kill> EAX<kill>
171 %res = icmp eq <16 x i64> %a, %b
172 %res1 = bitcast <16 x i1> %res to i16
176 define <16 x i32> @test13(<16 x float>%a, <16 x float>%b)
179 ; KNL-NEXT: vcmpeqps %zmm1, %zmm0, %k1
180 ; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
183 %cmpvector_i = fcmp oeq <16 x float> %a, %b
184 %conv = zext <16 x i1> %cmpvector_i to <16 x i32>
188 define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) {
191 ; KNL-NEXT: vpsubd %zmm1, %zmm0, %zmm1
192 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
193 ; KNL-NEXT: knotw %k0, %k0
194 ; KNL-NEXT: knotw %k0, %k1
195 ; KNL-NEXT: vmovdqu32 %zmm1, %zmm0 {%k1} {z}
197 %sub_r = sub <16 x i32> %a, %b
198 %cmp.i2.i = icmp sgt <16 x i32> %sub_r, %a
199 %sext.i3.i = sext <16 x i1> %cmp.i2.i to <16 x i32>
200 %mask = icmp eq <16 x i32> %sext.i3.i, zeroinitializer
201 %res = select <16 x i1> %mask, <16 x i32> zeroinitializer, <16 x i32> %sub_r
205 define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) {
208 ; KNL-NEXT: vpsubq %zmm1, %zmm0, %zmm1
209 ; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
210 ; KNL-NEXT: knotw %k0, %k0
211 ; KNL-NEXT: knotw %k0, %k1
212 ; KNL-NEXT: vmovdqu64 %zmm1, %zmm0 {%k1} {z}
214 %sub_r = sub <8 x i64> %a, %b
215 %cmp.i2.i = icmp sgt <8 x i64> %sub_r, %a
216 %sext.i3.i = sext <8 x i1> %cmp.i2.i to <8 x i64>
217 %mask = icmp eq <8 x i64> %sext.i3.i, zeroinitializer
218 %res = select <8 x i1> %mask, <8 x i64> zeroinitializer, <8 x i64> %sub_r
222 define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
225 ; KNL-NEXT: vpcmpled %zmm0, %zmm1, %k1
226 ; KNL-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
227 ; KNL-NEXT: vmovaps %zmm1, %zmm0
229 %mask = icmp sge <16 x i32> %x, %y
230 %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
234 define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
237 ; KNL-NEXT: vpcmpgtd (%rdi), %zmm0, %k1
238 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
239 ; KNL-NEXT: vmovaps %zmm1, %zmm0
241 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
242 %mask = icmp sgt <16 x i32> %x, %y
243 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
247 define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
250 ; KNL-NEXT: vpcmpled (%rdi), %zmm0, %k1
251 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
252 ; KNL-NEXT: vmovaps %zmm1, %zmm0
254 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
255 %mask = icmp sle <16 x i32> %x, %y
256 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
260 define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
263 ; KNL-NEXT: vpcmpleud (%rdi), %zmm0, %k1
264 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
265 ; KNL-NEXT: vmovaps %zmm1, %zmm0
267 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
268 %mask = icmp ule <16 x i32> %x, %y
269 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
273 define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) nounwind {
276 ; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
277 ; KNL-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1}
278 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
279 ; KNL-NEXT: vmovaps %zmm1, %zmm0
281 %mask1 = icmp eq <16 x i32> %x1, %y1
282 %mask0 = icmp eq <16 x i32> %x, %y
283 %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
284 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %y
288 define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) nounwind {
291 ; KNL-NEXT: vpcmpleq %zmm1, %zmm0, %k1
292 ; KNL-NEXT: vpcmpleq %zmm2, %zmm3, %k1 {%k1}
293 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm2 {%k1}
294 ; KNL-NEXT: vmovaps %zmm2, %zmm0
296 %mask1 = icmp sge <8 x i64> %x1, %y1
297 %mask0 = icmp sle <8 x i64> %x, %y
298 %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
299 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
303 define <8 x i64> @test22(<8 x i64> %x, <8 x i64>* %y.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
306 ; KNL-NEXT: vpcmpgtq %zmm2, %zmm1, %k1
307 ; KNL-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1}
308 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
309 ; KNL-NEXT: vmovaps %zmm1, %zmm0
311 %mask1 = icmp sgt <8 x i64> %x1, %y1
312 %y = load <8 x i64>, <8 x i64>* %y.ptr, align 4
313 %mask0 = icmp sgt <8 x i64> %x, %y
314 %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
315 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
319 define <16 x i32> @test23(<16 x i32> %x, <16 x i32>* %y.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
322 ; KNL-NEXT: vpcmpled %zmm1, %zmm2, %k1
323 ; KNL-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1}
324 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
325 ; KNL-NEXT: vmovaps %zmm1, %zmm0
327 %mask1 = icmp sge <16 x i32> %x1, %y1
328 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
329 %mask0 = icmp ule <16 x i32> %x, %y
330 %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
331 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
335 define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, i64* %yb.ptr) nounwind {
338 ; KNL-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1
339 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
340 ; KNL-NEXT: vmovaps %zmm1, %zmm0
342 %yb = load i64, i64* %yb.ptr, align 4
343 %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
344 %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
345 %mask = icmp eq <8 x i64> %x, %y
346 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
350 define <16 x i32> @test25(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1) nounwind {
353 ; KNL-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1
354 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
355 ; KNL-NEXT: vmovaps %zmm1, %zmm0
357 %yb = load i32, i32* %yb.ptr, align 4
358 %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
359 %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
360 %mask = icmp sle <16 x i32> %x, %y
361 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
365 define <16 x i32> @test26(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
368 ; KNL-NEXT: vpcmpled %zmm1, %zmm2, %k1
369 ; KNL-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1}
370 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
371 ; KNL-NEXT: vmovaps %zmm1, %zmm0
373 %mask1 = icmp sge <16 x i32> %x1, %y1
374 %yb = load i32, i32* %yb.ptr, align 4
375 %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
376 %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
377 %mask0 = icmp sgt <16 x i32> %x, %y
378 %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
379 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
383 define <8 x i64> @test27(<8 x i64> %x, i64* %yb.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
386 ; KNL-NEXT: vpcmpleq %zmm1, %zmm2, %k1
387 ; KNL-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1}
388 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
389 ; KNL-NEXT: vmovaps %zmm1, %zmm0
391 %mask1 = icmp sge <8 x i64> %x1, %y1
392 %yb = load i64, i64* %yb.ptr, align 4
393 %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
394 %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
395 %mask0 = icmp sle <8 x i64> %x, %y
396 %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
397 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
405 define <8 x i32>@test28(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) {
406 %x_gt_y = icmp sgt <8 x i64> %x, %y
407 %x1_gt_y1 = icmp sgt <8 x i64> %x1, %y1
408 %res = icmp eq <8 x i1>%x_gt_y, %x1_gt_y1
409 %resse = sext <8 x i1>%res to <8 x i32>
417 define <16 x i8>@test29(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) {
418 %x_gt_y = icmp sgt <16 x i32> %x, %y
419 %x1_gt_y1 = icmp sgt <16 x i32> %x1, %y1
420 %res = icmp ne <16 x i1>%x_gt_y, %x1_gt_y1
421 %resse = sext <16 x i1>%res to <16 x i8>
425 define <4 x double> @test30(<4 x double> %x, <4 x double> %y) nounwind {
427 ; SKX: vcmpeqpd %ymm1, %ymm0, %k1
428 ; SKX: vmovapd %ymm0, %ymm1 {%k1}
430 %mask = fcmp oeq <4 x double> %x, %y
431 %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %y
432 ret <4 x double> %max
435 define <2 x double> @test31(<2 x double> %x, <2 x double> %x1, <2 x double>* %yp) nounwind {
437 ; SKX: vcmpltpd (%rdi), %xmm0, %k1
438 ; SKX: vmovapd %xmm0, %xmm1 {%k1}
440 %y = load <2 x double>, <2 x double>* %yp, align 4
441 %mask = fcmp olt <2 x double> %x, %y
442 %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
443 ret <2 x double> %max
446 define <4 x double> @test32(<4 x double> %x, <4 x double> %x1, <4 x double>* %yp) nounwind {
448 ; SKX: vcmpltpd (%rdi), %ymm0, %k1
449 ; SKX: vmovapd %ymm0, %ymm1 {%k1}
451 %y = load <4 x double>, <4 x double>* %yp, align 4
452 %mask = fcmp ogt <4 x double> %y, %x
453 %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
454 ret <4 x double> %max
457 define <8 x double> @test33(<8 x double> %x, <8 x double> %x1, <8 x double>* %yp) nounwind {
459 ; SKX: vcmpltpd (%rdi), %zmm0, %k1
460 ; SKX: vmovapd %zmm0, %zmm1 {%k1}
461 %y = load <8 x double>, <8 x double>* %yp, align 4
462 %mask = fcmp olt <8 x double> %x, %y
463 %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
464 ret <8 x double> %max
467 define <4 x float> @test34(<4 x float> %x, <4 x float> %x1, <4 x float>* %yp) nounwind {
469 ; SKX: vcmpltps (%rdi), %xmm0, %k1
470 ; SKX: vmovaps %xmm0, %xmm1 {%k1}
471 %y = load <4 x float>, <4 x float>* %yp, align 4
472 %mask = fcmp olt <4 x float> %x, %y
473 %max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
477 define <8 x float> @test35(<8 x float> %x, <8 x float> %x1, <8 x float>* %yp) nounwind {
479 ; SKX: vcmpltps (%rdi), %ymm0, %k1
480 ; SKX: vmovaps %ymm0, %ymm1 {%k1}
482 %y = load <8 x float>, <8 x float>* %yp, align 4
483 %mask = fcmp ogt <8 x float> %y, %x
484 %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1
488 define <16 x float> @test36(<16 x float> %x, <16 x float> %x1, <16 x float>* %yp) nounwind {
490 ; SKX: vcmpltps (%rdi), %zmm0, %k1
491 ; SKX: vmovaps %zmm0, %zmm1 {%k1}
492 %y = load <16 x float>, <16 x float>* %yp, align 4
493 %mask = fcmp olt <16 x float> %x, %y
494 %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
495 ret <16 x float> %max
498 define <8 x double> @test37(<8 x double> %x, <8 x double> %x1, double* %ptr) nounwind {
500 ; SKX: vcmpltpd (%rdi){1to8}, %zmm0, %k1
501 ; SKX: vmovapd %zmm0, %zmm1 {%k1}
503 %a = load double, double* %ptr
504 %v = insertelement <8 x double> undef, double %a, i32 0
505 %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
507 %mask = fcmp ogt <8 x double> %shuffle, %x
508 %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
509 ret <8 x double> %max
512 define <4 x double> @test38(<4 x double> %x, <4 x double> %x1, double* %ptr) nounwind {
514 ; SKX: vcmpltpd (%rdi){1to4}, %ymm0, %k1
515 ; SKX: vmovapd %ymm0, %ymm1 {%k1}
517 %a = load double, double* %ptr
518 %v = insertelement <4 x double> undef, double %a, i32 0
519 %shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> zeroinitializer
521 %mask = fcmp ogt <4 x double> %shuffle, %x
522 %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
523 ret <4 x double> %max
526 define <2 x double> @test39(<2 x double> %x, <2 x double> %x1, double* %ptr) nounwind {
528 ; SKX: vcmpltpd (%rdi){1to2}, %xmm0, %k1
529 ; SKX: vmovapd %xmm0, %xmm1 {%k1}
531 %a = load double, double* %ptr
532 %v = insertelement <2 x double> undef, double %a, i32 0
533 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
535 %mask = fcmp ogt <2 x double> %shuffle, %x
536 %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
537 ret <2 x double> %max
541 define <16 x float> @test40(<16 x float> %x, <16 x float> %x1, float* %ptr) nounwind {
543 ; SKX: vcmpltps (%rdi){1to16}, %zmm0, %k1
544 ; SKX: vmovaps %zmm0, %zmm1 {%k1}
546 %a = load float, float* %ptr
547 %v = insertelement <16 x float> undef, float %a, i32 0
548 %shuffle = shufflevector <16 x float> %v, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
550 %mask = fcmp ogt <16 x float> %shuffle, %x
551 %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
552 ret <16 x float> %max
555 define <8 x float> @test41(<8 x float> %x, <8 x float> %x1, float* %ptr) nounwind {
557 ; SKX: vcmpltps (%rdi){1to8}, %ymm0, %k1
558 ; SKX: vmovaps %ymm0, %ymm1 {%k1}
560 %a = load float, float* %ptr
561 %v = insertelement <8 x float> undef, float %a, i32 0
562 %shuffle = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
564 %mask = fcmp ogt <8 x float> %shuffle, %x
565 %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1
569 define <4 x float> @test42(<4 x float> %x, <4 x float> %x1, float* %ptr) nounwind {
571 ; SKX: vcmpltps (%rdi){1to4}, %xmm0, %k1
572 ; SKX: vmovaps %xmm0, %xmm1 {%k1}
574 %a = load float, float* %ptr
575 %v = insertelement <4 x float> undef, float %a, i32 0
576 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
578 %mask = fcmp ogt <4 x float> %shuffle, %x
579 %max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
583 define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x i1> %mask_in) nounwind {
585 ; SKX: vpmovw2m %xmm2, %k1
586 ; SKX: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1}
587 ; SKX: vmovapd %zmm0, %zmm1 {%k1}
589 %a = load double, double* %ptr
590 %v = insertelement <8 x double> undef, double %a, i32 0
591 %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
593 %mask_cmp = fcmp ogt <8 x double> %shuffle, %x
594 %mask = and <8 x i1> %mask_cmp, %mask_in
595 %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
596 ret <8 x double> %max