1 ; check AVX2 instructions that are disabled in case avx512VL/avx512BW present
3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=core-avx2 -mattr=+avx2 -o /dev/null
4 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -o /dev/null
5 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -mattr=+avx512vl -o /dev/null
6 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -mattr=+avx512bw -o /dev/null
7 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -mattr=+avx512vl -mattr=+avx512bw -o /dev/null
8 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=skx -o /dev/null
10 define <4 x i64> @vpand_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
11 ; Force the execution domain with an add.
12 %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
13 %x = and <4 x i64> %a2, %b
17 define <2 x i64> @vpand_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
18 ; Force the execution domain with an add.
19 %a2 = add <2 x i64> %a, <i64 1, i64 1>
20 %x = and <2 x i64> %a2, %b
24 define <4 x i64> @vpandn_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
25 ; Force the execution domain with an add.
26 %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
27 %y = xor <4 x i64> %a2, <i64 -1, i64 -1, i64 -1, i64 -1>
28 %x = and <4 x i64> %a, %y
32 define <2 x i64> @vpandn_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
33 ; Force the execution domain with an add.
34 %a2 = add <2 x i64> %a, <i64 1, i64 1>
35 %y = xor <2 x i64> %a2, <i64 -1, i64 -1>
36 %x = and <2 x i64> %a, %y
40 define <4 x i64> @vpor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
41 ; Force the execution domain with an add.
42 %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
43 %x = or <4 x i64> %a2, %b
47 define <4 x i64> @vpxor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
48 ; Force the execution domain with an add.
49 %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
50 %x = xor <4 x i64> %a2, %b
54 define <2 x i64> @vpor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
55 ; Force the execution domain with an add.
56 %a2 = add <2 x i64> %a, <i64 1, i64 1>
57 %x = or <2 x i64> %a2, %b
61 define <2 x i64> @vpxor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
62 ; Force the execution domain with an add.
63 %a2 = add <2 x i64> %a, <i64 1, i64 1>
64 %x = xor <2 x i64> %a2, %b
68 define <4 x i64> @test_vpaddq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
69 %x = add <4 x i64> %i, %j
73 define <8 x i32> @test_vpaddd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
74 %x = add <8 x i32> %i, %j
78 define <16 x i16> @test_vpaddw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
79 %x = add <16 x i16> %i, %j
83 define <32 x i8> @test_vpaddb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
84 %x = add <32 x i8> %i, %j
88 define <4 x i64> @test_vpsubq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
89 %x = sub <4 x i64> %i, %j
93 define <8 x i32> @test_vpsubd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
94 %x = sub <8 x i32> %i, %j
98 define <16 x i16> @test_vpsubw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
99 %x = sub <16 x i16> %i, %j
103 define <32 x i8> @test_vpsubb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
104 %x = sub <32 x i8> %i, %j
108 define <16 x i16> @test_vpmullw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
109 %x = mul <16 x i16> %i, %j
113 define <8 x i32> @test_vpcmpgtd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
114 %bincmp = icmp slt <8 x i32> %i, %j
115 %x = sext <8 x i1> %bincmp to <8 x i32>
119 define <32 x i8> @test_vpcmpeqb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
120 %bincmp = icmp eq <32 x i8> %i, %j
121 %x = sext <32 x i1> %bincmp to <32 x i8>
125 define <16 x i16> @test_vpcmpeqw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
126 %bincmp = icmp eq <16 x i16> %i, %j
127 %x = sext <16 x i1> %bincmp to <16 x i16>
131 define <32 x i8> @test_vpcmpgtb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
132 %bincmp = icmp slt <32 x i8> %i, %j
133 %x = sext <32 x i1> %bincmp to <32 x i8>
137 define <16 x i16> @test_vpcmpgtw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
138 %bincmp = icmp slt <16 x i16> %i, %j
139 %x = sext <16 x i1> %bincmp to <16 x i16>
143 define <8 x i32> @test_vpcmpeqd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
144 %bincmp = icmp eq <8 x i32> %i, %j
145 %x = sext <8 x i1> %bincmp to <8 x i32>
149 define <2 x i64> @test_vpaddq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
150 %x = add <2 x i64> %i, %j
154 define <4 x i32> @test_vpaddd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
155 %x = add <4 x i32> %i, %j
159 define <8 x i16> @test_vpaddw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
160 %x = add <8 x i16> %i, %j
164 define <16 x i8> @test_vpaddb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
165 %x = add <16 x i8> %i, %j
169 define <2 x i64> @test_vpsubq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
170 %x = sub <2 x i64> %i, %j
174 define <4 x i32> @test_vpsubd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
175 %x = sub <4 x i32> %i, %j
179 define <8 x i16> @test_vpsubw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
180 %x = sub <8 x i16> %i, %j
184 define <16 x i8> @test_vpsubb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
185 %x = sub <16 x i8> %i, %j
189 define <8 x i16> @test_vpmullw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
190 %x = mul <8 x i16> %i, %j
194 define <8 x i16> @test_vpcmpgtw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
195 %bincmp = icmp slt <8 x i16> %i, %j
196 %x = sext <8 x i1> %bincmp to <8 x i16>
200 define <16 x i8> @test_vpcmpgtb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
201 %bincmp = icmp slt <16 x i8> %i, %j
202 %x = sext <16 x i1> %bincmp to <16 x i8>
206 define <8 x i16> @test_vpcmpeqw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
207 %bincmp = icmp eq <8 x i16> %i, %j
208 %x = sext <8 x i1> %bincmp to <8 x i16>
212 define <16 x i8> @test_vpcmpeqb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
213 %bincmp = icmp eq <16 x i8> %i, %j
214 %x = sext <16 x i1> %bincmp to <16 x i8>
218 define <8 x i16> @shuffle_v8i16_vpalignr(<8 x i16> %a, <8 x i16> %b) {
219 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
220 ret <8 x i16> %shuffle
223 define <16 x i16> @shuffle_v16i16_vpalignr(<16 x i16> %a, <16 x i16> %b) {
224 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
225 ret <16 x i16> %shuffle
228 define <16 x i8> @shuffle_v16i8_vpalignr(<16 x i8> %a, <16 x i8> %b) {
229 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
230 ret <16 x i8> %shuffle
233 define <32 x i8> @shuffle_v32i8_vpalignr(<32 x i8> %a, <32 x i8> %b) {
234 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
235 ret <32 x i8> %shuffle
238 define <2 x i64> @shuffle_v2i64_vpalignr(<2 x i64> %a, <2 x i64> %b) {
239 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
240 ret <2 x i64> %shuffle
243 define <4 x i32> @shuffle_v4i32_vpalignr(<4 x i32> %a, <4 x i32> %b) {
244 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
245 ret <4 x i32> %shuffle
248 define <8 x i32> @shuffle_v8i32_vpalignr(<8 x i32> %a, <8 x i32> %b) {
249 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6>
250 ret <8 x i32> %shuffle
253 define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) {
254 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 6, i32 3>
255 ret <4 x double> %shuffle
258 define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
259 %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
260 %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
261 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
262 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
263 ret <2 x double> %bitcast64
266 define <16 x i16> @shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24(<16 x i16> %a) {
267 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24>
268 ret <16 x i16> %shuffle
271 define i64 @extract_v2i64(<2 x i64> %x, i64* %dst) {
272 %r1 = extractelement <2 x i64> %x, i32 0
273 %r2 = extractelement <2 x i64> %x, i32 1
274 store i64 %r2, i64* %dst, align 1
278 define i32 @extract_v4i32(<4 x i32> %x, i32* %dst) {
279 %r1 = extractelement <4 x i32> %x, i32 1
280 %r2 = extractelement <4 x i32> %x, i32 3
281 store i32 %r2, i32* %dst, align 1
285 define i16 @extract_v8i16(<8 x i16> %x, i16* %dst) {
286 %r1 = extractelement <8 x i16> %x, i32 1
287 %r2 = extractelement <8 x i16> %x, i32 3
288 store i16 %r2, i16* %dst, align 1
292 define i8 @extract_v16i8(<16 x i8> %x, i8* %dst) {
293 %r1 = extractelement <16 x i8> %x, i32 1
294 %r2 = extractelement <16 x i8> %x, i32 3
295 store i8 %r2, i8* %dst, align 1
299 define <2 x i64> @insert_v2i64(<2 x i64> %x, i64 %y , i64* %ptr) {
300 %val = load i64, i64* %ptr
301 %r1 = insertelement <2 x i64> %x, i64 %val, i32 1
302 %r2 = insertelement <2 x i64> %r1, i64 %y, i32 3
306 define <4 x i32> @insert_v4i32(<4 x i32> %x, i32 %y, i32* %ptr) {
307 %val = load i32, i32* %ptr
308 %r1 = insertelement <4 x i32> %x, i32 %val, i32 1
309 %r2 = insertelement <4 x i32> %r1, i32 %y, i32 3
313 define <8 x i16> @insert_v8i16(<8 x i16> %x, i16 %y, i16* %ptr) {
314 %val = load i16, i16* %ptr
315 %r1 = insertelement <8 x i16> %x, i16 %val, i32 1
316 %r2 = insertelement <8 x i16> %r1, i16 %y, i32 5
320 define <16 x i8> @insert_v16i8(<16 x i8> %x, i8 %y, i8* %ptr) {
321 %val = load i8, i8* %ptr
322 %r1 = insertelement <16 x i8> %x, i8 %val, i32 3
323 %r2 = insertelement <16 x i8> %r1, i8 %y, i32 10
327 define <4 x i32> @shuffle_v4i32_0451(<4 x i32> %a, <4 x i32> %b) {
328 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1>
329 ret <4 x i32> %shuffle
332 define <4 x i32> @shuffle_v4i32_0142(<4 x i32> %a, <4 x i32> %b) {
333 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
334 ret <4 x i32> %shuffle
337 define <16 x i8> @shuffle_v16i8_0101010101010101(<16 x i8> %a, <16 x i8> %b) {
338 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
339 ret <16 x i8> %shuffle
342 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
343 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
344 ret <16 x i16> %shuffle
347 define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) {
349 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
350 ret <8 x float> %shuffle
353 define <4 x float> @shuffle_v4f32_1133(<4 x float> %a, <4 x float> %b) {
355 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
356 ret <4 x float> %shuffle
359 define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) {
361 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
362 ret <8 x float> %shuffle
365 define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) {
367 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
368 ret <4 x float> %shuffle
371 define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
372 %a = load double, double* %ptr
373 %v = insertelement <2 x double> undef, double %a, i32 0
374 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
375 ret <2 x double> %shuffle
378 define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
379 %a = load double, double* %ptr
380 %v = insertelement <2 x double> undef, double %a, i32 0
381 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
382 ret <2 x double> %shuffle
385 define void @store_floats(<4 x float> %x, i64* %p) {
386 %a = fadd <4 x float> %x, %x
387 %b = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
388 %c = bitcast <2 x float> %b to i64
389 store i64 %c, i64* %p
393 define void @store_double(<2 x double> %x, i64* %p) {
394 %a = fadd <2 x double> %x, %x
395 %b = extractelement <2 x double> %a, i32 0
396 %c = bitcast double %b to i64
397 store i64 %c, i64* %p
401 define void @store_h_double(<2 x double> %x, i64* %p) {
402 %a = fadd <2 x double> %x, %x
403 %b = extractelement <2 x double> %a, i32 1
404 %c = bitcast double %b to i64
405 store i64 %c, i64* %p
409 define <2 x double> @test39(double* %ptr) nounwind {
410 %a = load double, double* %ptr
411 %v = insertelement <2 x double> undef, double %a, i32 0
412 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
413 ret <2 x double> %shuffle
416 define <2 x double> @test40(<2 x double>* %ptr) nounwind {
417 %v = load <2 x double>, <2 x double>* %ptr
418 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
419 ret <2 x double> %shuffle
422 define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
423 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
424 ret <2 x double> %shuffle
427 define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) {
428 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
429 ret <4 x double> %shuffle