1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mattr=avx,aes,pclmul | FileCheck %s
3 define <2 x i64> @test_x86_aesni_aesdec(<2 x i64> %a0, <2 x i64> %a1) {
4 ; CHECK-LABEL: test_x86_aesni_aesdec:
6 ; CHECK-NEXT: vaesdec %xmm1, %xmm0, %xmm0
8 %res = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
11 declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone
14 define <2 x i64> @test_x86_aesni_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) {
15 ; CHECK-LABEL: test_x86_aesni_aesdeclast:
17 ; CHECK-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0
19 %res = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
22 declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind readnone
25 define <2 x i64> @test_x86_aesni_aesenc(<2 x i64> %a0, <2 x i64> %a1) {
26 ; CHECK-LABEL: test_x86_aesni_aesenc:
28 ; CHECK-NEXT: vaesenc %xmm1, %xmm0, %xmm0
30 %res = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
33 declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone
36 define <2 x i64> @test_x86_aesni_aesenclast(<2 x i64> %a0, <2 x i64> %a1) {
37 ; CHECK-LABEL: test_x86_aesni_aesenclast:
39 ; CHECK-NEXT: vaesenclast %xmm1, %xmm0, %xmm0
41 %res = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
44 declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind readnone
47 define <2 x i64> @test_x86_aesni_aesimc(<2 x i64> %a0) {
48 ; CHECK-LABEL: test_x86_aesni_aesimc:
50 ; CHECK-NEXT: vaesimc %xmm0, %xmm0
52 %res = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0) ; <<2 x i64>> [#uses=1]
55 declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone
58 define <2 x i64> @test_x86_aesni_aeskeygenassist(<2 x i64> %a0) {
59 ; CHECK-LABEL: test_x86_aesni_aeskeygenassist:
61 ; CHECK-NEXT: vaeskeygenassist $7, %xmm0, %xmm0
63 %res = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1]
66 declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone
69 define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) {
70 ; CHECK-LABEL: test_x86_sse2_add_sd:
72 ; CHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm0
74 %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
77 declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
80 define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) {
81 ; CHECK-LABEL: test_x86_sse2_cmp_pd:
83 ; CHECK-NEXT: vcmpordpd %xmm1, %xmm0, %xmm0
85 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
88 declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
91 define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) {
92 ; CHECK-LABEL: test_x86_sse2_cmp_sd:
94 ; CHECK-NEXT: vcmpordsd %xmm1, %xmm0, %xmm0
96 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
99 declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
102 define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) {
103 ; CHECK-LABEL: test_x86_sse2_comieq_sd:
105 ; CHECK-NEXT: vcomisd %xmm1, %xmm0
106 ; CHECK-NEXT: sete %al
107 ; CHECK-NEXT: movzbl %al, %eax
109 %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
112 declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
115 define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) {
116 ; CHECK-LABEL: test_x86_sse2_comige_sd:
118 ; CHECK-NEXT: vcomisd %xmm1, %xmm0
119 ; CHECK-NEXT: setae %al
120 ; CHECK-NEXT: movzbl %al, %eax
122 %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
125 declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
128 define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) {
129 ; CHECK-LABEL: test_x86_sse2_comigt_sd:
131 ; CHECK-NEXT: vcomisd %xmm1, %xmm0
132 ; CHECK-NEXT: seta %al
133 ; CHECK-NEXT: movzbl %al, %eax
135 %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
138 declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
141 define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
142 ; CHECK-LABEL: test_x86_sse2_comile_sd:
144 ; CHECK-NEXT: vcomisd %xmm1, %xmm0
145 ; CHECK-NEXT: setbe %al
146 ; CHECK-NEXT: movzbl %al, %eax
148 %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
151 declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
154 define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
155 ; CHECK-LABEL: test_x86_sse2_comilt_sd:
157 ; CHECK-NEXT: vcomisd %xmm1, %xmm0
158 ; CHECK-NEXT: sbbl %eax, %eax
159 ; CHECK-NEXT: andl $1, %eax
161 %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
164 declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
167 define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) {
168 ; CHECK-LABEL: test_x86_sse2_comineq_sd:
170 ; CHECK-NEXT: vcomisd %xmm1, %xmm0
171 ; CHECK-NEXT: setne %al
172 ; CHECK-NEXT: movzbl %al, %eax
174 %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
177 declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
180 define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
181 ; CHECK-LABEL: test_x86_sse2_cvtdq2pd:
183 ; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm0
185 %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
186 ret <2 x double> %res
188 declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
191 define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) {
192 ; CHECK-LABEL: test_x86_sse2_cvtdq2ps:
194 ; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0
196 %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1]
199 declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
202 define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) {
203 ; CHECK-LABEL: test_x86_sse2_cvtpd2dq:
205 ; CHECK-NEXT: vcvtpd2dq %xmm0, %xmm0
207 %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
210 declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
213 define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) {
214 ; CHECK-LABEL: test_x86_sse2_cvtpd2ps:
216 ; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm0
218 %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1]
221 declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
224 define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) {
225 ; CHECK-LABEL: test_x86_sse2_cvtps2dq:
227 ; CHECK-NEXT: vcvtps2dq %xmm0, %xmm0
229 %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
232 declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
235 define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
236 ; CHECK-LABEL: test_x86_sse2_cvtps2pd:
238 ; CHECK-NEXT: vcvtps2pd %xmm0, %xmm0
240 %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
241 ret <2 x double> %res
243 declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
246 define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) {
247 ; CHECK-LABEL: test_x86_sse2_cvtsd2si:
249 ; CHECK-NEXT: vcvtsd2si %xmm0, %eax
251 %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1]
254 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
257 define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) {
258 ; CHECK-LABEL: test_x86_sse2_cvtsd2ss:
260 ; CHECK-NEXT: vcvtsd2ss %xmm1, %xmm0, %xmm0
262 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
265 declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
268 define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0) {
269 ; CHECK-LABEL: test_x86_sse2_cvtsi2sd:
271 ; CHECK-NEXT: movl $7, %eax
272 ; CHECK-NEXT: vcvtsi2sdl %eax, %xmm0, %xmm0
274 %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
275 ret <2 x double> %res
277 declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
280 define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) {
281 ; CHECK-LABEL: test_x86_sse2_cvtss2sd:
283 ; CHECK-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0
285 %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
286 ret <2 x double> %res
288 declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone
291 define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) {
292 ; CHECK-LABEL: test_x86_sse2_cvttpd2dq:
294 ; CHECK-NEXT: vcvttpd2dq %xmm0, %xmm0
296 %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
299 declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
302 define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
303 ; CHECK-LABEL: test_x86_sse2_cvttps2dq:
305 ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
307 %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
310 declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
313 define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) {
314 ; CHECK-LABEL: test_x86_sse2_cvttsd2si:
316 ; CHECK-NEXT: vcvttsd2si %xmm0, %eax
318 %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1]
321 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
324 define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) {
325 ; CHECK-LABEL: test_x86_sse2_div_sd:
327 ; CHECK-NEXT: vdivsd %xmm1, %xmm0, %xmm0
329 %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
330 ret <2 x double> %res
332 declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
336 define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) {
337 ; CHECK-LABEL: test_x86_sse2_max_pd:
339 ; CHECK-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
341 %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
342 ret <2 x double> %res
344 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
347 define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) {
348 ; CHECK-LABEL: test_x86_sse2_max_sd:
350 ; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
352 %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
353 ret <2 x double> %res
355 declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
358 define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) {
359 ; CHECK-LABEL: test_x86_sse2_min_pd:
361 ; CHECK-NEXT: vminpd %xmm1, %xmm0, %xmm0
363 %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
364 ret <2 x double> %res
366 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
369 define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) {
370 ; CHECK-LABEL: test_x86_sse2_min_sd:
372 ; CHECK-NEXT: vminsd %xmm1, %xmm0, %xmm0
374 %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
375 ret <2 x double> %res
377 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
380 define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) {
381 ; CHECK-LABEL: test_x86_sse2_movmsk_pd:
383 ; CHECK-NEXT: vmovmskpd %xmm0, %eax
385 %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1]
388 declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
393 define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
394 ; CHECK-LABEL: test_x86_sse2_mul_sd:
396 ; CHECK-NEXT: vmulsd %xmm1, %xmm0, %xmm0
398 %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
399 ret <2 x double> %res
401 declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone
404 define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
405 ; CHECK-LABEL: test_x86_sse2_packssdw_128:
407 ; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
409 %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
412 declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
415 define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
416 ; CHECK-LABEL: test_x86_sse2_packsswb_128:
418 ; CHECK-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
420 %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
423 declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
426 define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
427 ; CHECK-LABEL: test_x86_sse2_packuswb_128:
429 ; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
431 %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
434 declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
437 define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) {
438 ; CHECK-LABEL: test_x86_sse2_padds_b:
440 ; CHECK-NEXT: vpaddsb %xmm1, %xmm0, %xmm0
442 %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
445 declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
448 define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) {
449 ; CHECK-LABEL: test_x86_sse2_padds_w:
451 ; CHECK-NEXT: vpaddsw %xmm1, %xmm0, %xmm0
453 %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
456 declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
459 define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) {
460 ; CHECK-LABEL: test_x86_sse2_paddus_b:
462 ; CHECK-NEXT: vpaddusb %xmm1, %xmm0, %xmm0
464 %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
467 declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
470 define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) {
471 ; CHECK-LABEL: test_x86_sse2_paddus_w:
473 ; CHECK-NEXT: vpaddusw %xmm1, %xmm0, %xmm0
475 %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
478 declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
481 define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) {
482 ; CHECK-LABEL: test_x86_sse2_pavg_b:
484 ; CHECK-NEXT: vpavgb %xmm1, %xmm0, %xmm0
486 %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
489 declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
492 define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) {
493 ; CHECK-LABEL: test_x86_sse2_pavg_w:
495 ; CHECK-NEXT: vpavgw %xmm1, %xmm0, %xmm0
497 %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
500 declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
503 define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) {
504 ; CHECK-LABEL: test_x86_sse2_pmadd_wd:
506 ; CHECK-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0
508 %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1]
511 declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
514 define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) {
515 ; CHECK-LABEL: test_x86_sse2_pmaxs_w:
517 ; CHECK-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
519 %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
522 declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
525 define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) {
526 ; CHECK-LABEL: test_x86_sse2_pmaxu_b:
528 ; CHECK-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
530 %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
533 declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
536 define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) {
537 ; CHECK-LABEL: test_x86_sse2_pmins_w:
539 ; CHECK-NEXT: vpminsw %xmm1, %xmm0, %xmm0
541 %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
544 declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
547 define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) {
548 ; CHECK-LABEL: test_x86_sse2_pminu_b:
550 ; CHECK-NEXT: vpminub %xmm1, %xmm0, %xmm0
552 %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
555 declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
558 define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
559 ; CHECK-LABEL: test_x86_sse2_pmovmskb_128:
561 ; CHECK-NEXT: vpmovmskb %xmm0, %eax
563 %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1]
566 declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
569 define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) {
570 ; CHECK-LABEL: test_x86_sse2_pmulh_w:
572 ; CHECK-NEXT: vpmulhw %xmm1, %xmm0, %xmm0
574 %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
577 declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
580 define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) {
581 ; CHECK-LABEL: test_x86_sse2_pmulhu_w:
583 ; CHECK-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0
585 %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
588 declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
591 define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) {
592 ; CHECK-LABEL: test_x86_sse2_pmulu_dq:
594 ; CHECK-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
596 %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
599 declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
602 define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) {
603 ; CHECK-LABEL: test_x86_sse2_psad_bw:
605 ; CHECK-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
607 %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1]
610 declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
613 define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
614 ; CHECK-LABEL: test_x86_sse2_psll_d:
616 ; CHECK-NEXT: vpslld %xmm1, %xmm0, %xmm0
618 %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
621 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
624 define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
625 ; CHECK-LABEL: test_x86_sse2_psll_q:
627 ; CHECK-NEXT: vpsllq %xmm1, %xmm0, %xmm0
629 %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
632 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
635 define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) {
636 ; CHECK-LABEL: test_x86_sse2_psll_w:
638 ; CHECK-NEXT: vpsllw %xmm1, %xmm0, %xmm0
640 %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
643 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
646 define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) {
647 ; CHECK-LABEL: test_x86_sse2_pslli_d:
649 ; CHECK-NEXT: vpslld $7, %xmm0, %xmm0
651 %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
654 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
657 define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) {
658 ; CHECK-LABEL: test_x86_sse2_pslli_q:
660 ; CHECK-NEXT: vpsllq $7, %xmm0, %xmm0
662 %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
665 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
668 define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) {
669 ; CHECK-LABEL: test_x86_sse2_pslli_w:
671 ; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0
673 %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
676 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
679 define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) {
680 ; CHECK-LABEL: test_x86_sse2_psra_d:
682 ; CHECK-NEXT: vpsrad %xmm1, %xmm0, %xmm0
684 %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
687 declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
690 define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) {
691 ; CHECK-LABEL: test_x86_sse2_psra_w:
693 ; CHECK-NEXT: vpsraw %xmm1, %xmm0, %xmm0
695 %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
698 declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
701 define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) {
702 ; CHECK-LABEL: test_x86_sse2_psrai_d:
704 ; CHECK-NEXT: vpsrad $7, %xmm0, %xmm0
706 %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
709 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
712 define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) {
713 ; CHECK-LABEL: test_x86_sse2_psrai_w:
715 ; CHECK-NEXT: vpsraw $7, %xmm0, %xmm0
717 %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
720 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
723 define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
724 ; CHECK-LABEL: test_x86_sse2_psrl_d:
726 ; CHECK-NEXT: vpsrld %xmm1, %xmm0, %xmm0
728 %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
731 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
734 define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
735 ; CHECK-LABEL: test_x86_sse2_psrl_q:
737 ; CHECK-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
739 %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
742 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
745 define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) {
746 ; CHECK-LABEL: test_x86_sse2_psrl_w:
748 ; CHECK-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
750 %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
753 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
756 define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) {
757 ; CHECK-LABEL: test_x86_sse2_psrli_d:
759 ; CHECK-NEXT: vpsrld $7, %xmm0, %xmm0
761 %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
764 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
767 define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) {
768 ; CHECK-LABEL: test_x86_sse2_psrli_q:
770 ; CHECK-NEXT: vpsrlq $7, %xmm0, %xmm0
772 %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
775 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
778 define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) {
779 ; CHECK-LABEL: test_x86_sse2_psrli_w:
781 ; CHECK-NEXT: vpsrlw $7, %xmm0, %xmm0
783 %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
786 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
789 define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) {
790 ; CHECK-LABEL: test_x86_sse2_psubs_b:
792 ; CHECK-NEXT: vpsubsb %xmm1, %xmm0, %xmm0
794 %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
797 declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
800 define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) {
801 ; CHECK-LABEL: test_x86_sse2_psubs_w:
803 ; CHECK-NEXT: vpsubsw %xmm1, %xmm0, %xmm0
805 %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
808 declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
811 define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) {
812 ; CHECK-LABEL: test_x86_sse2_psubus_b:
814 ; CHECK-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
816 %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
819 declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
822 define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) {
823 ; CHECK-LABEL: test_x86_sse2_psubus_w:
825 ; CHECK-NEXT: vpsubusw %xmm1, %xmm0, %xmm0
827 %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
830 declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
833 define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) {
834 ; CHECK-LABEL: test_x86_sse2_sqrt_pd:
836 ; CHECK-NEXT: vsqrtpd %xmm0, %xmm0
838 %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
839 ret <2 x double> %res
841 declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
844 define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
845 ; CHECK-LABEL: test_x86_sse2_sqrt_sd:
847 ; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
849 %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
850 ret <2 x double> %res
852 declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
855 define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
856 ; CHECK-LABEL: test_x86_sse2_storel_dq:
858 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
859 ; CHECK-NEXT: vmovlps %xmm0, (%eax)
861 call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
864 declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
867 define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
868 ; add operation forces the execution domain.
869 ; CHECK-LABEL: test_x86_sse2_storeu_dq:
871 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
872 ; CHECK-NEXT: vpaddb LCPI77_0, %xmm0, %xmm0
873 ; CHECK-NEXT: vmovdqu %xmm0, (%eax)
875 %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
876 call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2)
879 declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
882 define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
883 ; fadd operation forces the execution domain.
884 ; CHECK-LABEL: test_x86_sse2_storeu_pd:
886 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
887 ; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
888 ; CHECK-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
889 ; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
890 ; CHECK-NEXT: vmovupd %xmm0, (%eax)
892 %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
893 call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
896 declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
899 define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
900 ; CHECK-LABEL: test_x86_sse2_sub_sd:
902 ; CHECK-NEXT: vsubsd %xmm1, %xmm0, %xmm0
904 %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
905 ret <2 x double> %res
907 declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone
910 define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) {
911 ; CHECK-LABEL: test_x86_sse2_ucomieq_sd:
913 ; CHECK-NEXT: vucomisd %xmm1, %xmm0
914 ; CHECK-NEXT: sete %al
915 ; CHECK-NEXT: movzbl %al, %eax
917 %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
920 declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
923 define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) {
924 ; CHECK-LABEL: test_x86_sse2_ucomige_sd:
926 ; CHECK-NEXT: vucomisd %xmm1, %xmm0
927 ; CHECK-NEXT: setae %al
928 ; CHECK-NEXT: movzbl %al, %eax
930 %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
933 declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
936 define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) {
937 ; CHECK-LABEL: test_x86_sse2_ucomigt_sd:
939 ; CHECK-NEXT: vucomisd %xmm1, %xmm0
940 ; CHECK-NEXT: seta %al
941 ; CHECK-NEXT: movzbl %al, %eax
943 %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
946 declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
949 define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
950 ; CHECK-LABEL: test_x86_sse2_ucomile_sd:
952 ; CHECK-NEXT: vucomisd %xmm1, %xmm0
953 ; CHECK-NEXT: setbe %al
954 ; CHECK-NEXT: movzbl %al, %eax
956 %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
959 declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
962 define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
963 ; CHECK-LABEL: test_x86_sse2_ucomilt_sd:
965 ; CHECK-NEXT: vucomisd %xmm1, %xmm0
966 ; CHECK-NEXT: sbbl %eax, %eax
967 ; CHECK-NEXT: andl $1, %eax
969 %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
972 declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
975 define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) {
976 ; CHECK-LABEL: test_x86_sse2_ucomineq_sd:
978 ; CHECK-NEXT: vucomisd %xmm1, %xmm0
979 ; CHECK-NEXT: setne %al
980 ; CHECK-NEXT: movzbl %al, %eax
982 %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
985 declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
988 define <2 x double> @test_x86_sse3_addsub_pd(<2 x double> %a0, <2 x double> %a1) {
989 ; CHECK-LABEL: test_x86_sse3_addsub_pd:
991 ; CHECK-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0
993 %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
994 ret <2 x double> %res
996 declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
999 define <4 x float> @test_x86_sse3_addsub_ps(<4 x float> %a0, <4 x float> %a1) {
1000 ; CHECK-LABEL: test_x86_sse3_addsub_ps:
1002 ; CHECK-NEXT: vaddsubps %xmm1, %xmm0, %xmm0
1004 %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1005 ret <4 x float> %res
1007 declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
1010 define <2 x double> @test_x86_sse3_hadd_pd(<2 x double> %a0, <2 x double> %a1) {
1011 ; CHECK-LABEL: test_x86_sse3_hadd_pd:
1013 ; CHECK-NEXT: vhaddpd %xmm1, %xmm0, %xmm0
1015 %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
1016 ret <2 x double> %res
1018 declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
1021 define <4 x float> @test_x86_sse3_hadd_ps(<4 x float> %a0, <4 x float> %a1) {
1022 ; CHECK-LABEL: test_x86_sse3_hadd_ps:
1024 ; CHECK-NEXT: vhaddps %xmm1, %xmm0, %xmm0
1026 %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1027 ret <4 x float> %res
1029 declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
1032 define <2 x double> @test_x86_sse3_hsub_pd(<2 x double> %a0, <2 x double> %a1) {
1033 ; CHECK-LABEL: test_x86_sse3_hsub_pd:
1035 ; CHECK-NEXT: vhsubpd %xmm1, %xmm0, %xmm0
1037 %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
1038 ret <2 x double> %res
1040 declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
1043 define <4 x float> @test_x86_sse3_hsub_ps(<4 x float> %a0, <4 x float> %a1) {
1044 ; CHECK-LABEL: test_x86_sse3_hsub_ps:
1046 ; CHECK-NEXT: vhsubps %xmm1, %xmm0, %xmm0
1048 %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1049 ret <4 x float> %res
1051 declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
1054 define <16 x i8> @test_x86_sse3_ldu_dq(i8* %a0) {
1055 ; CHECK-LABEL: test_x86_sse3_ldu_dq:
1057 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1058 ; CHECK-NEXT: vlddqu (%eax), %xmm0
1060 %res = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0) ; <<16 x i8>> [#uses=1]
1063 declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
1066 define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
1067 ; CHECK-LABEL: test_x86_sse41_blendvpd:
1069 ; CHECK-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
1071 %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1]
1072 ret <2 x double> %res
1074 declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
1077 define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
1078 ; CHECK-LABEL: test_x86_sse41_blendvps:
1080 ; CHECK-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0
1082 %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1]
1083 ret <4 x float> %res
1085 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
1088 define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
1089 ; CHECK-LABEL: test_x86_sse41_dppd:
1091 ; CHECK-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0
1093 %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
1094 ret <2 x double> %res
1096 declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone
1099 define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
1100 ; CHECK-LABEL: test_x86_sse41_dpps:
1102 ; CHECK-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0
1104 %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
1105 ret <4 x float> %res
1107 declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone
1110 define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
1111 ; CHECK-LABEL: test_x86_sse41_insertps:
1113 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[3]
1115 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
1116 ret <4 x float> %res
1118 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
1122 define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
1123 ; CHECK-LABEL: test_x86_sse41_mpsadbw:
1125 ; CHECK-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0
1127 %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1]
1130 declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone
1133 define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
1134 ; CHECK-LABEL: test_x86_sse41_packusdw:
1136 ; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
1138 %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
1141 declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
1144 define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
1145 ; CHECK-LABEL: test_x86_sse41_pblendvb:
1147 ; CHECK-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
1149 %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1]
1152 declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
1155 define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
1156 ; CHECK-LABEL: test_x86_sse41_phminposuw:
1158 ; CHECK-NEXT: vphminposuw %xmm0, %xmm0
1160 %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
1163 declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone
1166 define <16 x i8> @test_x86_sse41_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) {
1167 ; CHECK-LABEL: test_x86_sse41_pmaxsb:
1169 ; CHECK-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
1171 %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
1174 declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
1177 define <4 x i32> @test_x86_sse41_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) {
1178 ; CHECK-LABEL: test_x86_sse41_pmaxsd:
1180 ; CHECK-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
1182 %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1185 declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
1188 define <4 x i32> @test_x86_sse41_pmaxud(<4 x i32> %a0, <4 x i32> %a1) {
1189 ; CHECK-LABEL: test_x86_sse41_pmaxud:
1191 ; CHECK-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
1193 %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1196 declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
1199 define <8 x i16> @test_x86_sse41_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) {
1200 ; CHECK-LABEL: test_x86_sse41_pmaxuw:
1202 ; CHECK-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
1204 %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1207 declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
1210 define <16 x i8> @test_x86_sse41_pminsb(<16 x i8> %a0, <16 x i8> %a1) {
1211 ; CHECK-LABEL: test_x86_sse41_pminsb:
1213 ; CHECK-NEXT: vpminsb %xmm1, %xmm0, %xmm0
1215 %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
1218 declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
1221 define <4 x i32> @test_x86_sse41_pminsd(<4 x i32> %a0, <4 x i32> %a1) {
1222 ; CHECK-LABEL: test_x86_sse41_pminsd:
1224 ; CHECK-NEXT: vpminsd %xmm1, %xmm0, %xmm0
1226 %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1229 declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
1232 define <4 x i32> @test_x86_sse41_pminud(<4 x i32> %a0, <4 x i32> %a1) {
1233 ; CHECK-LABEL: test_x86_sse41_pminud:
1235 ; CHECK-NEXT: vpminud %xmm1, %xmm0, %xmm0
1237 %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1240 declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
1243 define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
1244 ; CHECK-LABEL: test_x86_sse41_pminuw:
1246 ; CHECK-NEXT: vpminuw %xmm1, %xmm0, %xmm0
1248 %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1251 declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
1254 define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) {
1255 ; CHECK-LABEL: test_x86_sse41_pmovsxbd:
1257 ; CHECK-NEXT: vpmovsxbd %xmm0, %xmm0
1259 %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
1262 declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
1265 define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) {
1266 ; CHECK-LABEL: test_x86_sse41_pmovsxbq:
1268 ; CHECK-NEXT: vpmovsxbq %xmm0, %xmm0
1270 %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
1273 declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
1276 define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) {
1277 ; CHECK-LABEL: test_x86_sse41_pmovsxbw:
1279 ; CHECK-NEXT: vpmovsxbw %xmm0, %xmm0
1281 %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
1284 declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
1287 define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) {
1288 ; CHECK-LABEL: test_x86_sse41_pmovsxdq:
1290 ; CHECK-NEXT: vpmovsxdq %xmm0, %xmm0
1292 %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
1295 declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
1298 define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) {
1299 ; CHECK-LABEL: test_x86_sse41_pmovsxwd:
1301 ; CHECK-NEXT: vpmovsxwd %xmm0, %xmm0
1303 %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
1306 declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
1309 define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
1310 ; CHECK-LABEL: test_x86_sse41_pmovsxwq:
1312 ; CHECK-NEXT: vpmovsxwq %xmm0, %xmm0
1314 %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
1317 declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
1320 define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
1321 ; CHECK-LABEL: test_x86_sse41_pmovzxbd:
1323 ; CHECK-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1325 %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
1328 declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
1331 define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
1332 ; CHECK-LABEL: test_x86_sse41_pmovzxbq:
1334 ; CHECK-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1336 %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
1339 declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
1342 define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
1343 ; CHECK-LABEL: test_x86_sse41_pmovzxbw:
1345 ; CHECK-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1347 %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
1350 declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
1353 define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
1354 ; CHECK-LABEL: test_x86_sse41_pmovzxdq:
1356 ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1358 %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
1361 declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
1364 define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
1365 ; CHECK-LABEL: test_x86_sse41_pmovzxwd:
1367 ; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1369 %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
1372 declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
1375 define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
1376 ; CHECK-LABEL: test_x86_sse41_pmovzxwq:
1378 ; CHECK-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1380 %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
1383 declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
1386 define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
1387 ; CHECK-LABEL: test_x86_sse41_pmuldq:
1389 ; CHECK-NEXT: vpmuldq %xmm1, %xmm0, %xmm0
1391 %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
1394 declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
1397 define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) {
1398 ; CHECK-LABEL: test_x86_sse41_ptestc:
1400 ; CHECK-NEXT: vptest %xmm1, %xmm0
1401 ; CHECK-NEXT: sbbl %eax, %eax
1402 ; CHECK-NEXT: andl $1, %eax
1404 %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
1407 declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
1410 define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) {
1411 ; CHECK-LABEL: test_x86_sse41_ptestnzc:
1413 ; CHECK-NEXT: vptest %xmm1, %xmm0
1414 ; CHECK-NEXT: seta %al
1415 ; CHECK-NEXT: movzbl %al, %eax
1417 %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
1420 declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
1423 define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) {
1424 ; CHECK-LABEL: test_x86_sse41_ptestz:
1426 ; CHECK-NEXT: vptest %xmm1, %xmm0
1427 ; CHECK-NEXT: sete %al
1428 ; CHECK-NEXT: movzbl %al, %eax
1430 %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
1433 declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
1436 define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) {
1437 ; CHECK-LABEL: test_x86_sse41_round_pd:
1439 ; CHECK-NEXT: vroundpd $7, %xmm0, %xmm0
1441 %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
1442 ret <2 x double> %res
1444 declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
1447 define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) {
1448 ; CHECK-LABEL: test_x86_sse41_round_ps:
1450 ; CHECK-NEXT: vroundps $7, %xmm0, %xmm0
1452 %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
1453 ret <4 x float> %res
1455 declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
1458 define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) {
1459 ; CHECK-LABEL: test_x86_sse41_round_sd:
1461 ; CHECK-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm0
1463 %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
1464 ret <2 x double> %res
1466 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
1469 define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) {
1470 ; CHECK-LABEL: test_x86_sse41_round_ss:
1472 ; CHECK-NEXT: vroundss $7, %xmm1, %xmm0, %xmm0
1474 %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
1475 ret <4 x float> %res
1477 declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
1480 define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) {
1481 ; CHECK-LABEL: test_x86_sse42_pcmpestri128:
1483 ; CHECK-NEXT: movl $7, %eax
1484 ; CHECK-NEXT: movl $7, %edx
1485 ; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0
1486 ; CHECK-NEXT: movl %ecx, %eax
1488 %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1491 declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1494 define i32 @test_x86_sse42_pcmpestri128_load(<16 x i8>* %a0, <16 x i8>* %a2) {
1495 ; CHECK-LABEL: test_x86_sse42_pcmpestri128_load:
1497 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
1498 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1499 ; CHECK-NEXT: vmovdqa (%eax), %xmm0
1500 ; CHECK-NEXT: movl $7, %eax
1501 ; CHECK-NEXT: movl $7, %edx
1502 ; CHECK-NEXT: vpcmpestri $7, (%ecx), %xmm0
1503 ; CHECK-NEXT: movl %ecx, %eax
1505 %1 = load <16 x i8>, <16 x i8>* %a0
1506 %2 = load <16 x i8>, <16 x i8>* %a2
1507 %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %1, i32 7, <16 x i8> %2, i32 7, i8 7) ; <i32> [#uses=1]
1512 define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) {
1513 ; CHECK-LABEL: test_x86_sse42_pcmpestria128:
1515 ; CHECK-NEXT: movl $7, %eax
1516 ; CHECK-NEXT: movl $7, %edx
1517 ; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0
1518 ; CHECK-NEXT: seta %al
1519 ; CHECK-NEXT: movzbl %al, %eax
1521 %res = call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1524 declare i32 @llvm.x86.sse42.pcmpestria128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1527 define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) {
1528 ; CHECK-LABEL: test_x86_sse42_pcmpestric128:
1530 ; CHECK-NEXT: movl $7, %eax
1531 ; CHECK-NEXT: movl $7, %edx
1532 ; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0
1533 ; CHECK-NEXT: sbbl %eax, %eax
1534 ; CHECK-NEXT: andl $1, %eax
1536 %res = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1539 declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1542 define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) {
1543 ; CHECK-LABEL: test_x86_sse42_pcmpestrio128:
1545 ; CHECK-NEXT: movl $7, %eax
1546 ; CHECK-NEXT: movl $7, %edx
1547 ; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0
1548 ; CHECK-NEXT: seto %al
1549 ; CHECK-NEXT: movzbl %al, %eax
1551 %res = call i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1554 declare i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1557 define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) {
1558 ; CHECK-LABEL: test_x86_sse42_pcmpestris128:
1560 ; CHECK-NEXT: movl $7, %eax
1561 ; CHECK-NEXT: movl $7, %edx
1562 ; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0
1563 ; CHECK-NEXT: sets %al
1564 ; CHECK-NEXT: movzbl %al, %eax
1566 %res = call i32 @llvm.x86.sse42.pcmpestris128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1569 declare i32 @llvm.x86.sse42.pcmpestris128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1572 define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) {
1573 ; CHECK-LABEL: test_x86_sse42_pcmpestriz128:
1575 ; CHECK-NEXT: movl $7, %eax
1576 ; CHECK-NEXT: movl $7, %edx
1577 ; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0
1578 ; CHECK-NEXT: sete %al
1579 ; CHECK-NEXT: movzbl %al, %eax
1581 %res = call i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1584 declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1587 define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) {
1588 ; CHECK-LABEL: test_x86_sse42_pcmpestrm128:
1590 ; CHECK-NEXT: movl $7, %eax
1591 ; CHECK-NEXT: movl $7, %edx
1592 ; CHECK-NEXT: vpcmpestrm $7, %xmm1, %xmm0
1594 %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
1597 declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1600 define <16 x i8> @test_x86_sse42_pcmpestrm128_load(<16 x i8> %a0, <16 x i8>* %a2) {
1601 ; CHECK-LABEL: test_x86_sse42_pcmpestrm128_load:
1603 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
1604 ; CHECK-NEXT: movl $7, %eax
1605 ; CHECK-NEXT: movl $7, %edx
1606 ; CHECK-NEXT: vpcmpestrm $7, (%ecx), %xmm0
1608 %1 = load <16 x i8>, <16 x i8>* %a2
1609 %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %1, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
1614 define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) {
1615 ; CHECK-LABEL: test_x86_sse42_pcmpistri128:
1617 ; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0
1618 ; CHECK-NEXT: movl %ecx, %eax
1620 %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
1623 declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1626 define i32 @test_x86_sse42_pcmpistri128_load(<16 x i8>* %a0, <16 x i8>* %a1) {
1627 ; CHECK-LABEL: test_x86_sse42_pcmpistri128_load:
1629 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1630 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
1631 ; CHECK-NEXT: vmovdqa (%ecx), %xmm0
1632 ; CHECK-NEXT: vpcmpistri $7, (%eax), %xmm0
1633 ; CHECK-NEXT: movl %ecx, %eax
1635 %1 = load <16 x i8>, <16 x i8>* %a0
1636 %2 = load <16 x i8>, <16 x i8>* %a1
1637 %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %1, <16 x i8> %2, i8 7) ; <i32> [#uses=1]
1642 define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) {
1643 ; CHECK-LABEL: test_x86_sse42_pcmpistria128:
1645 ; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0
1646 ; CHECK-NEXT: seta %al
1647 ; CHECK-NEXT: movzbl %al, %eax
1649 %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
1652 declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1655 define i32 @test_x86_sse42_pcmpistric128(<16 x i8> %a0, <16 x i8> %a1) {
1656 ; CHECK-LABEL: test_x86_sse42_pcmpistric128:
1658 ; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0
1659 ; CHECK-NEXT: sbbl %eax, %eax
1660 ; CHECK-NEXT: andl $1, %eax
1662 %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
1665 declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1668 define i32 @test_x86_sse42_pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1) {
1669 ; CHECK-LABEL: test_x86_sse42_pcmpistrio128:
1671 ; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0
1672 ; CHECK-NEXT: seto %al
1673 ; CHECK-NEXT: movzbl %al, %eax
1675 %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
1678 declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1681 define i32 @test_x86_sse42_pcmpistris128(<16 x i8> %a0, <16 x i8> %a1) {
1682 ; CHECK-LABEL: test_x86_sse42_pcmpistris128:
1684 ; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0
1685 ; CHECK-NEXT: sets %al
1686 ; CHECK-NEXT: movzbl %al, %eax
1688 %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
1691 declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1694 define i32 @test_x86_sse42_pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1) {
1695 ; CHECK-LABEL: test_x86_sse42_pcmpistriz128:
1697 ; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0
1698 ; CHECK-NEXT: sete %al
1699 ; CHECK-NEXT: movzbl %al, %eax
1701 %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
1704 declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1707 define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) {
1708 ; CHECK-LABEL: test_x86_sse42_pcmpistrm128:
1710 ; CHECK-NEXT: vpcmpistrm $7, %xmm1, %xmm0
1712 %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1]
1715 declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1718 define <16 x i8> @test_x86_sse42_pcmpistrm128_load(<16 x i8> %a0, <16 x i8>* %a1) {
1719 ; CHECK-LABEL: test_x86_sse42_pcmpistrm128_load:
1721 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1722 ; CHECK-NEXT: vpcmpistrm $7, (%eax), %xmm0
1724 %1 = load <16 x i8>, <16 x i8>* %a1
1725 %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %1, i8 7) ; <<16 x i8>> [#uses=1]
1730 define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) {
1731 ; CHECK-LABEL: test_x86_sse_add_ss:
1733 ; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
1735 %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1736 ret <4 x float> %res
1738 declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone
1741 define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) {
1742 ; CHECK-LABEL: test_x86_sse_cmp_ps:
1744 ; CHECK-NEXT: vcmpordps %xmm1, %xmm0, %xmm0
1746 %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
1747 ret <4 x float> %res
1749 declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
1752 define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) {
1753 ; CHECK-LABEL: test_x86_sse_cmp_ss:
1755 ; CHECK-NEXT: vcmpordss %xmm1, %xmm0, %xmm0
1757 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
1758 ret <4 x float> %res
1760 declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
1763 define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) {
1764 ; CHECK-LABEL: test_x86_sse_comieq_ss:
1766 ; CHECK-NEXT: vcomiss %xmm1, %xmm0
1767 ; CHECK-NEXT: sete %al
1768 ; CHECK-NEXT: movzbl %al, %eax
1770 %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1773 declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
1776 define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) {
1777 ; CHECK-LABEL: test_x86_sse_comige_ss:
1779 ; CHECK-NEXT: vcomiss %xmm1, %xmm0
1780 ; CHECK-NEXT: setae %al
1781 ; CHECK-NEXT: movzbl %al, %eax
1783 %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1786 declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone
1789 define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) {
1790 ; CHECK-LABEL: test_x86_sse_comigt_ss:
1792 ; CHECK-NEXT: vcomiss %xmm1, %xmm0
1793 ; CHECK-NEXT: seta %al
1794 ; CHECK-NEXT: movzbl %al, %eax
1796 %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1799 declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone
1802 define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) {
1803 ; CHECK-LABEL: test_x86_sse_comile_ss:
1805 ; CHECK-NEXT: vcomiss %xmm1, %xmm0
1806 ; CHECK-NEXT: setbe %al
1807 ; CHECK-NEXT: movzbl %al, %eax
1809 %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1812 declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone
1815 define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
1816 ; CHECK-LABEL: test_x86_sse_comilt_ss:
1818 ; CHECK-NEXT: vcomiss %xmm1, %xmm0
1819 ; CHECK-NEXT: sbbl %eax, %eax
1820 ; CHECK-NEXT: andl $1, %eax
1822 %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1825 declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone
1828 define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) {
1829 ; CHECK-LABEL: test_x86_sse_comineq_ss:
1831 ; CHECK-NEXT: vcomiss %xmm1, %xmm0
1832 ; CHECK-NEXT: setne %al
1833 ; CHECK-NEXT: movzbl %al, %eax
1835 %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1838 declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone
1841 define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0) {
1842 ; CHECK-LABEL: test_x86_sse_cvtsi2ss:
1844 ; CHECK-NEXT: movl $7, %eax
1845 ; CHECK-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm0
1847 %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
1848 ret <4 x float> %res
1850 declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone
1853 define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) {
1854 ; CHECK-LABEL: test_x86_sse_cvtss2si:
1856 ; CHECK-NEXT: vcvtss2si %xmm0, %eax
1858 %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1]
1861 declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
1864 define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) {
1865 ; CHECK-LABEL: test_x86_sse_cvttss2si:
1867 ; CHECK-NEXT: vcvttss2si %xmm0, %eax
1869 %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1]
1872 declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
1875 define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) {
1876 ; CHECK-LABEL: test_x86_sse_div_ss:
1878 ; CHECK-NEXT: vdivss %xmm1, %xmm0, %xmm0
1880 %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1881 ret <4 x float> %res
1883 declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone
1886 define void @test_x86_sse_ldmxcsr(i8* %a0) {
1887 ; CHECK-LABEL: test_x86_sse_ldmxcsr:
1889 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1890 ; CHECK-NEXT: vldmxcsr (%eax)
1892 call void @llvm.x86.sse.ldmxcsr(i8* %a0)
1895 declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind
1899 define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) {
1900 ; CHECK-LABEL: test_x86_sse_max_ps:
1902 ; CHECK-NEXT: vmaxps %xmm1, %xmm0, %xmm0
1904 %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1905 ret <4 x float> %res
1907 declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
1910 define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) {
1911 ; CHECK-LABEL: test_x86_sse_max_ss:
1913 ; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm0
1915 %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1916 ret <4 x float> %res
1918 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
1921 define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) {
1922 ; CHECK-LABEL: test_x86_sse_min_ps:
1924 ; CHECK-NEXT: vminps %xmm1, %xmm0, %xmm0
1926 %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1927 ret <4 x float> %res
1929 declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
1932 define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) {
1933 ; CHECK-LABEL: test_x86_sse_min_ss:
1935 ; CHECK-NEXT: vminss %xmm1, %xmm0, %xmm0
1937 %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1938 ret <4 x float> %res
1940 declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
1943 define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) {
1944 ; CHECK-LABEL: test_x86_sse_movmsk_ps:
1946 ; CHECK-NEXT: vmovmskps %xmm0, %eax
1948 %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; <i32> [#uses=1]
1951 declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
1955 define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) {
1956 ; CHECK-LABEL: test_x86_sse_mul_ss:
1958 ; CHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0
1960 %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1961 ret <4 x float> %res
1963 declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone
1966 define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) {
1967 ; CHECK-LABEL: test_x86_sse_rcp_ps:
1969 ; CHECK-NEXT: vrcpps %xmm0, %xmm0
1971 %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
1972 ret <4 x float> %res
1974 declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
1977 define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) {
1978 ; CHECK-LABEL: test_x86_sse_rcp_ss:
1980 ; CHECK-NEXT: vrcpss %xmm0, %xmm0, %xmm0
1982 %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
1983 ret <4 x float> %res
1985 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
1988 define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) {
1989 ; CHECK-LABEL: test_x86_sse_rsqrt_ps:
1991 ; CHECK-NEXT: vrsqrtps %xmm0, %xmm0
1993 %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
1994 ret <4 x float> %res
1996 declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
1999 define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) {
2000 ; CHECK-LABEL: test_x86_sse_rsqrt_ss:
2002 ; CHECK-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0
2004 %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
2005 ret <4 x float> %res
2007 declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
2010 define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) {
2011 ; CHECK-LABEL: test_x86_sse_sqrt_ps:
2013 ; CHECK-NEXT: vsqrtps %xmm0, %xmm0
2015 %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
2016 ret <4 x float> %res
2018 declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
2021 define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) {
2022 ; CHECK-LABEL: test_x86_sse_sqrt_ss:
2024 ; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
2026 %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
2027 ret <4 x float> %res
2029 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
2032 define void @test_x86_sse_stmxcsr(i8* %a0) {
2033 ; CHECK-LABEL: test_x86_sse_stmxcsr:
2035 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
2036 ; CHECK-NEXT: vstmxcsr (%eax)
2038 call void @llvm.x86.sse.stmxcsr(i8* %a0)
2041 declare void @llvm.x86.sse.stmxcsr(i8*) nounwind
2044 define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) {
2045 ; CHECK-LABEL: test_x86_sse_storeu_ps:
2047 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
2048 ; CHECK-NEXT: vmovups %xmm0, (%eax)
2050 call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1)
2053 declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
2056 define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) {
2057 ; CHECK-LABEL: test_x86_sse_sub_ss:
2059 ; CHECK-NEXT: vsubss %xmm1, %xmm0, %xmm0
2061 %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
2062 ret <4 x float> %res
2064 declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone
2067 define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) {
2068 ; CHECK-LABEL: test_x86_sse_ucomieq_ss:
2070 ; CHECK-NEXT: vucomiss %xmm1, %xmm0
2071 ; CHECK-NEXT: sete %al
2072 ; CHECK-NEXT: movzbl %al, %eax
2074 %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2077 declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
2080 define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) {
2081 ; CHECK-LABEL: test_x86_sse_ucomige_ss:
2083 ; CHECK-NEXT: vucomiss %xmm1, %xmm0
2084 ; CHECK-NEXT: setae %al
2085 ; CHECK-NEXT: movzbl %al, %eax
2087 %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2090 declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone
2093 define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) {
2094 ; CHECK-LABEL: test_x86_sse_ucomigt_ss:
2096 ; CHECK-NEXT: vucomiss %xmm1, %xmm0
2097 ; CHECK-NEXT: seta %al
2098 ; CHECK-NEXT: movzbl %al, %eax
2100 %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2103 declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone
2106 define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) {
2107 ; CHECK-LABEL: test_x86_sse_ucomile_ss:
2109 ; CHECK-NEXT: vucomiss %xmm1, %xmm0
2110 ; CHECK-NEXT: setbe %al
2111 ; CHECK-NEXT: movzbl %al, %eax
2113 %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2116 declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone
2119 define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
2120 ; CHECK-LABEL: test_x86_sse_ucomilt_ss:
2122 ; CHECK-NEXT: vucomiss %xmm1, %xmm0
2123 ; CHECK-NEXT: sbbl %eax, %eax
2124 ; CHECK-NEXT: andl $1, %eax
2126 %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2129 declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone
2132 define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) {
2133 ; CHECK-LABEL: test_x86_sse_ucomineq_ss:
2135 ; CHECK-NEXT: vucomiss %xmm1, %xmm0
2136 ; CHECK-NEXT: setne %al
2137 ; CHECK-NEXT: movzbl %al, %eax
2139 %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2142 declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone
2145 define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) {
2146 ; CHECK-LABEL: test_x86_ssse3_pabs_b_128:
2148 ; CHECK-NEXT: vpabsb %xmm0, %xmm0
2150 %res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1]
2153 declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
2156 define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) {
2157 ; CHECK-LABEL: test_x86_ssse3_pabs_d_128:
2159 ; CHECK-NEXT: vpabsd %xmm0, %xmm0
2161 %res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1]
2164 declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
2167 define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) {
2168 ; CHECK-LABEL: test_x86_ssse3_pabs_w_128:
2170 ; CHECK-NEXT: vpabsw %xmm0, %xmm0
2172 %res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
2175 declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
2178 define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) {
2179 ; CHECK-LABEL: test_x86_ssse3_phadd_d_128:
2181 ; CHECK-NEXT: vphaddd %xmm1, %xmm0, %xmm0
2183 %res = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
2186 declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
2189 define <8 x i16> @test_x86_ssse3_phadd_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
2190 ; CHECK-LABEL: test_x86_ssse3_phadd_sw_128:
2192 ; CHECK-NEXT: vphaddsw %xmm1, %xmm0, %xmm0
2194 %res = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
2197 declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
2200 define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) {
2201 ; CHECK-LABEL: test_x86_ssse3_phadd_w_128:
2203 ; CHECK-NEXT: vphaddw %xmm1, %xmm0, %xmm0
2205 %res = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
2208 declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
2211 define <4 x i32> @test_x86_ssse3_phsub_d_128(<4 x i32> %a0, <4 x i32> %a1) {
2212 ; CHECK-LABEL: test_x86_ssse3_phsub_d_128:
2214 ; CHECK-NEXT: vphsubd %xmm1, %xmm0, %xmm0
2216 %res = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
2219 declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone
2222 define <8 x i16> @test_x86_ssse3_phsub_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
2223 ; CHECK-LABEL: test_x86_ssse3_phsub_sw_128:
2225 ; CHECK-NEXT: vphsubsw %xmm1, %xmm0, %xmm0
2227 %res = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
2230 declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
2233 define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) {
2234 ; CHECK-LABEL: test_x86_ssse3_phsub_w_128:
2236 ; CHECK-NEXT: vphsubw %xmm1, %xmm0, %xmm0
2238 %res = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
2241 declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
2244 define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<16 x i8> %a0, <16 x i8> %a1) {
2245 ; CHECK-LABEL: test_x86_ssse3_pmadd_ub_sw_128:
2247 ; CHECK-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0
2249 %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) ; <<8 x i16>> [#uses=1]
2252 declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone
2255 define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
2256 ; CHECK-LABEL: test_x86_ssse3_pmul_hr_sw_128:
2258 ; CHECK-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0
2260 %res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
2263 declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
2266 define <16 x i8> @test_x86_ssse3_pshuf_b_128(<16 x i8> %a0, <16 x i8> %a1) {
2267 ; CHECK-LABEL: test_x86_ssse3_pshuf_b_128:
2269 ; CHECK-NEXT: vpshufb %xmm1, %xmm0, %xmm0
2271 %res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
2274 declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
2277 define <16 x i8> @test_x86_ssse3_psign_b_128(<16 x i8> %a0, <16 x i8> %a1) {
2278 ; CHECK-LABEL: test_x86_ssse3_psign_b_128:
2280 ; CHECK-NEXT: vpsignb %xmm1, %xmm0, %xmm0
2282 %res = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
2285 declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone
2288 define <4 x i32> @test_x86_ssse3_psign_d_128(<4 x i32> %a0, <4 x i32> %a1) {
2289 ; CHECK-LABEL: test_x86_ssse3_psign_d_128:
2291 ; CHECK-NEXT: vpsignd %xmm1, %xmm0, %xmm0
2293 %res = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
2296 declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone
2299 define <8 x i16> @test_x86_ssse3_psign_w_128(<8 x i16> %a0, <8 x i16> %a1) {
2300 ; CHECK-LABEL: test_x86_ssse3_psign_w_128:
2302 ; CHECK-NEXT: vpsignw %xmm1, %xmm0, %xmm0
2304 %res = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
2307 declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone
2310 define <4 x double> @test_x86_avx_addsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
2311 ; CHECK-LABEL: test_x86_avx_addsub_pd_256:
2313 ; CHECK-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0
2315 %res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
2316 ret <4 x double> %res
2318 declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
2321 define <8 x float> @test_x86_avx_addsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
2322 ; CHECK-LABEL: test_x86_avx_addsub_ps_256:
2324 ; CHECK-NEXT: vaddsubps %ymm1, %ymm0, %ymm0
2326 %res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
2327 ret <8 x float> %res
2329 declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
2332 define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
2333 ; CHECK-LABEL: test_x86_avx_blendv_pd_256:
2335 ; CHECK-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
2337 %res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1]
2338 ret <4 x double> %res
2340 declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
2343 define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
2344 ; CHECK-LABEL: test_x86_avx_blendv_ps_256:
2346 ; CHECK-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0
2348 %res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1]
2349 ret <8 x float> %res
2351 declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
2354 define <4 x double> @test_x86_avx_cmp_pd_256(<4 x double> %a0, <4 x double> %a1) {
2355 ; CHECK-LABEL: test_x86_avx_cmp_pd_256:
2357 ; CHECK-NEXT: vcmpordpd %ymm1, %ymm0, %ymm0
2359 %res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
2360 ret <4 x double> %res
2362 declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
2365 define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) {
2366 ; CHECK-LABEL: test_x86_avx_cmp_ps_256:
2368 ; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %ymm0
2370 %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
2371 ret <8 x float> %res
2374 define <8 x float> @test_x86_avx_cmp_ps_256_pseudo_op(<8 x float> %a0, <8 x float> %a1) {
2375 ; CHECK-LABEL: test_x86_avx_cmp_ps_256_pseudo_op:
2377 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1
2378 ; CHECK-NEXT: vcmpltps %ymm1, %ymm0, %ymm1
2379 ; CHECK-NEXT: vcmpleps %ymm1, %ymm0, %ymm1
2380 ; CHECK-NEXT: vcmpunordps %ymm1, %ymm0, %ymm1
2381 ; CHECK-NEXT: vcmpneqps %ymm1, %ymm0, %ymm1
2382 ; CHECK-NEXT: vcmpnltps %ymm1, %ymm0, %ymm1
2383 ; CHECK-NEXT: vcmpnleps %ymm1, %ymm0, %ymm1
2384 ; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %ymm1
2385 ; CHECK-NEXT: vcmpeq_uqps %ymm1, %ymm0, %ymm1
2386 ; CHECK-NEXT: vcmpngeps %ymm1, %ymm0, %ymm1
2387 ; CHECK-NEXT: vcmpngtps %ymm1, %ymm0, %ymm1
2388 ; CHECK-NEXT: vcmpfalseps %ymm1, %ymm0, %ymm1
2389 ; CHECK-NEXT: vcmpneq_oqps %ymm1, %ymm0, %ymm1
2390 ; CHECK-NEXT: vcmpgeps %ymm1, %ymm0, %ymm1
2391 ; CHECK-NEXT: vcmpgtps %ymm1, %ymm0, %ymm1
2392 ; CHECK-NEXT: vcmptrueps %ymm1, %ymm0, %ymm1
2393 ; CHECK-NEXT: vcmpeq_osps %ymm1, %ymm0, %ymm1
2394 ; CHECK-NEXT: vcmplt_oqps %ymm1, %ymm0, %ymm1
2395 ; CHECK-NEXT: vcmple_oqps %ymm1, %ymm0, %ymm1
2396 ; CHECK-NEXT: vcmpunord_sps %ymm1, %ymm0, %ymm1
2397 ; CHECK-NEXT: vcmpneq_usps %ymm1, %ymm0, %ymm1
2398 ; CHECK-NEXT: vcmpnlt_uqps %ymm1, %ymm0, %ymm1
2399 ; CHECK-NEXT: vcmpnle_uqps %ymm1, %ymm0, %ymm1
2400 ; CHECK-NEXT: vcmpord_sps %ymm1, %ymm0, %ymm1
2401 ; CHECK-NEXT: vcmpeq_usps %ymm1, %ymm0, %ymm1
2402 ; CHECK-NEXT: vcmpnge_uqps %ymm1, %ymm0, %ymm1
2403 ; CHECK-NEXT: vcmpngt_uqps %ymm1, %ymm0, %ymm1
2404 ; CHECK-NEXT: vcmpfalse_osps %ymm1, %ymm0, %ymm1
2405 ; CHECK-NEXT: vcmpneq_osps %ymm1, %ymm0, %ymm1
2406 ; CHECK-NEXT: vcmpge_oqps %ymm1, %ymm0, %ymm1
2407 ; CHECK-NEXT: vcmpgt_oqps %ymm1, %ymm0, %ymm1
2408 ; CHECK-NEXT: vcmptrue_usps %ymm1, %ymm0, %ymm0
2410 %a2 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 0) ; <<8 x float>> [#uses=1]
2411 %a3 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a2, i8 1) ; <<8 x float>> [#uses=1]
2412 %a4 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a3, i8 2) ; <<8 x float>> [#uses=1]
2413 %a5 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a4, i8 3) ; <<8 x float>> [#uses=1]
2414 %a6 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a5, i8 4) ; <<8 x float>> [#uses=1]
2415 %a7 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a6, i8 5) ; <<8 x float>> [#uses=1]
2416 %a8 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a7, i8 6) ; <<8 x float>> [#uses=1]
2417 %a9 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a8, i8 7) ; <<8 x float>> [#uses=1]
2418 %a10 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a9, i8 8) ; <<8 x float>> [#uses=1]
2419 %a11 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a10, i8 9) ; <<8 x float>> [#uses=1]
2420 %a12 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a11, i8 10) ; <<8 x float>> [#uses=1]
2421 %a13 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a12, i8 11) ; <<8 x float>> [#uses=1]
2422 %a14 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a13, i8 12) ; <<8 x float>> [#uses=1]
2423 %a15 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a14, i8 13) ; <<8 x float>> [#uses=1]
2424 %a16 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a15, i8 14) ; <<8 x float>> [#uses=1]
2425 %a17 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a16, i8 15) ; <<8 x float>> [#uses=1]
2426 %a18 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a17, i8 16) ; <<8 x float>> [#uses=1]
2427 %a19 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a18, i8 17) ; <<8 x float>> [#uses=1]
2428 %a20 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a19, i8 18) ; <<8 x float>> [#uses=1]
2429 %a21 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a20, i8 19) ; <<8 x float>> [#uses=1]
2430 %a22 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a21, i8 20) ; <<8 x float>> [#uses=1]
2431 %a23 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a22, i8 21) ; <<8 x float>> [#uses=1]
2432 %a24 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a23, i8 22) ; <<8 x float>> [#uses=1]
2433 %a25 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a24, i8 23) ; <<8 x float>> [#uses=1]
2434 %a26 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a25, i8 24) ; <<8 x float>> [#uses=1]
2435 %a27 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a26, i8 25) ; <<8 x float>> [#uses=1]
2436 %a28 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a27, i8 26) ; <<8 x float>> [#uses=1]
2437 %a29 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a28, i8 27) ; <<8 x float>> [#uses=1]
2438 %a30 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a29, i8 28) ; <<8 x float>> [#uses=1]
2439 %a31 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a30, i8 29) ; <<8 x float>> [#uses=1]
2440 %a32 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a31, i8 30) ; <<8 x float>> [#uses=1]
2441 %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a32, i8 31) ; <<8 x float>> [#uses=1]
2442 ret <8 x float> %res
2444 declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
2447 define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) {
2448 ; CHECK-LABEL: test_x86_avx_cvt_pd2_ps_256:
2450 ; CHECK-NEXT: vcvtpd2psy %ymm0, %xmm0
2451 ; CHECK-NEXT: vzeroupper
2453 %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1]
2454 ret <4 x float> %res
2456 declare <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double>) nounwind readnone
2459 define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) {
2460 ; CHECK-LABEL: test_x86_avx_cvt_pd2dq_256:
2462 ; CHECK-NEXT: vcvtpd2dqy %ymm0, %xmm0
2463 ; CHECK-NEXT: vzeroupper
2465 %res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
2468 declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone
2471 define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) {
2472 ; CHECK-LABEL: test_x86_avx_cvt_ps2_pd_256:
2474 ; CHECK-NEXT: vcvtps2pd %xmm0, %ymm0
2476 %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1]
2477 ret <4 x double> %res
2479 declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone
2482 define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) {
2483 ; CHECK-LABEL: test_x86_avx_cvt_ps2dq_256:
2485 ; CHECK-NEXT: vcvtps2dq %ymm0, %ymm0
2487 %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
2490 declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone
2493 define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) {
2494 ; CHECK-LABEL: test_x86_avx_cvtdq2_pd_256:
2496 ; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0
2498 %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1]
2499 ret <4 x double> %res
2501 declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone
2504 define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) {
2505 ; CHECK-LABEL: test_x86_avx_cvtdq2_ps_256:
2507 ; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0
2509 %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1]
2510 ret <8 x float> %res
2512 declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone
2515 define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) {
2516 ; CHECK-LABEL: test_x86_avx_cvtt_pd2dq_256:
2518 ; CHECK-NEXT: vcvttpd2dqy %ymm0, %xmm0
2519 ; CHECK-NEXT: vzeroupper
2521 %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
2524 declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone
2527 define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) {
2528 ; CHECK-LABEL: test_x86_avx_cvtt_ps2dq_256:
2530 ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0
2532 %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
2535 declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone
2538 define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
2539 ; CHECK-LABEL: test_x86_avx_dp_ps_256:
2541 ; CHECK-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0
2543 %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
2544 ret <8 x float> %res
2546 declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
2549 define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) {
2550 ; CHECK-LABEL: test_x86_avx_hadd_pd_256:
2552 ; CHECK-NEXT: vhaddpd %ymm1, %ymm0, %ymm0
2554 %res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
2555 ret <4 x double> %res
2557 declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone
2560 define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) {
2561 ; CHECK-LABEL: test_x86_avx_hadd_ps_256:
2563 ; CHECK-NEXT: vhaddps %ymm1, %ymm0, %ymm0
2565 %res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
2566 ret <8 x float> %res
2568 declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone
2571 define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
2572 ; CHECK-LABEL: test_x86_avx_hsub_pd_256:
2574 ; CHECK-NEXT: vhsubpd %ymm1, %ymm0, %ymm0
2576 %res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
2577 ret <4 x double> %res
2579 declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
2582 define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
2583 ; CHECK-LABEL: test_x86_avx_hsub_ps_256:
2585 ; CHECK-NEXT: vhsubps %ymm1, %ymm0, %ymm0
2587 %res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
2588 ret <8 x float> %res
2590 declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
2593 define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) {
2594 ; CHECK-LABEL: test_x86_avx_ldu_dq_256:
2596 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
2597 ; CHECK-NEXT: vlddqu (%eax), %ymm0
2599 %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1]
2602 declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly
2605 define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x double> %a1) {
2606 ; CHECK-LABEL: test_x86_avx_maskload_pd:
2608 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
2609 ; CHECK-NEXT: vmaskmovpd (%eax), %xmm0, %xmm0
2611 %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
2612 ret <2 x double> %res
2614 declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x double>) nounwind readonly
2617 define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x double> %a1) {
2618 ; CHECK-LABEL: test_x86_avx_maskload_pd_256:
2620 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
2621 ; CHECK-NEXT: vmaskmovpd (%eax), %ymm0, %ymm0
2623 %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
2624 ret <4 x double> %res
2626 declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x double>) nounwind readonly
2629 define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x float> %a1) {
2630 ; CHECK-LABEL: test_x86_avx_maskload_ps:
2632 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
2633 ; CHECK-NEXT: vmaskmovps (%eax), %xmm0, %xmm0
2635 %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
2636 ret <4 x float> %res
2638 declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x float>) nounwind readonly
2641 define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x float> %a1) {
2642 ; CHECK-LABEL: test_x86_avx_maskload_ps_256:
2644 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
2645 ; CHECK-NEXT: vmaskmovps (%eax), %ymm0, %ymm0
2647 %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
2648 ret <8 x float> %res
2650 declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x float>) nounwind readonly
2653 define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x double> %a1, <2 x double> %a2) {
2654 ; CHECK-LABEL: test_x86_avx_maskstore_pd:
2656 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
2657 ; CHECK-NEXT: vmaskmovpd %xmm1, %xmm0, (%eax)
2659 call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x double> %a1, <2 x double> %a2)
2662 declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x double>, <2 x double>) nounwind
2665 define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x double> %a1, <4 x double> %a2) {
2666 ; CHECK-LABEL: test_x86_avx_maskstore_pd_256:
2668 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
2669 ; CHECK-NEXT: vmaskmovpd %ymm1, %ymm0, (%eax)
2670 ; CHECK-NEXT: vzeroupper
2672 call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x double> %a1, <4 x double> %a2)
2675 declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x double>, <4 x double>) nounwind
2678 define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x float> %a1, <4 x float> %a2) {
2679 ; CHECK-LABEL: test_x86_avx_maskstore_ps:
2681 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
2682 ; CHECK-NEXT: vmaskmovps %xmm1, %xmm0, (%eax)
2684 call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x float> %a1, <4 x float> %a2)
2687 declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x float>, <4 x float>) nounwind
2690 define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x float> %a1, <8 x float> %a2) {
2691 ; CHECK-LABEL: test_x86_avx_maskstore_ps_256:
2693 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
2694 ; CHECK-NEXT: vmaskmovps %ymm1, %ymm0, (%eax)
2695 ; CHECK-NEXT: vzeroupper
2697 call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x float> %a1, <8 x float> %a2)
2700 declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind
2703 define <4 x double> @test_x86_avx_max_pd_256(<4 x double> %a0, <4 x double> %a1) {
2704 ; CHECK-LABEL: test_x86_avx_max_pd_256:
2706 ; CHECK-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
2708 %res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
2709 ret <4 x double> %res
2711 declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone
2714 define <8 x float> @test_x86_avx_max_ps_256(<8 x float> %a0, <8 x float> %a1) {
2715 ; CHECK-LABEL: test_x86_avx_max_ps_256:
2717 ; CHECK-NEXT: vmaxps %ymm1, %ymm0, %ymm0
2719 %res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
2720 ret <8 x float> %res
2722 declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone
2725 define <4 x double> @test_x86_avx_min_pd_256(<4 x double> %a0, <4 x double> %a1) {
2726 ; CHECK-LABEL: test_x86_avx_min_pd_256:
2728 ; CHECK-NEXT: vminpd %ymm1, %ymm0, %ymm0
2730 %res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
2731 ret <4 x double> %res
2733 declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone
2736 define <8 x float> @test_x86_avx_min_ps_256(<8 x float> %a0, <8 x float> %a1) {
2737 ; CHECK-LABEL: test_x86_avx_min_ps_256:
2739 ; CHECK-NEXT: vminps %ymm1, %ymm0, %ymm0
2741 %res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
2742 ret <8 x float> %res
2744 declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone
2747 define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) {
2748 ; CHECK-LABEL: test_x86_avx_movmsk_pd_256:
2750 ; CHECK-NEXT: vmovmskpd %ymm0, %eax
2751 ; CHECK-NEXT: vzeroupper
2753 %res = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ; <i32> [#uses=1]
2756 declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone
2759 define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) {
2760 ; CHECK-LABEL: test_x86_avx_movmsk_ps_256:
2762 ; CHECK-NEXT: vmovmskps %ymm0, %eax
2763 ; CHECK-NEXT: vzeroupper
2765 %res = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ; <i32> [#uses=1]
2768 declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
2776 define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) {
2777 ; CHECK-LABEL: test_x86_avx_ptestc_256:
2779 ; CHECK-NEXT: vptest %ymm1, %ymm0
2780 ; CHECK-NEXT: sbbl %eax, %eax
2781 ; CHECK-NEXT: andl $1, %eax
2782 ; CHECK-NEXT: vzeroupper
2784 %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
2787 declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone
2790 define i32 @test_x86_avx_ptestnzc_256(<4 x i64> %a0, <4 x i64> %a1) {
2791 ; CHECK-LABEL: test_x86_avx_ptestnzc_256:
2793 ; CHECK-NEXT: vptest %ymm1, %ymm0
2794 ; CHECK-NEXT: seta %al
2795 ; CHECK-NEXT: movzbl %al, %eax
2796 ; CHECK-NEXT: vzeroupper
2798 %res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
2801 declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone
2804 define i32 @test_x86_avx_ptestz_256(<4 x i64> %a0, <4 x i64> %a1) {
2805 ; CHECK-LABEL: test_x86_avx_ptestz_256:
2807 ; CHECK-NEXT: vptest %ymm1, %ymm0
2808 ; CHECK-NEXT: sete %al
2809 ; CHECK-NEXT: movzbl %al, %eax
2810 ; CHECK-NEXT: vzeroupper
2812 %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
2815 declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone
2818 define <8 x float> @test_x86_avx_rcp_ps_256(<8 x float> %a0) {
2819 ; CHECK-LABEL: test_x86_avx_rcp_ps_256:
2821 ; CHECK-NEXT: vrcpps %ymm0, %ymm0
2823 %res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
2824 ret <8 x float> %res
2826 declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone
2829 define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) {
2830 ; CHECK-LABEL: test_x86_avx_round_pd_256:
2832 ; CHECK-NEXT: vroundpd $7, %ymm0, %ymm0
2834 %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) ; <<4 x double>> [#uses=1]
2835 ret <4 x double> %res
2837 declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone
2840 define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) {
2841 ; CHECK-LABEL: test_x86_avx_round_ps_256:
2843 ; CHECK-NEXT: vroundps $7, %ymm0, %ymm0
2845 %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) ; <<8 x float>> [#uses=1]
2846 ret <8 x float> %res
2848 declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone
2851 define <8 x float> @test_x86_avx_rsqrt_ps_256(<8 x float> %a0) {
2852 ; CHECK-LABEL: test_x86_avx_rsqrt_ps_256:
2854 ; CHECK-NEXT: vrsqrtps %ymm0, %ymm0
2856 %res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
2857 ret <8 x float> %res
2859 declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
2862 define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) {
2863 ; CHECK-LABEL: test_x86_avx_sqrt_pd_256:
2865 ; CHECK-NEXT: vsqrtpd %ymm0, %ymm0
2867 %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1]
2868 ret <4 x double> %res
2870 declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
2873 define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) {
2874 ; CHECK-LABEL: test_x86_avx_sqrt_ps_256:
2876 ; CHECK-NEXT: vsqrtps %ymm0, %ymm0
2878 %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
2879 ret <8 x float> %res
2881 declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
2884 define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
2885 ; FIXME: unfortunately the execution domain fix pass changes this to vmovups and its hard to force with no 256-bit integer instructions
2886 ; add operation forces the execution domain.
2887 ; CHECK-LABEL: test_x86_avx_storeu_dq_256:
2889 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
2890 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
2891 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
2892 ; CHECK-NEXT: vpaddb %xmm2, %xmm1, %xmm1
2893 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
2894 ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2895 ; CHECK-NEXT: vmovups %ymm0, (%eax)
2896 ; CHECK-NEXT: vzeroupper
2898 %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
2899 call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)
2902 declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
2905 define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) {
2906 ; add operation forces the execution domain.
2907 ; CHECK-LABEL: test_x86_avx_storeu_pd_256:
2909 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
2910 ; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
2911 ; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
2912 ; CHECK-NEXT: vmovupd %ymm0, (%eax)
2913 ; CHECK-NEXT: vzeroupper
2915 %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
2916 call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a2)
2919 declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind
2922 define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) {
2923 ; CHECK-LABEL: test_x86_avx_storeu_ps_256:
2925 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
2926 ; CHECK-NEXT: vmovups %ymm0, (%eax)
2927 ; CHECK-NEXT: vzeroupper
2929 call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1)
2932 declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind
2935 define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) {
2936 ; CHECK-LABEL: test_x86_avx_vbroadcastf128_pd_256:
2938 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
2939 ; CHECK-NEXT: vbroadcastf128 (%eax), %ymm0
2941 %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
2942 ret <4 x double> %res
2944 declare <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8*) nounwind readonly
2947 define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) {
2948 ; CHECK-LABEL: test_x86_avx_vbroadcastf128_ps_256:
2950 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
2951 ; CHECK-NEXT: vbroadcastf128 (%eax), %ymm0
2953 %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1]
2954 ret <8 x float> %res
2956 declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly
2959 define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) {
2960 ; CHECK-LABEL: test_x86_avx_vperm2f128_pd_256:
2962 ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
2964 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
2965 ret <4 x double> %res
2967 declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
2970 define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) {
2971 ; CHECK-LABEL: test_x86_avx_vperm2f128_ps_256:
2973 ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
2975 %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
2976 ret <8 x float> %res
2978 declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
2981 define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) {
2982 ; CHECK-LABEL: test_x86_avx_vperm2f128_si_256:
2984 ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
2986 %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1]
2989 declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
2992 define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) {
2993 ; CHECK-LABEL: test_x86_avx_vpermil_pd:
2995 ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
2997 %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 1) ; <<2 x double>> [#uses=1]
2998 ret <2 x double> %res
3000 declare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnone
3003 define <4 x double> @test_x86_avx_vpermil_pd_256(<4 x double> %a0) {
3004 ; CHECK-LABEL: test_x86_avx_vpermil_pd_256:
3006 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2]
3008 %res = call <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1]
3009 ret <4 x double> %res
3011 declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind readnone
3014 define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) {
3015 ; CHECK-LABEL: test_x86_avx_vpermil_ps:
3017 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,0,0]
3019 %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
3020 ret <4 x float> %res
3022 declare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) nounwind readnone
3025 define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) {
3026 ; CHECK-LABEL: test_x86_avx_vpermil_ps_256:
3028 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,1,0,0,7,5,4,4]
3030 %res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1]
3031 ret <8 x float> %res
3033 declare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) nounwind readnone
3036 define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) {
3037 ; CHECK-LABEL: test_x86_avx_vpermilvar_pd:
3039 ; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm0
3041 %res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1]
3042 ret <2 x double> %res
3044 declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone
3047 define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) {
3048 ; CHECK-LABEL: test_x86_avx_vpermilvar_pd_256:
3050 ; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm0
3052 %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1]
3053 ret <4 x double> %res
3055 declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone
3058 define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) {
3059 ; CHECK-LABEL: test_x86_avx_vpermilvar_ps:
3061 ; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm0
3063 %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1]
3064 ret <4 x float> %res
3066 define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, <4 x i32>* %a1) {
3067 ; CHECK-LABEL: test_x86_avx_vpermilvar_ps_load:
3069 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
3070 ; CHECK-NEXT: vpermilps (%eax), %xmm0, %xmm0
3072 %a2 = load <4 x i32>, <4 x i32>* %a1
3073 %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) ; <<4 x float>> [#uses=1]
3074 ret <4 x float> %res
3076 declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone
3079 define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) {
3080 ; CHECK-LABEL: test_x86_avx_vpermilvar_ps_256:
3082 ; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm0
3084 %res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1]
3085 ret <8 x float> %res
3087 declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone
3090 define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) {
3091 ; CHECK-LABEL: test_x86_avx_vtestc_pd:
3093 ; CHECK-NEXT: vtestpd %xmm1, %xmm0
3094 ; CHECK-NEXT: sbbl %eax, %eax
3095 ; CHECK-NEXT: andl $1, %eax
3097 %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
3100 declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone
3103 define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) {
3104 ; CHECK-LABEL: test_x86_avx_vtestc_pd_256:
3106 ; CHECK-NEXT: vtestpd %ymm1, %ymm0
3107 ; CHECK-NEXT: sbbl %eax, %eax
3108 ; CHECK-NEXT: andl $1, %eax
3109 ; CHECK-NEXT: vzeroupper
3111 %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
3114 declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone
3117 define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) {
3118 ; CHECK-LABEL: test_x86_avx_vtestc_ps:
3120 ; CHECK-NEXT: vtestps %xmm1, %xmm0
3121 ; CHECK-NEXT: sbbl %eax, %eax
3122 ; CHECK-NEXT: andl $1, %eax
3124 %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
3127 declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
3130 define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) {
3131 ; CHECK-LABEL: test_x86_avx_vtestc_ps_256:
3133 ; CHECK-NEXT: vtestps %ymm1, %ymm0
3134 ; CHECK-NEXT: sbbl %eax, %eax
3135 ; CHECK-NEXT: andl $1, %eax
3136 ; CHECK-NEXT: vzeroupper
3138 %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
3141 declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone
3144 define i32 @test_x86_avx_vtestnzc_pd(<2 x double> %a0, <2 x double> %a1) {
3145 ; CHECK-LABEL: test_x86_avx_vtestnzc_pd:
3147 ; CHECK-NEXT: vtestpd %xmm1, %xmm0
3148 ; CHECK-NEXT: seta %al
3149 ; CHECK-NEXT: movzbl %al, %eax
3151 %res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
3154 declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readnone
3157 define i32 @test_x86_avx_vtestnzc_pd_256(<4 x double> %a0, <4 x double> %a1) {
3158 ; CHECK-LABEL: test_x86_avx_vtestnzc_pd_256:
3160 ; CHECK-NEXT: vtestpd %ymm1, %ymm0
3161 ; CHECK-NEXT: seta %al
3162 ; CHECK-NEXT: movzbl %al, %eax
3163 ; CHECK-NEXT: vzeroupper
3165 %res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
3168 declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind readnone
3171 define i32 @test_x86_avx_vtestnzc_ps(<4 x float> %a0, <4 x float> %a1) {
3172 ; CHECK-LABEL: test_x86_avx_vtestnzc_ps:
3174 ; CHECK-NEXT: vtestps %xmm1, %xmm0
3175 ; CHECK-NEXT: seta %al
3176 ; CHECK-NEXT: movzbl %al, %eax
3178 %res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
3181 declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone
3184 define i32 @test_x86_avx_vtestnzc_ps_256(<8 x float> %a0, <8 x float> %a1) {
3185 ; CHECK-LABEL: test_x86_avx_vtestnzc_ps_256:
3187 ; CHECK-NEXT: vtestps %ymm1, %ymm0
3188 ; CHECK-NEXT: seta %al
3189 ; CHECK-NEXT: movzbl %al, %eax
3190 ; CHECK-NEXT: vzeroupper
3192 %res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
3195 declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind readnone
3198 define i32 @test_x86_avx_vtestz_pd(<2 x double> %a0, <2 x double> %a1) {
3199 ; CHECK-LABEL: test_x86_avx_vtestz_pd:
3201 ; CHECK-NEXT: vtestpd %xmm1, %xmm0
3202 ; CHECK-NEXT: sete %al
3203 ; CHECK-NEXT: movzbl %al, %eax
3205 %res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
3208 declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnone
3211 define i32 @test_x86_avx_vtestz_pd_256(<4 x double> %a0, <4 x double> %a1) {
3212 ; CHECK-LABEL: test_x86_avx_vtestz_pd_256:
3214 ; CHECK-NEXT: vtestpd %ymm1, %ymm0
3215 ; CHECK-NEXT: sete %al
3216 ; CHECK-NEXT: movzbl %al, %eax
3217 ; CHECK-NEXT: vzeroupper
3219 %res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
3222 declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind readnone
3225 define i32 @test_x86_avx_vtestz_ps(<4 x float> %a0, <4 x float> %a1) {
3226 ; CHECK-LABEL: test_x86_avx_vtestz_ps:
3228 ; CHECK-NEXT: vtestps %xmm1, %xmm0
3229 ; CHECK-NEXT: sete %al
3230 ; CHECK-NEXT: movzbl %al, %eax
3232 %res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
3235 declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone
3238 define i32 @test_x86_avx_vtestz_ps_256(<8 x float> %a0, <8 x float> %a1) {
3239 ; CHECK-LABEL: test_x86_avx_vtestz_ps_256:
3241 ; CHECK-NEXT: vtestps %ymm1, %ymm0
3242 ; CHECK-NEXT: sete %al
3243 ; CHECK-NEXT: movzbl %al, %eax
3244 ; CHECK-NEXT: vzeroupper
3246 %res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
3249 declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone
3252 define void @test_x86_avx_vzeroall() {
3253 ; CHECK-LABEL: test_x86_avx_vzeroall:
3255 ; CHECK-NEXT: vzeroall
3256 ; CHECK-NEXT: vzeroupper
3258 call void @llvm.x86.avx.vzeroall()
3261 declare void @llvm.x86.avx.vzeroall() nounwind
3264 define void @test_x86_avx_vzeroupper() {
3265 ; CHECK-LABEL: test_x86_avx_vzeroupper:
3267 ; CHECK-NEXT: vzeroupper
3268 ; CHECK-NEXT: vzeroupper
3270 call void @llvm.x86.avx.vzeroupper()
3273 declare void @llvm.x86.avx.vzeroupper() nounwind
3275 ; Make sure instructions with no AVX equivalents, but are associated with SSEX feature flags still work
3277 define void @monitor(i8* %P, i32 %E, i32 %H) nounwind {
3278 ; CHECK-LABEL: monitor:
3280 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
3281 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
3282 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
3283 ; CHECK-NEXT: leal (%eax), %eax
3284 ; CHECK-NEXT: monitor
3286 tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H)
3289 declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind
3291 define void @mwait(i32 %E, i32 %H) nounwind {
3292 ; CHECK-LABEL: mwait:
3294 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
3295 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
3298 tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H)
3301 declare void @llvm.x86.sse3.mwait(i32, i32) nounwind
3303 define void @sfence() nounwind {
3304 ; CHECK-LABEL: sfence:
3306 ; CHECK-NEXT: sfence
3308 tail call void @llvm.x86.sse.sfence()
3311 declare void @llvm.x86.sse.sfence() nounwind
3313 define void @lfence() nounwind {
3314 ; CHECK-LABEL: lfence:
3316 ; CHECK-NEXT: lfence
3318 tail call void @llvm.x86.sse2.lfence()
3321 declare void @llvm.x86.sse2.lfence() nounwind
3323 define void @mfence() nounwind {
3324 ; CHECK-LABEL: mfence:
3326 ; CHECK-NEXT: mfence
3328 tail call void @llvm.x86.sse2.mfence()
3331 declare void @llvm.x86.sse2.mfence() nounwind
3333 define void @clflush(i8* %p) nounwind {
3334 ; CHECK-LABEL: clflush:
3336 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
3337 ; CHECK-NEXT: clflush (%eax)
3339 tail call void @llvm.x86.sse2.clflush(i8* %p)
3342 declare void @llvm.x86.sse2.clflush(i8*) nounwind
3344 define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
3345 ; CHECK-LABEL: crc32_32_8:
3347 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
3348 ; CHECK-NEXT: crc32b {{[0-9]+}}(%esp), %eax
3350 %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
3353 declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
3355 define i32 @crc32_32_16(i32 %a, i16 %b) nounwind {
3356 ; CHECK-LABEL: crc32_32_16:
3358 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
3359 ; CHECK-NEXT: crc32w {{[0-9]+}}(%esp), %eax
3361 %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b)
3364 declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
3366 define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
3367 ; CHECK-LABEL: crc32_32_32:
3369 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
3370 ; CHECK-NEXT: crc32l {{[0-9]+}}(%esp), %eax
3372 %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b)
3375 declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
3377 define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
3378 ; CHECK-LABEL: movnt_dq:
3380 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
3381 ; CHECK-NEXT: vpaddq LCPI282_0, %xmm0, %xmm0
3382 ; CHECK-NEXT: vmovntdq %ymm0, (%eax)
3383 ; CHECK-NEXT: vzeroupper
3385 %a2 = add <2 x i64> %a1, <i64 1, i64 1>
3386 %a3 = shufflevector <2 x i64> %a2, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
3387 tail call void @llvm.x86.avx.movnt.dq.256(i8* %p, <4 x i64> %a3) nounwind
3390 declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind
3392 define void @movnt_ps(i8* %p, <8 x float> %a) nounwind {
3393 ; CHECK-LABEL: movnt_ps:
3395 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
3396 ; CHECK-NEXT: vmovntps %ymm0, (%eax)
3397 ; CHECK-NEXT: vzeroupper
3399 tail call void @llvm.x86.avx.movnt.ps.256(i8* %p, <8 x float> %a) nounwind
3402 declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind
3404 define void @movnt_pd(i8* %p, <4 x double> %a1) nounwind {
3405 ; add operation forces the execution domain.
3406 ; CHECK-LABEL: movnt_pd:
3408 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
3409 ; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
3410 ; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
3411 ; CHECK-NEXT: vmovntpd %ymm0, (%eax)
3412 ; CHECK-NEXT: vzeroupper
3414 %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
3415 tail call void @llvm.x86.avx.movnt.pd.256(i8* %p, <4 x double> %a2) nounwind
3418 declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind
3421 ; Check for pclmulqdq
3422 define <2 x i64> @test_x86_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) {
3423 ; CHECK-LABEL: test_x86_pclmulqdq:
3425 ; CHECK-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0
3427 %res = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ; <<2 x i64>> [#uses=1]
3430 declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone