1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mattr=avx,aes,pclmul | FileCheck %s
3 define <2 x i64> @test_x86_aesni_aesdec(<2 x i64> %a0, <2 x i64> %a1) {
4 ; CHECK-LABEL: test_x86_aesni_aesdec:
6 ; CHECK-NEXT: vaesdec %xmm1, %xmm0, %xmm0
8 %res = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
11 declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone
14 define <2 x i64> @test_x86_aesni_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) {
15 ; CHECK-LABEL: test_x86_aesni_aesdeclast:
17 ; CHECK-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0
19 %res = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
22 declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind readnone
25 define <2 x i64> @test_x86_aesni_aesenc(<2 x i64> %a0, <2 x i64> %a1) {
26 ; CHECK-LABEL: test_x86_aesni_aesenc:
28 ; CHECK-NEXT: vaesenc %xmm1, %xmm0, %xmm0
30 %res = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
33 declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone
36 define <2 x i64> @test_x86_aesni_aesenclast(<2 x i64> %a0, <2 x i64> %a1) {
37 ; CHECK-LABEL: test_x86_aesni_aesenclast:
39 ; CHECK-NEXT: vaesenclast %xmm1, %xmm0, %xmm0
41 %res = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
44 declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind readnone
47 define <2 x i64> @test_x86_aesni_aesimc(<2 x i64> %a0) {
48 ; CHECK-LABEL: test_x86_aesni_aesimc:
50 ; CHECK-NEXT: vaesimc %xmm0, %xmm0
52 %res = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0) ; <<2 x i64>> [#uses=1]
55 declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone
58 define <2 x i64> @test_x86_aesni_aeskeygenassist(<2 x i64> %a0) {
59 ; CHECK-LABEL: test_x86_aesni_aeskeygenassist:
61 ; CHECK-NEXT: vaeskeygenassist $7, %xmm0, %xmm0
63 %res = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1]
66 declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone
69 define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) {
70 ; CHECK-LABEL: test_x86_sse2_add_sd:
72 ; CHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm0
74 %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
77 declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
80 define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) {
81 ; CHECK-LABEL: test_x86_sse2_cmp_pd:
83 ; CHECK-NEXT: vcmpordpd %xmm1, %xmm0, %xmm0
85 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
88 declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
91 define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) {
92 ; CHECK-LABEL: test_x86_sse2_cmp_sd:
94 ; CHECK-NEXT: vcmpordsd %xmm1, %xmm0, %xmm0
96 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
99 declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
102 define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) {
103 ; CHECK-LABEL: test_x86_sse2_comieq_sd:
105 ; CHECK-NEXT: vcomisd %xmm1, %xmm0
106 ; CHECK-NEXT: sete %al
107 ; CHECK-NEXT: movzbl %al, %eax
109 %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
112 declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
115 define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) {
116 ; CHECK-LABEL: test_x86_sse2_comige_sd:
118 ; CHECK-NEXT: vcomisd %xmm1, %xmm0
119 ; CHECK-NEXT: setae %al
120 ; CHECK-NEXT: movzbl %al, %eax
122 %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
125 declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
128 define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) {
129 ; CHECK-LABEL: test_x86_sse2_comigt_sd:
131 ; CHECK-NEXT: vcomisd %xmm1, %xmm0
132 ; CHECK-NEXT: seta %al
133 ; CHECK-NEXT: movzbl %al, %eax
135 %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
138 declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
141 define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
142 ; CHECK-LABEL: test_x86_sse2_comile_sd:
144 ; CHECK-NEXT: vcomisd %xmm1, %xmm0
145 ; CHECK-NEXT: setbe %al
146 ; CHECK-NEXT: movzbl %al, %eax
148 %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
151 declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
154 define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
155 ; CHECK-LABEL: test_x86_sse2_comilt_sd:
157 ; CHECK-NEXT: vcomisd %xmm1, %xmm0
158 ; CHECK-NEXT: sbbl %eax, %eax
159 ; CHECK-NEXT: andl $1, %eax
161 %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
164 declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
167 define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) {
168 ; CHECK-LABEL: test_x86_sse2_comineq_sd:
170 ; CHECK-NEXT: vcomisd %xmm1, %xmm0
171 ; CHECK-NEXT: setne %al
172 ; CHECK-NEXT: movzbl %al, %eax
174 %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
177 declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
180 define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
181 ; CHECK-LABEL: test_x86_sse2_cvtdq2pd:
183 ; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm0
185 %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
186 ret <2 x double> %res
188 declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
191 define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) {
192 ; CHECK-LABEL: test_x86_sse2_cvtdq2ps:
194 ; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0
196 %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1]
199 declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
202 define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) {
203 ; CHECK-LABEL: test_x86_sse2_cvtpd2dq:
205 ; CHECK-NEXT: vcvtpd2dq %xmm0, %xmm0
207 %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
210 declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
213 define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) {
214 ; CHECK-LABEL: test_x86_sse2_cvtpd2ps:
216 ; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm0
218 %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1]
221 declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
224 define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) {
225 ; CHECK-LABEL: test_x86_sse2_cvtps2dq:
227 ; CHECK-NEXT: vcvtps2dq %xmm0, %xmm0
229 %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
232 declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
235 define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
236 ; CHECK-LABEL: test_x86_sse2_cvtps2pd:
238 ; CHECK-NEXT: vcvtps2pd %xmm0, %xmm0
240 %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
241 ret <2 x double> %res
243 declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
246 define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) {
247 ; CHECK-LABEL: test_x86_sse2_cvtsd2si:
249 ; CHECK-NEXT: vcvtsd2si %xmm0, %eax
251 %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1]
254 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
257 define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) {
258 ; CHECK-LABEL: test_x86_sse2_cvtsd2ss:
260 ; CHECK-NEXT: vcvtsd2ss %xmm1, %xmm0, %xmm0
262 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
265 declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
268 define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0) {
269 ; CHECK-LABEL: test_x86_sse2_cvtsi2sd:
271 ; CHECK-NEXT: movl $7, %eax
272 ; CHECK-NEXT: vcvtsi2sdl %eax, %xmm0, %xmm0
274 %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
275 ret <2 x double> %res
277 declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
280 define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) {
281 ; CHECK-LABEL: test_x86_sse2_cvtss2sd:
283 ; CHECK-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0
285 %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
286 ret <2 x double> %res
288 declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone
291 define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) {
292 ; CHECK-LABEL: test_x86_sse2_cvttpd2dq:
294 ; CHECK-NEXT: vcvttpd2dq %xmm0, %xmm0
296 %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
299 declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
302 define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
303 ; CHECK-LABEL: test_x86_sse2_cvttps2dq:
305 ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
307 %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
310 declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
313 define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) {
314 ; CHECK-LABEL: test_x86_sse2_cvttsd2si:
316 ; CHECK-NEXT: vcvttsd2si %xmm0, %eax
318 %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1]
321 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
324 define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) {
325 ; CHECK-LABEL: test_x86_sse2_div_sd:
327 ; CHECK-NEXT: vdivsd %xmm1, %xmm0, %xmm0
329 %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
330 ret <2 x double> %res
332 declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
336 define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) {
337 ; CHECK-LABEL: test_x86_sse2_max_pd:
339 ; CHECK-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
341 %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
342 ret <2 x double> %res
344 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
347 define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) {
348 ; CHECK-LABEL: test_x86_sse2_max_sd:
350 ; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
352 %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
353 ret <2 x double> %res
355 declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
358 define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) {
359 ; CHECK-LABEL: test_x86_sse2_min_pd:
361 ; CHECK-NEXT: vminpd %xmm1, %xmm0, %xmm0
363 %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
364 ret <2 x double> %res
366 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
369 define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) {
370 ; CHECK-LABEL: test_x86_sse2_min_sd:
372 ; CHECK-NEXT: vminsd %xmm1, %xmm0, %xmm0
374 %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
375 ret <2 x double> %res
377 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
380 define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) {
381 ; CHECK-LABEL: test_x86_sse2_movmsk_pd:
383 ; CHECK-NEXT: vmovmskpd %xmm0, %eax
385 %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1]
388 declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
393 define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
394 ; CHECK-LABEL: test_x86_sse2_mul_sd:
396 ; CHECK-NEXT: vmulsd %xmm1, %xmm0, %xmm0
398 %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
399 ret <2 x double> %res
401 declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone
404 define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
405 ; CHECK-LABEL: test_x86_sse2_packssdw_128:
407 ; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
409 %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
412 declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
415 define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
416 ; CHECK-LABEL: test_x86_sse2_packsswb_128:
418 ; CHECK-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
420 %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
423 declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
426 define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
427 ; CHECK-LABEL: test_x86_sse2_packuswb_128:
429 ; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
431 %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
434 declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
437 define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) {
438 ; CHECK-LABEL: test_x86_sse2_padds_b:
440 ; CHECK-NEXT: vpaddsb %xmm1, %xmm0, %xmm0
442 %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
445 declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
448 define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) {
449 ; CHECK-LABEL: test_x86_sse2_padds_w:
451 ; CHECK-NEXT: vpaddsw %xmm1, %xmm0, %xmm0
453 %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
456 declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
459 define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) {
460 ; CHECK-LABEL: test_x86_sse2_paddus_b:
462 ; CHECK-NEXT: vpaddusb %xmm1, %xmm0, %xmm0
464 %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
467 declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
470 define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) {
471 ; CHECK-LABEL: test_x86_sse2_paddus_w:
473 ; CHECK-NEXT: vpaddusw %xmm1, %xmm0, %xmm0
475 %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
478 declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
481 define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) {
482 ; CHECK-LABEL: test_x86_sse2_pavg_b:
484 ; CHECK-NEXT: vpavgb %xmm1, %xmm0, %xmm0
486 %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
489 declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
492 define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) {
493 ; CHECK-LABEL: test_x86_sse2_pavg_w:
495 ; CHECK-NEXT: vpavgw %xmm1, %xmm0, %xmm0
497 %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
500 declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
503 define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) {
504 ; CHECK-LABEL: test_x86_sse2_pmadd_wd:
506 ; CHECK-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0
508 %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1]
511 declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
514 define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) {
515 ; CHECK-LABEL: test_x86_sse2_pmaxs_w:
517 ; CHECK-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
519 %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
522 declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
525 define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) {
526 ; CHECK-LABEL: test_x86_sse2_pmaxu_b:
528 ; CHECK-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
530 %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
533 declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
536 define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) {
537 ; CHECK-LABEL: test_x86_sse2_pmins_w:
539 ; CHECK-NEXT: vpminsw %xmm1, %xmm0, %xmm0
541 %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
544 declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
547 define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) {
548 ; CHECK-LABEL: test_x86_sse2_pminu_b:
550 ; CHECK-NEXT: vpminub %xmm1, %xmm0, %xmm0
552 %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
555 declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
558 define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
559 ; CHECK-LABEL: test_x86_sse2_pmovmskb_128:
561 ; CHECK-NEXT: vpmovmskb %xmm0, %eax
563 %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1]
566 declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
569 define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) {
570 ; CHECK-LABEL: test_x86_sse2_pmulh_w:
572 ; CHECK-NEXT: vpmulhw %xmm1, %xmm0, %xmm0
574 %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
577 declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
580 define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) {
581 ; CHECK-LABEL: test_x86_sse2_pmulhu_w:
583 ; CHECK-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0
585 %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
588 declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
591 define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) {
592 ; CHECK-LABEL: test_x86_sse2_pmulu_dq:
594 ; CHECK-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
596 %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
599 declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
602 define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) {
603 ; CHECK-LABEL: test_x86_sse2_psad_bw:
605 ; CHECK-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
607 %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1]
610 declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
613 define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
614 ; CHECK-LABEL: test_x86_sse2_psll_d:
616 ; CHECK-NEXT: vpslld %xmm1, %xmm0, %xmm0
618 %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
621 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
624 define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
625 ; CHECK-LABEL: test_x86_sse2_psll_q:
627 ; CHECK-NEXT: vpsllq %xmm1, %xmm0, %xmm0
629 %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
632 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
635 define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) {
636 ; CHECK-LABEL: test_x86_sse2_psll_w:
638 ; CHECK-NEXT: vpsllw %xmm1, %xmm0, %xmm0
640 %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
643 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
646 define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) {
647 ; CHECK-LABEL: test_x86_sse2_pslli_d:
649 ; CHECK-NEXT: vpslld $7, %xmm0, %xmm0
651 %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
654 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
657 define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) {
658 ; CHECK-LABEL: test_x86_sse2_pslli_q:
660 ; CHECK-NEXT: vpsllq $7, %xmm0, %xmm0
662 %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
665 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
668 define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) {
669 ; CHECK-LABEL: test_x86_sse2_pslli_w:
671 ; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0
673 %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
676 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
679 define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) {
680 ; CHECK-LABEL: test_x86_sse2_psra_d:
682 ; CHECK-NEXT: vpsrad %xmm1, %xmm0, %xmm0
684 %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
687 declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
690 define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) {
691 ; CHECK-LABEL: test_x86_sse2_psra_w:
693 ; CHECK-NEXT: vpsraw %xmm1, %xmm0, %xmm0
695 %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
698 declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
701 define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) {
702 ; CHECK-LABEL: test_x86_sse2_psrai_d:
704 ; CHECK-NEXT: vpsrad $7, %xmm0, %xmm0
706 %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
709 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
712 define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) {
713 ; CHECK-LABEL: test_x86_sse2_psrai_w:
715 ; CHECK-NEXT: vpsraw $7, %xmm0, %xmm0
717 %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
720 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
723 define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
724 ; CHECK-LABEL: test_x86_sse2_psrl_d:
726 ; CHECK-NEXT: vpsrld %xmm1, %xmm0, %xmm0
728 %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
731 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
734 define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
735 ; CHECK-LABEL: test_x86_sse2_psrl_q:
737 ; CHECK-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
739 %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
742 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
745 define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) {
746 ; CHECK-LABEL: test_x86_sse2_psrl_w:
748 ; CHECK-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
750 %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
753 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
756 define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) {
757 ; CHECK-LABEL: test_x86_sse2_psrli_d:
759 ; CHECK-NEXT: vpsrld $7, %xmm0, %xmm0
761 %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
764 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
767 define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) {
768 ; CHECK-LABEL: test_x86_sse2_psrli_q:
770 ; CHECK-NEXT: vpsrlq $7, %xmm0, %xmm0
772 %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
775 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
778 define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) {
779 ; CHECK-LABEL: test_x86_sse2_psrli_w:
781 ; CHECK-NEXT: vpsrlw $7, %xmm0, %xmm0
783 %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
786 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
789 define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) {
790 ; CHECK-LABEL: test_x86_sse2_psubs_b:
792 ; CHECK-NEXT: vpsubsb %xmm1, %xmm0, %xmm0
794 %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
797 declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
800 define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) {
801 ; CHECK-LABEL: test_x86_sse2_psubs_w:
803 ; CHECK-NEXT: vpsubsw %xmm1, %xmm0, %xmm0
805 %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
808 declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
811 define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) {
812 ; CHECK-LABEL: test_x86_sse2_psubus_b:
814 ; CHECK-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
816 %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
819 declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
822 define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) {
823 ; CHECK-LABEL: test_x86_sse2_psubus_w:
825 ; CHECK-NEXT: vpsubusw %xmm1, %xmm0, %xmm0
827 %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
830 declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
833 define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) {
834 ; CHECK-LABEL: test_x86_sse2_sqrt_pd:
836 ; CHECK-NEXT: vsqrtpd %xmm0, %xmm0
838 %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
839 ret <2 x double> %res
841 declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
844 define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
845 ; CHECK-LABEL: test_x86_sse2_sqrt_sd:
847 ; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
849 %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
850 ret <2 x double> %res
852 declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
855 define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
856 ; CHECK-LABEL: test_x86_sse2_storel_dq:
858 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
859 ; CHECK-NEXT: vmovlps %xmm0, (%eax)
861 call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
864 declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
867 define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
868 ; add operation forces the execution domain.
869 ; CHECK-LABEL: test_x86_sse2_storeu_dq:
871 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
872 ; CHECK-NEXT: vpaddb LCPI77_0, %xmm0, %xmm0
873 ; CHECK-NEXT: vmovdqu %xmm0, (%eax)
875 %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
876 call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2)
879 declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
882 define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
883 ; fadd operation forces the execution domain.
884 ; CHECK-LABEL: test_x86_sse2_storeu_pd:
886 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
887 ; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
888 ; CHECK-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
889 ; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
890 ; CHECK-NEXT: vmovupd %xmm0, (%eax)
892 %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
893 call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
896 declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
899 define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
900 ; CHECK-LABEL: test_x86_sse2_sub_sd:
902 ; CHECK-NEXT: vsubsd %xmm1, %xmm0, %xmm0
904 %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
905 ret <2 x double> %res
907 declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone
910 define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) {
911 ; CHECK-LABEL: test_x86_sse2_ucomieq_sd:
913 ; CHECK-NEXT: vucomisd %xmm1, %xmm0
914 ; CHECK-NEXT: sete %al
915 ; CHECK-NEXT: movzbl %al, %eax
917 %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
920 declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
923 define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) {
924 ; CHECK-LABEL: test_x86_sse2_ucomige_sd:
926 ; CHECK-NEXT: vucomisd %xmm1, %xmm0
927 ; CHECK-NEXT: setae %al
928 ; CHECK-NEXT: movzbl %al, %eax
930 %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
933 declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
936 define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) {
937 ; CHECK-LABEL: test_x86_sse2_ucomigt_sd:
939 ; CHECK-NEXT: vucomisd %xmm1, %xmm0
940 ; CHECK-NEXT: seta %al
941 ; CHECK-NEXT: movzbl %al, %eax
943 %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
946 declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
949 define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
950 ; CHECK-LABEL: test_x86_sse2_ucomile_sd:
952 ; CHECK-NEXT: vucomisd %xmm1, %xmm0
953 ; CHECK-NEXT: setbe %al
954 ; CHECK-NEXT: movzbl %al, %eax
956 %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
959 declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
962 define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
963 ; CHECK-LABEL: test_x86_sse2_ucomilt_sd:
965 ; CHECK-NEXT: vucomisd %xmm1, %xmm0
966 ; CHECK-NEXT: sbbl %eax, %eax
967 ; CHECK-NEXT: andl $1, %eax
969 %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
972 declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
975 define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) {
976 ; CHECK-LABEL: test_x86_sse2_ucomineq_sd:
978 ; CHECK-NEXT: vucomisd %xmm1, %xmm0
979 ; CHECK-NEXT: setne %al
980 ; CHECK-NEXT: movzbl %al, %eax
982 %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
985 declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
988 define <2 x double> @test_x86_sse3_addsub_pd(<2 x double> %a0, <2 x double> %a1) {
989 ; CHECK-LABEL: test_x86_sse3_addsub_pd:
991 ; CHECK-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0
993 %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
994 ret <2 x double> %res
996 declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
999 define <4 x float> @test_x86_sse3_addsub_ps(<4 x float> %a0, <4 x float> %a1) {
1000 ; CHECK-LABEL: test_x86_sse3_addsub_ps:
1002 ; CHECK-NEXT: vaddsubps %xmm1, %xmm0, %xmm0
1004 %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1005 ret <4 x float> %res
1007 declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
1010 define <2 x double> @test_x86_sse3_hadd_pd(<2 x double> %a0, <2 x double> %a1) {
1011 ; CHECK-LABEL: test_x86_sse3_hadd_pd:
1013 ; CHECK-NEXT: vhaddpd %xmm1, %xmm0, %xmm0
1015 %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
1016 ret <2 x double> %res
1018 declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
1021 define <4 x float> @test_x86_sse3_hadd_ps(<4 x float> %a0, <4 x float> %a1) {
1022 ; CHECK-LABEL: test_x86_sse3_hadd_ps:
1024 ; CHECK-NEXT: vhaddps %xmm1, %xmm0, %xmm0
1026 %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1027 ret <4 x float> %res
1029 declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
1032 define <2 x double> @test_x86_sse3_hsub_pd(<2 x double> %a0, <2 x double> %a1) {
1033 ; CHECK-LABEL: test_x86_sse3_hsub_pd:
1035 ; CHECK-NEXT: vhsubpd %xmm1, %xmm0, %xmm0
1037 %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
1038 ret <2 x double> %res
1040 declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
1043 define <4 x float> @test_x86_sse3_hsub_ps(<4 x float> %a0, <4 x float> %a1) {
1044 ; CHECK-LABEL: test_x86_sse3_hsub_ps:
1046 ; CHECK-NEXT: vhsubps %xmm1, %xmm0, %xmm0
1048 %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1049 ret <4 x float> %res
1051 declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
1054 define <16 x i8> @test_x86_sse3_ldu_dq(i8* %a0) {
1055 ; CHECK-LABEL: test_x86_sse3_ldu_dq:
1057 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1058 ; CHECK-NEXT: vlddqu (%eax), %xmm0
1060 %res = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0) ; <<16 x i8>> [#uses=1]
1063 declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
1066 define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
1067 ; CHECK-LABEL: test_x86_sse41_blendvpd:
1069 ; CHECK-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
1071 %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1]
1072 ret <2 x double> %res
1074 declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
1077 define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
1078 ; CHECK-LABEL: test_x86_sse41_blendvps:
1080 ; CHECK-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0
1082 %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1]
1083 ret <4 x float> %res
1085 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
1088 define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
1089 ; CHECK-LABEL: test_x86_sse41_dppd:
1091 ; CHECK-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0
1093 %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
1094 ret <2 x double> %res
1096 declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone
1099 define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
1100 ; CHECK-LABEL: test_x86_sse41_dpps:
1102 ; CHECK-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0
1104 %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
1105 ret <4 x float> %res
1107 declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone
1110 define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
1111 ; CHECK-LABEL: test_x86_sse41_insertps:
1113 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[3]
1115 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
1116 ret <4 x float> %res
1118 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
1122 define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
1123 ; CHECK-LABEL: test_x86_sse41_mpsadbw:
1125 ; CHECK-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0
1127 %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1]
1130 declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone
1133 define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
1134 ; CHECK-LABEL: test_x86_sse41_packusdw:
1136 ; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
1138 %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
1141 declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
1144 define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
1145 ; CHECK-LABEL: test_x86_sse41_pblendvb:
1147 ; CHECK-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
1149 %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1]
1152 declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
1155 define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
1156 ; CHECK-LABEL: test_x86_sse41_phminposuw:
1158 ; CHECK-NEXT: vphminposuw %xmm0, %xmm0
1160 %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
1163 declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone
1166 define <16 x i8> @test_x86_sse41_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) {
1167 ; CHECK-LABEL: test_x86_sse41_pmaxsb:
1169 ; CHECK-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
1171 %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
1174 declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
1177 define <4 x i32> @test_x86_sse41_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) {
1178 ; CHECK-LABEL: test_x86_sse41_pmaxsd:
1180 ; CHECK-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
1182 %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1185 declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
1188 define <4 x i32> @test_x86_sse41_pmaxud(<4 x i32> %a0, <4 x i32> %a1) {
1189 ; CHECK-LABEL: test_x86_sse41_pmaxud:
1191 ; CHECK-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
1193 %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1196 declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
1199 define <8 x i16> @test_x86_sse41_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) {
1200 ; CHECK-LABEL: test_x86_sse41_pmaxuw:
1202 ; CHECK-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
1204 %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1207 declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
1210 define <16 x i8> @test_x86_sse41_pminsb(<16 x i8> %a0, <16 x i8> %a1) {
1211 ; CHECK-LABEL: test_x86_sse41_pminsb:
1213 ; CHECK-NEXT: vpminsb %xmm1, %xmm0, %xmm0
1215 %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
1218 declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
1221 define <4 x i32> @test_x86_sse41_pminsd(<4 x i32> %a0, <4 x i32> %a1) {
1222 ; CHECK-LABEL: test_x86_sse41_pminsd:
1224 ; CHECK-NEXT: vpminsd %xmm1, %xmm0, %xmm0
1226 %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1229 declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
1232 define <4 x i32> @test_x86_sse41_pminud(<4 x i32> %a0, <4 x i32> %a1) {
1233 ; CHECK-LABEL: test_x86_sse41_pminud:
1235 ; CHECK-NEXT: vpminud %xmm1, %xmm0, %xmm0
1237 %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1240 declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
1243 define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
1244 ; CHECK-LABEL: test_x86_sse41_pminuw:
1246 ; CHECK-NEXT: vpminuw %xmm1, %xmm0, %xmm0
1248 %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1251 declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
1254 define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
1255 ; CHECK-LABEL: test_x86_sse41_pmovzxbd:
1257 ; CHECK-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1259 %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
1262 declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
1265 define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
1266 ; CHECK-LABEL: test_x86_sse41_pmovzxbq:
1268 ; CHECK-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1270 %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
1273 declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
1276 define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
1277 ; CHECK-LABEL: test_x86_sse41_pmovzxbw:
1279 ; CHECK-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1281 %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
1284 declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
1287 define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
1288 ; CHECK-LABEL: test_x86_sse41_pmovzxdq:
1290 ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1292 %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
1295 declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
1298 define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
1299 ; CHECK-LABEL: test_x86_sse41_pmovzxwd:
1301 ; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1303 %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
1306 declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
1309 define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
1310 ; CHECK-LABEL: test_x86_sse41_pmovzxwq:
1312 ; CHECK-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1314 %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
1317 declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
1320 define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
1321 ; CHECK-LABEL: test_x86_sse41_pmuldq:
1323 ; CHECK-NEXT: vpmuldq %xmm1, %xmm0, %xmm0
1325 %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
1328 declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
1331 define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) {
1332 ; CHECK-LABEL: test_x86_sse41_ptestc:
1334 ; CHECK-NEXT: vptest %xmm1, %xmm0
1335 ; CHECK-NEXT: sbbl %eax, %eax
1336 ; CHECK-NEXT: andl $1, %eax
1338 %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
1341 declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
1344 define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) {
1345 ; CHECK-LABEL: test_x86_sse41_ptestnzc:
1347 ; CHECK-NEXT: vptest %xmm1, %xmm0
1348 ; CHECK-NEXT: seta %al
1349 ; CHECK-NEXT: movzbl %al, %eax
1351 %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
1354 declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
1357 define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) {
1358 ; CHECK-LABEL: test_x86_sse41_ptestz:
1360 ; CHECK-NEXT: vptest %xmm1, %xmm0
1361 ; CHECK-NEXT: sete %al
1362 ; CHECK-NEXT: movzbl %al, %eax
1364 %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
1367 declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
1370 define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) {
1371 ; CHECK-LABEL: test_x86_sse41_round_pd:
1373 ; CHECK-NEXT: vroundpd $7, %xmm0, %xmm0
1375 %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
1376 ret <2 x double> %res
1378 declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
1381 define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) {
1382 ; CHECK-LABEL: test_x86_sse41_round_ps:
1384 ; CHECK-NEXT: vroundps $7, %xmm0, %xmm0
1386 %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
1387 ret <4 x float> %res
1389 declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
1392 define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) {
1393 ; CHECK-LABEL: test_x86_sse41_round_sd:
1395 ; CHECK-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm0
1397 %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
1398 ret <2 x double> %res
1400 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
1403 define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) {
1404 ; CHECK-LABEL: test_x86_sse41_round_ss:
1406 ; CHECK-NEXT: vroundss $7, %xmm1, %xmm0, %xmm0
1408 %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
1409 ret <4 x float> %res
1411 declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
1414 define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) {
1415 ; CHECK-LABEL: test_x86_sse42_pcmpestri128:
1417 ; CHECK-NEXT: movl $7, %eax
1418 ; CHECK-NEXT: movl $7, %edx
1419 ; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0
1420 ; CHECK-NEXT: movl %ecx, %eax
1422 %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1425 declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1428 define i32 @test_x86_sse42_pcmpestri128_load(<16 x i8>* %a0, <16 x i8>* %a2) {
1429 ; CHECK-LABEL: test_x86_sse42_pcmpestri128_load:
1431 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
1432 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1433 ; CHECK-NEXT: vmovdqa (%eax), %xmm0
1434 ; CHECK-NEXT: movl $7, %eax
1435 ; CHECK-NEXT: movl $7, %edx
1436 ; CHECK-NEXT: vpcmpestri $7, (%ecx), %xmm0
1437 ; CHECK-NEXT: movl %ecx, %eax
1439 %1 = load <16 x i8>, <16 x i8>* %a0
1440 %2 = load <16 x i8>, <16 x i8>* %a2
1441 %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %1, i32 7, <16 x i8> %2, i32 7, i8 7) ; <i32> [#uses=1]
1446 define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) {
1447 ; CHECK-LABEL: test_x86_sse42_pcmpestria128:
1449 ; CHECK-NEXT: movl $7, %eax
1450 ; CHECK-NEXT: movl $7, %edx
1451 ; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0
1452 ; CHECK-NEXT: seta %al
1453 ; CHECK-NEXT: movzbl %al, %eax
1455 %res = call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1458 declare i32 @llvm.x86.sse42.pcmpestria128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1461 define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) {
1462 ; CHECK-LABEL: test_x86_sse42_pcmpestric128:
1464 ; CHECK-NEXT: movl $7, %eax
1465 ; CHECK-NEXT: movl $7, %edx
1466 ; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0
1467 ; CHECK-NEXT: sbbl %eax, %eax
1468 ; CHECK-NEXT: andl $1, %eax
1470 %res = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1473 declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1476 define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) {
1477 ; CHECK-LABEL: test_x86_sse42_pcmpestrio128:
1479 ; CHECK-NEXT: movl $7, %eax
1480 ; CHECK-NEXT: movl $7, %edx
1481 ; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0
1482 ; CHECK-NEXT: seto %al
1483 ; CHECK-NEXT: movzbl %al, %eax
1485 %res = call i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1488 declare i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1491 define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) {
1492 ; CHECK-LABEL: test_x86_sse42_pcmpestris128:
1494 ; CHECK-NEXT: movl $7, %eax
1495 ; CHECK-NEXT: movl $7, %edx
1496 ; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0
1497 ; CHECK-NEXT: sets %al
1498 ; CHECK-NEXT: movzbl %al, %eax
1500 %res = call i32 @llvm.x86.sse42.pcmpestris128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1503 declare i32 @llvm.x86.sse42.pcmpestris128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1506 define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) {
1507 ; CHECK-LABEL: test_x86_sse42_pcmpestriz128:
1509 ; CHECK-NEXT: movl $7, %eax
1510 ; CHECK-NEXT: movl $7, %edx
1511 ; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0
1512 ; CHECK-NEXT: sete %al
1513 ; CHECK-NEXT: movzbl %al, %eax
1515 %res = call i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1518 declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1521 define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) {
1522 ; CHECK-LABEL: test_x86_sse42_pcmpestrm128:
1524 ; CHECK-NEXT: movl $7, %eax
1525 ; CHECK-NEXT: movl $7, %edx
1526 ; CHECK-NEXT: vpcmpestrm $7, %xmm1, %xmm0
1528 %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
1531 declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1534 define <16 x i8> @test_x86_sse42_pcmpestrm128_load(<16 x i8> %a0, <16 x i8>* %a2) {
1535 ; CHECK-LABEL: test_x86_sse42_pcmpestrm128_load:
1537 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
1538 ; CHECK-NEXT: movl $7, %eax
1539 ; CHECK-NEXT: movl $7, %edx
1540 ; CHECK-NEXT: vpcmpestrm $7, (%ecx), %xmm0
1542 %1 = load <16 x i8>, <16 x i8>* %a2
1543 %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %1, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
1548 define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) {
1549 ; CHECK-LABEL: test_x86_sse42_pcmpistri128:
1551 ; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0
1552 ; CHECK-NEXT: movl %ecx, %eax
1554 %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
1557 declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1560 define i32 @test_x86_sse42_pcmpistri128_load(<16 x i8>* %a0, <16 x i8>* %a1) {
1561 ; CHECK-LABEL: test_x86_sse42_pcmpistri128_load:
1563 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1564 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
1565 ; CHECK-NEXT: vmovdqa (%ecx), %xmm0
1566 ; CHECK-NEXT: vpcmpistri $7, (%eax), %xmm0
1567 ; CHECK-NEXT: movl %ecx, %eax
1569 %1 = load <16 x i8>, <16 x i8>* %a0
1570 %2 = load <16 x i8>, <16 x i8>* %a1
1571 %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %1, <16 x i8> %2, i8 7) ; <i32> [#uses=1]
1576 define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) {
1577 ; CHECK-LABEL: test_x86_sse42_pcmpistria128:
1579 ; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0
1580 ; CHECK-NEXT: seta %al
1581 ; CHECK-NEXT: movzbl %al, %eax
1583 %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
1586 declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1589 define i32 @test_x86_sse42_pcmpistric128(<16 x i8> %a0, <16 x i8> %a1) {
1590 ; CHECK-LABEL: test_x86_sse42_pcmpistric128:
1592 ; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0
1593 ; CHECK-NEXT: sbbl %eax, %eax
1594 ; CHECK-NEXT: andl $1, %eax
1596 %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
1599 declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1602 define i32 @test_x86_sse42_pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1) {
1603 ; CHECK-LABEL: test_x86_sse42_pcmpistrio128:
1605 ; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0
1606 ; CHECK-NEXT: seto %al
1607 ; CHECK-NEXT: movzbl %al, %eax
1609 %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
1612 declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1615 define i32 @test_x86_sse42_pcmpistris128(<16 x i8> %a0, <16 x i8> %a1) {
1616 ; CHECK-LABEL: test_x86_sse42_pcmpistris128:
1618 ; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0
1619 ; CHECK-NEXT: sets %al
1620 ; CHECK-NEXT: movzbl %al, %eax
1622 %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
1625 declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1628 define i32 @test_x86_sse42_pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1) {
1629 ; CHECK-LABEL: test_x86_sse42_pcmpistriz128:
1631 ; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0
1632 ; CHECK-NEXT: sete %al
1633 ; CHECK-NEXT: movzbl %al, %eax
1635 %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
1638 declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1641 define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) {
1642 ; CHECK-LABEL: test_x86_sse42_pcmpistrm128:
1644 ; CHECK-NEXT: vpcmpistrm $7, %xmm1, %xmm0
1646 %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1]
1649 declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1652 define <16 x i8> @test_x86_sse42_pcmpistrm128_load(<16 x i8> %a0, <16 x i8>* %a1) {
1653 ; CHECK-LABEL: test_x86_sse42_pcmpistrm128_load:
1655 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1656 ; CHECK-NEXT: vpcmpistrm $7, (%eax), %xmm0
1658 %1 = load <16 x i8>, <16 x i8>* %a1
1659 %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %1, i8 7) ; <<16 x i8>> [#uses=1]
1664 define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) {
1665 ; CHECK-LABEL: test_x86_sse_add_ss:
1667 ; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
1669 %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1670 ret <4 x float> %res
1672 declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone
1675 define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) {
1676 ; CHECK-LABEL: test_x86_sse_cmp_ps:
1678 ; CHECK-NEXT: vcmpordps %xmm1, %xmm0, %xmm0
1680 %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
1681 ret <4 x float> %res
1683 declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
1686 define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) {
1687 ; CHECK-LABEL: test_x86_sse_cmp_ss:
1689 ; CHECK-NEXT: vcmpordss %xmm1, %xmm0, %xmm0
1691 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
1692 ret <4 x float> %res
1694 declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
1697 define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) {
1698 ; CHECK-LABEL: test_x86_sse_comieq_ss:
1700 ; CHECK-NEXT: vcomiss %xmm1, %xmm0
1701 ; CHECK-NEXT: sete %al
1702 ; CHECK-NEXT: movzbl %al, %eax
1704 %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1707 declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
1710 define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) {
1711 ; CHECK-LABEL: test_x86_sse_comige_ss:
1713 ; CHECK-NEXT: vcomiss %xmm1, %xmm0
1714 ; CHECK-NEXT: setae %al
1715 ; CHECK-NEXT: movzbl %al, %eax
1717 %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1720 declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone
1723 define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) {
1724 ; CHECK-LABEL: test_x86_sse_comigt_ss:
1726 ; CHECK-NEXT: vcomiss %xmm1, %xmm0
1727 ; CHECK-NEXT: seta %al
1728 ; CHECK-NEXT: movzbl %al, %eax
1730 %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1733 declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone
1736 define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) {
1737 ; CHECK-LABEL: test_x86_sse_comile_ss:
1739 ; CHECK-NEXT: vcomiss %xmm1, %xmm0
1740 ; CHECK-NEXT: setbe %al
1741 ; CHECK-NEXT: movzbl %al, %eax
1743 %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1746 declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone
1749 define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
1750 ; CHECK-LABEL: test_x86_sse_comilt_ss:
1752 ; CHECK-NEXT: vcomiss %xmm1, %xmm0
1753 ; CHECK-NEXT: sbbl %eax, %eax
1754 ; CHECK-NEXT: andl $1, %eax
1756 %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1759 declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone
1762 define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) {
1763 ; CHECK-LABEL: test_x86_sse_comineq_ss:
1765 ; CHECK-NEXT: vcomiss %xmm1, %xmm0
1766 ; CHECK-NEXT: setne %al
1767 ; CHECK-NEXT: movzbl %al, %eax
1769 %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1772 declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone
1775 define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0) {
1776 ; CHECK-LABEL: test_x86_sse_cvtsi2ss:
1778 ; CHECK-NEXT: movl $7, %eax
1779 ; CHECK-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm0
1781 %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
1782 ret <4 x float> %res
1784 declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone
1787 define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) {
1788 ; CHECK-LABEL: test_x86_sse_cvtss2si:
1790 ; CHECK-NEXT: vcvtss2si %xmm0, %eax
1792 %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1]
1795 declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
1798 define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) {
1799 ; CHECK-LABEL: test_x86_sse_cvttss2si:
1801 ; CHECK-NEXT: vcvttss2si %xmm0, %eax
1803 %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1]
1806 declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
1809 define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) {
1810 ; CHECK-LABEL: test_x86_sse_div_ss:
1812 ; CHECK-NEXT: vdivss %xmm1, %xmm0, %xmm0
1814 %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1815 ret <4 x float> %res
1817 declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone
1820 define void @test_x86_sse_ldmxcsr(i8* %a0) {
1821 ; CHECK-LABEL: test_x86_sse_ldmxcsr:
1823 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1824 ; CHECK-NEXT: vldmxcsr (%eax)
1826 call void @llvm.x86.sse.ldmxcsr(i8* %a0)
1829 declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind
1833 define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) {
1834 ; CHECK-LABEL: test_x86_sse_max_ps:
1836 ; CHECK-NEXT: vmaxps %xmm1, %xmm0, %xmm0
1838 %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1839 ret <4 x float> %res
1841 declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
1844 define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) {
1845 ; CHECK-LABEL: test_x86_sse_max_ss:
1847 ; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm0
1849 %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1850 ret <4 x float> %res
1852 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
1855 define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) {
1856 ; CHECK-LABEL: test_x86_sse_min_ps:
1858 ; CHECK-NEXT: vminps %xmm1, %xmm0, %xmm0
1860 %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1861 ret <4 x float> %res
1863 declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
1866 define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) {
1867 ; CHECK-LABEL: test_x86_sse_min_ss:
1869 ; CHECK-NEXT: vminss %xmm1, %xmm0, %xmm0
1871 %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1872 ret <4 x float> %res
1874 declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
1877 define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) {
1878 ; CHECK-LABEL: test_x86_sse_movmsk_ps:
1880 ; CHECK-NEXT: vmovmskps %xmm0, %eax
1882 %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; <i32> [#uses=1]
1885 declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
1889 define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) {
1890 ; CHECK-LABEL: test_x86_sse_mul_ss:
1892 ; CHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0
1894 %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1895 ret <4 x float> %res
1897 declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone
1900 define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) {
1901 ; CHECK-LABEL: test_x86_sse_rcp_ps:
1903 ; CHECK-NEXT: vrcpps %xmm0, %xmm0
1905 %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
1906 ret <4 x float> %res
1908 declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
1911 define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) {
1912 ; CHECK-LABEL: test_x86_sse_rcp_ss:
1914 ; CHECK-NEXT: vrcpss %xmm0, %xmm0, %xmm0
1916 %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
1917 ret <4 x float> %res
1919 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
1922 define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) {
1923 ; CHECK-LABEL: test_x86_sse_rsqrt_ps:
1925 ; CHECK-NEXT: vrsqrtps %xmm0, %xmm0
1927 %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
1928 ret <4 x float> %res
1930 declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
1933 define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) {
1934 ; CHECK-LABEL: test_x86_sse_rsqrt_ss:
1936 ; CHECK-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0
1938 %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
1939 ret <4 x float> %res
1941 declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
1944 define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) {
1945 ; CHECK-LABEL: test_x86_sse_sqrt_ps:
1947 ; CHECK-NEXT: vsqrtps %xmm0, %xmm0
1949 %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
1950 ret <4 x float> %res
1952 declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
1955 define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) {
1956 ; CHECK-LABEL: test_x86_sse_sqrt_ss:
1958 ; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
1960 %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
1961 ret <4 x float> %res
1963 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
1966 define void @test_x86_sse_stmxcsr(i8* %a0) {
1967 ; CHECK-LABEL: test_x86_sse_stmxcsr:
1969 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1970 ; CHECK-NEXT: vstmxcsr (%eax)
1972 call void @llvm.x86.sse.stmxcsr(i8* %a0)
1975 declare void @llvm.x86.sse.stmxcsr(i8*) nounwind
1978 define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) {
1979 ; CHECK-LABEL: test_x86_sse_storeu_ps:
1981 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1982 ; CHECK-NEXT: vmovups %xmm0, (%eax)
1984 call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1)
1987 declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
1990 define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) {
1991 ; CHECK-LABEL: test_x86_sse_sub_ss:
1993 ; CHECK-NEXT: vsubss %xmm1, %xmm0, %xmm0
1995 %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1996 ret <4 x float> %res
1998 declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone
2001 define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) {
2002 ; CHECK-LABEL: test_x86_sse_ucomieq_ss:
2004 ; CHECK-NEXT: vucomiss %xmm1, %xmm0
2005 ; CHECK-NEXT: sete %al
2006 ; CHECK-NEXT: movzbl %al, %eax
2008 %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2011 declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
2014 define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) {
2015 ; CHECK-LABEL: test_x86_sse_ucomige_ss:
2017 ; CHECK-NEXT: vucomiss %xmm1, %xmm0
2018 ; CHECK-NEXT: setae %al
2019 ; CHECK-NEXT: movzbl %al, %eax
2021 %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2024 declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone
2027 define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) {
2028 ; CHECK-LABEL: test_x86_sse_ucomigt_ss:
2030 ; CHECK-NEXT: vucomiss %xmm1, %xmm0
2031 ; CHECK-NEXT: seta %al
2032 ; CHECK-NEXT: movzbl %al, %eax
2034 %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2037 declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone
2040 define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) {
2041 ; CHECK-LABEL: test_x86_sse_ucomile_ss:
2043 ; CHECK-NEXT: vucomiss %xmm1, %xmm0
2044 ; CHECK-NEXT: setbe %al
2045 ; CHECK-NEXT: movzbl %al, %eax
2047 %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2050 declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone
2053 define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
2054 ; CHECK-LABEL: test_x86_sse_ucomilt_ss:
2056 ; CHECK-NEXT: vucomiss %xmm1, %xmm0
2057 ; CHECK-NEXT: sbbl %eax, %eax
2058 ; CHECK-NEXT: andl $1, %eax
2060 %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2063 declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone
2066 define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) {
2067 ; CHECK-LABEL: test_x86_sse_ucomineq_ss:
2069 ; CHECK-NEXT: vucomiss %xmm1, %xmm0
2070 ; CHECK-NEXT: setne %al
2071 ; CHECK-NEXT: movzbl %al, %eax
2073 %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2076 declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone
2079 define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) {
2080 ; CHECK-LABEL: test_x86_ssse3_pabs_b_128:
2082 ; CHECK-NEXT: vpabsb %xmm0, %xmm0
2084 %res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1]
2087 declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
2090 define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) {
2091 ; CHECK-LABEL: test_x86_ssse3_pabs_d_128:
2093 ; CHECK-NEXT: vpabsd %xmm0, %xmm0
2095 %res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1]
2098 declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
2101 define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) {
2102 ; CHECK-LABEL: test_x86_ssse3_pabs_w_128:
2104 ; CHECK-NEXT: vpabsw %xmm0, %xmm0
2106 %res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
2109 declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
2112 define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) {
2113 ; CHECK-LABEL: test_x86_ssse3_phadd_d_128:
2115 ; CHECK-NEXT: vphaddd %xmm1, %xmm0, %xmm0
2117 %res = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
2120 declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
2123 define <8 x i16> @test_x86_ssse3_phadd_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
2124 ; CHECK-LABEL: test_x86_ssse3_phadd_sw_128:
2126 ; CHECK-NEXT: vphaddsw %xmm1, %xmm0, %xmm0
2128 %res = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
2131 declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
2134 define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) {
2135 ; CHECK-LABEL: test_x86_ssse3_phadd_w_128:
2137 ; CHECK-NEXT: vphaddw %xmm1, %xmm0, %xmm0
2139 %res = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
2142 declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
2145 define <4 x i32> @test_x86_ssse3_phsub_d_128(<4 x i32> %a0, <4 x i32> %a1) {
2146 ; CHECK-LABEL: test_x86_ssse3_phsub_d_128:
2148 ; CHECK-NEXT: vphsubd %xmm1, %xmm0, %xmm0
2150 %res = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
2153 declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone
2156 define <8 x i16> @test_x86_ssse3_phsub_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
2157 ; CHECK-LABEL: test_x86_ssse3_phsub_sw_128:
2159 ; CHECK-NEXT: vphsubsw %xmm1, %xmm0, %xmm0
2161 %res = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
2164 declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
2167 define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) {
2168 ; CHECK-LABEL: test_x86_ssse3_phsub_w_128:
2170 ; CHECK-NEXT: vphsubw %xmm1, %xmm0, %xmm0
2172 %res = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
2175 declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
2178 define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<16 x i8> %a0, <16 x i8> %a1) {
2179 ; CHECK-LABEL: test_x86_ssse3_pmadd_ub_sw_128:
2181 ; CHECK-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0
2183 %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) ; <<8 x i16>> [#uses=1]
2186 declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone
2189 define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
2190 ; CHECK-LABEL: test_x86_ssse3_pmul_hr_sw_128:
2192 ; CHECK-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0
2194 %res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
2197 declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
2200 define <16 x i8> @test_x86_ssse3_pshuf_b_128(<16 x i8> %a0, <16 x i8> %a1) {
2201 ; CHECK-LABEL: test_x86_ssse3_pshuf_b_128:
2203 ; CHECK-NEXT: vpshufb %xmm1, %xmm0, %xmm0
2205 %res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
2208 declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
2211 define <16 x i8> @test_x86_ssse3_psign_b_128(<16 x i8> %a0, <16 x i8> %a1) {
2212 ; CHECK-LABEL: test_x86_ssse3_psign_b_128:
2214 ; CHECK-NEXT: vpsignb %xmm1, %xmm0, %xmm0
2216 %res = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
2219 declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone
2222 define <4 x i32> @test_x86_ssse3_psign_d_128(<4 x i32> %a0, <4 x i32> %a1) {
2223 ; CHECK-LABEL: test_x86_ssse3_psign_d_128:
2225 ; CHECK-NEXT: vpsignd %xmm1, %xmm0, %xmm0
2227 %res = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
2230 declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone
2233 define <8 x i16> @test_x86_ssse3_psign_w_128(<8 x i16> %a0, <8 x i16> %a1) {
2234 ; CHECK-LABEL: test_x86_ssse3_psign_w_128:
2236 ; CHECK-NEXT: vpsignw %xmm1, %xmm0, %xmm0
2238 %res = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
2241 declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone
2244 define <4 x double> @test_x86_avx_addsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
2245 ; CHECK-LABEL: test_x86_avx_addsub_pd_256:
2247 ; CHECK-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0
2249 %res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
2250 ret <4 x double> %res
2252 declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
2255 define <8 x float> @test_x86_avx_addsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
2256 ; CHECK-LABEL: test_x86_avx_addsub_ps_256:
2258 ; CHECK-NEXT: vaddsubps %ymm1, %ymm0, %ymm0
2260 %res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
2261 ret <8 x float> %res
2263 declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
2266 define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
2267 ; CHECK-LABEL: test_x86_avx_blendv_pd_256:
2269 ; CHECK-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
2271 %res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1]
2272 ret <4 x double> %res
2274 declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
2277 define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
2278 ; CHECK-LABEL: test_x86_avx_blendv_ps_256:
2280 ; CHECK-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0
2282 %res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1]
2283 ret <8 x float> %res
2285 declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
2288 define <4 x double> @test_x86_avx_cmp_pd_256(<4 x double> %a0, <4 x double> %a1) {
2289 ; CHECK-LABEL: test_x86_avx_cmp_pd_256:
2291 ; CHECK-NEXT: vcmpordpd %ymm1, %ymm0, %ymm0
2293 %res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
2294 ret <4 x double> %res
2296 declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
2299 define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) {
2300 ; CHECK-LABEL: test_x86_avx_cmp_ps_256:
2302 ; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %ymm0
2304 %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
2305 ret <8 x float> %res
2308 define <8 x float> @test_x86_avx_cmp_ps_256_pseudo_op(<8 x float> %a0, <8 x float> %a1) {
2309 ; CHECK-LABEL: test_x86_avx_cmp_ps_256_pseudo_op:
2311 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1
2312 ; CHECK-NEXT: vcmpltps %ymm1, %ymm0, %ymm1
2313 ; CHECK-NEXT: vcmpleps %ymm1, %ymm0, %ymm1
2314 ; CHECK-NEXT: vcmpunordps %ymm1, %ymm0, %ymm1
2315 ; CHECK-NEXT: vcmpneqps %ymm1, %ymm0, %ymm1
2316 ; CHECK-NEXT: vcmpnltps %ymm1, %ymm0, %ymm1
2317 ; CHECK-NEXT: vcmpnleps %ymm1, %ymm0, %ymm1
2318 ; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %ymm1
2319 ; CHECK-NEXT: vcmpeq_uqps %ymm1, %ymm0, %ymm1
2320 ; CHECK-NEXT: vcmpngeps %ymm1, %ymm0, %ymm1
2321 ; CHECK-NEXT: vcmpngtps %ymm1, %ymm0, %ymm1
2322 ; CHECK-NEXT: vcmpfalseps %ymm1, %ymm0, %ymm1
2323 ; CHECK-NEXT: vcmpneq_oqps %ymm1, %ymm0, %ymm1
2324 ; CHECK-NEXT: vcmpgeps %ymm1, %ymm0, %ymm1
2325 ; CHECK-NEXT: vcmpgtps %ymm1, %ymm0, %ymm1
2326 ; CHECK-NEXT: vcmptrueps %ymm1, %ymm0, %ymm1
2327 ; CHECK-NEXT: vcmpeq_osps %ymm1, %ymm0, %ymm1
2328 ; CHECK-NEXT: vcmplt_oqps %ymm1, %ymm0, %ymm1
2329 ; CHECK-NEXT: vcmple_oqps %ymm1, %ymm0, %ymm1
2330 ; CHECK-NEXT: vcmpunord_sps %ymm1, %ymm0, %ymm1
2331 ; CHECK-NEXT: vcmpneq_usps %ymm1, %ymm0, %ymm1
2332 ; CHECK-NEXT: vcmpnlt_uqps %ymm1, %ymm0, %ymm1
2333 ; CHECK-NEXT: vcmpnle_uqps %ymm1, %ymm0, %ymm1
2334 ; CHECK-NEXT: vcmpord_sps %ymm1, %ymm0, %ymm1
2335 ; CHECK-NEXT: vcmpeq_usps %ymm1, %ymm0, %ymm1
2336 ; CHECK-NEXT: vcmpnge_uqps %ymm1, %ymm0, %ymm1
2337 ; CHECK-NEXT: vcmpngt_uqps %ymm1, %ymm0, %ymm1
2338 ; CHECK-NEXT: vcmpfalse_osps %ymm1, %ymm0, %ymm1
2339 ; CHECK-NEXT: vcmpneq_osps %ymm1, %ymm0, %ymm1
2340 ; CHECK-NEXT: vcmpge_oqps %ymm1, %ymm0, %ymm1
2341 ; CHECK-NEXT: vcmpgt_oqps %ymm1, %ymm0, %ymm1
2342 ; CHECK-NEXT: vcmptrue_usps %ymm1, %ymm0, %ymm0
2344 %a2 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 0) ; <<8 x float>> [#uses=1]
2345 %a3 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a2, i8 1) ; <<8 x float>> [#uses=1]
2346 %a4 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a3, i8 2) ; <<8 x float>> [#uses=1]
2347 %a5 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a4, i8 3) ; <<8 x float>> [#uses=1]
2348 %a6 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a5, i8 4) ; <<8 x float>> [#uses=1]
2349 %a7 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a6, i8 5) ; <<8 x float>> [#uses=1]
2350 %a8 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a7, i8 6) ; <<8 x float>> [#uses=1]
2351 %a9 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a8, i8 7) ; <<8 x float>> [#uses=1]
2352 %a10 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a9, i8 8) ; <<8 x float>> [#uses=1]
2353 %a11 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a10, i8 9) ; <<8 x float>> [#uses=1]
2354 %a12 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a11, i8 10) ; <<8 x float>> [#uses=1]
2355 %a13 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a12, i8 11) ; <<8 x float>> [#uses=1]
2356 %a14 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a13, i8 12) ; <<8 x float>> [#uses=1]
2357 %a15 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a14, i8 13) ; <<8 x float>> [#uses=1]
2358 %a16 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a15, i8 14) ; <<8 x float>> [#uses=1]
2359 %a17 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a16, i8 15) ; <<8 x float>> [#uses=1]
2360 %a18 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a17, i8 16) ; <<8 x float>> [#uses=1]
2361 %a19 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a18, i8 17) ; <<8 x float>> [#uses=1]
2362 %a20 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a19, i8 18) ; <<8 x float>> [#uses=1]
2363 %a21 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a20, i8 19) ; <<8 x float>> [#uses=1]
2364 %a22 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a21, i8 20) ; <<8 x float>> [#uses=1]
2365 %a23 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a22, i8 21) ; <<8 x float>> [#uses=1]
2366 %a24 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a23, i8 22) ; <<8 x float>> [#uses=1]
2367 %a25 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a24, i8 23) ; <<8 x float>> [#uses=1]
2368 %a26 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a25, i8 24) ; <<8 x float>> [#uses=1]
2369 %a27 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a26, i8 25) ; <<8 x float>> [#uses=1]
2370 %a28 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a27, i8 26) ; <<8 x float>> [#uses=1]
2371 %a29 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a28, i8 27) ; <<8 x float>> [#uses=1]
2372 %a30 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a29, i8 28) ; <<8 x float>> [#uses=1]
2373 %a31 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a30, i8 29) ; <<8 x float>> [#uses=1]
2374 %a32 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a31, i8 30) ; <<8 x float>> [#uses=1]
2375 %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a32, i8 31) ; <<8 x float>> [#uses=1]
2376 ret <8 x float> %res
2378 declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
2381 define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) {
2382 ; CHECK-LABEL: test_x86_avx_cvt_pd2_ps_256:
2384 ; CHECK-NEXT: vcvtpd2psy %ymm0, %xmm0
2385 ; CHECK-NEXT: vzeroupper
2387 %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1]
2388 ret <4 x float> %res
2390 declare <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double>) nounwind readnone
2393 define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) {
2394 ; CHECK-LABEL: test_x86_avx_cvt_pd2dq_256:
2396 ; CHECK-NEXT: vcvtpd2dqy %ymm0, %xmm0
2397 ; CHECK-NEXT: vzeroupper
2399 %res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
2402 declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone
2405 define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) {
2406 ; CHECK-LABEL: test_x86_avx_cvt_ps2_pd_256:
2408 ; CHECK-NEXT: vcvtps2pd %xmm0, %ymm0
2410 %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1]
2411 ret <4 x double> %res
2413 declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone
2416 define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) {
2417 ; CHECK-LABEL: test_x86_avx_cvt_ps2dq_256:
2419 ; CHECK-NEXT: vcvtps2dq %ymm0, %ymm0
2421 %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
2424 declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone
2427 define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) {
2428 ; CHECK-LABEL: test_x86_avx_cvtdq2_pd_256:
2430 ; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0
2432 %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1]
2433 ret <4 x double> %res
2435 declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone
2438 define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) {
2439 ; CHECK-LABEL: test_x86_avx_cvtdq2_ps_256:
2441 ; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0
2443 %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1]
2444 ret <8 x float> %res
2446 declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone
2449 define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) {
2450 ; CHECK-LABEL: test_x86_avx_cvtt_pd2dq_256:
2452 ; CHECK-NEXT: vcvttpd2dqy %ymm0, %xmm0
2453 ; CHECK-NEXT: vzeroupper
2455 %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
2458 declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone
2461 define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) {
2462 ; CHECK-LABEL: test_x86_avx_cvtt_ps2dq_256:
2464 ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0
2466 %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
2469 declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone
2472 define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
2473 ; CHECK-LABEL: test_x86_avx_dp_ps_256:
2475 ; CHECK-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0
2477 %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
2478 ret <8 x float> %res
2480 declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
2483 define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) {
2484 ; CHECK-LABEL: test_x86_avx_hadd_pd_256:
2486 ; CHECK-NEXT: vhaddpd %ymm1, %ymm0, %ymm0
2488 %res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
2489 ret <4 x double> %res
2491 declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone
2494 define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) {
2495 ; CHECK-LABEL: test_x86_avx_hadd_ps_256:
2497 ; CHECK-NEXT: vhaddps %ymm1, %ymm0, %ymm0
2499 %res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
2500 ret <8 x float> %res
2502 declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone
2505 define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
2506 ; CHECK-LABEL: test_x86_avx_hsub_pd_256:
2508 ; CHECK-NEXT: vhsubpd %ymm1, %ymm0, %ymm0
2510 %res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
2511 ret <4 x double> %res
2513 declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
2516 define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
2517 ; CHECK-LABEL: test_x86_avx_hsub_ps_256:
2519 ; CHECK-NEXT: vhsubps %ymm1, %ymm0, %ymm0
2521 %res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
2522 ret <8 x float> %res
2524 declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
2527 define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) {
2528 ; CHECK-LABEL: test_x86_avx_ldu_dq_256:
2530 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
2531 ; CHECK-NEXT: vlddqu (%eax), %ymm0
2533 %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1]
2536 declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly
2539 define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x i64> %mask) {
2540 ; CHECK-LABEL: test_x86_avx_maskload_pd:
2542 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
2543 ; CHECK-NEXT: vmaskmovpd (%eax), %xmm0, %xmm0
2545 %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x i64> %mask) ; <<2 x double>> [#uses=1]
2546 ret <2 x double> %res
2548 declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x i64>) nounwind readonly
2551 define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x i64> %mask) {
2552 ; CHECK-LABEL: test_x86_avx_maskload_pd_256:
2554 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
2555 ; CHECK-NEXT: vmaskmovpd (%eax), %ymm0, %ymm0
2557 %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x i64> %mask) ; <<4 x double>> [#uses=1]
2558 ret <4 x double> %res
2560 declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x i64>) nounwind readonly
2563 define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x i32> %mask) {
2564 ; CHECK-LABEL: test_x86_avx_maskload_ps:
2566 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
2567 ; CHECK-NEXT: vmaskmovps (%eax), %xmm0, %xmm0
2569 %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x i32> %mask) ; <<4 x float>> [#uses=1]
2570 ret <4 x float> %res
2572 declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x i32>) nounwind readonly
2575 define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x i32> %mask) {
2576 ; CHECK-LABEL: test_x86_avx_maskload_ps_256:
2578 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
2579 ; CHECK-NEXT: vmaskmovps (%eax), %ymm0, %ymm0
2581 %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x i32> %mask) ; <<8 x float>> [#uses=1]
2582 ret <8 x float> %res
2584 declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x i32>) nounwind readonly
2587 define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x i64> %mask, <2 x double> %a2) {
2588 ; CHECK-LABEL: test_x86_avx_maskstore_pd:
2590 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
2591 ; CHECK-NEXT: vmaskmovpd %xmm1, %xmm0, (%eax)
2593 call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x i64> %mask, <2 x double> %a2)
2596 declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x i64>, <2 x double>) nounwind
2599 define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x i64> %mask, <4 x double> %a2) {
2600 ; CHECK-LABEL: test_x86_avx_maskstore_pd_256:
2602 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
2603 ; CHECK-NEXT: vmaskmovpd %ymm1, %ymm0, (%eax)
2604 ; CHECK-NEXT: vzeroupper
2606 call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x i64> %mask, <4 x double> %a2)
2609 declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x i64>, <4 x double>) nounwind
2612 define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x i32> %mask, <4 x float> %a2) {
2613 ; CHECK-LABEL: test_x86_avx_maskstore_ps:
2615 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
2616 ; CHECK-NEXT: vmaskmovps %xmm1, %xmm0, (%eax)
2618 call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x i32> %mask, <4 x float> %a2)
2621 declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x i32>, <4 x float>) nounwind
2624 define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x i32> %mask, <8 x float> %a2) {
2625 ; CHECK-LABEL: test_x86_avx_maskstore_ps_256:
2627 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
2628 ; CHECK-NEXT: vmaskmovps %ymm1, %ymm0, (%eax)
2629 ; CHECK-NEXT: vzeroupper
2631 call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x i32> %mask, <8 x float> %a2)
2634 declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x i32>, <8 x float>) nounwind
2637 define <4 x double> @test_x86_avx_max_pd_256(<4 x double> %a0, <4 x double> %a1) {
2638 ; CHECK-LABEL: test_x86_avx_max_pd_256:
2640 ; CHECK-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
2642 %res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
2643 ret <4 x double> %res
2645 declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone
2648 define <8 x float> @test_x86_avx_max_ps_256(<8 x float> %a0, <8 x float> %a1) {
2649 ; CHECK-LABEL: test_x86_avx_max_ps_256:
2651 ; CHECK-NEXT: vmaxps %ymm1, %ymm0, %ymm0
2653 %res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
2654 ret <8 x float> %res
2656 declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone
2659 define <4 x double> @test_x86_avx_min_pd_256(<4 x double> %a0, <4 x double> %a1) {
2660 ; CHECK-LABEL: test_x86_avx_min_pd_256:
2662 ; CHECK-NEXT: vminpd %ymm1, %ymm0, %ymm0
2664 %res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
2665 ret <4 x double> %res
2667 declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone
2670 define <8 x float> @test_x86_avx_min_ps_256(<8 x float> %a0, <8 x float> %a1) {
2671 ; CHECK-LABEL: test_x86_avx_min_ps_256:
2673 ; CHECK-NEXT: vminps %ymm1, %ymm0, %ymm0
2675 %res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
2676 ret <8 x float> %res
2678 declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone
2681 define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) {
2682 ; CHECK-LABEL: test_x86_avx_movmsk_pd_256:
2684 ; CHECK-NEXT: vmovmskpd %ymm0, %eax
2685 ; CHECK-NEXT: vzeroupper
2687 %res = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ; <i32> [#uses=1]
2690 declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone
2693 define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) {
2694 ; CHECK-LABEL: test_x86_avx_movmsk_ps_256:
2696 ; CHECK-NEXT: vmovmskps %ymm0, %eax
2697 ; CHECK-NEXT: vzeroupper
2699 %res = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ; <i32> [#uses=1]
2702 declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
2710 define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) {
2711 ; CHECK-LABEL: test_x86_avx_ptestc_256:
2713 ; CHECK-NEXT: vptest %ymm1, %ymm0
2714 ; CHECK-NEXT: sbbl %eax, %eax
2715 ; CHECK-NEXT: andl $1, %eax
2716 ; CHECK-NEXT: vzeroupper
2718 %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
2721 declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone
2724 define i32 @test_x86_avx_ptestnzc_256(<4 x i64> %a0, <4 x i64> %a1) {
2725 ; CHECK-LABEL: test_x86_avx_ptestnzc_256:
2727 ; CHECK-NEXT: vptest %ymm1, %ymm0
2728 ; CHECK-NEXT: seta %al
2729 ; CHECK-NEXT: movzbl %al, %eax
2730 ; CHECK-NEXT: vzeroupper
2732 %res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
2735 declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone
2738 define i32 @test_x86_avx_ptestz_256(<4 x i64> %a0, <4 x i64> %a1) {
2739 ; CHECK-LABEL: test_x86_avx_ptestz_256:
2741 ; CHECK-NEXT: vptest %ymm1, %ymm0
2742 ; CHECK-NEXT: sete %al
2743 ; CHECK-NEXT: movzbl %al, %eax
2744 ; CHECK-NEXT: vzeroupper
2746 %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
2749 declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone
2752 define <8 x float> @test_x86_avx_rcp_ps_256(<8 x float> %a0) {
2753 ; CHECK-LABEL: test_x86_avx_rcp_ps_256:
2755 ; CHECK-NEXT: vrcpps %ymm0, %ymm0
2757 %res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
2758 ret <8 x float> %res
2760 declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone
2763 define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) {
2764 ; CHECK-LABEL: test_x86_avx_round_pd_256:
2766 ; CHECK-NEXT: vroundpd $7, %ymm0, %ymm0
2768 %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) ; <<4 x double>> [#uses=1]
2769 ret <4 x double> %res
2771 declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone
2774 define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) {
2775 ; CHECK-LABEL: test_x86_avx_round_ps_256:
2777 ; CHECK-NEXT: vroundps $7, %ymm0, %ymm0
2779 %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) ; <<8 x float>> [#uses=1]
2780 ret <8 x float> %res
2782 declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone
2785 define <8 x float> @test_x86_avx_rsqrt_ps_256(<8 x float> %a0) {
2786 ; CHECK-LABEL: test_x86_avx_rsqrt_ps_256:
2788 ; CHECK-NEXT: vrsqrtps %ymm0, %ymm0
2790 %res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
2791 ret <8 x float> %res
2793 declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
2796 define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) {
2797 ; CHECK-LABEL: test_x86_avx_sqrt_pd_256:
2799 ; CHECK-NEXT: vsqrtpd %ymm0, %ymm0
2801 %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1]
2802 ret <4 x double> %res
2804 declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
2807 define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) {
2808 ; CHECK-LABEL: test_x86_avx_sqrt_ps_256:
2810 ; CHECK-NEXT: vsqrtps %ymm0, %ymm0
2812 %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
2813 ret <8 x float> %res
2815 declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
2818 define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
2819 ; FIXME: unfortunately the execution domain fix pass changes this to vmovups and its hard to force with no 256-bit integer instructions
2820 ; add operation forces the execution domain.
2821 ; CHECK-LABEL: test_x86_avx_storeu_dq_256:
2823 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
2824 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
2825 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
2826 ; CHECK-NEXT: vpaddb %xmm2, %xmm1, %xmm1
2827 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
2828 ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2829 ; CHECK-NEXT: vmovups %ymm0, (%eax)
2830 ; CHECK-NEXT: vzeroupper
2832 %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
2833 call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)
2836 declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
2839 define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) {
2840 ; add operation forces the execution domain.
2841 ; CHECK-LABEL: test_x86_avx_storeu_pd_256:
2843 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
2844 ; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
2845 ; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
2846 ; CHECK-NEXT: vmovupd %ymm0, (%eax)
2847 ; CHECK-NEXT: vzeroupper
2849 %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
2850 call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a2)
2853 declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind
2856 define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) {
2857 ; CHECK-LABEL: test_x86_avx_storeu_ps_256:
2859 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
2860 ; CHECK-NEXT: vmovups %ymm0, (%eax)
2861 ; CHECK-NEXT: vzeroupper
2863 call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1)
2866 declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind
2869 define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) {
2870 ; CHECK-LABEL: test_x86_avx_vbroadcastf128_pd_256:
2872 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
2873 ; CHECK-NEXT: vbroadcastf128 (%eax), %ymm0
2875 %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
2876 ret <4 x double> %res
2878 declare <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8*) nounwind readonly
2881 define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) {
2882 ; CHECK-LABEL: test_x86_avx_vbroadcastf128_ps_256:
2884 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
2885 ; CHECK-NEXT: vbroadcastf128 (%eax), %ymm0
2887 %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1]
2888 ret <8 x float> %res
2890 declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly
2893 define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) {
2894 ; CHECK-LABEL: test_x86_avx_vperm2f128_pd_256:
2896 ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
2898 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
2899 ret <4 x double> %res
2901 declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
2904 define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) {
2905 ; CHECK-LABEL: test_x86_avx_vperm2f128_ps_256:
2907 ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
2909 %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
2910 ret <8 x float> %res
2912 declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
2915 define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) {
2916 ; CHECK-LABEL: test_x86_avx_vperm2f128_si_256:
2918 ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
2920 %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1]
2923 declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
2926 define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) {
2927 ; CHECK-LABEL: test_x86_avx_vpermil_pd:
2929 ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
2931 %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 1) ; <<2 x double>> [#uses=1]
2932 ret <2 x double> %res
2934 declare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnone
2937 define <4 x double> @test_x86_avx_vpermil_pd_256(<4 x double> %a0) {
2938 ; CHECK-LABEL: test_x86_avx_vpermil_pd_256:
2940 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2]
2942 %res = call <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1]
2943 ret <4 x double> %res
2945 declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind readnone
2948 define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) {
2949 ; CHECK-LABEL: test_x86_avx_vpermil_ps:
2951 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,0,0]
2953 %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
2954 ret <4 x float> %res
2956 declare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) nounwind readnone
2959 define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) {
2960 ; CHECK-LABEL: test_x86_avx_vpermil_ps_256:
2962 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,1,0,0,7,5,4,4]
2964 %res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1]
2965 ret <8 x float> %res
2967 declare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) nounwind readnone
2970 define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) {
2971 ; CHECK-LABEL: test_x86_avx_vpermilvar_pd:
2973 ; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm0
2975 %res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1]
2976 ret <2 x double> %res
2978 declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone
2981 define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) {
2982 ; CHECK-LABEL: test_x86_avx_vpermilvar_pd_256:
2984 ; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm0
2986 %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1]
2987 ret <4 x double> %res
2989 declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone
2992 define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) {
2993 ; CHECK-LABEL: test_x86_avx_vpermilvar_ps:
2995 ; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm0
2997 %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1]
2998 ret <4 x float> %res
3000 define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, <4 x i32>* %a1) {
3001 ; CHECK-LABEL: test_x86_avx_vpermilvar_ps_load:
3003 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
3004 ; CHECK-NEXT: vpermilps (%eax), %xmm0, %xmm0
3006 %a2 = load <4 x i32>, <4 x i32>* %a1
3007 %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) ; <<4 x float>> [#uses=1]
3008 ret <4 x float> %res
3010 declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone
3013 define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) {
3014 ; CHECK-LABEL: test_x86_avx_vpermilvar_ps_256:
3016 ; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm0
3018 %res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1]
3019 ret <8 x float> %res
3021 declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone
3024 define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) {
3025 ; CHECK-LABEL: test_x86_avx_vtestc_pd:
3027 ; CHECK-NEXT: vtestpd %xmm1, %xmm0
3028 ; CHECK-NEXT: sbbl %eax, %eax
3029 ; CHECK-NEXT: andl $1, %eax
3031 %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
3034 declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone
3037 define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) {
3038 ; CHECK-LABEL: test_x86_avx_vtestc_pd_256:
3040 ; CHECK-NEXT: vtestpd %ymm1, %ymm0
3041 ; CHECK-NEXT: sbbl %eax, %eax
3042 ; CHECK-NEXT: andl $1, %eax
3043 ; CHECK-NEXT: vzeroupper
3045 %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
3048 declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone
3051 define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) {
3052 ; CHECK-LABEL: test_x86_avx_vtestc_ps:
3054 ; CHECK-NEXT: vtestps %xmm1, %xmm0
3055 ; CHECK-NEXT: sbbl %eax, %eax
3056 ; CHECK-NEXT: andl $1, %eax
3058 %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
3061 declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
3064 define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) {
3065 ; CHECK-LABEL: test_x86_avx_vtestc_ps_256:
3067 ; CHECK-NEXT: vtestps %ymm1, %ymm0
3068 ; CHECK-NEXT: sbbl %eax, %eax
3069 ; CHECK-NEXT: andl $1, %eax
3070 ; CHECK-NEXT: vzeroupper
3072 %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
3075 declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone
3078 define i32 @test_x86_avx_vtestnzc_pd(<2 x double> %a0, <2 x double> %a1) {
3079 ; CHECK-LABEL: test_x86_avx_vtestnzc_pd:
3081 ; CHECK-NEXT: vtestpd %xmm1, %xmm0
3082 ; CHECK-NEXT: seta %al
3083 ; CHECK-NEXT: movzbl %al, %eax
3085 %res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
3088 declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readnone
3091 define i32 @test_x86_avx_vtestnzc_pd_256(<4 x double> %a0, <4 x double> %a1) {
3092 ; CHECK-LABEL: test_x86_avx_vtestnzc_pd_256:
3094 ; CHECK-NEXT: vtestpd %ymm1, %ymm0
3095 ; CHECK-NEXT: seta %al
3096 ; CHECK-NEXT: movzbl %al, %eax
3097 ; CHECK-NEXT: vzeroupper
3099 %res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
3102 declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind readnone
3105 define i32 @test_x86_avx_vtestnzc_ps(<4 x float> %a0, <4 x float> %a1) {
3106 ; CHECK-LABEL: test_x86_avx_vtestnzc_ps:
3108 ; CHECK-NEXT: vtestps %xmm1, %xmm0
3109 ; CHECK-NEXT: seta %al
3110 ; CHECK-NEXT: movzbl %al, %eax
3112 %res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
3115 declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone
3118 define i32 @test_x86_avx_vtestnzc_ps_256(<8 x float> %a0, <8 x float> %a1) {
3119 ; CHECK-LABEL: test_x86_avx_vtestnzc_ps_256:
3121 ; CHECK-NEXT: vtestps %ymm1, %ymm0
3122 ; CHECK-NEXT: seta %al
3123 ; CHECK-NEXT: movzbl %al, %eax
3124 ; CHECK-NEXT: vzeroupper
3126 %res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
3129 declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind readnone
3132 define i32 @test_x86_avx_vtestz_pd(<2 x double> %a0, <2 x double> %a1) {
3133 ; CHECK-LABEL: test_x86_avx_vtestz_pd:
3135 ; CHECK-NEXT: vtestpd %xmm1, %xmm0
3136 ; CHECK-NEXT: sete %al
3137 ; CHECK-NEXT: movzbl %al, %eax
3139 %res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
3142 declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnone
3145 define i32 @test_x86_avx_vtestz_pd_256(<4 x double> %a0, <4 x double> %a1) {
3146 ; CHECK-LABEL: test_x86_avx_vtestz_pd_256:
3148 ; CHECK-NEXT: vtestpd %ymm1, %ymm0
3149 ; CHECK-NEXT: sete %al
3150 ; CHECK-NEXT: movzbl %al, %eax
3151 ; CHECK-NEXT: vzeroupper
3153 %res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
3156 declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind readnone
3159 define i32 @test_x86_avx_vtestz_ps(<4 x float> %a0, <4 x float> %a1) {
3160 ; CHECK-LABEL: test_x86_avx_vtestz_ps:
3162 ; CHECK-NEXT: vtestps %xmm1, %xmm0
3163 ; CHECK-NEXT: sete %al
3164 ; CHECK-NEXT: movzbl %al, %eax
3166 %res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
3169 declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone
3172 define i32 @test_x86_avx_vtestz_ps_256(<8 x float> %a0, <8 x float> %a1) {
3173 ; CHECK-LABEL: test_x86_avx_vtestz_ps_256:
3175 ; CHECK-NEXT: vtestps %ymm1, %ymm0
3176 ; CHECK-NEXT: sete %al
3177 ; CHECK-NEXT: movzbl %al, %eax
3178 ; CHECK-NEXT: vzeroupper
3180 %res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
3183 declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone
3186 define void @test_x86_avx_vzeroall() {
3187 ; CHECK-LABEL: test_x86_avx_vzeroall:
3189 ; CHECK-NEXT: vzeroall
3190 ; CHECK-NEXT: vzeroupper
3192 call void @llvm.x86.avx.vzeroall()
3195 declare void @llvm.x86.avx.vzeroall() nounwind
3198 define void @test_x86_avx_vzeroupper() {
3199 ; CHECK-LABEL: test_x86_avx_vzeroupper:
3201 ; CHECK-NEXT: vzeroupper
3202 ; CHECK-NEXT: vzeroupper
3204 call void @llvm.x86.avx.vzeroupper()
3207 declare void @llvm.x86.avx.vzeroupper() nounwind
3209 ; Make sure instructions with no AVX equivalents, but are associated with SSEX feature flags still work
3211 define void @monitor(i8* %P, i32 %E, i32 %H) nounwind {
3212 ; CHECK-LABEL: monitor:
3214 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
3215 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
3216 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
3217 ; CHECK-NEXT: leal (%eax), %eax
3218 ; CHECK-NEXT: monitor
3220 tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H)
3223 declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind
3225 define void @mwait(i32 %E, i32 %H) nounwind {
3226 ; CHECK-LABEL: mwait:
3228 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
3229 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
3232 tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H)
3235 declare void @llvm.x86.sse3.mwait(i32, i32) nounwind
3237 define void @sfence() nounwind {
3238 ; CHECK-LABEL: sfence:
3240 ; CHECK-NEXT: sfence
3242 tail call void @llvm.x86.sse.sfence()
3245 declare void @llvm.x86.sse.sfence() nounwind
3247 define void @lfence() nounwind {
3248 ; CHECK-LABEL: lfence:
3250 ; CHECK-NEXT: lfence
3252 tail call void @llvm.x86.sse2.lfence()
3255 declare void @llvm.x86.sse2.lfence() nounwind
3257 define void @mfence() nounwind {
3258 ; CHECK-LABEL: mfence:
3260 ; CHECK-NEXT: mfence
3262 tail call void @llvm.x86.sse2.mfence()
3265 declare void @llvm.x86.sse2.mfence() nounwind
3267 define void @clflush(i8* %p) nounwind {
3268 ; CHECK-LABEL: clflush:
3270 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
3271 ; CHECK-NEXT: clflush (%eax)
3273 tail call void @llvm.x86.sse2.clflush(i8* %p)
3276 declare void @llvm.x86.sse2.clflush(i8*) nounwind
3278 define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
3279 ; CHECK-LABEL: crc32_32_8:
3281 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
3282 ; CHECK-NEXT: crc32b {{[0-9]+}}(%esp), %eax
3284 %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
3287 declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
3289 define i32 @crc32_32_16(i32 %a, i16 %b) nounwind {
3290 ; CHECK-LABEL: crc32_32_16:
3292 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
3293 ; CHECK-NEXT: crc32w {{[0-9]+}}(%esp), %eax
3295 %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b)
3298 declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
3300 define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
3301 ; CHECK-LABEL: crc32_32_32:
3303 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
3304 ; CHECK-NEXT: crc32l {{[0-9]+}}(%esp), %eax
3306 %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b)
3309 declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
3311 define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
3312 ; CHECK-LABEL: movnt_dq:
3314 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
3315 ; CHECK-NEXT: vpaddq LCPI276_0, %xmm0, %xmm0
3316 ; CHECK-NEXT: vmovntdq %ymm0, (%eax)
3317 ; CHECK-NEXT: vzeroupper
3319 %a2 = add <2 x i64> %a1, <i64 1, i64 1>
3320 %a3 = shufflevector <2 x i64> %a2, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
3321 tail call void @llvm.x86.avx.movnt.dq.256(i8* %p, <4 x i64> %a3) nounwind
3324 declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind
3326 define void @movnt_ps(i8* %p, <8 x float> %a) nounwind {
3327 ; CHECK-LABEL: movnt_ps:
3329 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
3330 ; CHECK-NEXT: vmovntps %ymm0, (%eax)
3331 ; CHECK-NEXT: vzeroupper
3333 tail call void @llvm.x86.avx.movnt.ps.256(i8* %p, <8 x float> %a) nounwind
3336 declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind
3338 define void @movnt_pd(i8* %p, <4 x double> %a1) nounwind {
3339 ; add operation forces the execution domain.
3340 ; CHECK-LABEL: movnt_pd:
3342 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
3343 ; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
3344 ; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
3345 ; CHECK-NEXT: vmovntpd %ymm0, (%eax)
3346 ; CHECK-NEXT: vzeroupper
3348 %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
3349 tail call void @llvm.x86.avx.movnt.pd.256(i8* %p, <4 x double> %a2) nounwind
3352 declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind
3355 ; Check for pclmulqdq
3356 define <2 x i64> @test_x86_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) {
3357 ; CHECK-LABEL: test_x86_pclmulqdq:
3359 ; CHECK-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0
3361 %res = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ; <<2 x i64>> [#uses=1]
3364 declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone