1 ; RUN: llc -mcpu=x86-64 -mattr=+sse2 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE2 %s
2 ; RUN: llc -mcpu=x86-64 -mattr=+sse2 < %s -x86-experimental-vector-shuffle-lowering | FileCheck --check-prefix=SSE --check-prefix=SSE2 %s
3 ; RUN: llc -mcpu=x86-64 -mattr=+sse4.1 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE41 %s
4 ; RUN: llc -mcpu=x86-64 -mattr=+sse4.1 < %s -x86-experimental-vector-shuffle-lowering | FileCheck --check-prefix=SSE --check-prefix=SSE41 %s
5 ; RUN: llc -mcpu=x86-64 -mattr=+avx < %s | FileCheck --check-prefix=AVX %s
6 ; RUN: llc -mcpu=x86-64 -mattr=+avx < %s -x86-experimental-vector-shuffle-lowering | FileCheck --check-prefix=AVX %s
8 target triple = "x86_64-unknown-unknown"
10 ; Ensure that the backend no longer emits unnecessary vector insert
11 ; instructions immediately after SSE scalar fp instructions
12 ; like addss or mulss.
14 define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) {
15 ; SSE-LABEL: test_add_ss:
17 ; SSE-NEXT: addss %xmm1, %xmm0
20 ; AVX-LABEL: test_add_ss:
22 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
24 %1 = extractelement <4 x float> %b, i32 0
25 %2 = extractelement <4 x float> %a, i32 0
26 %add = fadd float %2, %1
27 %3 = insertelement <4 x float> %a, float %add, i32 0
31 define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) {
32 ; SSE-LABEL: test_sub_ss:
34 ; SSE-NEXT: subss %xmm1, %xmm0
37 ; AVX-LABEL: test_sub_ss:
39 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
41 %1 = extractelement <4 x float> %b, i32 0
42 %2 = extractelement <4 x float> %a, i32 0
43 %sub = fsub float %2, %1
44 %3 = insertelement <4 x float> %a, float %sub, i32 0
48 define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) {
49 ; SSE-LABEL: test_mul_ss:
51 ; SSE-NEXT: mulss %xmm1, %xmm0
54 ; AVX-LABEL: test_mul_ss:
56 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
58 %1 = extractelement <4 x float> %b, i32 0
59 %2 = extractelement <4 x float> %a, i32 0
60 %mul = fmul float %2, %1
61 %3 = insertelement <4 x float> %a, float %mul, i32 0
65 define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
66 ; SSE-LABEL: test_div_ss:
68 ; SSE-NEXT: divss %xmm1, %xmm0
71 ; AVX-LABEL: test_div_ss:
73 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
75 %1 = extractelement <4 x float> %b, i32 0
76 %2 = extractelement <4 x float> %a, i32 0
77 %div = fdiv float %2, %1
78 %3 = insertelement <4 x float> %a, float %div, i32 0
82 define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) {
83 ; SSE-LABEL: test_add_sd:
85 ; SSE-NEXT: addsd %xmm1, %xmm0
88 ; AVX-LABEL: test_add_sd:
90 ; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
92 %1 = extractelement <2 x double> %b, i32 0
93 %2 = extractelement <2 x double> %a, i32 0
94 %add = fadd double %2, %1
95 %3 = insertelement <2 x double> %a, double %add, i32 0
99 define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) {
100 ; SSE-LABEL: test_sub_sd:
102 ; SSE-NEXT: subsd %xmm1, %xmm0
105 ; AVX-LABEL: test_sub_sd:
107 ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0
109 %1 = extractelement <2 x double> %b, i32 0
110 %2 = extractelement <2 x double> %a, i32 0
111 %sub = fsub double %2, %1
112 %3 = insertelement <2 x double> %a, double %sub, i32 0
116 define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) {
117 ; SSE-LABEL: test_mul_sd:
119 ; SSE-NEXT: mulsd %xmm1, %xmm0
122 ; AVX-LABEL: test_mul_sd:
124 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
126 %1 = extractelement <2 x double> %b, i32 0
127 %2 = extractelement <2 x double> %a, i32 0
128 %mul = fmul double %2, %1
129 %3 = insertelement <2 x double> %a, double %mul, i32 0
133 define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) {
134 ; SSE-LABEL: test_div_sd:
136 ; SSE-NEXT: divsd %xmm1, %xmm0
139 ; AVX-LABEL: test_div_sd:
141 ; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
143 %1 = extractelement <2 x double> %b, i32 0
144 %2 = extractelement <2 x double> %a, i32 0
145 %div = fdiv double %2, %1
146 %3 = insertelement <2 x double> %a, double %div, i32 0
150 define <4 x float> @test2_add_ss(<4 x float> %a, <4 x float> %b) {
151 ; SSE-LABEL: test2_add_ss:
153 ; SSE-NEXT: addss %xmm0, %xmm1
154 ; SSE-NEXT: movaps %xmm1, %xmm0
157 ; AVX-LABEL: test2_add_ss:
159 ; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0
161 %1 = extractelement <4 x float> %a, i32 0
162 %2 = extractelement <4 x float> %b, i32 0
163 %add = fadd float %1, %2
164 %3 = insertelement <4 x float> %b, float %add, i32 0
168 define <4 x float> @test2_sub_ss(<4 x float> %a, <4 x float> %b) {
169 ; SSE-LABEL: test2_sub_ss:
171 ; SSE-NEXT: subss %xmm0, %xmm1
172 ; SSE-NEXT: movaps %xmm1, %xmm0
175 ; AVX-LABEL: test2_sub_ss:
177 ; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0
179 %1 = extractelement <4 x float> %a, i32 0
180 %2 = extractelement <4 x float> %b, i32 0
181 %sub = fsub float %2, %1
182 %3 = insertelement <4 x float> %b, float %sub, i32 0
186 define <4 x float> @test2_mul_ss(<4 x float> %a, <4 x float> %b) {
187 ; SSE-LABEL: test2_mul_ss:
189 ; SSE-NEXT: mulss %xmm0, %xmm1
190 ; SSE-NEXT: movaps %xmm1, %xmm0
193 ; AVX-LABEL: test2_mul_ss:
195 ; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0
197 %1 = extractelement <4 x float> %a, i32 0
198 %2 = extractelement <4 x float> %b, i32 0
199 %mul = fmul float %1, %2
200 %3 = insertelement <4 x float> %b, float %mul, i32 0
204 define <4 x float> @test2_div_ss(<4 x float> %a, <4 x float> %b) {
205 ; SSE-LABEL: test2_div_ss:
207 ; SSE-NEXT: divss %xmm0, %xmm1
208 ; SSE-NEXT: movaps %xmm1, %xmm0
211 ; AVX-LABEL: test2_div_ss:
213 ; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0
215 %1 = extractelement <4 x float> %a, i32 0
216 %2 = extractelement <4 x float> %b, i32 0
217 %div = fdiv float %2, %1
218 %3 = insertelement <4 x float> %b, float %div, i32 0
222 define <2 x double> @test2_add_sd(<2 x double> %a, <2 x double> %b) {
223 ; SSE-LABEL: test2_add_sd:
225 ; SSE-NEXT: addsd %xmm0, %xmm1
226 ; SSE-NEXT: movaps %xmm1, %xmm0
229 ; AVX-LABEL: test2_add_sd:
231 ; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
233 %1 = extractelement <2 x double> %a, i32 0
234 %2 = extractelement <2 x double> %b, i32 0
235 %add = fadd double %1, %2
236 %3 = insertelement <2 x double> %b, double %add, i32 0
240 define <2 x double> @test2_sub_sd(<2 x double> %a, <2 x double> %b) {
241 ; SSE-LABEL: test2_sub_sd:
243 ; SSE-NEXT: subsd %xmm0, %xmm1
244 ; SSE-NEXT: movaps %xmm1, %xmm0
247 ; AVX-LABEL: test2_sub_sd:
249 ; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0
251 %1 = extractelement <2 x double> %a, i32 0
252 %2 = extractelement <2 x double> %b, i32 0
253 %sub = fsub double %2, %1
254 %3 = insertelement <2 x double> %b, double %sub, i32 0
258 define <2 x double> @test2_mul_sd(<2 x double> %a, <2 x double> %b) {
259 ; SSE-LABEL: test2_mul_sd:
261 ; SSE-NEXT: mulsd %xmm0, %xmm1
262 ; SSE-NEXT: movaps %xmm1, %xmm0
265 ; AVX-LABEL: test2_mul_sd:
267 ; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0
269 %1 = extractelement <2 x double> %a, i32 0
270 %2 = extractelement <2 x double> %b, i32 0
271 %mul = fmul double %1, %2
272 %3 = insertelement <2 x double> %b, double %mul, i32 0
276 define <2 x double> @test2_div_sd(<2 x double> %a, <2 x double> %b) {
277 ; SSE-LABEL: test2_div_sd:
279 ; SSE-NEXT: divsd %xmm0, %xmm1
280 ; SSE-NEXT: movaps %xmm1, %xmm0
283 ; AVX-LABEL: test2_div_sd:
285 ; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0
287 %1 = extractelement <2 x double> %a, i32 0
288 %2 = extractelement <2 x double> %b, i32 0
289 %div = fdiv double %2, %1
290 %3 = insertelement <2 x double> %b, double %div, i32 0
294 define <4 x float> @test_multiple_add_ss(<4 x float> %a, <4 x float> %b) {
295 ; SSE-LABEL: test_multiple_add_ss:
297 ; SSE-NEXT: addss %xmm0, %xmm1
298 ; SSE-NEXT: addss %xmm1, %xmm0
301 ; AVX-LABEL: test_multiple_add_ss:
303 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm1
304 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
306 %1 = extractelement <4 x float> %b, i32 0
307 %2 = extractelement <4 x float> %a, i32 0
308 %add = fadd float %2, %1
309 %add2 = fadd float %2, %add
310 %3 = insertelement <4 x float> %a, float %add2, i32 0
314 define <4 x float> @test_multiple_sub_ss(<4 x float> %a, <4 x float> %b) {
315 ; SSE-LABEL: test_multiple_sub_ss:
317 ; SSE-NEXT: movaps %xmm0, %xmm2
318 ; SSE-NEXT: subss %xmm1, %xmm2
319 ; SSE-NEXT: subss %xmm2, %xmm0
322 ; AVX-LABEL: test_multiple_sub_ss:
324 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm1
325 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
327 %1 = extractelement <4 x float> %b, i32 0
328 %2 = extractelement <4 x float> %a, i32 0
329 %sub = fsub float %2, %1
330 %sub2 = fsub float %2, %sub
331 %3 = insertelement <4 x float> %a, float %sub2, i32 0
335 define <4 x float> @test_multiple_mul_ss(<4 x float> %a, <4 x float> %b) {
336 ; SSE-LABEL: test_multiple_mul_ss:
338 ; SSE-NEXT: mulss %xmm0, %xmm1
339 ; SSE-NEXT: mulss %xmm1, %xmm0
342 ; AVX-LABEL: test_multiple_mul_ss:
344 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm1
345 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
347 %1 = extractelement <4 x float> %b, i32 0
348 %2 = extractelement <4 x float> %a, i32 0
349 %mul = fmul float %2, %1
350 %mul2 = fmul float %2, %mul
351 %3 = insertelement <4 x float> %a, float %mul2, i32 0
355 define <4 x float> @test_multiple_div_ss(<4 x float> %a, <4 x float> %b) {
356 ; SSE-LABEL: test_multiple_div_ss:
358 ; SSE-NEXT: movaps %xmm0, %xmm2
359 ; SSE-NEXT: divss %xmm1, %xmm2
360 ; SSE-NEXT: divss %xmm2, %xmm0
363 ; AVX-LABEL: test_multiple_div_ss:
365 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm1
366 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
368 %1 = extractelement <4 x float> %b, i32 0
369 %2 = extractelement <4 x float> %a, i32 0
370 %div = fdiv float %2, %1
371 %div2 = fdiv float %2, %div
372 %3 = insertelement <4 x float> %a, float %div2, i32 0
376 ; Ensure that the backend selects SSE/AVX scalar fp instructions
377 ; from a packed fp instrution plus a vector insert.
379 define <4 x float> @insert_test_add_ss(<4 x float> %a, <4 x float> %b) {
380 ; SSE-LABEL: insert_test_add_ss:
382 ; SSE-NEXT: addss %xmm1, %xmm0
385 ; AVX-LABEL: insert_test_add_ss:
387 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
389 %1 = fadd <4 x float> %a, %b
390 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
394 define <4 x float> @insert_test_sub_ss(<4 x float> %a, <4 x float> %b) {
395 ; SSE-LABEL: insert_test_sub_ss:
397 ; SSE-NEXT: subss %xmm1, %xmm0
400 ; AVX-LABEL: insert_test_sub_ss:
402 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
404 %1 = fsub <4 x float> %a, %b
405 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
409 define <4 x float> @insert_test_mul_ss(<4 x float> %a, <4 x float> %b) {
410 ; SSE-LABEL: insert_test_mul_ss:
412 ; SSE-NEXT: mulss %xmm1, %xmm0
415 ; AVX-LABEL: insert_test_mul_ss:
417 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
419 %1 = fmul <4 x float> %a, %b
420 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
424 define <4 x float> @insert_test_div_ss(<4 x float> %a, <4 x float> %b) {
425 ; SSE-LABEL: insert_test_div_ss:
427 ; SSE-NEXT: divss %xmm1, %xmm0
430 ; AVX-LABEL: insert_test_div_ss:
432 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
434 %1 = fdiv <4 x float> %a, %b
435 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
439 define <2 x double> @insert_test_add_sd(<2 x double> %a, <2 x double> %b) {
440 ; SSE-LABEL: insert_test_add_sd:
442 ; SSE-NEXT: addsd %xmm1, %xmm0
445 ; AVX-LABEL: insert_test_add_sd:
447 ; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
449 %1 = fadd <2 x double> %a, %b
450 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
454 define <2 x double> @insert_test_sub_sd(<2 x double> %a, <2 x double> %b) {
455 ; SSE-LABEL: insert_test_sub_sd:
457 ; SSE-NEXT: subsd %xmm1, %xmm0
460 ; AVX-LABEL: insert_test_sub_sd:
462 ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0
464 %1 = fsub <2 x double> %a, %b
465 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
469 define <2 x double> @insert_test_mul_sd(<2 x double> %a, <2 x double> %b) {
470 ; SSE-LABEL: insert_test_mul_sd:
472 ; SSE-NEXT: mulsd %xmm1, %xmm0
475 ; AVX-LABEL: insert_test_mul_sd:
477 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
479 %1 = fmul <2 x double> %a, %b
480 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
484 define <2 x double> @insert_test_div_sd(<2 x double> %a, <2 x double> %b) {
485 ; SSE-LABEL: insert_test_div_sd:
487 ; SSE-NEXT: divsd %xmm1, %xmm0
490 ; AVX-LABEL: insert_test_div_sd:
492 ; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
494 %1 = fdiv <2 x double> %a, %b
495 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
499 define <4 x float> @insert_test2_add_ss(<4 x float> %a, <4 x float> %b) {
500 ; SSE-LABEL: insert_test2_add_ss:
502 ; SSE-NEXT: addss %xmm0, %xmm1
503 ; SSE-NEXT: movaps %xmm1, %xmm0
506 ; AVX-LABEL: insert_test2_add_ss:
508 ; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0
510 %1 = fadd <4 x float> %b, %a
511 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
515 define <4 x float> @insert_test2_sub_ss(<4 x float> %a, <4 x float> %b) {
516 ; SSE-LABEL: insert_test2_sub_ss:
518 ; SSE-NEXT: subss %xmm0, %xmm1
519 ; SSE-NEXT: movaps %xmm1, %xmm0
522 ; AVX-LABEL: insert_test2_sub_ss:
524 ; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0
526 %1 = fsub <4 x float> %b, %a
527 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
531 define <4 x float> @insert_test2_mul_ss(<4 x float> %a, <4 x float> %b) {
532 ; SSE-LABEL: insert_test2_mul_ss:
534 ; SSE-NEXT: mulss %xmm0, %xmm1
535 ; SSE-NEXT: movaps %xmm1, %xmm0
538 ; AVX-LABEL: insert_test2_mul_ss:
540 ; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0
542 %1 = fmul <4 x float> %b, %a
543 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
547 define <4 x float> @insert_test2_div_ss(<4 x float> %a, <4 x float> %b) {
548 ; SSE-LABEL: insert_test2_div_ss:
550 ; SSE-NEXT: divss %xmm0, %xmm1
551 ; SSE-NEXT: movaps %xmm1, %xmm0
554 ; AVX-LABEL: insert_test2_div_ss:
556 ; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0
558 %1 = fdiv <4 x float> %b, %a
559 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
563 define <2 x double> @insert_test2_add_sd(<2 x double> %a, <2 x double> %b) {
564 ; SSE-LABEL: insert_test2_add_sd:
566 ; SSE-NEXT: addsd %xmm0, %xmm1
567 ; SSE-NEXT: movaps %xmm1, %xmm0
570 ; AVX-LABEL: insert_test2_add_sd:
572 ; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
574 %1 = fadd <2 x double> %b, %a
575 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
579 define <2 x double> @insert_test2_sub_sd(<2 x double> %a, <2 x double> %b) {
580 ; SSE-LABEL: insert_test2_sub_sd:
582 ; SSE-NEXT: subsd %xmm0, %xmm1
583 ; SSE-NEXT: movaps %xmm1, %xmm0
586 ; AVX-LABEL: insert_test2_sub_sd:
588 ; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0
590 %1 = fsub <2 x double> %b, %a
591 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
595 define <2 x double> @insert_test2_mul_sd(<2 x double> %a, <2 x double> %b) {
596 ; SSE-LABEL: insert_test2_mul_sd:
598 ; SSE-NEXT: mulsd %xmm0, %xmm1
599 ; SSE-NEXT: movaps %xmm1, %xmm0
602 ; AVX-LABEL: insert_test2_mul_sd:
604 ; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0
606 %1 = fmul <2 x double> %b, %a
607 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
611 define <2 x double> @insert_test2_div_sd(<2 x double> %a, <2 x double> %b) {
612 ; SSE-LABEL: insert_test2_div_sd:
614 ; SSE-NEXT: divsd %xmm0, %xmm1
615 ; SSE-NEXT: movaps %xmm1, %xmm0
618 ; AVX-LABEL: insert_test2_div_sd:
620 ; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0
622 %1 = fdiv <2 x double> %b, %a
623 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
627 define <4 x float> @insert_test3_add_ss(<4 x float> %a, <4 x float> %b) {
628 ; SSE-LABEL: insert_test3_add_ss:
630 ; SSE-NEXT: addss %xmm1, %xmm0
633 ; AVX-LABEL: insert_test3_add_ss:
635 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
637 %1 = fadd <4 x float> %a, %b
638 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
642 define <4 x float> @insert_test3_sub_ss(<4 x float> %a, <4 x float> %b) {
643 ; SSE-LABEL: insert_test3_sub_ss:
645 ; SSE-NEXT: subss %xmm1, %xmm0
648 ; AVX-LABEL: insert_test3_sub_ss:
650 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
652 %1 = fsub <4 x float> %a, %b
653 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
657 define <4 x float> @insert_test3_mul_ss(<4 x float> %a, <4 x float> %b) {
658 ; SSE-LABEL: insert_test3_mul_ss:
660 ; SSE-NEXT: mulss %xmm1, %xmm0
663 ; AVX-LABEL: insert_test3_mul_ss:
665 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
667 %1 = fmul <4 x float> %a, %b
668 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
672 define <4 x float> @insert_test3_div_ss(<4 x float> %a, <4 x float> %b) {
673 ; SSE-LABEL: insert_test3_div_ss:
675 ; SSE-NEXT: divss %xmm1, %xmm0
678 ; AVX-LABEL: insert_test3_div_ss:
680 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
682 %1 = fdiv <4 x float> %a, %b
683 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
687 define <2 x double> @insert_test3_add_sd(<2 x double> %a, <2 x double> %b) {
688 ; SSE-LABEL: insert_test3_add_sd:
690 ; SSE-NEXT: addsd %xmm1, %xmm0
693 ; AVX-LABEL: insert_test3_add_sd:
695 ; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
697 %1 = fadd <2 x double> %a, %b
698 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
702 define <2 x double> @insert_test3_sub_sd(<2 x double> %a, <2 x double> %b) {
703 ; SSE-LABEL: insert_test3_sub_sd:
705 ; SSE-NEXT: subsd %xmm1, %xmm0
708 ; AVX-LABEL: insert_test3_sub_sd:
710 ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0
712 %1 = fsub <2 x double> %a, %b
713 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
717 define <2 x double> @insert_test3_mul_sd(<2 x double> %a, <2 x double> %b) {
718 ; SSE-LABEL: insert_test3_mul_sd:
720 ; SSE-NEXT: mulsd %xmm1, %xmm0
723 ; AVX-LABEL: insert_test3_mul_sd:
725 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
727 %1 = fmul <2 x double> %a, %b
728 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
732 define <2 x double> @insert_test3_div_sd(<2 x double> %a, <2 x double> %b) {
733 ; SSE-LABEL: insert_test3_div_sd:
735 ; SSE-NEXT: divsd %xmm1, %xmm0
738 ; AVX-LABEL: insert_test3_div_sd:
740 ; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
742 %1 = fdiv <2 x double> %a, %b
743 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
747 define <4 x float> @insert_test4_add_ss(<4 x float> %a, <4 x float> %b) {
748 ; SSE-LABEL: insert_test4_add_ss:
750 ; SSE-NEXT: addss %xmm0, %xmm1
751 ; SSE-NEXT: movaps %xmm1, %xmm0
754 ; AVX-LABEL: insert_test4_add_ss:
756 ; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0
758 %1 = fadd <4 x float> %b, %a
759 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
763 define <4 x float> @insert_test4_sub_ss(<4 x float> %a, <4 x float> %b) {
764 ; SSE-LABEL: insert_test4_sub_ss:
766 ; SSE-NEXT: subss %xmm0, %xmm1
767 ; SSE-NEXT: movaps %xmm1, %xmm0
770 ; AVX-LABEL: insert_test4_sub_ss:
772 ; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0
774 %1 = fsub <4 x float> %b, %a
775 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
779 define <4 x float> @insert_test4_mul_ss(<4 x float> %a, <4 x float> %b) {
780 ; SSE-LABEL: insert_test4_mul_ss:
782 ; SSE-NEXT: mulss %xmm0, %xmm1
783 ; SSE-NEXT: movaps %xmm1, %xmm0
786 ; AVX-LABEL: insert_test4_mul_ss:
788 ; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0
790 %1 = fmul <4 x float> %b, %a
791 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
795 define <4 x float> @insert_test4_div_ss(<4 x float> %a, <4 x float> %b) {
796 ; SSE-LABEL: insert_test4_div_ss:
798 ; SSE-NEXT: divss %xmm0, %xmm1
799 ; SSE-NEXT: movaps %xmm1, %xmm0
802 ; AVX-LABEL: insert_test4_div_ss:
804 ; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0
806 %1 = fdiv <4 x float> %b, %a
807 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
811 define <2 x double> @insert_test4_add_sd(<2 x double> %a, <2 x double> %b) {
812 ; SSE-LABEL: insert_test4_add_sd:
814 ; SSE-NEXT: addsd %xmm0, %xmm1
815 ; SSE-NEXT: movaps %xmm1, %xmm0
818 ; AVX-LABEL: insert_test4_add_sd:
820 ; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
822 %1 = fadd <2 x double> %b, %a
823 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
827 define <2 x double> @insert_test4_sub_sd(<2 x double> %a, <2 x double> %b) {
828 ; SSE-LABEL: insert_test4_sub_sd:
830 ; SSE-NEXT: subsd %xmm0, %xmm1
831 ; SSE-NEXT: movaps %xmm1, %xmm0
834 ; AVX-LABEL: insert_test4_sub_sd:
836 ; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0
838 %1 = fsub <2 x double> %b, %a
839 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
843 define <2 x double> @insert_test4_mul_sd(<2 x double> %a, <2 x double> %b) {
844 ; SSE-LABEL: insert_test4_mul_sd:
846 ; SSE-NEXT: mulsd %xmm0, %xmm1
847 ; SSE-NEXT: movaps %xmm1, %xmm0
850 ; AVX-LABEL: insert_test4_mul_sd:
852 ; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0
854 %1 = fmul <2 x double> %b, %a
855 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
859 define <2 x double> @insert_test4_div_sd(<2 x double> %a, <2 x double> %b) {
860 ; SSE-LABEL: insert_test4_div_sd:
862 ; SSE-NEXT: divsd %xmm0, %xmm1
863 ; SSE-NEXT: movaps %xmm1, %xmm0
866 ; AVX-LABEL: insert_test4_div_sd:
868 ; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0
870 %1 = fdiv <2 x double> %b, %a
871 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1