1 ; RUN: llc -mtriple=x86_64-pc-linux -mcpu=corei7 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE2 %s
2 ; RUN: llc -mtriple=x86_64-pc-linux -mattr=-sse4.1 -mcpu=corei7 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE41 %s
3 ; RUN: llc -mtriple=x86_64-pc-linux -mcpu=corei7-avx < %s | FileCheck --check-prefix=AVX %s
5 ; Ensure that the backend no longer emits unnecessary vector insert
6 ; instructions immediately after SSE scalar fp instructions
9 define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) {
10 ; SSE-LABEL: test_add_ss:
12 ; SSE-NEXT: addss %xmm1, %xmm0
15 ; AVX-LABEL: test_add_ss:
17 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
19 %1 = extractelement <4 x float> %b, i32 0
20 %2 = extractelement <4 x float> %a, i32 0
21 %add = fadd float %2, %1
22 %3 = insertelement <4 x float> %a, float %add, i32 0
26 define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) {
27 ; SSE-LABEL: test_sub_ss:
29 ; SSE-NEXT: subss %xmm1, %xmm0
32 ; AVX-LABEL: test_sub_ss:
34 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
36 %1 = extractelement <4 x float> %b, i32 0
37 %2 = extractelement <4 x float> %a, i32 0
38 %sub = fsub float %2, %1
39 %3 = insertelement <4 x float> %a, float %sub, i32 0
43 define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) {
44 ; SSE-LABEL: test_mul_ss:
46 ; SSE-NEXT: mulss %xmm1, %xmm0
49 ; AVX-LABEL: test_mul_ss:
51 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
53 %1 = extractelement <4 x float> %b, i32 0
54 %2 = extractelement <4 x float> %a, i32 0
55 %mul = fmul float %2, %1
56 %3 = insertelement <4 x float> %a, float %mul, i32 0
60 define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
61 ; SSE-LABEL: test_div_ss:
63 ; SSE-NEXT: divss %xmm1, %xmm0
66 ; AVX-LABEL: test_div_ss:
68 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
70 %1 = extractelement <4 x float> %b, i32 0
71 %2 = extractelement <4 x float> %a, i32 0
72 %div = fdiv float %2, %1
73 %3 = insertelement <4 x float> %a, float %div, i32 0
77 define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) {
78 ; SSE-LABEL: test_add_sd:
80 ; SSE-NEXT: addsd %xmm1, %xmm0
83 ; AVX-LABEL: test_add_sd:
85 ; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
87 %1 = extractelement <2 x double> %b, i32 0
88 %2 = extractelement <2 x double> %a, i32 0
89 %add = fadd double %2, %1
90 %3 = insertelement <2 x double> %a, double %add, i32 0
94 define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) {
95 ; SSE-LABEL: test_sub_sd:
97 ; SSE-NEXT: subsd %xmm1, %xmm0
100 ; AVX-LABEL: test_sub_sd:
102 ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0
104 %1 = extractelement <2 x double> %b, i32 0
105 %2 = extractelement <2 x double> %a, i32 0
106 %sub = fsub double %2, %1
107 %3 = insertelement <2 x double> %a, double %sub, i32 0
111 define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) {
112 ; SSE-LABEL: test_mul_sd:
114 ; SSE-NEXT: mulsd %xmm1, %xmm0
117 ; AVX-LABEL: test_mul_sd:
119 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
121 %1 = extractelement <2 x double> %b, i32 0
122 %2 = extractelement <2 x double> %a, i32 0
123 %mul = fmul double %2, %1
124 %3 = insertelement <2 x double> %a, double %mul, i32 0
128 define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) {
129 ; SSE-LABEL: test_div_sd:
131 ; SSE-NEXT: divsd %xmm1, %xmm0
134 ; AVX-LABEL: test_div_sd:
136 ; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
138 %1 = extractelement <2 x double> %b, i32 0
139 %2 = extractelement <2 x double> %a, i32 0
140 %div = fdiv double %2, %1
141 %3 = insertelement <2 x double> %a, double %div, i32 0
145 define <4 x float> @test2_add_ss(<4 x float> %a, <4 x float> %b) {
146 ; SSE-LABEL: test2_add_ss:
148 ; SSE-NEXT: addss %xmm0, %xmm1
149 ; SSE-NEXT: movaps %xmm1, %xmm0
152 ; AVX-LABEL: test2_add_ss:
154 ; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0
156 %1 = extractelement <4 x float> %a, i32 0
157 %2 = extractelement <4 x float> %b, i32 0
158 %add = fadd float %1, %2
159 %3 = insertelement <4 x float> %b, float %add, i32 0
163 define <4 x float> @test2_sub_ss(<4 x float> %a, <4 x float> %b) {
164 ; SSE-LABEL: test2_sub_ss:
166 ; SSE-NEXT: subss %xmm0, %xmm1
167 ; SSE-NEXT: movaps %xmm1, %xmm0
170 ; AVX-LABEL: test2_sub_ss:
172 ; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0
174 %1 = extractelement <4 x float> %a, i32 0
175 %2 = extractelement <4 x float> %b, i32 0
176 %sub = fsub float %2, %1
177 %3 = insertelement <4 x float> %b, float %sub, i32 0
181 define <4 x float> @test2_mul_ss(<4 x float> %a, <4 x float> %b) {
182 ; SSE-LABEL: test2_mul_ss:
184 ; SSE-NEXT: mulss %xmm0, %xmm1
185 ; SSE-NEXT: movaps %xmm1, %xmm0
188 ; AVX-LABEL: test2_mul_ss:
190 ; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0
192 %1 = extractelement <4 x float> %a, i32 0
193 %2 = extractelement <4 x float> %b, i32 0
194 %mul = fmul float %1, %2
195 %3 = insertelement <4 x float> %b, float %mul, i32 0
199 define <4 x float> @test2_div_ss(<4 x float> %a, <4 x float> %b) {
200 ; SSE-LABEL: test2_div_ss:
202 ; SSE-NEXT: divss %xmm0, %xmm1
203 ; SSE-NEXT: movaps %xmm1, %xmm0
206 ; AVX-LABEL: test2_div_ss:
208 ; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0
210 %1 = extractelement <4 x float> %a, i32 0
211 %2 = extractelement <4 x float> %b, i32 0
212 %div = fdiv float %2, %1
213 %3 = insertelement <4 x float> %b, float %div, i32 0
217 define <2 x double> @test2_add_sd(<2 x double> %a, <2 x double> %b) {
218 ; SSE-LABEL: test2_add_sd:
220 ; SSE-NEXT: addsd %xmm0, %xmm1
221 ; SSE-NEXT: movaps %xmm1, %xmm0
224 ; AVX-LABEL: test2_add_sd:
226 ; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
228 %1 = extractelement <2 x double> %a, i32 0
229 %2 = extractelement <2 x double> %b, i32 0
230 %add = fadd double %1, %2
231 %3 = insertelement <2 x double> %b, double %add, i32 0
235 define <2 x double> @test2_sub_sd(<2 x double> %a, <2 x double> %b) {
236 ; SSE-LABEL: test2_sub_sd:
238 ; SSE-NEXT: subsd %xmm0, %xmm1
239 ; SSE-NEXT: movaps %xmm1, %xmm0
242 ; AVX-LABEL: test2_sub_sd:
244 ; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0
246 %1 = extractelement <2 x double> %a, i32 0
247 %2 = extractelement <2 x double> %b, i32 0
248 %sub = fsub double %2, %1
249 %3 = insertelement <2 x double> %b, double %sub, i32 0
253 define <2 x double> @test2_mul_sd(<2 x double> %a, <2 x double> %b) {
254 ; SSE-LABEL: test2_mul_sd:
256 ; SSE-NEXT: mulsd %xmm0, %xmm1
257 ; SSE-NEXT: movaps %xmm1, %xmm0
260 ; AVX-LABEL: test2_mul_sd:
262 ; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0
264 %1 = extractelement <2 x double> %a, i32 0
265 %2 = extractelement <2 x double> %b, i32 0
266 %mul = fmul double %1, %2
267 %3 = insertelement <2 x double> %b, double %mul, i32 0
271 define <2 x double> @test2_div_sd(<2 x double> %a, <2 x double> %b) {
272 ; SSE-LABEL: test2_div_sd:
274 ; SSE-NEXT: divsd %xmm0, %xmm1
275 ; SSE-NEXT: movaps %xmm1, %xmm0
278 ; AVX-LABEL: test2_div_sd:
280 ; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0
282 %1 = extractelement <2 x double> %a, i32 0
283 %2 = extractelement <2 x double> %b, i32 0
284 %div = fdiv double %2, %1
285 %3 = insertelement <2 x double> %b, double %div, i32 0
289 define <4 x float> @test_multiple_add_ss(<4 x float> %a, <4 x float> %b) {
290 ; SSE-LABEL: test_multiple_add_ss:
292 ; SSE-NEXT: addss %xmm0, %xmm1
293 ; SSE-NEXT: addss %xmm1, %xmm0
296 ; AVX-LABEL: test_multiple_add_ss:
298 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm1
299 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
301 %1 = extractelement <4 x float> %b, i32 0
302 %2 = extractelement <4 x float> %a, i32 0
303 %add = fadd float %2, %1
304 %add2 = fadd float %2, %add
305 %3 = insertelement <4 x float> %a, float %add2, i32 0
309 define <4 x float> @test_multiple_sub_ss(<4 x float> %a, <4 x float> %b) {
310 ; SSE-LABEL: test_multiple_sub_ss:
312 ; SSE-NEXT: movaps %xmm0, %xmm2
313 ; SSE-NEXT: subss %xmm1, %xmm2
314 ; SSE-NEXT: subss %xmm2, %xmm0
317 ; AVX-LABEL: test_multiple_sub_ss:
319 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm1
320 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
322 %1 = extractelement <4 x float> %b, i32 0
323 %2 = extractelement <4 x float> %a, i32 0
324 %sub = fsub float %2, %1
325 %sub2 = fsub float %2, %sub
326 %3 = insertelement <4 x float> %a, float %sub2, i32 0
330 define <4 x float> @test_multiple_mul_ss(<4 x float> %a, <4 x float> %b) {
331 ; SSE-LABEL: test_multiple_mul_ss:
333 ; SSE-NEXT: mulss %xmm0, %xmm1
334 ; SSE-NEXT: mulss %xmm1, %xmm0
337 ; AVX-LABEL: test_multiple_mul_ss:
339 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm1
340 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
342 %1 = extractelement <4 x float> %b, i32 0
343 %2 = extractelement <4 x float> %a, i32 0
344 %mul = fmul float %2, %1
345 %mul2 = fmul float %2, %mul
346 %3 = insertelement <4 x float> %a, float %mul2, i32 0
350 define <4 x float> @test_multiple_div_ss(<4 x float> %a, <4 x float> %b) {
351 ; SSE-LABEL: test_multiple_div_ss:
353 ; SSE-NEXT: movaps %xmm0, %xmm2
354 ; SSE-NEXT: divss %xmm1, %xmm2
355 ; SSE-NEXT: divss %xmm2, %xmm0
358 ; AVX-LABEL: test_multiple_div_ss:
360 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm1
361 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
363 %1 = extractelement <4 x float> %b, i32 0
364 %2 = extractelement <4 x float> %a, i32 0
365 %div = fdiv float %2, %1
366 %div2 = fdiv float %2, %div
367 %3 = insertelement <4 x float> %a, float %div2, i32 0
371 ; Ensure that the backend selects SSE/AVX scalar fp instructions
372 ; from a packed fp instrution plus a vector insert.
374 define <4 x float> @insert_test_add_ss(<4 x float> %a, <4 x float> %b) {
375 ; SSE-LABEL: insert_test_add_ss:
377 ; SSE-NEXT: addss %xmm1, %xmm0
380 ; AVX-LABEL: insert_test_add_ss:
382 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
384 %1 = fadd <4 x float> %a, %b
385 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
389 define <4 x float> @insert_test_sub_ss(<4 x float> %a, <4 x float> %b) {
390 ; SSE-LABEL: insert_test_sub_ss:
392 ; SSE-NEXT: subss %xmm1, %xmm0
395 ; AVX-LABEL: insert_test_sub_ss:
397 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
399 %1 = fsub <4 x float> %a, %b
400 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
404 define <4 x float> @insert_test_mul_ss(<4 x float> %a, <4 x float> %b) {
405 ; SSE-LABEL: insert_test_mul_ss:
407 ; SSE-NEXT: mulss %xmm1, %xmm0
410 ; AVX-LABEL: insert_test_mul_ss:
412 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
414 %1 = fmul <4 x float> %a, %b
415 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
419 define <4 x float> @insert_test_div_ss(<4 x float> %a, <4 x float> %b) {
420 ; SSE-LABEL: insert_test_div_ss:
422 ; SSE-NEXT: divss %xmm1, %xmm0
425 ; AVX-LABEL: insert_test_div_ss:
427 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
429 %1 = fdiv <4 x float> %a, %b
430 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
434 define <2 x double> @insert_test_add_sd(<2 x double> %a, <2 x double> %b) {
435 ; SSE-LABEL: insert_test_add_sd:
437 ; SSE-NEXT: addsd %xmm1, %xmm0
440 ; AVX-LABEL: insert_test_add_sd:
442 ; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
444 %1 = fadd <2 x double> %a, %b
445 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
449 define <2 x double> @insert_test_sub_sd(<2 x double> %a, <2 x double> %b) {
450 ; SSE-LABEL: insert_test_sub_sd:
452 ; SSE-NEXT: subsd %xmm1, %xmm0
455 ; AVX-LABEL: insert_test_sub_sd:
457 ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0
459 %1 = fsub <2 x double> %a, %b
460 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
464 define <2 x double> @insert_test_mul_sd(<2 x double> %a, <2 x double> %b) {
465 ; SSE-LABEL: insert_test_mul_sd:
467 ; SSE-NEXT: mulsd %xmm1, %xmm0
470 ; AVX-LABEL: insert_test_mul_sd:
472 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
474 %1 = fmul <2 x double> %a, %b
475 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
479 define <2 x double> @insert_test_div_sd(<2 x double> %a, <2 x double> %b) {
480 ; SSE-LABEL: insert_test_div_sd:
482 ; SSE-NEXT: divsd %xmm1, %xmm0
485 ; AVX-LABEL: insert_test_div_sd:
487 ; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
489 %1 = fdiv <2 x double> %a, %b
490 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
494 define <4 x float> @insert_test2_add_ss(<4 x float> %a, <4 x float> %b) {
495 ; SSE-LABEL: insert_test2_add_ss:
497 ; SSE-NEXT: addss %xmm0, %xmm1
498 ; SSE-NEXT: movaps %xmm1, %xmm0
501 ; AVX-LABEL: insert_test2_add_ss:
503 ; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0
505 %1 = fadd <4 x float> %b, %a
506 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
510 define <4 x float> @insert_test2_sub_ss(<4 x float> %a, <4 x float> %b) {
511 ; SSE-LABEL: insert_test2_sub_ss:
513 ; SSE-NEXT: subss %xmm0, %xmm1
514 ; SSE-NEXT: movaps %xmm1, %xmm0
517 ; AVX-LABEL: insert_test2_sub_ss:
519 ; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0
521 %1 = fsub <4 x float> %b, %a
522 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
526 define <4 x float> @insert_test2_mul_ss(<4 x float> %a, <4 x float> %b) {
527 ; SSE-LABEL: insert_test2_mul_ss:
529 ; SSE-NEXT: mulss %xmm0, %xmm1
530 ; SSE-NEXT: movaps %xmm1, %xmm0
533 ; AVX-LABEL: insert_test2_mul_ss:
535 ; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0
537 %1 = fmul <4 x float> %b, %a
538 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
542 define <4 x float> @insert_test2_div_ss(<4 x float> %a, <4 x float> %b) {
543 ; SSE-LABEL: insert_test2_div_ss:
545 ; SSE-NEXT: divss %xmm0, %xmm1
546 ; SSE-NEXT: movaps %xmm1, %xmm0
549 ; AVX-LABEL: insert_test2_div_ss:
551 ; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0
553 %1 = fdiv <4 x float> %b, %a
554 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
558 define <2 x double> @insert_test2_add_sd(<2 x double> %a, <2 x double> %b) {
559 ; SSE-LABEL: insert_test2_add_sd:
561 ; SSE-NEXT: addsd %xmm0, %xmm1
562 ; SSE-NEXT: movaps %xmm1, %xmm0
565 ; AVX-LABEL: insert_test2_add_sd:
567 ; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
569 %1 = fadd <2 x double> %b, %a
570 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
574 define <2 x double> @insert_test2_sub_sd(<2 x double> %a, <2 x double> %b) {
575 ; SSE-LABEL: insert_test2_sub_sd:
577 ; SSE-NEXT: subsd %xmm0, %xmm1
578 ; SSE-NEXT: movaps %xmm1, %xmm0
581 ; AVX-LABEL: insert_test2_sub_sd:
583 ; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0
585 %1 = fsub <2 x double> %b, %a
586 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
590 define <2 x double> @insert_test2_mul_sd(<2 x double> %a, <2 x double> %b) {
591 ; SSE-LABEL: insert_test2_mul_sd:
593 ; SSE-NEXT: mulsd %xmm0, %xmm1
594 ; SSE-NEXT: movaps %xmm1, %xmm0
597 ; AVX-LABEL: insert_test2_mul_sd:
599 ; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0
601 %1 = fmul <2 x double> %b, %a
602 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
606 define <2 x double> @insert_test2_div_sd(<2 x double> %a, <2 x double> %b) {
607 ; SSE-LABEL: insert_test2_div_sd:
609 ; SSE-NEXT: divsd %xmm0, %xmm1
610 ; SSE-NEXT: movaps %xmm1, %xmm0
613 ; AVX-LABEL: insert_test2_div_sd:
615 ; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0
617 %1 = fdiv <2 x double> %b, %a
618 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
622 define <4 x float> @insert_test3_add_ss(<4 x float> %a, <4 x float> %b) {
623 ; SSE-LABEL: insert_test3_add_ss:
625 ; SSE-NEXT: addss %xmm1, %xmm0
628 ; AVX-LABEL: insert_test3_add_ss:
630 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
632 %1 = fadd <4 x float> %a, %b
633 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
637 define <4 x float> @insert_test3_sub_ss(<4 x float> %a, <4 x float> %b) {
638 ; SSE-LABEL: insert_test3_sub_ss:
640 ; SSE-NEXT: subss %xmm1, %xmm0
643 ; AVX-LABEL: insert_test3_sub_ss:
645 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
647 %1 = fsub <4 x float> %a, %b
648 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
652 define <4 x float> @insert_test3_mul_ss(<4 x float> %a, <4 x float> %b) {
653 ; SSE-LABEL: insert_test3_mul_ss:
655 ; SSE-NEXT: mulss %xmm1, %xmm0
658 ; AVX-LABEL: insert_test3_mul_ss:
660 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
662 %1 = fmul <4 x float> %a, %b
663 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
667 define <4 x float> @insert_test3_div_ss(<4 x float> %a, <4 x float> %b) {
668 ; SSE-LABEL: insert_test3_div_ss:
670 ; SSE-NEXT: divss %xmm1, %xmm0
673 ; AVX-LABEL: insert_test3_div_ss:
675 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
677 %1 = fdiv <4 x float> %a, %b
678 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
682 define <2 x double> @insert_test3_add_sd(<2 x double> %a, <2 x double> %b) {
683 ; SSE-LABEL: insert_test3_add_sd:
685 ; SSE-NEXT: addsd %xmm1, %xmm0
688 ; AVX-LABEL: insert_test3_add_sd:
690 ; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
692 %1 = fadd <2 x double> %a, %b
693 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
697 define <2 x double> @insert_test3_sub_sd(<2 x double> %a, <2 x double> %b) {
698 ; SSE-LABEL: insert_test3_sub_sd:
700 ; SSE-NEXT: subsd %xmm1, %xmm0
703 ; AVX-LABEL: insert_test3_sub_sd:
705 ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0
707 %1 = fsub <2 x double> %a, %b
708 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
712 define <2 x double> @insert_test3_mul_sd(<2 x double> %a, <2 x double> %b) {
713 ; SSE-LABEL: insert_test3_mul_sd:
715 ; SSE-NEXT: mulsd %xmm1, %xmm0
718 ; AVX-LABEL: insert_test3_mul_sd:
720 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
722 %1 = fmul <2 x double> %a, %b
723 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
727 define <2 x double> @insert_test3_div_sd(<2 x double> %a, <2 x double> %b) {
728 ; SSE-LABEL: insert_test3_div_sd:
730 ; SSE-NEXT: divsd %xmm1, %xmm0
733 ; AVX-LABEL: insert_test3_div_sd:
735 ; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
737 %1 = fdiv <2 x double> %a, %b
738 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
742 define <4 x float> @insert_test4_add_ss(<4 x float> %a, <4 x float> %b) {
743 ; SSE-LABEL: insert_test4_add_ss:
745 ; SSE-NEXT: addss %xmm0, %xmm1
746 ; SSE-NEXT: movaps %xmm1, %xmm0
749 ; AVX-LABEL: insert_test4_add_ss:
751 ; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0
753 %1 = fadd <4 x float> %b, %a
754 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
758 define <4 x float> @insert_test4_sub_ss(<4 x float> %a, <4 x float> %b) {
759 ; SSE-LABEL: insert_test4_sub_ss:
761 ; SSE-NEXT: subss %xmm0, %xmm1
762 ; SSE-NEXT: movaps %xmm1, %xmm0
765 ; AVX-LABEL: insert_test4_sub_ss:
767 ; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0
769 %1 = fsub <4 x float> %b, %a
770 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
774 define <4 x float> @insert_test4_mul_ss(<4 x float> %a, <4 x float> %b) {
775 ; SSE-LABEL: insert_test4_mul_ss:
777 ; SSE-NEXT: mulss %xmm0, %xmm1
778 ; SSE-NEXT: movaps %xmm1, %xmm0
781 ; AVX-LABEL: insert_test4_mul_ss:
783 ; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0
785 %1 = fmul <4 x float> %b, %a
786 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
790 define <4 x float> @insert_test4_div_ss(<4 x float> %a, <4 x float> %b) {
791 ; SSE-LABEL: insert_test4_div_ss:
793 ; SSE-NEXT: divss %xmm0, %xmm1
794 ; SSE-NEXT: movaps %xmm1, %xmm0
797 ; AVX-LABEL: insert_test4_div_ss:
799 ; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0
801 %1 = fdiv <4 x float> %b, %a
802 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
806 define <2 x double> @insert_test4_add_sd(<2 x double> %a, <2 x double> %b) {
807 ; SSE-LABEL: insert_test4_add_sd:
809 ; SSE-NEXT: addsd %xmm0, %xmm1
810 ; SSE-NEXT: movaps %xmm1, %xmm0
813 ; AVX-LABEL: insert_test4_add_sd:
815 ; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
817 %1 = fadd <2 x double> %b, %a
818 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
822 define <2 x double> @insert_test4_sub_sd(<2 x double> %a, <2 x double> %b) {
823 ; SSE-LABEL: insert_test4_sub_sd:
825 ; SSE-NEXT: subsd %xmm0, %xmm1
826 ; SSE-NEXT: movaps %xmm1, %xmm0
829 ; AVX-LABEL: insert_test4_sub_sd:
831 ; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0
833 %1 = fsub <2 x double> %b, %a
834 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
838 define <2 x double> @insert_test4_mul_sd(<2 x double> %a, <2 x double> %b) {
839 ; SSE-LABEL: insert_test4_mul_sd:
841 ; SSE-NEXT: mulsd %xmm0, %xmm1
842 ; SSE-NEXT: movaps %xmm1, %xmm0
845 ; AVX-LABEL: insert_test4_mul_sd:
847 ; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0
849 %1 = fmul <2 x double> %b, %a
850 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
854 define <2 x double> @insert_test4_div_sd(<2 x double> %a, <2 x double> %b) {
855 ; SSE-LABEL: insert_test4_div_sd:
857 ; SSE-NEXT: divsd %xmm0, %xmm1
858 ; SSE-NEXT: movaps %xmm1, %xmm0
861 ; AVX-LABEL: insert_test4_div_sd:
863 ; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0
865 %1 = fdiv <2 x double> %b, %a
866 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1