1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE
3 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX
5 ; Verify that we're folding the load into the math instruction.
6 ; This pattern is generated out of the simplest intrinsics usage:
7 ; _mm_add_ss(a, _mm_load_ss(b));
9 define <4 x float> @addss(<4 x float> %va, float* %pb) {
12 ; SSE-NEXT: addss (%rdi), %xmm0
17 ; AVX-NEXT: vaddss (%rdi), %xmm0, %xmm0
19 %a = extractelement <4 x float> %va, i32 0
20 %b = load float, float* %pb
21 %r = fadd float %a, %b
22 %vr = insertelement <4 x float> %va, float %r, i32 0
26 define <2 x double> @addsd(<2 x double> %va, double* %pb) {
29 ; SSE-NEXT: addsd (%rdi), %xmm0
34 ; AVX-NEXT: vaddsd (%rdi), %xmm0, %xmm0
36 %a = extractelement <2 x double> %va, i32 0
37 %b = load double, double* %pb
38 %r = fadd double %a, %b
39 %vr = insertelement <2 x double> %va, double %r, i32 0
43 define <4 x float> @subss(<4 x float> %va, float* %pb) {
46 ; SSE-NEXT: subss (%rdi), %xmm0
51 ; AVX-NEXT: vsubss (%rdi), %xmm0, %xmm0
53 %a = extractelement <4 x float> %va, i32 0
54 %b = load float, float* %pb
55 %r = fsub float %a, %b
56 %vr = insertelement <4 x float> %va, float %r, i32 0
60 define <2 x double> @subsd(<2 x double> %va, double* %pb) {
63 ; SSE-NEXT: subsd (%rdi), %xmm0
68 ; AVX-NEXT: vsubsd (%rdi), %xmm0, %xmm0
70 %a = extractelement <2 x double> %va, i32 0
71 %b = load double, double* %pb
72 %r = fsub double %a, %b
73 %vr = insertelement <2 x double> %va, double %r, i32 0
77 define <4 x float> @mulss(<4 x float> %va, float* %pb) {
80 ; SSE-NEXT: mulss (%rdi), %xmm0
85 ; AVX-NEXT: vmulss (%rdi), %xmm0, %xmm0
87 %a = extractelement <4 x float> %va, i32 0
88 %b = load float, float* %pb
89 %r = fmul float %a, %b
90 %vr = insertelement <4 x float> %va, float %r, i32 0
94 define <2 x double> @mulsd(<2 x double> %va, double* %pb) {
97 ; SSE-NEXT: mulsd (%rdi), %xmm0
102 ; AVX-NEXT: vmulsd (%rdi), %xmm0, %xmm0
104 %a = extractelement <2 x double> %va, i32 0
105 %b = load double, double* %pb
106 %r = fmul double %a, %b
107 %vr = insertelement <2 x double> %va, double %r, i32 0
111 define <4 x float> @divss(<4 x float> %va, float* %pb) {
114 ; SSE-NEXT: divss (%rdi), %xmm0
119 ; AVX-NEXT: vdivss (%rdi), %xmm0, %xmm0
121 %a = extractelement <4 x float> %va, i32 0
122 %b = load float, float* %pb
123 %r = fdiv float %a, %b
124 %vr = insertelement <4 x float> %va, float %r, i32 0
128 define <2 x double> @divsd(<2 x double> %va, double* %pb) {
131 ; SSE-NEXT: divsd (%rdi), %xmm0
136 ; AVX-NEXT: vdivsd (%rdi), %xmm0, %xmm0
138 %a = extractelement <2 x double> %va, i32 0
139 %b = load double, double* %pb
140 %r = fdiv double %a, %b
141 %vr = insertelement <2 x double> %va, double %r, i32 0