1 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=sse2 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
2 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx < %s | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
4 declare float @fmaxf(float, float)
5 declare double @fmax(double, double)
6 declare x86_fp80 @fmaxl(x86_fp80, x86_fp80)
7 declare float @llvm.maxnum.f32(float, float)
8 declare double @llvm.maxnum.f64(double, double)
9 declare x86_fp80 @llvm.maxnum.f80(x86_fp80, x86_fp80)
11 declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>)
12 declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>)
13 declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>)
14 declare <4 x double> @llvm.maxnum.v4f64(<4 x double>, <4 x double>)
15 declare <8 x double> @llvm.maxnum.v8f64(<8 x double>, <8 x double>)
18 ; CHECK-LABEL: @test_fmaxf
20 define float @test_fmaxf(float %x, float %y) {
21 %z = call float @fmaxf(float %x, float %y) readnone
25 ; CHECK-LABEL: @test_fmax
27 define double @test_fmax(double %x, double %y) {
28 %z = call double @fmax(double %x, double %y) readnone
32 ; CHECK-LABEL: @test_fmaxl
34 define x86_fp80 @test_fmaxl(x86_fp80 %x, x86_fp80 %y) {
35 %z = call x86_fp80 @fmaxl(x86_fp80 %x, x86_fp80 %y) readnone
39 ; CHECK-LABEL: @test_intrinsic_fmaxf
41 define float @test_intrinsic_fmaxf(float %x, float %y) {
42 %z = call float @llvm.maxnum.f32(float %x, float %y) readnone
46 ; CHECK-LABEL: @test_intrinsic_fmax
48 define double @test_intrinsic_fmax(double %x, double %y) {
49 %z = call double @llvm.maxnum.f64(double %x, double %y) readnone
53 ; CHECK-LABEL: @test_intrinsic_fmaxl
55 define x86_fp80 @test_intrinsic_fmaxl(x86_fp80 %x, x86_fp80 %y) {
56 %z = call x86_fp80 @llvm.maxnum.f80(x86_fp80 %x, x86_fp80 %y) readnone
60 ; CHECK-LABEL: @test_intrinsic_fmax_v2f32
61 ; SSE: movaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill
62 ; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
63 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
64 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
65 ; SSE-NEXT: callq fmaxf
66 ; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
67 ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
68 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
69 ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
70 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
71 ; SSE-NEXT: callq fmaxf
72 ; SSE-NEXT: unpcklps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Folded Reload
73 ; SSE: movaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
74 ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
75 ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
76 ; SSE-NEXT: callq fmaxf
77 ; SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
78 ; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
79 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
80 ; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
81 ; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0]
82 ; SSE-NEXT: callq fmaxf
83 ; SSE-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
84 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
85 ; SSE-NEXT: unpcklps {{[0-9]+}}(%rsp), %xmm1 # 16-byte Folded Reload
86 ; SSE: movaps %xmm1, %xmm0
87 ; SSE-NEXT: addq $72, %rsp
90 ; AVX: vmovaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill
91 ; AVX-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
92 ; AVX-NEXT: callq fmaxf
93 ; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
94 ; AVX-NEXT: vmovshdup {{[0-9]+}}(%rsp), %xmm0 # 16-byte Folded Reload
95 ; AVX: vmovshdup {{[0-9]+}}(%rsp), %xmm1 # 16-byte Folded Reload
97 ; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
98 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
99 ; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
100 ; AVX-NEXT: vpermilpd $1, {{[0-9]+}}(%rsp), %xmm0 # 16-byte Folded Reload
101 ; AVX: vpermilpd $1, {{[0-9]+}}(%rsp), %xmm1 # 16-byte Folded Reload
103 ; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
104 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
105 ; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
106 ; AVX-NEXT: vpermilps $231, {{[0-9]+}}(%rsp), %xmm0 # 16-byte Folded Reload
107 ; AVX: vpermilps $231, {{[0-9]+}}(%rsp), %xmm1 # 16-byte Folded Reload
109 ; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
110 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
111 ; AVX-NEXT: addq $56, %rsp
113 define <2 x float> @test_intrinsic_fmax_v2f32(<2 x float> %x, <2 x float> %y) {
114 %z = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %x, <2 x float> %y) readnone
118 ; CHECK-LABEL: @test_intrinsic_fmax_v4f32
119 ; SSE: movaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill
120 ; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
121 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
122 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
123 ; SSE-NEXT: callq fmaxf
124 ; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
125 ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
126 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
127 ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
128 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
129 ; SSE-NEXT: callq fmaxf
130 ; SSE-NEXT: unpcklps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Folded Reload
131 ; SSE: movaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
132 ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
133 ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
134 ; SSE-NEXT: callq fmaxf
135 ; SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
136 ; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
137 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
138 ; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
139 ; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0]
140 ; SSE-NEXT: callq fmaxf
141 ; SSE-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
142 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
143 ; SSE-NEXT: unpcklps {{[0-9]+}}(%rsp), %xmm1 # 16-byte Folded Reload
144 ; SSE: movaps %xmm1, %xmm0
145 ; SSE-NEXT: addq $72, %rsp
148 ; AVX: vmovaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill
149 ; AVX-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
150 ; AVX-NEXT: callq fmaxf
151 ; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
152 ; AVX-NEXT: vmovshdup {{[0-9]+}}(%rsp), %xmm0 # 16-byte Folded Reload
153 ; AVX: vmovshdup {{[0-9]+}}(%rsp), %xmm1 # 16-byte Folded Reload
155 ; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
156 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
157 ; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
158 ; AVX-NEXT: vpermilpd $1, {{[0-9]+}}(%rsp), %xmm0 # 16-byte Folded Reload
159 ; AVX: vpermilpd $1, {{[0-9]+}}(%rsp), %xmm1 # 16-byte Folded Reload
161 ; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
162 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
163 ; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
164 ; AVX-NEXT: vpermilps $231, {{[0-9]+}}(%rsp), %xmm0 # 16-byte Folded Reload
165 ; AVX: vpermilps $231, {{[0-9]+}}(%rsp), %xmm1 # 16-byte Folded Reload
167 ; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
168 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
169 ; AVX-NEXT: addq $56, %rsp
171 define <4 x float> @test_intrinsic_fmax_v4f32(<4 x float> %x, <4 x float> %y) {
172 %z = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y) readnone
176 ; CHECK-LABEL: @test_intrinsic_fmax_v2f64
179 define <2 x double> @test_intrinsic_fmax_v2f64(<2 x double> %x, <2 x double> %y) {
180 %z = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %x, <2 x double> %y) readnone
184 ; CHECK-LABEL: @test_intrinsic_fmax_v4f64
189 define <4 x double> @test_intrinsic_fmax_v4f64(<4 x double> %x, <4 x double> %y) {
190 %z = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %x, <4 x double> %y) readnone
194 ; CHECK-LABEL: @test_intrinsic_fmax_v8f64
203 define <8 x double> @test_intrinsic_fmax_v8f64(<8 x double> %x, <8 x double> %y) {
204 %z = call <8 x double> @llvm.maxnum.v8f64(<8 x double> %x, <8 x double> %y) readnone