1 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=sse2 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
2 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx < %s | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
4 declare float @fminf(float, float)
5 declare double @fmin(double, double)
6 declare x86_fp80 @fminl(x86_fp80, x86_fp80)
7 declare float @llvm.minnum.f32(float, float)
8 declare double @llvm.minnum.f64(double, double)
9 declare x86_fp80 @llvm.minnum.f80(x86_fp80, x86_fp80)
11 declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>)
12 declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>)
13 declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>)
14 declare <4 x double> @llvm.minnum.v4f64(<4 x double>, <4 x double>)
15 declare <8 x double> @llvm.minnum.v8f64(<8 x double>, <8 x double>)
17 ; FIXME: As the vector tests show, the SSE run shouldn't need this many moves.
19 ; CHECK-LABEL: @test_fminf
20 ; SSE: movaps %xmm0, %xmm2
21 ; SSE-NEXT: cmpunordss %xmm2, %xmm2
22 ; SSE-NEXT: movaps %xmm2, %xmm3
23 ; SSE-NEXT: andps %xmm1, %xmm3
24 ; SSE-NEXT: minss %xmm0, %xmm1
25 ; SSE-NEXT: andnps %xmm1, %xmm2
26 ; SSE-NEXT: orps %xmm3, %xmm2
27 ; SSE-NEXT: movaps %xmm2, %xmm0
30 ; AVX: vminss %xmm0, %xmm1, %xmm2
31 ; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0
32 ; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
34 define float @test_fminf(float %x, float %y) {
35 %z = call float @fminf(float %x, float %y) readnone
39 ; FIXME: As the vector tests show, the SSE run shouldn't need this many moves.
41 ; CHECK-LABEL: @test_fmin
42 ; SSE: movapd %xmm0, %xmm2
43 ; SSE-NEXT: cmpunordsd %xmm2, %xmm2
44 ; SSE-NEXT: movapd %xmm2, %xmm3
45 ; SSE-NEXT: andpd %xmm1, %xmm3
46 ; SSE-NEXT: minsd %xmm0, %xmm1
47 ; SSE-NEXT: andnpd %xmm1, %xmm2
48 ; SSE-NEXT: orpd %xmm3, %xmm2
49 ; SSE-NEXT: movapd %xmm2, %xmm0
52 ; AVX: vminsd %xmm0, %xmm1, %xmm2
53 ; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0
54 ; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
56 define double @test_fmin(double %x, double %y) {
57 %z = call double @fmin(double %x, double %y) readnone
61 ; CHECK-LABEL: @test_fminl
63 define x86_fp80 @test_fminl(x86_fp80 %x, x86_fp80 %y) {
64 %z = call x86_fp80 @fminl(x86_fp80 %x, x86_fp80 %y) readnone
68 ; CHECK-LABEL: @test_intrinsic_fminf
69 ; SSE: movaps %xmm0, %xmm2
70 ; SSE-NEXT: cmpunordss %xmm2, %xmm2
71 ; SSE-NEXT: movaps %xmm2, %xmm3
72 ; SSE-NEXT: andps %xmm1, %xmm3
73 ; SSE-NEXT: minss %xmm0, %xmm1
74 ; SSE-NEXT: andnps %xmm1, %xmm2
75 ; SSE-NEXT: orps %xmm3, %xmm2
76 ; SSE-NEXT: movaps %xmm2, %xmm0
79 ; AVX: vminss %xmm0, %xmm1, %xmm2
80 ; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0
81 ; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
83 define float @test_intrinsic_fminf(float %x, float %y) {
84 %z = call float @llvm.minnum.f32(float %x, float %y) readnone
88 ; CHECK-LABEL: @test_intrinsic_fmin
89 ; SSE: movapd %xmm0, %xmm2
90 ; SSE-NEXT: cmpunordsd %xmm2, %xmm2
91 ; SSE-NEXT: movapd %xmm2, %xmm3
92 ; SSE-NEXT: andpd %xmm1, %xmm3
93 ; SSE-NEXT: minsd %xmm0, %xmm1
94 ; SSE-NEXT: andnpd %xmm1, %xmm2
95 ; SSE-NEXT: orpd %xmm3, %xmm2
96 ; SSE-NEXT: movapd %xmm2, %xmm0
99 ; AVX: vminsd %xmm0, %xmm1, %xmm2
100 ; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0
101 ; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
103 define double @test_intrinsic_fmin(double %x, double %y) {
104 %z = call double @llvm.minnum.f64(double %x, double %y) readnone
108 ; CHECK-LABEL: @test_intrinsic_fminl
110 define x86_fp80 @test_intrinsic_fminl(x86_fp80 %x, x86_fp80 %y) {
111 %z = call x86_fp80 @llvm.minnum.f80(x86_fp80 %x, x86_fp80 %y) readnone
115 ; CHECK-LABEL: @test_intrinsic_fmin_v2f32
116 ; SSE: movaps %xmm1, %xmm2
117 ; SSE-NEXT: minps %xmm0, %xmm2
118 ; SSE-NEXT: cmpunordps %xmm0, %xmm0
119 ; SSE-NEXT: andps %xmm0, %xmm1
120 ; SSE-NEXT: andnps %xmm2, %xmm0
121 ; SSE-NEXT: orps %xmm1, %xmm0
124 ; AVX: vminps %xmm0, %xmm1, %xmm2
125 ; AVX-NEXT: vcmpunordps %xmm0, %xmm0, %xmm0
126 ; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
128 define <2 x float> @test_intrinsic_fmin_v2f32(<2 x float> %x, <2 x float> %y) {
129 %z = call <2 x float> @llvm.minnum.v2f32(<2 x float> %x, <2 x float> %y) readnone
133 ; CHECK-LABEL: @test_intrinsic_fmin_v4f32
134 ; SSE: movaps %xmm1, %xmm2
135 ; SSE-NEXT: minps %xmm0, %xmm2
136 ; SSE-NEXT: cmpunordps %xmm0, %xmm0
137 ; SSE-NEXT: andps %xmm0, %xmm1
138 ; SSE-NEXT: andnps %xmm2, %xmm0
139 ; SSE-NEXT: orps %xmm1, %xmm0
142 ; AVX: vminps %xmm0, %xmm1, %xmm2
143 ; AVX-NEXT: vcmpunordps %xmm0, %xmm0, %xmm0
144 ; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
146 define <4 x float> @test_intrinsic_fmin_v4f32(<4 x float> %x, <4 x float> %y) {
147 %z = call <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float> %y) readnone
151 ; CHECK-LABEL: @test_intrinsic_fmin_v2f64
152 ; SSE: movapd %xmm1, %xmm2
153 ; SSE-NEXT: minpd %xmm0, %xmm2
154 ; SSE-NEXT: cmpunordpd %xmm0, %xmm0
155 ; SSE-NEXT: andpd %xmm0, %xmm1
156 ; SSE-NEXT: andnpd %xmm2, %xmm0
157 ; SSE-NEXT: orpd %xmm1, %xmm0
160 ; AVX: vminpd %xmm0, %xmm1, %xmm2
161 ; AVX-NEXT: vcmpunordpd %xmm0, %xmm0, %xmm0
162 ; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
164 define <2 x double> @test_intrinsic_fmin_v2f64(<2 x double> %x, <2 x double> %y) {
165 %z = call <2 x double> @llvm.minnum.v2f64(<2 x double> %x, <2 x double> %y) readnone
169 ; CHECK-LABEL: @test_intrinsic_fmin_v4f64
170 ; SSE: movapd %xmm2, %xmm4
171 ; SSE-NEXT: minpd %xmm0, %xmm4
172 ; SSE-NEXT: cmpunordpd %xmm0, %xmm0
173 ; SSE-NEXT: andpd %xmm0, %xmm2
174 ; SSE-NEXT: andnpd %xmm4, %xmm0
175 ; SSE-NEXT: orpd %xmm2, %xmm0
176 ; SSE-NEXT: movapd %xmm3, %xmm2
177 ; SSE-NEXT: minpd %xmm1, %xmm2
178 ; SSE-NEXT: cmpunordpd %xmm1, %xmm1
179 ; SSE-NEXT: andpd %xmm1, %xmm3
180 ; SSE-NEXT: andnpd %xmm2, %xmm1
181 ; SSE-NEXT: orpd %xmm3, %xmm1
184 ; AVX: vminpd %ymm0, %ymm1, %ymm2
185 ; AVX-NEXT: vcmpunordpd %ymm0, %ymm0, %ymm0
186 ; AVX-NEXT: vblendvpd %ymm0, %ymm1, %ymm2, %ymm0
188 define <4 x double> @test_intrinsic_fmin_v4f64(<4 x double> %x, <4 x double> %y) {
189 %z = call <4 x double> @llvm.minnum.v4f64(<4 x double> %x, <4 x double> %y) readnone
193 ; CHECK-LABEL: @test_intrinsic_fmin_v8f64
194 ; SSE: movapd %xmm4, %xmm8
195 ; SSE-NEXT: minpd %xmm0, %xmm8
196 ; SSE-NEXT: cmpunordpd %xmm0, %xmm0
197 ; SSE-NEXT: andpd %xmm0, %xmm4
198 ; SSE-NEXT: andnpd %xmm8, %xmm0
199 ; SSE-NEXT: orpd %xmm4, %xmm0
200 ; SSE-NEXT: movapd %xmm5, %xmm4
201 ; SSE-NEXT: minpd %xmm1, %xmm4
202 ; SSE-NEXT: cmpunordpd %xmm1, %xmm1
203 ; SSE-NEXT: andpd %xmm1, %xmm5
204 ; SSE-NEXT: andnpd %xmm4, %xmm1
205 ; SSE-NEXT: orpd %xmm5, %xmm1
206 ; SSE-NEXT: movapd %xmm6, %xmm4
207 ; SSE-NEXT: minpd %xmm2, %xmm4
208 ; SSE-NEXT: cmpunordpd %xmm2, %xmm2
209 ; SSE-NEXT: andpd %xmm2, %xmm6
210 ; SSE-NEXT: andnpd %xmm4, %xmm2
211 ; SSE-NEXT: orpd %xmm6, %xmm2
212 ; SSE-NEXT: movapd %xmm7, %xmm4
213 ; SSE-NEXT: minpd %xmm3, %xmm4
214 ; SSE-NEXT: cmpunordpd %xmm3, %xmm3
215 ; SSE-NEXT: andpd %xmm3, %xmm7
216 ; SSE-NEXT: andnpd %xmm4, %xmm3
217 ; SSE-NEXT: orpd %xmm7, %xmm3
220 ; AVX: vminpd %ymm0, %ymm2, %ymm4
221 ; AVX-NEXT: vcmpunordpd %ymm0, %ymm0, %ymm0
222 ; AVX-NEXT: vblendvpd %ymm0, %ymm2, %ymm4, %ymm0
223 ; AVX-NEXT: vminpd %ymm1, %ymm3, %ymm2
224 ; AVX-NEXT: vcmpunordpd %ymm1, %ymm1, %ymm1
225 ; AVX-NEXT: vblendvpd %ymm1, %ymm3, %ymm2, %ymm1
227 define <8 x double> @test_intrinsic_fmin_v8f64(<8 x double> %x, <8 x double> %y) {
228 %z = call <8 x double> @llvm.minnum.v8f64(<8 x double> %x, <8 x double> %y) readnone