1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE2
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX
5 ; Signed Integer to Double
8 define <2 x double> @sitofp_2vf64(<2 x i64> %a) {
9 ; SSE2-LABEL: sitofp_2vf64:
11 ; SSE2-NEXT: movd %xmm0, %rax
12 ; SSE2-NEXT: cvtsi2sdq %rax, %xmm1
13 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
14 ; SSE2-NEXT: movd %xmm0, %rax
15 ; SSE2-NEXT: xorps %xmm0, %xmm0
16 ; SSE2-NEXT: cvtsi2sdq %rax, %xmm0
17 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
18 ; SSE2-NEXT: movapd %xmm1, %xmm0
21 ; AVX-LABEL: sitofp_2vf64:
23 ; AVX-NEXT: vpextrq $1, %xmm0, %rax
24 ; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm1
25 ; AVX-NEXT: vmovq %xmm0, %rax
26 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
27 ; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm0
28 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
30 %cvt = sitofp <2 x i64> %a to <2 x double>
34 define <2 x double> @sitofp_2vf64_i32(<4 x i32> %a) {
35 ; SSE2-LABEL: sitofp_2vf64_i32:
37 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
38 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
39 ; SSE2-NEXT: movd %xmm1, %rax
41 ; SSE2-NEXT: movd %xmm0, %rcx
42 ; SSE2-NEXT: movslq %ecx, %rcx
43 ; SSE2-NEXT: xorps %xmm0, %xmm0
44 ; SSE2-NEXT: cvtsi2sdq %rcx, %xmm0
45 ; SSE2-NEXT: xorps %xmm1, %xmm1
46 ; SSE2-NEXT: cvtsi2sdq %rax, %xmm1
47 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
50 ; AVX-LABEL: sitofp_2vf64_i32:
52 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
53 ; AVX-NEXT: vmovq %xmm0, %rax
55 ; AVX-NEXT: vpextrq $1, %xmm0, %rcx
56 ; AVX-NEXT: movslq %ecx, %rcx
57 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
58 ; AVX-NEXT: vcvtsi2sdq %rcx, %xmm0, %xmm0
59 ; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm1
60 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
62 %shuf = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
63 %cvt = sitofp <2 x i32> %shuf to <2 x double>
67 define <4 x double> @sitofp_4vf64(<4 x i64> %a) {
68 ; SSE2-LABEL: sitofp_4vf64:
70 ; SSE2-NEXT: movd %xmm0, %rax
71 ; SSE2-NEXT: cvtsi2sdq %rax, %xmm2
72 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
73 ; SSE2-NEXT: movd %xmm0, %rax
74 ; SSE2-NEXT: xorps %xmm0, %xmm0
75 ; SSE2-NEXT: cvtsi2sdq %rax, %xmm0
76 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
77 ; SSE2-NEXT: movd %xmm1, %rax
78 ; SSE2-NEXT: cvtsi2sdq %rax, %xmm3
79 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
80 ; SSE2-NEXT: movd %xmm0, %rax
81 ; SSE2-NEXT: xorps %xmm0, %xmm0
82 ; SSE2-NEXT: cvtsi2sdq %rax, %xmm0
83 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm3 = xmm3[0],xmm0[0]
84 ; SSE2-NEXT: movapd %xmm2, %xmm0
85 ; SSE2-NEXT: movapd %xmm3, %xmm1
88 ; AVX-LABEL: sitofp_4vf64:
90 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
91 ; AVX-NEXT: vpextrq $1, %xmm1, %rax
92 ; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm2
93 ; AVX-NEXT: vmovq %xmm1, %rax
94 ; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm1
95 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
96 ; AVX-NEXT: vpextrq $1, %xmm0, %rax
97 ; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm2
98 ; AVX-NEXT: vmovq %xmm0, %rax
99 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
100 ; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm0
101 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
102 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
104 %cvt = sitofp <4 x i64> %a to <4 x double>
105 ret <4 x double> %cvt
108 define <4 x double> @sitofp_4vf64_i32(<4 x i32> %a) {
109 ; SSE2-LABEL: sitofp_4vf64_i32:
111 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
112 ; SSE2-NEXT: movd %xmm1, %rax
114 ; SSE2-NEXT: cvtsi2sdq %rax, %xmm2
115 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
116 ; SSE2-NEXT: movd %xmm1, %rax
118 ; SSE2-NEXT: xorps %xmm1, %xmm1
119 ; SSE2-NEXT: cvtsi2sdq %rax, %xmm1
120 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0]
121 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
122 ; SSE2-NEXT: movd %xmm0, %rax
124 ; SSE2-NEXT: xorps %xmm1, %xmm1
125 ; SSE2-NEXT: cvtsi2sdq %rax, %xmm1
126 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
127 ; SSE2-NEXT: movd %xmm0, %rax
129 ; SSE2-NEXT: xorps %xmm0, %xmm0
130 ; SSE2-NEXT: cvtsi2sdq %rax, %xmm0
131 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
132 ; SSE2-NEXT: movapd %xmm2, %xmm0
135 ; AVX-LABEL: sitofp_4vf64_i32:
137 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
139 %cvt = sitofp <4 x i32> %a to <4 x double>
140 ret <4 x double> %cvt
144 ; Unsigned Integer to Double
147 define <2 x double> @uitofp_2vf64(<2 x i64> %a) {
148 ; SSE2-LABEL: uitofp_2vf64:
150 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
151 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
152 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
153 ; SSE2-NEXT: movapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
154 ; SSE2-NEXT: subpd %xmm3, %xmm0
155 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
156 ; SSE2-NEXT: addpd %xmm4, %xmm0
157 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
158 ; SSE2-NEXT: subpd %xmm3, %xmm2
159 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
160 ; SSE2-NEXT: addpd %xmm2, %xmm1
161 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
164 ; AVX-LABEL: uitofp_2vf64:
166 ; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
167 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
168 ; AVX-NEXT: vmovapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
169 ; AVX-NEXT: vsubpd %xmm3, %xmm2, %xmm2
170 ; AVX-NEXT: vhaddpd %xmm2, %xmm2, %xmm2
171 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
172 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
173 ; AVX-NEXT: vsubpd %xmm3, %xmm0, %xmm0
174 ; AVX-NEXT: vhaddpd %xmm0, %xmm0, %xmm0
175 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0]
177 %cvt = uitofp <2 x i64> %a to <2 x double>
178 ret <2 x double> %cvt
181 define <2 x double> @uitofp_2vf64_i32(<4 x i32> %a) {
182 ; SSE2-LABEL: uitofp_2vf64_i32:
184 ; SSE2-NEXT: pxor %xmm1, %xmm1
185 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
186 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
187 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
188 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
189 ; SSE2-NEXT: movapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
190 ; SSE2-NEXT: subpd %xmm3, %xmm0
191 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
192 ; SSE2-NEXT: addpd %xmm4, %xmm0
193 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
194 ; SSE2-NEXT: subpd %xmm3, %xmm2
195 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
196 ; SSE2-NEXT: addpd %xmm2, %xmm1
197 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
200 ; AVX-LABEL: uitofp_2vf64_i32:
202 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
203 ; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
204 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
205 ; AVX-NEXT: vmovapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
206 ; AVX-NEXT: vsubpd %xmm3, %xmm2, %xmm2
207 ; AVX-NEXT: vhaddpd %xmm2, %xmm2, %xmm2
208 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
209 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
210 ; AVX-NEXT: vsubpd %xmm3, %xmm0, %xmm0
211 ; AVX-NEXT: vhaddpd %xmm0, %xmm0, %xmm0
212 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0]
214 %shuf = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
215 %cvt = uitofp <2 x i32> %shuf to <2 x double>
216 ret <2 x double> %cvt
219 define <4 x double> @uitofp_4vf64(<4 x i64> %a) {
220 ; SSE2-LABEL: uitofp_4vf64:
222 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
223 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
224 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
225 ; SSE2-NEXT: movapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
226 ; SSE2-NEXT: subpd %xmm4, %xmm0
227 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm0[2,3,0,1]
228 ; SSE2-NEXT: addpd %xmm5, %xmm0
229 ; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
230 ; SSE2-NEXT: subpd %xmm4, %xmm3
231 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[2,3,0,1]
232 ; SSE2-NEXT: addpd %xmm3, %xmm5
233 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm5[0]
234 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
235 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
236 ; SSE2-NEXT: subpd %xmm4, %xmm1
237 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[2,3,0,1]
238 ; SSE2-NEXT: addpd %xmm5, %xmm1
239 ; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
240 ; SSE2-NEXT: subpd %xmm4, %xmm3
241 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
242 ; SSE2-NEXT: addpd %xmm3, %xmm2
243 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
246 ; AVX-LABEL: uitofp_4vf64:
248 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
249 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
250 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
251 ; AVX-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
252 ; AVX-NEXT: vsubpd %xmm4, %xmm3, %xmm3
253 ; AVX-NEXT: vhaddpd %xmm3, %xmm3, %xmm3
254 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
255 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
256 ; AVX-NEXT: vsubpd %xmm4, %xmm1, %xmm1
257 ; AVX-NEXT: vhaddpd %xmm1, %xmm1, %xmm1
258 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm3[0],xmm1[0]
259 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
260 ; AVX-NEXT: vsubpd %xmm4, %xmm3, %xmm3
261 ; AVX-NEXT: vhaddpd %xmm3, %xmm3, %xmm3
262 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
263 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
264 ; AVX-NEXT: vsubpd %xmm4, %xmm0, %xmm0
265 ; AVX-NEXT: vhaddpd %xmm0, %xmm0, %xmm0
266 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm3[0],xmm0[0]
267 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
269 %cvt = uitofp <4 x i64> %a to <4 x double>
270 ret <4 x double> %cvt
273 define <4 x double> @uitofp_4vf64_i32(<4 x i32> %a) {
274 ; SSE2-LABEL: uitofp_4vf64_i32:
276 ; SSE2-NEXT: pxor %xmm1, %xmm1
277 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
278 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
279 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1127219200,1160773632,0,0]
280 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
281 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
282 ; SSE2-NEXT: movapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
283 ; SSE2-NEXT: subpd %xmm4, %xmm0
284 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm0[2,3,0,1]
285 ; SSE2-NEXT: addpd %xmm5, %xmm0
286 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
287 ; SSE2-NEXT: subpd %xmm4, %xmm1
288 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[2,3,0,1]
289 ; SSE2-NEXT: addpd %xmm1, %xmm5
290 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm5[0]
291 ; SSE2-NEXT: pand .LCPI7_2(%rip), %xmm2
292 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm2[2,3,0,1]
293 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
294 ; SSE2-NEXT: subpd %xmm4, %xmm2
295 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
296 ; SSE2-NEXT: addpd %xmm2, %xmm1
297 ; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
298 ; SSE2-NEXT: subpd %xmm4, %xmm5
299 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm5[2,3,0,1]
300 ; SSE2-NEXT: addpd %xmm5, %xmm2
301 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
304 ; AVX-LABEL: uitofp_4vf64_i32:
306 ; AVX-NEXT: vpand .LCPI7_0(%rip), %xmm0, %xmm1
307 ; AVX-NEXT: vcvtdq2pd %xmm1, %ymm1
308 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
309 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
310 ; AVX-NEXT: vmulpd .LCPI7_1(%rip), %ymm0, %ymm0
311 ; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
313 %cvt = uitofp <4 x i32> %a to <4 x double>
314 ret <4 x double> %cvt
318 ; Signed Integer to Float
321 define <4 x float> @sitofp_4vf32(<4 x i32> %a) {
322 ; SSE2-LABEL: sitofp_4vf32:
324 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
327 ; AVX-LABEL: sitofp_4vf32:
329 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
331 %cvt = sitofp <4 x i32> %a to <4 x float>
335 define <4 x float> @sitofp_4vf32_i64(<2 x i64> %a) {
336 ; SSE2-LABEL: sitofp_4vf32_i64:
338 ; SSE2-NEXT: movd %xmm0, %rax
339 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm1
340 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
341 ; SSE2-NEXT: movd %xmm0, %rax
342 ; SSE2-NEXT: xorps %xmm0, %xmm0
343 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm0
344 ; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
345 ; SSE2-NEXT: movaps %xmm1, %xmm0
348 ; AVX-LABEL: sitofp_4vf32_i64:
350 ; AVX-NEXT: vpextrq $1, %xmm0, %rax
351 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
352 ; AVX-NEXT: vmovq %xmm0, %rax
353 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
354 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0
355 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
356 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
357 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
358 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
360 %cvt = sitofp <2 x i64> %a to <2 x float>
361 %ext = shufflevector <2 x float> %cvt, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
365 define <8 x float> @sitofp_8vf32(<8 x i32> %a) {
366 ; SSE2-LABEL: sitofp_8vf32:
368 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
369 ; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1
372 ; AVX-LABEL: sitofp_8vf32:
374 ; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0
376 %cvt = sitofp <8 x i32> %a to <8 x float>
380 define <4 x float> @sitofp_4vf32_4i64(<4 x i64> %a) {
381 ; SSE2-LABEL: sitofp_4vf32_4i64:
383 ; SSE2-NEXT: movd %xmm1, %rax
384 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm3
385 ; SSE2-NEXT: movd %xmm0, %rax
386 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm2
387 ; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
388 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
389 ; SSE2-NEXT: movd %xmm1, %rax
390 ; SSE2-NEXT: xorps %xmm1, %xmm1
391 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm1
392 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
393 ; SSE2-NEXT: movd %xmm0, %rax
394 ; SSE2-NEXT: xorps %xmm0, %xmm0
395 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm0
396 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
397 ; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
398 ; SSE2-NEXT: movaps %xmm2, %xmm0
401 ; AVX-LABEL: sitofp_4vf32_4i64:
403 ; AVX-NEXT: vpextrq $1, %xmm0, %rax
404 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
405 ; AVX-NEXT: vmovq %xmm0, %rax
406 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2
407 ; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
408 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
409 ; AVX-NEXT: vmovq %xmm0, %rax
410 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2
411 ; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
412 ; AVX-NEXT: vpextrq $1, %xmm0, %rax
413 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
414 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0
415 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
416 ; AVX-NEXT: vzeroupper
418 %cvt = sitofp <4 x i64> %a to <4 x float>
423 ; Unsigned Integer to Float
426 define <4 x float> @uitofp_4vf32(<4 x i32> %a) {
427 ; SSE2-LABEL: uitofp_4vf32:
429 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
430 ; SSE2-NEXT: pand %xmm0, %xmm1
431 ; SSE2-NEXT: por .LCPI12_1(%rip), %xmm1
432 ; SSE2-NEXT: psrld $16, %xmm0
433 ; SSE2-NEXT: por .LCPI12_2(%rip), %xmm0
434 ; SSE2-NEXT: addps .LCPI12_3(%rip), %xmm0
435 ; SSE2-NEXT: addps %xmm1, %xmm0
438 ; AVX-LABEL: uitofp_4vf32:
440 ; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
441 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
442 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
443 ; AVX-NEXT: vaddps .LCPI12_2(%rip), %xmm0, %xmm0
444 ; AVX-NEXT: vaddps %xmm0, %xmm1, %xmm0
446 %cvt = uitofp <4 x i32> %a to <4 x float>
450 define <4 x float> @uitofp_4vf32_i64(<2 x i64> %a) {
451 ; SSE2-LABEL: uitofp_4vf32_i64:
453 ; SSE2-NEXT: movdqa %xmm0, %xmm1
454 ; SSE2-NEXT: movd %xmm1, %rax
455 ; SSE2-NEXT: movl %eax, %ecx
456 ; SSE2-NEXT: andl $1, %ecx
457 ; SSE2-NEXT: testq %rax, %rax
458 ; SSE2-NEXT: js .LBB13_1
460 ; SSE2-NEXT: xorps %xmm0, %xmm0
461 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm0
462 ; SSE2-NEXT: jmp .LBB13_3
463 ; SSE2-NEXT: .LBB13_1:
464 ; SSE2-NEXT: shrq %rax
465 ; SSE2-NEXT: orq %rax, %rcx
466 ; SSE2-NEXT: xorps %xmm0, %xmm0
467 ; SSE2-NEXT: cvtsi2ssq %rcx, %xmm0
468 ; SSE2-NEXT: addss %xmm0, %xmm0
469 ; SSE2-NEXT: .LBB13_3:
470 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
471 ; SSE2-NEXT: movd %xmm1, %rax
472 ; SSE2-NEXT: movl %eax, %ecx
473 ; SSE2-NEXT: andl $1, %ecx
474 ; SSE2-NEXT: testq %rax, %rax
475 ; SSE2-NEXT: js .LBB13_4
477 ; SSE2-NEXT: xorps %xmm1, %xmm1
478 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm1
479 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
481 ; SSE2-NEXT: .LBB13_4:
482 ; SSE2-NEXT: shrq %rax
483 ; SSE2-NEXT: orq %rax, %rcx
484 ; SSE2-NEXT: xorps %xmm1, %xmm1
485 ; SSE2-NEXT: cvtsi2ssq %rcx, %xmm1
486 ; SSE2-NEXT: addss %xmm1, %xmm1
487 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
490 ; AVX-LABEL: uitofp_4vf32_i64:
492 ; AVX-NEXT: vpextrq $1, %xmm0, %rax
493 ; AVX-NEXT: movl %eax, %ecx
494 ; AVX-NEXT: andl $1, %ecx
495 ; AVX-NEXT: testq %rax, %rax
496 ; AVX-NEXT: js .LBB13_1
498 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
499 ; AVX-NEXT: jmp .LBB13_3
500 ; AVX-NEXT: .LBB13_1:
501 ; AVX-NEXT: shrq %rax
502 ; AVX-NEXT: orq %rax, %rcx
503 ; AVX-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm1
504 ; AVX-NEXT: vaddss %xmm1, %xmm1, %xmm1
505 ; AVX-NEXT: .LBB13_3:
506 ; AVX-NEXT: vmovq %xmm0, %rax
507 ; AVX-NEXT: movl %eax, %ecx
508 ; AVX-NEXT: andl $1, %ecx
509 ; AVX-NEXT: testq %rax, %rax
510 ; AVX-NEXT: js .LBB13_4
512 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
513 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0
514 ; AVX-NEXT: jmp .LBB13_6
515 ; AVX-NEXT: .LBB13_4:
516 ; AVX-NEXT: shrq %rax
517 ; AVX-NEXT: orq %rax, %rcx
518 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
519 ; AVX-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm0
520 ; AVX-NEXT: vaddss %xmm0, %xmm0, %xmm0
521 ; AVX-NEXT: .LBB13_6:
522 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
523 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
524 ; AVX-NEXT: testq %rax, %rax
525 ; AVX-NEXT: js .LBB13_8
527 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
528 ; AVX-NEXT: .LBB13_8:
529 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
530 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
532 %cvt = uitofp <2 x i64> %a to <2 x float>
533 %ext = shufflevector <2 x float> %cvt, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
537 define <8 x float> @uitofp_8vf32(<8 x i32> %a) {
538 ; SSE2-LABEL: uitofp_8vf32:
540 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535]
541 ; SSE2-NEXT: movdqa %xmm0, %xmm3
542 ; SSE2-NEXT: pand %xmm2, %xmm3
543 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1258291200,1258291200,1258291200,1258291200]
544 ; SSE2-NEXT: por %xmm4, %xmm3
545 ; SSE2-NEXT: psrld $16, %xmm0
546 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [1392508928,1392508928,1392508928,1392508928]
547 ; SSE2-NEXT: por %xmm5, %xmm0
548 ; SSE2-NEXT: movaps {{.*#+}} xmm6 = [-5.497642e+11,-5.497642e+11,-5.497642e+11,-5.497642e+11]
549 ; SSE2-NEXT: addps %xmm6, %xmm0
550 ; SSE2-NEXT: addps %xmm3, %xmm0
551 ; SSE2-NEXT: pand %xmm1, %xmm2
552 ; SSE2-NEXT: por %xmm4, %xmm2
553 ; SSE2-NEXT: psrld $16, %xmm1
554 ; SSE2-NEXT: por %xmm5, %xmm1
555 ; SSE2-NEXT: addps %xmm6, %xmm1
556 ; SSE2-NEXT: addps %xmm2, %xmm1
559 ; AVX-LABEL: uitofp_8vf32:
561 ; AVX-NEXT: vandps .LCPI14_0(%rip), %ymm0, %ymm1
562 ; AVX-NEXT: vcvtdq2ps %ymm1, %ymm1
563 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm2
564 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
565 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
566 ; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
567 ; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0
568 ; AVX-NEXT: vmulps .LCPI14_1(%rip), %ymm0, %ymm0
569 ; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
571 %cvt = uitofp <8 x i32> %a to <8 x float>
575 define <4 x float> @uitofp_4vf32_4i64(<4 x i64> %a) {
576 ; SSE2-LABEL: uitofp_4vf32_4i64:
578 ; SSE2-NEXT: movd %xmm1, %rax
579 ; SSE2-NEXT: movl %eax, %ecx
580 ; SSE2-NEXT: andl $1, %ecx
581 ; SSE2-NEXT: testq %rax, %rax
582 ; SSE2-NEXT: js .LBB15_1
584 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm3
585 ; SSE2-NEXT: jmp .LBB15_3
586 ; SSE2-NEXT: .LBB15_1:
587 ; SSE2-NEXT: shrq %rax
588 ; SSE2-NEXT: orq %rax, %rcx
589 ; SSE2-NEXT: cvtsi2ssq %rcx, %xmm3
590 ; SSE2-NEXT: addss %xmm3, %xmm3
591 ; SSE2-NEXT: .LBB15_3:
592 ; SSE2-NEXT: movd %xmm0, %rax
593 ; SSE2-NEXT: movl %eax, %ecx
594 ; SSE2-NEXT: andl $1, %ecx
595 ; SSE2-NEXT: testq %rax, %rax
596 ; SSE2-NEXT: js .LBB15_4
598 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm2
599 ; SSE2-NEXT: jmp .LBB15_6
600 ; SSE2-NEXT: .LBB15_4:
601 ; SSE2-NEXT: shrq %rax
602 ; SSE2-NEXT: orq %rax, %rcx
603 ; SSE2-NEXT: cvtsi2ssq %rcx, %xmm2
604 ; SSE2-NEXT: addss %xmm2, %xmm2
605 ; SSE2-NEXT: .LBB15_6:
606 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
607 ; SSE2-NEXT: movd %xmm1, %rax
608 ; SSE2-NEXT: movl %eax, %ecx
609 ; SSE2-NEXT: andl $1, %ecx
610 ; SSE2-NEXT: testq %rax, %rax
611 ; SSE2-NEXT: js .LBB15_7
613 ; SSE2-NEXT: xorps %xmm1, %xmm1
614 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm1
615 ; SSE2-NEXT: jmp .LBB15_9
616 ; SSE2-NEXT: .LBB15_7:
617 ; SSE2-NEXT: shrq %rax
618 ; SSE2-NEXT: orq %rax, %rcx
619 ; SSE2-NEXT: xorps %xmm1, %xmm1
620 ; SSE2-NEXT: cvtsi2ssq %rcx, %xmm1
621 ; SSE2-NEXT: addss %xmm1, %xmm1
622 ; SSE2-NEXT: .LBB15_9:
623 ; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
624 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
625 ; SSE2-NEXT: movd %xmm0, %rax
626 ; SSE2-NEXT: movl %eax, %ecx
627 ; SSE2-NEXT: andl $1, %ecx
628 ; SSE2-NEXT: testq %rax, %rax
629 ; SSE2-NEXT: js .LBB15_10
630 ; SSE2-NEXT: # BB#11:
631 ; SSE2-NEXT: xorps %xmm0, %xmm0
632 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm0
633 ; SSE2-NEXT: jmp .LBB15_12
634 ; SSE2-NEXT: .LBB15_10:
635 ; SSE2-NEXT: shrq %rax
636 ; SSE2-NEXT: orq %rax, %rcx
637 ; SSE2-NEXT: xorps %xmm0, %xmm0
638 ; SSE2-NEXT: cvtsi2ssq %rcx, %xmm0
639 ; SSE2-NEXT: addss %xmm0, %xmm0
640 ; SSE2-NEXT: .LBB15_12:
641 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
642 ; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
643 ; SSE2-NEXT: movaps %xmm2, %xmm0
646 ; AVX-LABEL: uitofp_4vf32_4i64:
648 ; AVX-NEXT: vpextrq $1, %xmm0, %rax
649 ; AVX-NEXT: movl %eax, %ecx
650 ; AVX-NEXT: andl $1, %ecx
651 ; AVX-NEXT: testq %rax, %rax
652 ; AVX-NEXT: js .LBB15_1
654 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
655 ; AVX-NEXT: jmp .LBB15_3
656 ; AVX-NEXT: .LBB15_1:
657 ; AVX-NEXT: shrq %rax
658 ; AVX-NEXT: orq %rax, %rcx
659 ; AVX-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm1
660 ; AVX-NEXT: vaddss %xmm1, %xmm1, %xmm1
661 ; AVX-NEXT: .LBB15_3:
662 ; AVX-NEXT: vmovq %xmm0, %rax
663 ; AVX-NEXT: movl %eax, %ecx
664 ; AVX-NEXT: andl $1, %ecx
665 ; AVX-NEXT: testq %rax, %rax
666 ; AVX-NEXT: js .LBB15_4
668 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2
669 ; AVX-NEXT: jmp .LBB15_6
670 ; AVX-NEXT: .LBB15_4:
671 ; AVX-NEXT: shrq %rax
672 ; AVX-NEXT: orq %rax, %rcx
673 ; AVX-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm2
674 ; AVX-NEXT: vaddss %xmm2, %xmm2, %xmm2
675 ; AVX-NEXT: .LBB15_6:
676 ; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
677 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
678 ; AVX-NEXT: vmovq %xmm0, %rax
679 ; AVX-NEXT: movl %eax, %ecx
680 ; AVX-NEXT: andl $1, %ecx
681 ; AVX-NEXT: testq %rax, %rax
682 ; AVX-NEXT: js .LBB15_7
684 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2
685 ; AVX-NEXT: jmp .LBB15_9
686 ; AVX-NEXT: .LBB15_7:
687 ; AVX-NEXT: shrq %rax
688 ; AVX-NEXT: orq %rax, %rcx
689 ; AVX-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm2
690 ; AVX-NEXT: vaddss %xmm2, %xmm2, %xmm2
691 ; AVX-NEXT: .LBB15_9:
692 ; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
693 ; AVX-NEXT: vpextrq $1, %xmm0, %rax
694 ; AVX-NEXT: movl %eax, %ecx
695 ; AVX-NEXT: andl $1, %ecx
696 ; AVX-NEXT: testq %rax, %rax
697 ; AVX-NEXT: js .LBB15_10
699 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
700 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0
701 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
702 ; AVX-NEXT: vzeroupper
704 ; AVX-NEXT: .LBB15_10:
705 ; AVX-NEXT: shrq %rax
706 ; AVX-NEXT: orq %rax, %rcx
707 ; AVX-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm0
708 ; AVX-NEXT: vaddss %xmm0, %xmm0, %xmm0
709 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
710 ; AVX-NEXT: vzeroupper
712 %cvt = uitofp <4 x i64> %a to <4 x float>