1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE2
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX
5 ; Signed Integer to Double
8 define <2 x double> @sitofp_2vf64(<2 x i64> %a) {
9 ; SSE2-LABEL: sitofp_2vf64:
11 ; SSE2-NEXT: movd %xmm0, %rax
12 ; SSE2-NEXT: cvtsi2sdq %rax, %xmm1
13 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
14 ; SSE2-NEXT: movd %xmm0, %rax
15 ; SSE2-NEXT: xorps %xmm0, %xmm0
16 ; SSE2-NEXT: cvtsi2sdq %rax, %xmm0
17 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
18 ; SSE2-NEXT: movapd %xmm1, %xmm0
21 ; AVX-LABEL: sitofp_2vf64:
23 ; AVX-NEXT: vpextrq $1, %xmm0, %rax
24 ; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm1
25 ; AVX-NEXT: vmovq %xmm0, %rax
26 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
27 ; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm0
28 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
30 %cvt = sitofp <2 x i64> %a to <2 x double>
34 define <2 x double> @sitofp_2vf64_i32(<4 x i32> %a) {
35 ; SSE2-LABEL: sitofp_2vf64_i32:
37 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
38 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
39 ; SSE2-NEXT: movd %xmm1, %rax
41 ; SSE2-NEXT: movd %xmm0, %rcx
42 ; SSE2-NEXT: movslq %ecx, %rcx
43 ; SSE2-NEXT: xorps %xmm0, %xmm0
44 ; SSE2-NEXT: cvtsi2sdq %rcx, %xmm0
45 ; SSE2-NEXT: xorps %xmm1, %xmm1
46 ; SSE2-NEXT: cvtsi2sdq %rax, %xmm1
47 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
50 ; AVX-LABEL: sitofp_2vf64_i32:
52 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
53 ; AVX-NEXT: vmovq %xmm0, %rax
55 ; AVX-NEXT: vpextrq $1, %xmm0, %rcx
56 ; AVX-NEXT: movslq %ecx, %rcx
57 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
58 ; AVX-NEXT: vcvtsi2sdq %rcx, %xmm0, %xmm0
59 ; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm1
60 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
62 %shuf = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
63 %cvt = sitofp <2 x i32> %shuf to <2 x double>
67 define <2 x double> @sitofp_2vf64_i16(<8 x i16> %a) {
68 ; SSE2-LABEL: sitofp_2vf64_i16:
70 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
71 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
72 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
73 ; SSE2-NEXT: movd %xmm1, %rax
74 ; SSE2-NEXT: movswq %ax, %rax
75 ; SSE2-NEXT: movd %xmm0, %rcx
76 ; SSE2-NEXT: movswq %cx, %rcx
77 ; SSE2-NEXT: xorps %xmm0, %xmm0
78 ; SSE2-NEXT: cvtsi2sdq %rcx, %xmm0
79 ; SSE2-NEXT: xorps %xmm1, %xmm1
80 ; SSE2-NEXT: cvtsi2sdq %rax, %xmm1
81 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
84 ; AVX-LABEL: sitofp_2vf64_i16:
86 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
87 ; AVX-NEXT: vmovq %xmm0, %rax
88 ; AVX-NEXT: movswq %ax, %rax
89 ; AVX-NEXT: vpextrq $1, %xmm0, %rcx
90 ; AVX-NEXT: movswq %cx, %rcx
91 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
92 ; AVX-NEXT: vcvtsi2sdq %rcx, %xmm0, %xmm0
93 ; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm1
94 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
96 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
97 %cvt = sitofp <2 x i16> %shuf to <2 x double>
101 define <2 x double> @sitofp_2vf64_i8(<16 x i8> %a) {
102 ; SSE2-LABEL: sitofp_2vf64_i8:
104 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
105 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
106 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0,0,1,1]
107 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
108 ; SSE2-NEXT: movd %xmm1, %rax
109 ; SSE2-NEXT: movsbq %al, %rax
110 ; SSE2-NEXT: movd %xmm0, %rcx
111 ; SSE2-NEXT: movsbq %cl, %rcx
112 ; SSE2-NEXT: xorps %xmm0, %xmm0
113 ; SSE2-NEXT: cvtsi2sdq %rcx, %xmm0
114 ; SSE2-NEXT: xorps %xmm1, %xmm1
115 ; SSE2-NEXT: cvtsi2sdq %rax, %xmm1
116 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
119 ; AVX-LABEL: sitofp_2vf64_i8:
121 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
122 ; AVX-NEXT: vmovq %xmm0, %rax
123 ; AVX-NEXT: movsbq %al, %rax
124 ; AVX-NEXT: vpextrq $1, %xmm0, %rcx
125 ; AVX-NEXT: movsbq %cl, %rcx
126 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
127 ; AVX-NEXT: vcvtsi2sdq %rcx, %xmm0, %xmm0
128 ; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm1
129 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
131 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
132 %cvt = sitofp <2 x i8> %shuf to <2 x double>
133 ret <2 x double> %cvt
136 define <4 x double> @sitofp_4vf64(<4 x i64> %a) {
137 ; SSE2-LABEL: sitofp_4vf64:
139 ; SSE2-NEXT: movd %xmm0, %rax
140 ; SSE2-NEXT: cvtsi2sdq %rax, %xmm2
141 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
142 ; SSE2-NEXT: movd %xmm0, %rax
143 ; SSE2-NEXT: xorps %xmm0, %xmm0
144 ; SSE2-NEXT: cvtsi2sdq %rax, %xmm0
145 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
146 ; SSE2-NEXT: movd %xmm1, %rax
147 ; SSE2-NEXT: cvtsi2sdq %rax, %xmm3
148 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
149 ; SSE2-NEXT: movd %xmm0, %rax
150 ; SSE2-NEXT: xorps %xmm0, %xmm0
151 ; SSE2-NEXT: cvtsi2sdq %rax, %xmm0
152 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm3 = xmm3[0],xmm0[0]
153 ; SSE2-NEXT: movapd %xmm2, %xmm0
154 ; SSE2-NEXT: movapd %xmm3, %xmm1
157 ; AVX-LABEL: sitofp_4vf64:
159 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
160 ; AVX-NEXT: vpextrq $1, %xmm1, %rax
161 ; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm2
162 ; AVX-NEXT: vmovq %xmm1, %rax
163 ; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm1
164 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
165 ; AVX-NEXT: vpextrq $1, %xmm0, %rax
166 ; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm2
167 ; AVX-NEXT: vmovq %xmm0, %rax
168 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
169 ; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm0
170 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
171 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
173 %cvt = sitofp <4 x i64> %a to <4 x double>
174 ret <4 x double> %cvt
177 define <4 x double> @sitofp_4vf64_i32(<4 x i32> %a) {
178 ; SSE2-LABEL: sitofp_4vf64_i32:
180 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
181 ; SSE2-NEXT: movd %xmm1, %rax
183 ; SSE2-NEXT: cvtsi2sdq %rax, %xmm2
184 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
185 ; SSE2-NEXT: movd %xmm1, %rax
187 ; SSE2-NEXT: xorps %xmm1, %xmm1
188 ; SSE2-NEXT: cvtsi2sdq %rax, %xmm1
189 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0]
190 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
191 ; SSE2-NEXT: movd %xmm0, %rax
193 ; SSE2-NEXT: xorps %xmm1, %xmm1
194 ; SSE2-NEXT: cvtsi2sdq %rax, %xmm1
195 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
196 ; SSE2-NEXT: movd %xmm0, %rax
198 ; SSE2-NEXT: xorps %xmm0, %xmm0
199 ; SSE2-NEXT: cvtsi2sdq %rax, %xmm0
200 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
201 ; SSE2-NEXT: movapd %xmm2, %xmm0
204 ; AVX-LABEL: sitofp_4vf64_i32:
206 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
208 %cvt = sitofp <4 x i32> %a to <4 x double>
209 ret <4 x double> %cvt
212 define <4 x double> @sitofp_4vf64_i16(<8 x i16> %a) {
213 ; SSE2-LABEL: sitofp_4vf64_i16:
215 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,3]
216 ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,6,7]
217 ; SSE2-NEXT: movd %xmm1, %rax
218 ; SSE2-NEXT: movswq %ax, %rax
219 ; SSE2-NEXT: cvtsi2sdq %rax, %xmm2
220 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
221 ; SSE2-NEXT: movd %xmm1, %rax
222 ; SSE2-NEXT: movswq %ax, %rax
223 ; SSE2-NEXT: xorps %xmm1, %xmm1
224 ; SSE2-NEXT: cvtsi2sdq %rax, %xmm1
225 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0]
226 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,1]
227 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,1,2,3,4,5,6,7]
228 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,7]
229 ; SSE2-NEXT: movd %xmm0, %rax
230 ; SSE2-NEXT: movswq %ax, %rax
231 ; SSE2-NEXT: xorps %xmm1, %xmm1
232 ; SSE2-NEXT: cvtsi2sdq %rax, %xmm1
233 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
234 ; SSE2-NEXT: movd %xmm0, %rax
235 ; SSE2-NEXT: movswq %ax, %rax
236 ; SSE2-NEXT: xorps %xmm0, %xmm0
237 ; SSE2-NEXT: cvtsi2sdq %rax, %xmm0
238 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
239 ; SSE2-NEXT: movapd %xmm2, %xmm0
242 ; AVX-LABEL: sitofp_4vf64_i16:
244 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
245 ; AVX-NEXT: vpslld $16, %xmm0, %xmm0
246 ; AVX-NEXT: vpsrad $16, %xmm0, %xmm0
247 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
249 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
250 %cvt = sitofp <4 x i16> %shuf to <4 x double>
251 ret <4 x double> %cvt
254 define <4 x double> @sitofp_4vf64_i8(<16 x i8> %a) {
255 ; SSE2-LABEL: sitofp_4vf64_i8:
257 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
258 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
259 ; SSE2-NEXT: psrad $24, %xmm1
260 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,1,3]
261 ; SSE2-NEXT: movd %xmm2, %rax
263 ; SSE2-NEXT: xorps %xmm0, %xmm0
264 ; SSE2-NEXT: cvtsi2sdq %rax, %xmm0
265 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
266 ; SSE2-NEXT: movd %xmm2, %rax
268 ; SSE2-NEXT: xorps %xmm2, %xmm2
269 ; SSE2-NEXT: cvtsi2sdq %rax, %xmm2
270 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
271 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
272 ; SSE2-NEXT: movd %xmm2, %rax
274 ; SSE2-NEXT: xorps %xmm1, %xmm1
275 ; SSE2-NEXT: cvtsi2sdq %rax, %xmm1
276 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
277 ; SSE2-NEXT: movd %xmm2, %rax
279 ; SSE2-NEXT: xorps %xmm2, %xmm2
280 ; SSE2-NEXT: cvtsi2sdq %rax, %xmm2
281 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
284 ; AVX-LABEL: sitofp_4vf64_i8:
286 ; AVX-NEXT: vpmovsxbd %xmm0, %xmm0
287 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
289 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
290 %cvt = sitofp <4 x i8> %shuf to <4 x double>
291 ret <4 x double> %cvt
295 ; Unsigned Integer to Double
298 define <2 x double> @uitofp_2vf64(<2 x i64> %a) {
299 ; SSE2-LABEL: uitofp_2vf64:
301 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
302 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
303 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
304 ; SSE2-NEXT: movapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
305 ; SSE2-NEXT: subpd %xmm3, %xmm0
306 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
307 ; SSE2-NEXT: addpd %xmm4, %xmm0
308 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
309 ; SSE2-NEXT: subpd %xmm3, %xmm2
310 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
311 ; SSE2-NEXT: addpd %xmm2, %xmm1
312 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
315 ; AVX-LABEL: uitofp_2vf64:
317 ; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
318 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
319 ; AVX-NEXT: vmovapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
320 ; AVX-NEXT: vsubpd %xmm3, %xmm2, %xmm2
321 ; AVX-NEXT: vhaddpd %xmm2, %xmm2, %xmm2
322 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
323 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
324 ; AVX-NEXT: vsubpd %xmm3, %xmm0, %xmm0
325 ; AVX-NEXT: vhaddpd %xmm0, %xmm0, %xmm0
326 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0]
328 %cvt = uitofp <2 x i64> %a to <2 x double>
329 ret <2 x double> %cvt
332 define <2 x double> @uitofp_2vf64_i32(<4 x i32> %a) {
333 ; SSE2-LABEL: uitofp_2vf64_i32:
335 ; SSE2-NEXT: pxor %xmm1, %xmm1
336 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
337 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
338 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
339 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
340 ; SSE2-NEXT: movapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
341 ; SSE2-NEXT: subpd %xmm3, %xmm0
342 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
343 ; SSE2-NEXT: addpd %xmm4, %xmm0
344 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
345 ; SSE2-NEXT: subpd %xmm3, %xmm2
346 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
347 ; SSE2-NEXT: addpd %xmm2, %xmm1
348 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
351 ; AVX-LABEL: uitofp_2vf64_i32:
353 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
354 ; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
355 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
356 ; AVX-NEXT: vmovapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
357 ; AVX-NEXT: vsubpd %xmm3, %xmm2, %xmm2
358 ; AVX-NEXT: vhaddpd %xmm2, %xmm2, %xmm2
359 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
360 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
361 ; AVX-NEXT: vsubpd %xmm3, %xmm0, %xmm0
362 ; AVX-NEXT: vhaddpd %xmm0, %xmm0, %xmm0
363 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0]
365 %shuf = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
366 %cvt = uitofp <2 x i32> %shuf to <2 x double>
367 ret <2 x double> %cvt
370 define <2 x double> @uitofp_2vf64_i16(<8 x i16> %a) {
371 ; SSE2-LABEL: uitofp_2vf64_i16:
373 ; SSE2-NEXT: pxor %xmm1, %xmm1
374 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
375 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
376 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
377 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
378 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
379 ; SSE2-NEXT: movapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
380 ; SSE2-NEXT: subpd %xmm3, %xmm0
381 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
382 ; SSE2-NEXT: addpd %xmm4, %xmm0
383 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
384 ; SSE2-NEXT: subpd %xmm3, %xmm2
385 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
386 ; SSE2-NEXT: addpd %xmm2, %xmm1
387 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
390 ; AVX-LABEL: uitofp_2vf64_i16:
392 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
393 ; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
394 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
395 ; AVX-NEXT: vmovapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
396 ; AVX-NEXT: vsubpd %xmm3, %xmm2, %xmm2
397 ; AVX-NEXT: vhaddpd %xmm2, %xmm2, %xmm2
398 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
399 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
400 ; AVX-NEXT: vsubpd %xmm3, %xmm0, %xmm0
401 ; AVX-NEXT: vhaddpd %xmm0, %xmm0, %xmm0
402 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0]
404 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
405 %cvt = uitofp <2 x i16> %shuf to <2 x double>
406 ret <2 x double> %cvt
409 define <2 x double> @uitofp_2vf64_i8(<16 x i8> %a) {
410 ; SSE2-LABEL: uitofp_2vf64_i8:
412 ; SSE2-NEXT: pxor %xmm1, %xmm1
413 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
414 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
415 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
416 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
417 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
418 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
419 ; SSE2-NEXT: movapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
420 ; SSE2-NEXT: subpd %xmm3, %xmm0
421 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
422 ; SSE2-NEXT: addpd %xmm4, %xmm0
423 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
424 ; SSE2-NEXT: subpd %xmm3, %xmm2
425 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
426 ; SSE2-NEXT: addpd %xmm2, %xmm1
427 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
430 ; AVX-LABEL: uitofp_2vf64_i8:
432 ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
433 ; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
434 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
435 ; AVX-NEXT: vmovapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
436 ; AVX-NEXT: vsubpd %xmm3, %xmm2, %xmm2
437 ; AVX-NEXT: vhaddpd %xmm2, %xmm2, %xmm2
438 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
439 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
440 ; AVX-NEXT: vsubpd %xmm3, %xmm0, %xmm0
441 ; AVX-NEXT: vhaddpd %xmm0, %xmm0, %xmm0
442 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0]
444 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
445 %cvt = uitofp <2 x i8> %shuf to <2 x double>
446 ret <2 x double> %cvt
449 define <4 x double> @uitofp_4vf64(<4 x i64> %a) {
450 ; SSE2-LABEL: uitofp_4vf64:
452 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
453 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
454 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
455 ; SSE2-NEXT: movapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
456 ; SSE2-NEXT: subpd %xmm4, %xmm0
457 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm0[2,3,0,1]
458 ; SSE2-NEXT: addpd %xmm5, %xmm0
459 ; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
460 ; SSE2-NEXT: subpd %xmm4, %xmm3
461 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[2,3,0,1]
462 ; SSE2-NEXT: addpd %xmm3, %xmm5
463 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm5[0]
464 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
465 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
466 ; SSE2-NEXT: subpd %xmm4, %xmm1
467 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[2,3,0,1]
468 ; SSE2-NEXT: addpd %xmm5, %xmm1
469 ; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
470 ; SSE2-NEXT: subpd %xmm4, %xmm3
471 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
472 ; SSE2-NEXT: addpd %xmm3, %xmm2
473 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
476 ; AVX-LABEL: uitofp_4vf64:
478 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
479 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
480 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
481 ; AVX-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
482 ; AVX-NEXT: vsubpd %xmm4, %xmm3, %xmm3
483 ; AVX-NEXT: vhaddpd %xmm3, %xmm3, %xmm3
484 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
485 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
486 ; AVX-NEXT: vsubpd %xmm4, %xmm1, %xmm1
487 ; AVX-NEXT: vhaddpd %xmm1, %xmm1, %xmm1
488 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm3[0],xmm1[0]
489 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
490 ; AVX-NEXT: vsubpd %xmm4, %xmm3, %xmm3
491 ; AVX-NEXT: vhaddpd %xmm3, %xmm3, %xmm3
492 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
493 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
494 ; AVX-NEXT: vsubpd %xmm4, %xmm0, %xmm0
495 ; AVX-NEXT: vhaddpd %xmm0, %xmm0, %xmm0
496 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm3[0],xmm0[0]
497 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
499 %cvt = uitofp <4 x i64> %a to <4 x double>
500 ret <4 x double> %cvt
503 define <4 x double> @uitofp_4vf64_i32(<4 x i32> %a) {
504 ; SSE2-LABEL: uitofp_4vf64_i32:
506 ; SSE2-NEXT: pxor %xmm1, %xmm1
507 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
508 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
509 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1127219200,1160773632,0,0]
510 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
511 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
512 ; SSE2-NEXT: movapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
513 ; SSE2-NEXT: subpd %xmm4, %xmm0
514 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm0[2,3,0,1]
515 ; SSE2-NEXT: addpd %xmm5, %xmm0
516 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
517 ; SSE2-NEXT: subpd %xmm4, %xmm1
518 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[2,3,0,1]
519 ; SSE2-NEXT: addpd %xmm1, %xmm5
520 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm5[0]
521 ; SSE2-NEXT: pand .LCPI13_2(%rip), %xmm2
522 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm2[2,3,0,1]
523 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
524 ; SSE2-NEXT: subpd %xmm4, %xmm2
525 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
526 ; SSE2-NEXT: addpd %xmm2, %xmm1
527 ; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
528 ; SSE2-NEXT: subpd %xmm4, %xmm5
529 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm5[2,3,0,1]
530 ; SSE2-NEXT: addpd %xmm5, %xmm2
531 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
534 ; AVX-LABEL: uitofp_4vf64_i32:
536 ; AVX-NEXT: vpand .LCPI13_0(%rip), %xmm0, %xmm1
537 ; AVX-NEXT: vcvtdq2pd %xmm1, %ymm1
538 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
539 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
540 ; AVX-NEXT: vmulpd .LCPI13_1(%rip), %ymm0, %ymm0
541 ; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
543 %cvt = uitofp <4 x i32> %a to <4 x double>
544 ret <4 x double> %cvt
547 define <4 x double> @uitofp_4vf64_i16(<8 x i16> %a) {
548 ; SSE2-LABEL: uitofp_4vf64_i16:
550 ; SSE2-NEXT: pxor %xmm1, %xmm1
551 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,2,1]
552 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
553 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
554 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1127219200,1160773632,0,0]
555 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
556 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
557 ; SSE2-NEXT: movapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
558 ; SSE2-NEXT: subpd %xmm4, %xmm0
559 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm0[2,3,0,1]
560 ; SSE2-NEXT: addpd %xmm5, %xmm0
561 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
562 ; SSE2-NEXT: subpd %xmm4, %xmm1
563 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[2,3,0,1]
564 ; SSE2-NEXT: addpd %xmm1, %xmm5
565 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm5[0]
566 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[2,1,2,3,4,5,6,7]
567 ; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm1[0,1,2,3,7,5,6,7]
568 ; SSE2-NEXT: pand .LCPI14_2(%rip), %xmm2
569 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm2[2,3,0,1]
570 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
571 ; SSE2-NEXT: subpd %xmm4, %xmm2
572 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
573 ; SSE2-NEXT: addpd %xmm2, %xmm1
574 ; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
575 ; SSE2-NEXT: subpd %xmm4, %xmm5
576 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm5[2,3,0,1]
577 ; SSE2-NEXT: addpd %xmm5, %xmm2
578 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
581 ; AVX-LABEL: uitofp_4vf64_i16:
583 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
584 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
586 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
587 %cvt = uitofp <4 x i16> %shuf to <4 x double>
588 ret <4 x double> %cvt
591 define <4 x double> @uitofp_4vf64_i8(<16 x i8> %a) {
592 ; SSE2-LABEL: uitofp_4vf64_i8:
594 ; SSE2-NEXT: movdqa %xmm0, %xmm1
595 ; SSE2-NEXT: pxor %xmm2, %xmm2
596 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
597 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
598 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
599 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
600 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
601 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
602 ; SSE2-NEXT: movapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
603 ; SSE2-NEXT: subpd %xmm3, %xmm0
604 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm0[2,3,0,1]
605 ; SSE2-NEXT: addpd %xmm5, %xmm0
606 ; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
607 ; SSE2-NEXT: subpd %xmm3, %xmm4
608 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[2,3,0,1]
609 ; SSE2-NEXT: addpd %xmm4, %xmm5
610 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm5[0]
611 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
612 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,2,1]
613 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,1,2,3,4,5,6,7]
614 ; SSE2-NEXT: pshufhw {{.*#+}} xmm4 = xmm1[0,1,2,3,7,5,6,7]
615 ; SSE2-NEXT: pand .LCPI15_2(%rip), %xmm4
616 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[2,3,0,1]
617 ; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
618 ; SSE2-NEXT: subpd %xmm3, %xmm4
619 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[2,3,0,1]
620 ; SSE2-NEXT: addpd %xmm4, %xmm1
621 ; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1]
622 ; SSE2-NEXT: subpd %xmm3, %xmm5
623 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm5[2,3,0,1]
624 ; SSE2-NEXT: addpd %xmm5, %xmm2
625 ; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
628 ; AVX-LABEL: uitofp_4vf64_i8:
630 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
631 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
633 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
634 %cvt = uitofp <4 x i8> %shuf to <4 x double>
635 ret <4 x double> %cvt
639 ; Signed Integer to Float
642 define <4 x float> @sitofp_4vf32(<4 x i32> %a) {
643 ; SSE2-LABEL: sitofp_4vf32:
645 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
648 ; AVX-LABEL: sitofp_4vf32:
650 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
652 %cvt = sitofp <4 x i32> %a to <4 x float>
656 define <4 x float> @sitofp_4vf32_i64(<2 x i64> %a) {
657 ; SSE2-LABEL: sitofp_4vf32_i64:
659 ; SSE2-NEXT: movd %xmm0, %rax
660 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm1
661 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
662 ; SSE2-NEXT: movd %xmm0, %rax
663 ; SSE2-NEXT: xorps %xmm0, %xmm0
664 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm0
665 ; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
666 ; SSE2-NEXT: movaps %xmm1, %xmm0
669 ; AVX-LABEL: sitofp_4vf32_i64:
671 ; AVX-NEXT: vpextrq $1, %xmm0, %rax
672 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
673 ; AVX-NEXT: vmovq %xmm0, %rax
674 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
675 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0
676 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
677 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
678 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
679 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
681 %cvt = sitofp <2 x i64> %a to <2 x float>
682 %ext = shufflevector <2 x float> %cvt, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
686 define <4 x float> @sitofp_4vf32_i16(<8 x i16> %a) {
687 ; SSE2-LABEL: sitofp_4vf32_i16:
689 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
690 ; SSE2-NEXT: pslld $16, %xmm0
691 ; SSE2-NEXT: psrad $16, %xmm0
692 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
695 ; AVX-LABEL: sitofp_4vf32_i16:
697 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
698 ; AVX-NEXT: vpslld $16, %xmm0, %xmm0
699 ; AVX-NEXT: vpsrad $16, %xmm0, %xmm0
700 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
702 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
703 %cvt = sitofp <4 x i16> %shuf to <4 x float>
707 define <4 x float> @sitofp_4vf32_i8(<16 x i8> %a) {
708 ; SSE2-LABEL: sitofp_4vf32_i8:
710 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
711 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
712 ; SSE2-NEXT: psrad $24, %xmm0
713 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
716 ; AVX-LABEL: sitofp_4vf32_i8:
718 ; AVX-NEXT: vpmovsxbd %xmm0, %xmm0
719 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
721 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
722 %cvt = sitofp <4 x i8> %shuf to <4 x float>
726 define <8 x float> @sitofp_8vf32(<8 x i32> %a) {
727 ; SSE2-LABEL: sitofp_8vf32:
729 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
730 ; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1
733 ; AVX-LABEL: sitofp_8vf32:
735 ; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0
737 %cvt = sitofp <8 x i32> %a to <8 x float>
741 define <4 x float> @sitofp_4vf32_4i64(<4 x i64> %a) {
742 ; SSE2-LABEL: sitofp_4vf32_4i64:
744 ; SSE2-NEXT: movd %xmm1, %rax
745 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm3
746 ; SSE2-NEXT: movd %xmm0, %rax
747 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm2
748 ; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
749 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
750 ; SSE2-NEXT: movd %xmm1, %rax
751 ; SSE2-NEXT: xorps %xmm1, %xmm1
752 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm1
753 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
754 ; SSE2-NEXT: movd %xmm0, %rax
755 ; SSE2-NEXT: xorps %xmm0, %xmm0
756 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm0
757 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
758 ; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
759 ; SSE2-NEXT: movaps %xmm2, %xmm0
762 ; AVX-LABEL: sitofp_4vf32_4i64:
764 ; AVX-NEXT: vpextrq $1, %xmm0, %rax
765 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
766 ; AVX-NEXT: vmovq %xmm0, %rax
767 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2
768 ; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
769 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
770 ; AVX-NEXT: vmovq %xmm0, %rax
771 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2
772 ; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
773 ; AVX-NEXT: vpextrq $1, %xmm0, %rax
774 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
775 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0
776 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
777 ; AVX-NEXT: vzeroupper
779 %cvt = sitofp <4 x i64> %a to <4 x float>
783 define <8 x float> @sitofp_8vf32_i16(<8 x i16> %a) {
784 ; SSE2-LABEL: sitofp_8vf32_i16:
786 ; SSE2-NEXT: movdqa %xmm0, %xmm1
787 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
788 ; SSE2-NEXT: pslld $16, %xmm1
789 ; SSE2-NEXT: psrad $16, %xmm1
790 ; SSE2-NEXT: cvtdq2ps %xmm1, %xmm2
791 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
792 ; SSE2-NEXT: pslld $16, %xmm0
793 ; SSE2-NEXT: psrad $16, %xmm0
794 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm1
795 ; SSE2-NEXT: movaps %xmm2, %xmm0
798 ; AVX-LABEL: sitofp_8vf32_i16:
800 ; AVX-NEXT: vpmovsxwd %xmm0, %xmm1
801 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
802 ; AVX-NEXT: vpmovsxwd %xmm0, %xmm0
803 ; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
804 ; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0
806 %cvt = sitofp <8 x i16> %a to <8 x float>
810 define <8 x float> @sitofp_8vf32_i8(<16 x i8> %a) {
811 ; SSE2-LABEL: sitofp_8vf32_i8:
813 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
814 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
815 ; SSE2-NEXT: psrad $24, %xmm1
816 ; SSE2-NEXT: cvtdq2ps %xmm1, %xmm2
817 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
818 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
819 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
820 ; SSE2-NEXT: psrad $24, %xmm0
821 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm1
822 ; SSE2-NEXT: movaps %xmm2, %xmm0
825 ; AVX-LABEL: sitofp_8vf32_i8:
827 ; AVX-NEXT: vpmovsxbd %xmm0, %xmm1
828 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
829 ; AVX-NEXT: vpmovsxbd %xmm0, %xmm0
830 ; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
831 ; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0
833 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
834 %cvt = sitofp <8 x i8> %shuf to <8 x float>
839 ; Unsigned Integer to Float
842 define <4 x float> @uitofp_4vf32(<4 x i32> %a) {
843 ; SSE2-LABEL: uitofp_4vf32:
845 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
846 ; SSE2-NEXT: pand %xmm0, %xmm1
847 ; SSE2-NEXT: por .LCPI24_1(%rip), %xmm1
848 ; SSE2-NEXT: psrld $16, %xmm0
849 ; SSE2-NEXT: por .LCPI24_2(%rip), %xmm0
850 ; SSE2-NEXT: addps .LCPI24_3(%rip), %xmm0
851 ; SSE2-NEXT: addps %xmm1, %xmm0
854 ; AVX-LABEL: uitofp_4vf32:
856 ; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
857 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
858 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
859 ; AVX-NEXT: vaddps .LCPI24_2(%rip), %xmm0, %xmm0
860 ; AVX-NEXT: vaddps %xmm0, %xmm1, %xmm0
862 %cvt = uitofp <4 x i32> %a to <4 x float>
866 define <4 x float> @uitofp_4vf32_i64(<2 x i64> %a) {
867 ; SSE2-LABEL: uitofp_4vf32_i64:
869 ; SSE2-NEXT: movdqa %xmm0, %xmm1
870 ; SSE2-NEXT: movd %xmm1, %rax
871 ; SSE2-NEXT: movl %eax, %ecx
872 ; SSE2-NEXT: andl $1, %ecx
873 ; SSE2-NEXT: testq %rax, %rax
874 ; SSE2-NEXT: js .LBB25_1
876 ; SSE2-NEXT: xorps %xmm0, %xmm0
877 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm0
878 ; SSE2-NEXT: jmp .LBB25_3
879 ; SSE2-NEXT: .LBB25_1:
880 ; SSE2-NEXT: shrq %rax
881 ; SSE2-NEXT: orq %rax, %rcx
882 ; SSE2-NEXT: xorps %xmm0, %xmm0
883 ; SSE2-NEXT: cvtsi2ssq %rcx, %xmm0
884 ; SSE2-NEXT: addss %xmm0, %xmm0
885 ; SSE2-NEXT: .LBB25_3:
886 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
887 ; SSE2-NEXT: movd %xmm1, %rax
888 ; SSE2-NEXT: movl %eax, %ecx
889 ; SSE2-NEXT: andl $1, %ecx
890 ; SSE2-NEXT: testq %rax, %rax
891 ; SSE2-NEXT: js .LBB25_4
893 ; SSE2-NEXT: xorps %xmm1, %xmm1
894 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm1
895 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
897 ; SSE2-NEXT: .LBB25_4:
898 ; SSE2-NEXT: shrq %rax
899 ; SSE2-NEXT: orq %rax, %rcx
900 ; SSE2-NEXT: xorps %xmm1, %xmm1
901 ; SSE2-NEXT: cvtsi2ssq %rcx, %xmm1
902 ; SSE2-NEXT: addss %xmm1, %xmm1
903 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
906 ; AVX-LABEL: uitofp_4vf32_i64:
908 ; AVX-NEXT: vpextrq $1, %xmm0, %rax
909 ; AVX-NEXT: movl %eax, %ecx
910 ; AVX-NEXT: andl $1, %ecx
911 ; AVX-NEXT: testq %rax, %rax
912 ; AVX-NEXT: js .LBB25_1
914 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
915 ; AVX-NEXT: jmp .LBB25_3
916 ; AVX-NEXT: .LBB25_1:
917 ; AVX-NEXT: shrq %rax
918 ; AVX-NEXT: orq %rax, %rcx
919 ; AVX-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm1
920 ; AVX-NEXT: vaddss %xmm1, %xmm1, %xmm1
921 ; AVX-NEXT: .LBB25_3:
922 ; AVX-NEXT: vmovq %xmm0, %rax
923 ; AVX-NEXT: movl %eax, %ecx
924 ; AVX-NEXT: andl $1, %ecx
925 ; AVX-NEXT: testq %rax, %rax
926 ; AVX-NEXT: js .LBB25_4
928 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
929 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0
930 ; AVX-NEXT: jmp .LBB25_6
931 ; AVX-NEXT: .LBB25_4:
932 ; AVX-NEXT: shrq %rax
933 ; AVX-NEXT: orq %rax, %rcx
934 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
935 ; AVX-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm0
936 ; AVX-NEXT: vaddss %xmm0, %xmm0, %xmm0
937 ; AVX-NEXT: .LBB25_6:
938 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
939 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
940 ; AVX-NEXT: testq %rax, %rax
941 ; AVX-NEXT: js .LBB25_8
943 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
944 ; AVX-NEXT: .LBB25_8:
945 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
946 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
948 %cvt = uitofp <2 x i64> %a to <2 x float>
949 %ext = shufflevector <2 x float> %cvt, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
953 define <4 x float> @uitofp_4vf32_i16(<8 x i16> %a) {
954 ; SSE2-LABEL: uitofp_4vf32_i16:
956 ; SSE2-NEXT: pxor %xmm1, %xmm1
957 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
958 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
961 ; AVX-LABEL: uitofp_4vf32_i16:
963 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
964 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
966 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
967 %cvt = uitofp <4 x i16> %shuf to <4 x float>
971 define <4 x float> @uitofp_4vf32_i8(<16 x i8> %a) {
972 ; SSE2-LABEL: uitofp_4vf32_i8:
974 ; SSE2-NEXT: pxor %xmm1, %xmm1
975 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
976 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
977 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
980 ; AVX-LABEL: uitofp_4vf32_i8:
982 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
983 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
985 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
986 %cvt = uitofp <4 x i8> %shuf to <4 x float>
990 define <8 x float> @uitofp_8vf32(<8 x i32> %a) {
991 ; SSE2-LABEL: uitofp_8vf32:
993 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535]
994 ; SSE2-NEXT: movdqa %xmm0, %xmm3
995 ; SSE2-NEXT: pand %xmm2, %xmm3
996 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1258291200,1258291200,1258291200,1258291200]
997 ; SSE2-NEXT: por %xmm4, %xmm3
998 ; SSE2-NEXT: psrld $16, %xmm0
999 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [1392508928,1392508928,1392508928,1392508928]
1000 ; SSE2-NEXT: por %xmm5, %xmm0
1001 ; SSE2-NEXT: movaps {{.*#+}} xmm6 = [-5.497642e+11,-5.497642e+11,-5.497642e+11,-5.497642e+11]
1002 ; SSE2-NEXT: addps %xmm6, %xmm0
1003 ; SSE2-NEXT: addps %xmm3, %xmm0
1004 ; SSE2-NEXT: pand %xmm1, %xmm2
1005 ; SSE2-NEXT: por %xmm4, %xmm2
1006 ; SSE2-NEXT: psrld $16, %xmm1
1007 ; SSE2-NEXT: por %xmm5, %xmm1
1008 ; SSE2-NEXT: addps %xmm6, %xmm1
1009 ; SSE2-NEXT: addps %xmm2, %xmm1
1012 ; AVX-LABEL: uitofp_8vf32:
1014 ; AVX-NEXT: vandps .LCPI28_0(%rip), %ymm0, %ymm1
1015 ; AVX-NEXT: vcvtdq2ps %ymm1, %ymm1
1016 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm2
1017 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
1018 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
1019 ; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1020 ; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0
1021 ; AVX-NEXT: vmulps .LCPI28_1(%rip), %ymm0, %ymm0
1022 ; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
1024 %cvt = uitofp <8 x i32> %a to <8 x float>
1025 ret <8 x float> %cvt
1028 define <4 x float> @uitofp_4vf32_4i64(<4 x i64> %a) {
1029 ; SSE2-LABEL: uitofp_4vf32_4i64:
1031 ; SSE2-NEXT: movd %xmm1, %rax
1032 ; SSE2-NEXT: movl %eax, %ecx
1033 ; SSE2-NEXT: andl $1, %ecx
1034 ; SSE2-NEXT: testq %rax, %rax
1035 ; SSE2-NEXT: js .LBB29_1
1036 ; SSE2-NEXT: # BB#2:
1037 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm3
1038 ; SSE2-NEXT: jmp .LBB29_3
1039 ; SSE2-NEXT: .LBB29_1:
1040 ; SSE2-NEXT: shrq %rax
1041 ; SSE2-NEXT: orq %rax, %rcx
1042 ; SSE2-NEXT: cvtsi2ssq %rcx, %xmm3
1043 ; SSE2-NEXT: addss %xmm3, %xmm3
1044 ; SSE2-NEXT: .LBB29_3:
1045 ; SSE2-NEXT: movd %xmm0, %rax
1046 ; SSE2-NEXT: movl %eax, %ecx
1047 ; SSE2-NEXT: andl $1, %ecx
1048 ; SSE2-NEXT: testq %rax, %rax
1049 ; SSE2-NEXT: js .LBB29_4
1050 ; SSE2-NEXT: # BB#5:
1051 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm2
1052 ; SSE2-NEXT: jmp .LBB29_6
1053 ; SSE2-NEXT: .LBB29_4:
1054 ; SSE2-NEXT: shrq %rax
1055 ; SSE2-NEXT: orq %rax, %rcx
1056 ; SSE2-NEXT: cvtsi2ssq %rcx, %xmm2
1057 ; SSE2-NEXT: addss %xmm2, %xmm2
1058 ; SSE2-NEXT: .LBB29_6:
1059 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1060 ; SSE2-NEXT: movd %xmm1, %rax
1061 ; SSE2-NEXT: movl %eax, %ecx
1062 ; SSE2-NEXT: andl $1, %ecx
1063 ; SSE2-NEXT: testq %rax, %rax
1064 ; SSE2-NEXT: js .LBB29_7
1065 ; SSE2-NEXT: # BB#8:
1066 ; SSE2-NEXT: xorps %xmm1, %xmm1
1067 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm1
1068 ; SSE2-NEXT: jmp .LBB29_9
1069 ; SSE2-NEXT: .LBB29_7:
1070 ; SSE2-NEXT: shrq %rax
1071 ; SSE2-NEXT: orq %rax, %rcx
1072 ; SSE2-NEXT: xorps %xmm1, %xmm1
1073 ; SSE2-NEXT: cvtsi2ssq %rcx, %xmm1
1074 ; SSE2-NEXT: addss %xmm1, %xmm1
1075 ; SSE2-NEXT: .LBB29_9:
1076 ; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
1077 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1078 ; SSE2-NEXT: movd %xmm0, %rax
1079 ; SSE2-NEXT: movl %eax, %ecx
1080 ; SSE2-NEXT: andl $1, %ecx
1081 ; SSE2-NEXT: testq %rax, %rax
1082 ; SSE2-NEXT: js .LBB29_10
1083 ; SSE2-NEXT: # BB#11:
1084 ; SSE2-NEXT: xorps %xmm0, %xmm0
1085 ; SSE2-NEXT: cvtsi2ssq %rax, %xmm0
1086 ; SSE2-NEXT: jmp .LBB29_12
1087 ; SSE2-NEXT: .LBB29_10:
1088 ; SSE2-NEXT: shrq %rax
1089 ; SSE2-NEXT: orq %rax, %rcx
1090 ; SSE2-NEXT: xorps %xmm0, %xmm0
1091 ; SSE2-NEXT: cvtsi2ssq %rcx, %xmm0
1092 ; SSE2-NEXT: addss %xmm0, %xmm0
1093 ; SSE2-NEXT: .LBB29_12:
1094 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1095 ; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
1096 ; SSE2-NEXT: movaps %xmm2, %xmm0
1099 ; AVX-LABEL: uitofp_4vf32_4i64:
1101 ; AVX-NEXT: vpextrq $1, %xmm0, %rax
1102 ; AVX-NEXT: movl %eax, %ecx
1103 ; AVX-NEXT: andl $1, %ecx
1104 ; AVX-NEXT: testq %rax, %rax
1105 ; AVX-NEXT: js .LBB29_1
1107 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
1108 ; AVX-NEXT: jmp .LBB29_3
1109 ; AVX-NEXT: .LBB29_1:
1110 ; AVX-NEXT: shrq %rax
1111 ; AVX-NEXT: orq %rax, %rcx
1112 ; AVX-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm1
1113 ; AVX-NEXT: vaddss %xmm1, %xmm1, %xmm1
1114 ; AVX-NEXT: .LBB29_3:
1115 ; AVX-NEXT: vmovq %xmm0, %rax
1116 ; AVX-NEXT: movl %eax, %ecx
1117 ; AVX-NEXT: andl $1, %ecx
1118 ; AVX-NEXT: testq %rax, %rax
1119 ; AVX-NEXT: js .LBB29_4
1121 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2
1122 ; AVX-NEXT: jmp .LBB29_6
1123 ; AVX-NEXT: .LBB29_4:
1124 ; AVX-NEXT: shrq %rax
1125 ; AVX-NEXT: orq %rax, %rcx
1126 ; AVX-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm2
1127 ; AVX-NEXT: vaddss %xmm2, %xmm2, %xmm2
1128 ; AVX-NEXT: .LBB29_6:
1129 ; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
1130 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
1131 ; AVX-NEXT: vmovq %xmm0, %rax
1132 ; AVX-NEXT: movl %eax, %ecx
1133 ; AVX-NEXT: andl $1, %ecx
1134 ; AVX-NEXT: testq %rax, %rax
1135 ; AVX-NEXT: js .LBB29_7
1137 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2
1138 ; AVX-NEXT: jmp .LBB29_9
1139 ; AVX-NEXT: .LBB29_7:
1140 ; AVX-NEXT: shrq %rax
1141 ; AVX-NEXT: orq %rax, %rcx
1142 ; AVX-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm2
1143 ; AVX-NEXT: vaddss %xmm2, %xmm2, %xmm2
1144 ; AVX-NEXT: .LBB29_9:
1145 ; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
1146 ; AVX-NEXT: vpextrq $1, %xmm0, %rax
1147 ; AVX-NEXT: movl %eax, %ecx
1148 ; AVX-NEXT: andl $1, %ecx
1149 ; AVX-NEXT: testq %rax, %rax
1150 ; AVX-NEXT: js .LBB29_10
1151 ; AVX-NEXT: # BB#11:
1152 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
1153 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0
1154 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1155 ; AVX-NEXT: vzeroupper
1157 ; AVX-NEXT: .LBB29_10:
1158 ; AVX-NEXT: shrq %rax
1159 ; AVX-NEXT: orq %rax, %rcx
1160 ; AVX-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm0
1161 ; AVX-NEXT: vaddss %xmm0, %xmm0, %xmm0
1162 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1163 ; AVX-NEXT: vzeroupper
1165 %cvt = uitofp <4 x i64> %a to <4 x float>
1166 ret <4 x float> %cvt
1169 define <8 x float> @uitofp_8vf32_i16(<8 x i16> %a) {
1170 ; SSE2-LABEL: uitofp_8vf32_i16:
1172 ; SSE2-NEXT: pxor %xmm1, %xmm1
1173 ; SSE2-NEXT: movdqa %xmm0, %xmm2
1174 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
1175 ; SSE2-NEXT: cvtdq2ps %xmm2, %xmm2
1176 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
1177 ; SSE2-NEXT: pand .LCPI30_0(%rip), %xmm0
1178 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm1
1179 ; SSE2-NEXT: movaps %xmm2, %xmm0
1182 ; AVX-LABEL: uitofp_8vf32_i16:
1184 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1185 ; AVX-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1186 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1187 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1188 ; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0
1190 %cvt = uitofp <8 x i16> %a to <8 x float>
1191 ret <8 x float> %cvt
1194 define <8 x float> @uitofp_8vf32_i8(<16 x i8> %a) {
1195 ; SSE2-LABEL: uitofp_8vf32_i8:
1197 ; SSE2-NEXT: pxor %xmm1, %xmm1
1198 ; SSE2-NEXT: movdqa %xmm0, %xmm2
1199 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
1200 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
1201 ; SSE2-NEXT: cvtdq2ps %xmm2, %xmm2
1202 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1203 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
1204 ; SSE2-NEXT: pand .LCPI31_0(%rip), %xmm0
1205 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm1
1206 ; SSE2-NEXT: movaps %xmm2, %xmm0
1209 ; AVX-LABEL: uitofp_8vf32_i8:
1211 ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1212 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1213 ; AVX-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
1214 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1215 ; AVX-NEXT: vandps .LCPI31_0(%rip), %ymm0, %ymm0
1216 ; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0
1218 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1219 %cvt = uitofp <8 x i8> %shuf to <8 x float>
1220 ret <8 x float> %cvt