1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE2
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX
5 ; Double to Signed Integer
8 define <2 x i64> @fptosi_2vf64(<2 x double> %a) {
9 ; SSE2-LABEL: fptosi_2vf64:
11 ; SSE2-NEXT: cvttsd2si %xmm0, %rax
12 ; SSE2-NEXT: movd %rax, %xmm1
13 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
14 ; SSE2-NEXT: cvttsd2si %xmm0, %rax
15 ; SSE2-NEXT: movd %rax, %xmm0
16 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
17 ; SSE2-NEXT: movdqa %xmm1, %xmm0
20 ; AVX-LABEL: fptosi_2vf64:
22 ; AVX-NEXT: vcvttsd2si %xmm0, %rax
23 ; AVX-NEXT: vmovq %rax, %xmm1
24 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
25 ; AVX-NEXT: vcvttsd2si %xmm0, %rax
26 ; AVX-NEXT: vmovq %rax, %xmm0
27 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
29 %cvt = fptosi <2 x double> %a to <2 x i64>
33 define <4 x i32> @fptosi_2vf64_i32(<2 x double> %a) {
34 ; SSE2-LABEL: fptosi_2vf64_i32:
36 ; SSE2-NEXT: cvttsd2si %xmm0, %rax
37 ; SSE2-NEXT: movd %rax, %xmm1
38 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
39 ; SSE2-NEXT: cvttsd2si %xmm0, %rax
40 ; SSE2-NEXT: movd %rax, %xmm0
41 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
42 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
45 ; AVX-LABEL: fptosi_2vf64_i32:
47 ; AVX-NEXT: vcvttsd2si %xmm0, %rax
48 ; AVX-NEXT: vmovq %rax, %xmm1
49 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
50 ; AVX-NEXT: vcvttsd2si %xmm0, %rax
51 ; AVX-NEXT: vmovq %rax, %xmm0
52 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
53 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
55 %cvt = fptosi <2 x double> %a to <2 x i32>
56 %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
60 define <4 x i64> @fptosi_4vf64(<4 x double> %a) {
61 ; SSE2-LABEL: fptosi_4vf64:
63 ; SSE2-NEXT: cvttsd2si %xmm0, %rax
64 ; SSE2-NEXT: movd %rax, %xmm2
65 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
66 ; SSE2-NEXT: cvttsd2si %xmm0, %rax
67 ; SSE2-NEXT: movd %rax, %xmm0
68 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
69 ; SSE2-NEXT: cvttsd2si %xmm1, %rax
70 ; SSE2-NEXT: movd %rax, %xmm3
71 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0]
72 ; SSE2-NEXT: cvttsd2si %xmm1, %rax
73 ; SSE2-NEXT: movd %rax, %xmm0
74 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
75 ; SSE2-NEXT: movdqa %xmm2, %xmm0
76 ; SSE2-NEXT: movdqa %xmm3, %xmm1
79 ; AVX-LABEL: fptosi_4vf64:
81 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
82 ; AVX-NEXT: vcvttsd2si %xmm1, %rax
83 ; AVX-NEXT: vmovq %rax, %xmm2
84 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
85 ; AVX-NEXT: vcvttsd2si %xmm1, %rax
86 ; AVX-NEXT: vmovq %rax, %xmm1
87 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
88 ; AVX-NEXT: vcvttsd2si %xmm0, %rax
89 ; AVX-NEXT: vmovq %rax, %xmm2
90 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
91 ; AVX-NEXT: vcvttsd2si %xmm0, %rax
92 ; AVX-NEXT: vmovq %rax, %xmm0
93 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
94 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
96 %cvt = fptosi <4 x double> %a to <4 x i64>
100 define <4 x i32> @fptosi_4vf64_i32(<4 x double> %a) {
101 ; SSE2-LABEL: fptosi_4vf64_i32:
103 ; SSE2-NEXT: cvttsd2si %xmm1, %rax
104 ; SSE2-NEXT: movd %rax, %xmm2
105 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0]
106 ; SSE2-NEXT: cvttsd2si %xmm1, %rax
107 ; SSE2-NEXT: movd %rax, %xmm1
108 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
109 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
110 ; SSE2-NEXT: cvttsd2si %xmm0, %rax
111 ; SSE2-NEXT: movd %rax, %xmm2
112 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
113 ; SSE2-NEXT: cvttsd2si %xmm0, %rax
114 ; SSE2-NEXT: movd %rax, %xmm0
115 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
116 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
117 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
120 ; AVX-LABEL: fptosi_4vf64_i32:
122 ; AVX-NEXT: vcvttpd2dqy %ymm0, %xmm0
123 ; AVX-NEXT: vzeroupper
125 %cvt = fptosi <4 x double> %a to <4 x i32>
130 ; Double to Unsigned Integer
133 define <2 x i64> @fptoui_2vf64(<2 x double> %a) {
134 ; SSE2-LABEL: fptoui_2vf64:
136 ; SSE2-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
137 ; SSE2-NEXT: movapd %xmm0, %xmm1
138 ; SSE2-NEXT: subsd %xmm2, %xmm1
139 ; SSE2-NEXT: cvttsd2si %xmm1, %rax
140 ; SSE2-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
141 ; SSE2-NEXT: xorq %rcx, %rax
142 ; SSE2-NEXT: cvttsd2si %xmm0, %rdx
143 ; SSE2-NEXT: ucomisd %xmm2, %xmm0
144 ; SSE2-NEXT: cmovaeq %rax, %rdx
145 ; SSE2-NEXT: movd %rdx, %xmm1
146 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
147 ; SSE2-NEXT: movapd %xmm0, %xmm3
148 ; SSE2-NEXT: subsd %xmm2, %xmm3
149 ; SSE2-NEXT: cvttsd2si %xmm3, %rax
150 ; SSE2-NEXT: xorq %rcx, %rax
151 ; SSE2-NEXT: cvttsd2si %xmm0, %rcx
152 ; SSE2-NEXT: ucomisd %xmm2, %xmm0
153 ; SSE2-NEXT: cmovaeq %rax, %rcx
154 ; SSE2-NEXT: movd %rcx, %xmm0
155 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
156 ; SSE2-NEXT: movdqa %xmm1, %xmm0
159 ; AVX-LABEL: fptoui_2vf64:
161 ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
162 ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm2
163 ; AVX-NEXT: vcvttsd2si %xmm2, %rax
164 ; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
165 ; AVX-NEXT: xorq %rcx, %rax
166 ; AVX-NEXT: vcvttsd2si %xmm0, %rdx
167 ; AVX-NEXT: vucomisd %xmm1, %xmm0
168 ; AVX-NEXT: cmovaeq %rax, %rdx
169 ; AVX-NEXT: vmovq %rdx, %xmm2
170 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
171 ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm3
172 ; AVX-NEXT: vcvttsd2si %xmm3, %rax
173 ; AVX-NEXT: xorq %rcx, %rax
174 ; AVX-NEXT: vcvttsd2si %xmm0, %rcx
175 ; AVX-NEXT: vucomisd %xmm1, %xmm0
176 ; AVX-NEXT: cmovaeq %rax, %rcx
177 ; AVX-NEXT: vmovq %rcx, %xmm0
178 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
180 %cvt = fptoui <2 x double> %a to <2 x i64>
184 define <4 x i32> @fptoui_2vf64_i32(<2 x double> %a) {
185 ; SSE2-LABEL: fptoui_2vf64_i32:
187 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
188 ; SSE2-NEXT: movapd %xmm0, %xmm2
189 ; SSE2-NEXT: subsd %xmm1, %xmm2
190 ; SSE2-NEXT: cvttsd2si %xmm2, %rax
191 ; SSE2-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
192 ; SSE2-NEXT: xorq %rcx, %rax
193 ; SSE2-NEXT: cvttsd2si %xmm0, %rdx
194 ; SSE2-NEXT: ucomisd %xmm1, %xmm0
195 ; SSE2-NEXT: cmovaeq %rax, %rdx
196 ; SSE2-NEXT: movd %rdx, %xmm2
197 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
198 ; SSE2-NEXT: movapd %xmm0, %xmm3
199 ; SSE2-NEXT: subsd %xmm1, %xmm3
200 ; SSE2-NEXT: cvttsd2si %xmm3, %rax
201 ; SSE2-NEXT: xorq %rcx, %rax
202 ; SSE2-NEXT: cvttsd2si %xmm0, %rcx
203 ; SSE2-NEXT: ucomisd %xmm1, %xmm0
204 ; SSE2-NEXT: cmovaeq %rax, %rcx
205 ; SSE2-NEXT: movd %rcx, %xmm0
206 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
207 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
210 ; AVX-LABEL: fptoui_2vf64_i32:
212 ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
213 ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm2
214 ; AVX-NEXT: vcvttsd2si %xmm2, %rax
215 ; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
216 ; AVX-NEXT: xorq %rcx, %rax
217 ; AVX-NEXT: vcvttsd2si %xmm0, %rdx
218 ; AVX-NEXT: vucomisd %xmm1, %xmm0
219 ; AVX-NEXT: cmovaeq %rax, %rdx
220 ; AVX-NEXT: vmovq %rdx, %xmm2
221 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
222 ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm3
223 ; AVX-NEXT: vcvttsd2si %xmm3, %rax
224 ; AVX-NEXT: xorq %rcx, %rax
225 ; AVX-NEXT: vcvttsd2si %xmm0, %rcx
226 ; AVX-NEXT: vucomisd %xmm1, %xmm0
227 ; AVX-NEXT: cmovaeq %rax, %rcx
228 ; AVX-NEXT: vmovq %rcx, %xmm0
229 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
230 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
232 %cvt = fptoui <2 x double> %a to <2 x i32>
233 %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
237 define <4 x i64> @fptoui_4vf64(<4 x double> %a) {
238 ; SSE2-LABEL: fptoui_4vf64:
240 ; SSE2-NEXT: movapd %xmm0, %xmm2
241 ; SSE2-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
242 ; SSE2-NEXT: {{.*#+}} kill: XMM0<def> XMM2<kill>
243 ; SSE2-NEXT: subsd %xmm3, %xmm0
244 ; SSE2-NEXT: cvttsd2si %xmm0, %rcx
245 ; SSE2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
246 ; SSE2-NEXT: xorq %rax, %rcx
247 ; SSE2-NEXT: cvttsd2si %xmm2, %rdx
248 ; SSE2-NEXT: ucomisd %xmm3, %xmm2
249 ; SSE2-NEXT: cmovaeq %rcx, %rdx
250 ; SSE2-NEXT: movd %rdx, %xmm0
251 ; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1,0]
252 ; SSE2-NEXT: movapd %xmm2, %xmm4
253 ; SSE2-NEXT: subsd %xmm3, %xmm4
254 ; SSE2-NEXT: cvttsd2si %xmm4, %rcx
255 ; SSE2-NEXT: xorq %rax, %rcx
256 ; SSE2-NEXT: cvttsd2si %xmm2, %rdx
257 ; SSE2-NEXT: ucomisd %xmm3, %xmm2
258 ; SSE2-NEXT: cmovaeq %rcx, %rdx
259 ; SSE2-NEXT: movd %rdx, %xmm2
260 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
261 ; SSE2-NEXT: movapd %xmm1, %xmm2
262 ; SSE2-NEXT: subsd %xmm3, %xmm2
263 ; SSE2-NEXT: cvttsd2si %xmm2, %rcx
264 ; SSE2-NEXT: xorq %rax, %rcx
265 ; SSE2-NEXT: cvttsd2si %xmm1, %rdx
266 ; SSE2-NEXT: ucomisd %xmm3, %xmm1
267 ; SSE2-NEXT: cmovaeq %rcx, %rdx
268 ; SSE2-NEXT: movd %rdx, %xmm2
269 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0]
270 ; SSE2-NEXT: movapd %xmm1, %xmm4
271 ; SSE2-NEXT: subsd %xmm3, %xmm4
272 ; SSE2-NEXT: cvttsd2si %xmm4, %rcx
273 ; SSE2-NEXT: xorq %rax, %rcx
274 ; SSE2-NEXT: cvttsd2si %xmm1, %rax
275 ; SSE2-NEXT: ucomisd %xmm3, %xmm1
276 ; SSE2-NEXT: cmovaeq %rcx, %rax
277 ; SSE2-NEXT: movd %rax, %xmm1
278 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
279 ; SSE2-NEXT: movdqa %xmm2, %xmm1
282 ; AVX-LABEL: fptoui_4vf64:
284 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2
285 ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
286 ; AVX-NEXT: vsubsd %xmm1, %xmm2, %xmm3
287 ; AVX-NEXT: vcvttsd2si %xmm3, %rax
288 ; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
289 ; AVX-NEXT: xorq %rcx, %rax
290 ; AVX-NEXT: vcvttsd2si %xmm2, %rdx
291 ; AVX-NEXT: vucomisd %xmm1, %xmm2
292 ; AVX-NEXT: cmovaeq %rax, %rdx
293 ; AVX-NEXT: vmovq %rdx, %xmm3
294 ; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
295 ; AVX-NEXT: vsubsd %xmm1, %xmm2, %xmm4
296 ; AVX-NEXT: vcvttsd2si %xmm4, %rax
297 ; AVX-NEXT: xorq %rcx, %rax
298 ; AVX-NEXT: vcvttsd2si %xmm2, %rdx
299 ; AVX-NEXT: vucomisd %xmm1, %xmm2
300 ; AVX-NEXT: cmovaeq %rax, %rdx
301 ; AVX-NEXT: vmovq %rdx, %xmm2
302 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
303 ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm3
304 ; AVX-NEXT: vcvttsd2si %xmm3, %rax
305 ; AVX-NEXT: xorq %rcx, %rax
306 ; AVX-NEXT: vcvttsd2si %xmm0, %rdx
307 ; AVX-NEXT: vucomisd %xmm1, %xmm0
308 ; AVX-NEXT: cmovaeq %rax, %rdx
309 ; AVX-NEXT: vmovq %rdx, %xmm3
310 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
311 ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm4
312 ; AVX-NEXT: vcvttsd2si %xmm4, %rax
313 ; AVX-NEXT: xorq %rcx, %rax
314 ; AVX-NEXT: vcvttsd2si %xmm0, %rcx
315 ; AVX-NEXT: vucomisd %xmm1, %xmm0
316 ; AVX-NEXT: cmovaeq %rax, %rcx
317 ; AVX-NEXT: vmovq %rcx, %xmm0
318 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
319 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
321 %cvt = fptoui <4 x double> %a to <4 x i64>
325 define <4 x i32> @fptoui_4vf64_i32(<4 x double> %a) {
326 ; SSE2-LABEL: fptoui_4vf64_i32:
328 ; SSE2-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
329 ; SSE2-NEXT: movapd %xmm1, %xmm3
330 ; SSE2-NEXT: subsd %xmm2, %xmm3
331 ; SSE2-NEXT: cvttsd2si %xmm3, %rcx
332 ; SSE2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
333 ; SSE2-NEXT: xorq %rax, %rcx
334 ; SSE2-NEXT: cvttsd2si %xmm1, %rdx
335 ; SSE2-NEXT: ucomisd %xmm2, %xmm1
336 ; SSE2-NEXT: cmovaeq %rcx, %rdx
337 ; SSE2-NEXT: movd %rdx, %xmm3
338 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0]
339 ; SSE2-NEXT: movapd %xmm1, %xmm4
340 ; SSE2-NEXT: subsd %xmm2, %xmm4
341 ; SSE2-NEXT: cvttsd2si %xmm4, %rcx
342 ; SSE2-NEXT: xorq %rax, %rcx
343 ; SSE2-NEXT: cvttsd2si %xmm1, %rdx
344 ; SSE2-NEXT: ucomisd %xmm2, %xmm1
345 ; SSE2-NEXT: cmovaeq %rcx, %rdx
346 ; SSE2-NEXT: movd %rdx, %xmm1
347 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm1[0]
348 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
349 ; SSE2-NEXT: movapd %xmm0, %xmm3
350 ; SSE2-NEXT: subsd %xmm2, %xmm3
351 ; SSE2-NEXT: cvttsd2si %xmm3, %rcx
352 ; SSE2-NEXT: xorq %rax, %rcx
353 ; SSE2-NEXT: cvttsd2si %xmm0, %rdx
354 ; SSE2-NEXT: ucomisd %xmm2, %xmm0
355 ; SSE2-NEXT: cmovaeq %rcx, %rdx
356 ; SSE2-NEXT: movd %rdx, %xmm3
357 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
358 ; SSE2-NEXT: movapd %xmm0, %xmm4
359 ; SSE2-NEXT: subsd %xmm2, %xmm4
360 ; SSE2-NEXT: cvttsd2si %xmm4, %rcx
361 ; SSE2-NEXT: xorq %rax, %rcx
362 ; SSE2-NEXT: cvttsd2si %xmm0, %rax
363 ; SSE2-NEXT: ucomisd %xmm2, %xmm0
364 ; SSE2-NEXT: cmovaeq %rcx, %rax
365 ; SSE2-NEXT: movd %rax, %xmm0
366 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
367 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3]
368 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
371 ; AVX-LABEL: fptoui_4vf64_i32:
373 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
374 ; AVX-NEXT: vcvttsd2si %xmm1, %rax
375 ; AVX-NEXT: vcvttsd2si %xmm0, %rcx
376 ; AVX-NEXT: vmovd %ecx, %xmm1
377 ; AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
378 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
379 ; AVX-NEXT: vcvttsd2si %xmm0, %rax
380 ; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
381 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
382 ; AVX-NEXT: vcvttsd2si %xmm0, %rax
383 ; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
384 ; AVX-NEXT: vzeroupper
386 %cvt = fptoui <4 x double> %a to <4 x i32>
391 ; Float to Signed Integer
394 define <4 x i32> @fptosi_4vf32(<4 x float> %a) {
395 ; SSE2-LABEL: fptosi_4vf32:
397 ; SSE2-NEXT: cvttps2dq %xmm0, %xmm0
400 ; AVX-LABEL: fptosi_4vf32:
402 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
404 %cvt = fptosi <4 x float> %a to <4 x i32>
408 define <2 x i64> @fptosi_4vf32_i64(<4 x float> %a) {
409 ; SSE2-LABEL: fptosi_4vf32_i64:
411 ; SSE2-NEXT: cvttss2si %xmm0, %rax
412 ; SSE2-NEXT: movd %rax, %xmm1
413 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
414 ; SSE2-NEXT: cvttss2si %xmm0, %rax
415 ; SSE2-NEXT: movd %rax, %xmm0
416 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
417 ; SSE2-NEXT: movdqa %xmm1, %xmm0
420 ; AVX-LABEL: fptosi_4vf32_i64:
422 ; AVX-NEXT: vcvttss2si %xmm0, %rax
423 ; AVX-NEXT: vmovq %rax, %xmm1
424 ; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
425 ; AVX-NEXT: vcvttss2si %xmm0, %rax
426 ; AVX-NEXT: vmovq %rax, %xmm0
427 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
429 %shuf = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> <i32 0, i32 1>
430 %cvt = fptosi <2 x float> %shuf to <2 x i64>
434 define <8 x i32> @fptosi_8vf32(<8 x float> %a) {
435 ; SSE2-LABEL: fptosi_8vf32:
437 ; SSE2-NEXT: cvttps2dq %xmm0, %xmm0
438 ; SSE2-NEXT: cvttps2dq %xmm1, %xmm1
441 ; AVX-LABEL: fptosi_8vf32:
443 ; AVX-NEXT: vcvttps2dq %ymm0, %ymm0
445 %cvt = fptosi <8 x float> %a to <8 x i32>
449 define <4 x i64> @fptosi_8vf32_i64(<8 x float> %a) {
450 ; SSE2-LABEL: fptosi_8vf32_i64:
452 ; SSE2-NEXT: cvttss2si %xmm0, %rax
453 ; SSE2-NEXT: movd %rax, %xmm2
454 ; SSE2-NEXT: movaps %xmm0, %xmm1
455 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
456 ; SSE2-NEXT: cvttss2si %xmm1, %rax
457 ; SSE2-NEXT: movd %rax, %xmm1
458 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
459 ; SSE2-NEXT: movaps %xmm0, %xmm1
460 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
461 ; SSE2-NEXT: cvttss2si %xmm1, %rax
462 ; SSE2-NEXT: movd %rax, %xmm3
463 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
464 ; SSE2-NEXT: cvttss2si %xmm0, %rax
465 ; SSE2-NEXT: movd %rax, %xmm1
466 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
467 ; SSE2-NEXT: movdqa %xmm2, %xmm0
470 ; AVX-LABEL: fptosi_8vf32_i64:
472 ; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
473 ; AVX-NEXT: vcvttss2si %xmm1, %rax
474 ; AVX-NEXT: vmovq %rax, %xmm1
475 ; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
476 ; AVX-NEXT: vcvttss2si %xmm2, %rax
477 ; AVX-NEXT: vmovq %rax, %xmm2
478 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
479 ; AVX-NEXT: vcvttss2si %xmm0, %rax
480 ; AVX-NEXT: vmovq %rax, %xmm2
481 ; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
482 ; AVX-NEXT: vcvttss2si %xmm0, %rax
483 ; AVX-NEXT: vmovq %rax, %xmm0
484 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
485 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
487 %shuf = shufflevector <8 x float> %a, <8 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
488 %cvt = fptosi <4 x float> %shuf to <4 x i64>
493 ; Float to Unsigned Integer
496 define <4 x i32> @fptoui_4vf32(<4 x float> %a) {
497 ; SSE2-LABEL: fptoui_4vf32:
499 ; SSE2-NEXT: movaps %xmm0, %xmm1
500 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
501 ; SSE2-NEXT: cvttss2si %xmm1, %rax
502 ; SSE2-NEXT: movd %eax, %xmm1
503 ; SSE2-NEXT: movaps %xmm0, %xmm2
504 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
505 ; SSE2-NEXT: cvttss2si %xmm2, %rax
506 ; SSE2-NEXT: movd %eax, %xmm2
507 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
508 ; SSE2-NEXT: cvttss2si %xmm0, %rax
509 ; SSE2-NEXT: movd %eax, %xmm1
510 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
511 ; SSE2-NEXT: cvttss2si %xmm0, %rax
512 ; SSE2-NEXT: movd %eax, %xmm0
513 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
514 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
515 ; SSE2-NEXT: movdqa %xmm1, %xmm0
518 ; AVX-LABEL: fptoui_4vf32:
520 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
521 ; AVX-NEXT: vcvttss2si %xmm1, %rax
522 ; AVX-NEXT: vcvttss2si %xmm0, %rcx
523 ; AVX-NEXT: vmovd %ecx, %xmm1
524 ; AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
525 ; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
526 ; AVX-NEXT: vcvttss2si %xmm2, %rax
527 ; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
528 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
529 ; AVX-NEXT: vcvttss2si %xmm0, %rax
530 ; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
532 %cvt = fptoui <4 x float> %a to <4 x i32>
536 define <2 x i64> @fptoui_4vf32_i64(<4 x float> %a) {
537 ; SSE2-LABEL: fptoui_4vf32_i64:
539 ; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
540 ; SSE2-NEXT: movaps %xmm0, %xmm1
541 ; SSE2-NEXT: subss %xmm2, %xmm1
542 ; SSE2-NEXT: cvttss2si %xmm1, %rax
543 ; SSE2-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
544 ; SSE2-NEXT: xorq %rcx, %rax
545 ; SSE2-NEXT: cvttss2si %xmm0, %rdx
546 ; SSE2-NEXT: ucomiss %xmm2, %xmm0
547 ; SSE2-NEXT: cmovaeq %rax, %rdx
548 ; SSE2-NEXT: movd %rdx, %xmm1
549 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
550 ; SSE2-NEXT: movaps %xmm0, %xmm3
551 ; SSE2-NEXT: subss %xmm2, %xmm3
552 ; SSE2-NEXT: cvttss2si %xmm3, %rax
553 ; SSE2-NEXT: xorq %rcx, %rax
554 ; SSE2-NEXT: cvttss2si %xmm0, %rcx
555 ; SSE2-NEXT: ucomiss %xmm2, %xmm0
556 ; SSE2-NEXT: cmovaeq %rax, %rcx
557 ; SSE2-NEXT: movd %rcx, %xmm0
558 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
559 ; SSE2-NEXT: movdqa %xmm1, %xmm0
562 ; AVX-LABEL: fptoui_4vf32_i64:
564 ; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
565 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm2
566 ; AVX-NEXT: vcvttss2si %xmm2, %rax
567 ; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
568 ; AVX-NEXT: xorq %rcx, %rax
569 ; AVX-NEXT: vcvttss2si %xmm0, %rdx
570 ; AVX-NEXT: vucomiss %xmm1, %xmm0
571 ; AVX-NEXT: cmovaeq %rax, %rdx
572 ; AVX-NEXT: vmovq %rdx, %xmm2
573 ; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
574 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm3
575 ; AVX-NEXT: vcvttss2si %xmm3, %rax
576 ; AVX-NEXT: xorq %rcx, %rax
577 ; AVX-NEXT: vcvttss2si %xmm0, %rcx
578 ; AVX-NEXT: vucomiss %xmm1, %xmm0
579 ; AVX-NEXT: cmovaeq %rax, %rcx
580 ; AVX-NEXT: vmovq %rcx, %xmm0
581 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
583 %shuf = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> <i32 0, i32 1>
584 %cvt = fptoui <2 x float> %shuf to <2 x i64>
588 define <8 x i32> @fptoui_8vf32(<8 x float> %a) {
589 ; SSE2-LABEL: fptoui_8vf32:
591 ; SSE2-NEXT: movaps %xmm0, %xmm2
592 ; SSE2-NEXT: {{.*#+}} kill: XMM0<def> XMM2<kill>
593 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
594 ; SSE2-NEXT: cvttss2si %xmm0, %rax
595 ; SSE2-NEXT: movd %eax, %xmm0
596 ; SSE2-NEXT: movaps %xmm2, %xmm3
597 ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
598 ; SSE2-NEXT: cvttss2si %xmm3, %rax
599 ; SSE2-NEXT: movd %eax, %xmm3
600 ; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
601 ; SSE2-NEXT: cvttss2si %xmm2, %rax
602 ; SSE2-NEXT: movd %eax, %xmm0
603 ; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1,0]
604 ; SSE2-NEXT: cvttss2si %xmm2, %rax
605 ; SSE2-NEXT: movd %eax, %xmm2
606 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
607 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
608 ; SSE2-NEXT: movaps %xmm1, %xmm2
609 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
610 ; SSE2-NEXT: cvttss2si %xmm2, %rax
611 ; SSE2-NEXT: movd %eax, %xmm2
612 ; SSE2-NEXT: movaps %xmm1, %xmm3
613 ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
614 ; SSE2-NEXT: cvttss2si %xmm3, %rax
615 ; SSE2-NEXT: movd %eax, %xmm3
616 ; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
617 ; SSE2-NEXT: cvttss2si %xmm1, %rax
618 ; SSE2-NEXT: movd %eax, %xmm2
619 ; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0]
620 ; SSE2-NEXT: cvttss2si %xmm1, %rax
621 ; SSE2-NEXT: movd %eax, %xmm1
622 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
623 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
624 ; SSE2-NEXT: movdqa %xmm2, %xmm1
627 ; AVX-LABEL: fptoui_8vf32:
629 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
630 ; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
631 ; AVX-NEXT: vcvttss2si %xmm2, %rax
632 ; AVX-NEXT: vcvttss2si %xmm1, %rcx
633 ; AVX-NEXT: vmovd %ecx, %xmm2
634 ; AVX-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
635 ; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0]
636 ; AVX-NEXT: vcvttss2si %xmm3, %rax
637 ; AVX-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
638 ; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
639 ; AVX-NEXT: vcvttss2si %xmm1, %rax
640 ; AVX-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1
641 ; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
642 ; AVX-NEXT: vcvttss2si %xmm2, %rax
643 ; AVX-NEXT: vcvttss2si %xmm0, %rcx
644 ; AVX-NEXT: vmovd %ecx, %xmm2
645 ; AVX-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
646 ; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
647 ; AVX-NEXT: vcvttss2si %xmm3, %rax
648 ; AVX-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
649 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
650 ; AVX-NEXT: vcvttss2si %xmm0, %rax
651 ; AVX-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0
652 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
654 %cvt = fptoui <8 x float> %a to <8 x i32>
658 define <4 x i64> @fptoui_8vf32_i64(<8 x float> %a) {
659 ; SSE2-LABEL: fptoui_8vf32_i64:
661 ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
662 ; SSE2-NEXT: movaps %xmm0, %xmm2
663 ; SSE2-NEXT: subss %xmm1, %xmm2
664 ; SSE2-NEXT: cvttss2si %xmm2, %rcx
665 ; SSE2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
666 ; SSE2-NEXT: xorq %rax, %rcx
667 ; SSE2-NEXT: cvttss2si %xmm0, %rdx
668 ; SSE2-NEXT: ucomiss %xmm1, %xmm0
669 ; SSE2-NEXT: cmovaeq %rcx, %rdx
670 ; SSE2-NEXT: movd %rdx, %xmm2
671 ; SSE2-NEXT: movaps %xmm0, %xmm3
672 ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
673 ; SSE2-NEXT: movaps %xmm3, %xmm4
674 ; SSE2-NEXT: subss %xmm1, %xmm4
675 ; SSE2-NEXT: cvttss2si %xmm4, %rcx
676 ; SSE2-NEXT: xorq %rax, %rcx
677 ; SSE2-NEXT: cvttss2si %xmm3, %rdx
678 ; SSE2-NEXT: ucomiss %xmm1, %xmm3
679 ; SSE2-NEXT: cmovaeq %rcx, %rdx
680 ; SSE2-NEXT: movd %rdx, %xmm3
681 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
682 ; SSE2-NEXT: movaps %xmm0, %xmm3
683 ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
684 ; SSE2-NEXT: movaps %xmm3, %xmm4
685 ; SSE2-NEXT: subss %xmm1, %xmm4
686 ; SSE2-NEXT: cvttss2si %xmm4, %rcx
687 ; SSE2-NEXT: xorq %rax, %rcx
688 ; SSE2-NEXT: cvttss2si %xmm3, %rdx
689 ; SSE2-NEXT: ucomiss %xmm1, %xmm3
690 ; SSE2-NEXT: cmovaeq %rcx, %rdx
691 ; SSE2-NEXT: movd %rdx, %xmm3
692 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
693 ; SSE2-NEXT: movapd %xmm0, %xmm4
694 ; SSE2-NEXT: subss %xmm1, %xmm4
695 ; SSE2-NEXT: cvttss2si %xmm4, %rcx
696 ; SSE2-NEXT: xorq %rax, %rcx
697 ; SSE2-NEXT: cvttss2si %xmm0, %rax
698 ; SSE2-NEXT: ucomiss %xmm1, %xmm0
699 ; SSE2-NEXT: cmovaeq %rcx, %rax
700 ; SSE2-NEXT: movd %rax, %xmm1
701 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
702 ; SSE2-NEXT: movdqa %xmm2, %xmm0
705 ; AVX-LABEL: fptoui_8vf32_i64:
707 ; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
708 ; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
709 ; AVX-NEXT: vsubss %xmm1, %xmm2, %xmm3
710 ; AVX-NEXT: vcvttss2si %xmm3, %rax
711 ; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
712 ; AVX-NEXT: xorq %rcx, %rax
713 ; AVX-NEXT: vcvttss2si %xmm2, %rdx
714 ; AVX-NEXT: vucomiss %xmm1, %xmm2
715 ; AVX-NEXT: cmovaeq %rax, %rdx
716 ; AVX-NEXT: vmovq %rdx, %xmm2
717 ; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
718 ; AVX-NEXT: vsubss %xmm1, %xmm3, %xmm4
719 ; AVX-NEXT: vcvttss2si %xmm4, %rax
720 ; AVX-NEXT: xorq %rcx, %rax
721 ; AVX-NEXT: vcvttss2si %xmm3, %rdx
722 ; AVX-NEXT: vucomiss %xmm1, %xmm3
723 ; AVX-NEXT: cmovaeq %rax, %rdx
724 ; AVX-NEXT: vmovq %rdx, %xmm3
725 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
726 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm3
727 ; AVX-NEXT: vcvttss2si %xmm3, %rax
728 ; AVX-NEXT: xorq %rcx, %rax
729 ; AVX-NEXT: vcvttss2si %xmm0, %rdx
730 ; AVX-NEXT: vucomiss %xmm1, %xmm0
731 ; AVX-NEXT: cmovaeq %rax, %rdx
732 ; AVX-NEXT: vmovq %rdx, %xmm3
733 ; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
734 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm4
735 ; AVX-NEXT: vcvttss2si %xmm4, %rax
736 ; AVX-NEXT: xorq %rcx, %rax
737 ; AVX-NEXT: vcvttss2si %xmm0, %rcx
738 ; AVX-NEXT: vucomiss %xmm1, %xmm0
739 ; AVX-NEXT: cmovaeq %rax, %rcx
740 ; AVX-NEXT: vmovq %rcx, %xmm0
741 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
742 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
744 %shuf = shufflevector <8 x float> %a, <8 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
745 %cvt = fptoui <4 x float> %shuf to <4 x i64>
753 define <2 x i64> @fptosi_2vf64c() {
754 ; SSE2-LABEL: fptosi_2vf64c:
756 ; SSE2-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615]
759 ; AVX-LABEL: fptosi_2vf64c:
761 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,18446744073709551615]
763 %cvt = fptosi <2 x double> <double 1.0, double -1.0> to <2 x i64>
767 define <4 x i32> @fptosi_2vf64c_i32() {
768 ; SSE2-LABEL: fptosi_2vf64c_i32:
770 ; SSE2-NEXT: movaps {{.*#+}} xmm0 = <4294967295,1,u,u>
773 ; AVX-LABEL: fptosi_2vf64c_i32:
775 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = <4294967295,1,u,u>
777 %cvt = fptosi <2 x double> <double -1.0, double 1.0> to <2 x i32>
778 %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
782 define <4 x i64> @fptosi_4vf64c() {
783 ; SSE2-LABEL: fptosi_4vf64c:
785 ; SSE2-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615]
786 ; SSE2-NEXT: movaps {{.*#+}} xmm1 = [2,18446744073709551613]
789 ; AVX-LABEL: fptosi_4vf64c:
791 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,18446744073709551613]
793 %cvt = fptosi <4 x double> <double 1.0, double -1.0, double 2.0, double -3.0> to <4 x i64>
797 define <4 x i32> @fptosi_4vf64c_i32() {
798 ; SSE2-LABEL: fptosi_4vf64c_i32:
800 ; SSE2-NEXT: movaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3]
803 ; AVX-LABEL: fptosi_4vf64c_i32:
805 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3]
807 %cvt = fptosi <4 x double> <double -1.0, double 1.0, double -2.0, double 3.0> to <4 x i32>
811 define <2 x i64> @fptoui_2vf64c() {
812 ; SSE2-LABEL: fptoui_2vf64c:
814 ; SSE2-NEXT: movaps {{.*#+}} xmm0 = [2,4]
817 ; AVX-LABEL: fptoui_2vf64c:
819 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [2,4]
821 %cvt = fptoui <2 x double> <double 2.0, double 4.0> to <2 x i64>
825 define <4 x i32> @fptoui_2vf64c_i32(<2 x double> %a) {
826 ; SSE2-LABEL: fptoui_2vf64c_i32:
828 ; SSE2-NEXT: movaps {{.*#+}} xmm0 = <2,4,u,u>
831 ; AVX-LABEL: fptoui_2vf64c_i32:
833 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = <2,4,u,u>
835 %cvt = fptoui <2 x double> <double 2.0, double 4.0> to <2 x i32>
836 %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
840 define <4 x i64> @fptoui_4vf64c(<4 x double> %a) {
841 ; SSE2-LABEL: fptoui_4vf64c:
843 ; SSE2-NEXT: movaps {{.*#+}} xmm0 = [2,4]
844 ; SSE2-NEXT: movaps {{.*#+}} xmm1 = [6,8]
847 ; AVX-LABEL: fptoui_4vf64c:
849 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [2,4,6,8]
851 %cvt = fptoui <4 x double> <double 2.0, double 4.0, double 6.0, double 8.0> to <4 x i64>
855 define <4 x i32> @fptoui_4vf64c_i32(<4 x double> %a) {
856 ; SSE2-LABEL: fptoui_4vf64c_i32:
858 ; SSE2-NEXT: movaps {{.*#+}} xmm0 = [2,4,6,8]
861 ; AVX-LABEL: fptoui_4vf64c_i32:
863 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [2,4,6,8]
865 %cvt = fptoui <4 x double> <double 2.0, double 4.0, double 6.0, double 8.0> to <4 x i32>
869 define <4 x i32> @fptosi_4vf32c() {
870 ; SSE2-LABEL: fptosi_4vf32c:
872 ; SSE2-NEXT: movaps {{.*#+}} xmm0 = [1,4294967295,2,3]
875 ; AVX-LABEL: fptosi_4vf32c:
877 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,4294967295,2,3]
879 %cvt = fptosi <4 x float> <float 1.0, float -1.0, float 2.0, float 3.0> to <4 x i32>
883 define <4 x i64> @fptosi_4vf32c_i64() {
884 ; SSE2-LABEL: fptosi_4vf32c_i64:
886 ; SSE2-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615]
887 ; SSE2-NEXT: movaps {{.*#+}} xmm1 = [2,3]
890 ; AVX-LABEL: fptosi_4vf32c_i64:
892 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,3]
894 %cvt = fptosi <4 x float> <float 1.0, float -1.0, float 2.0, float 3.0> to <4 x i64>
898 define <8 x i32> @fptosi_8vf32c(<8 x float> %a) {
899 ; SSE2-LABEL: fptosi_8vf32c:
901 ; SSE2-NEXT: movaps {{.*#+}} xmm0 = [1,4294967295,2,3]
902 ; SSE2-NEXT: movaps {{.*#+}} xmm1 = [6,4294967288,2,4294967295]
905 ; AVX-LABEL: fptosi_8vf32c:
907 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,4294967295,2,3,6,4294967288,2,4294967295]
909 %cvt = fptosi <8 x float> <float 1.0, float -1.0, float 2.0, float 3.0, float 6.0, float -8.0, float 2.0, float -1.0> to <8 x i32>
913 define <4 x i32> @fptoui_4vf32c(<4 x float> %a) {
914 ; SSE2-LABEL: fptoui_4vf32c:
916 ; SSE2-NEXT: movaps {{.*#+}} xmm0 = [1,2,4,6]
919 ; AVX-LABEL: fptoui_4vf32c:
921 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,2,4,6]
923 %cvt = fptoui <4 x float> <float 1.0, float 2.0, float 4.0, float 6.0> to <4 x i32>
927 define <4 x i64> @fptoui_4vf32c_i64() {
928 ; SSE2-LABEL: fptoui_4vf32c_i64:
930 ; SSE2-NEXT: movaps {{.*#+}} xmm0 = [1,2]
931 ; SSE2-NEXT: movaps {{.*#+}} xmm1 = [4,8]
934 ; AVX-LABEL: fptoui_4vf32c_i64:
936 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8]
938 %cvt = fptoui <4 x float> <float 1.0, float 2.0, float 4.0, float 8.0> to <4 x i64>
942 define <8 x i32> @fptoui_8vf32c(<8 x float> %a) {
943 ; SSE2-LABEL: fptoui_8vf32c:
945 ; SSE2-NEXT: movaps {{.*#+}} xmm0 = [1,2,4,6]
946 ; SSE2-NEXT: movaps {{.*#+}} xmm1 = [8,6,4,1]
949 ; AVX-LABEL: fptoui_8vf32c:
951 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,6,8,6,4,1]
953 %cvt = fptoui <8 x float> <float 1.0, float 2.0, float 4.0, float 6.0, float 8.0, float 6.0, float 4.0, float 1.0> to <8 x i32>