1 ; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
4 define float @t1(i32* nocapture %src) nounwind ssp {
9 %tmp1 = load i32* %src, align 4
10 %tmp2 = sitofp i32 %tmp1 to float
14 define float @t2(i32* nocapture %src) nounwind ssp {
19 %tmp1 = load i32* %src, align 4
20 %tmp2 = uitofp i32 %tmp1 to float
24 define double @t3(i64* nocapture %src) nounwind ssp {
29 %tmp1 = load i64* %src, align 4
30 %tmp2 = sitofp i64 %tmp1 to double
34 define double @t4(i64* nocapture %src) nounwind ssp {
39 %tmp1 = load i64* %src, align 4
40 %tmp2 = uitofp i64 %tmp1 to double
45 define double @t5(i32* nocapture %src) nounwind ssp optsize {
48 ; CHECK: ldr [[REG:w[0-9]+]], [x0]
49 ; CHECK: scvtf d0, [[REG]]
50 %tmp1 = load i32* %src, align 4
51 %tmp2 = sitofp i32 %tmp1 to double
55 ; Check that we load in FP register when we want to convert into
56 ; floating point value.
57 ; This is much faster than loading on GPR and making the conversion
59 ; <rdar://problem/14599607>
61 ; Check the flollowing patterns for signed/unsigned:
62 ; 1. load with scaled imm to float.
63 ; 2. load with scaled register to float.
64 ; 3. load with scaled imm to double.
65 ; 4. load with scaled register to double.
66 ; 5. load with unscaled imm to float.
67 ; 6. load with unscaled imm to double.
68 ; With loading size: 8, 16, 32, and 64-bits.
70 ; ********* 1. load with scaled imm to float. *********
71 define float @fct1(i8* nocapture %sp0) {
73 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
74 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
75 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
77 %addr = getelementptr i8* %sp0, i64 1
78 %pix_sp0.0.copyload = load i8* %addr, align 1
79 %val = uitofp i8 %pix_sp0.0.copyload to float
80 %vmull.i = fmul float %val, %val
84 define float @fct2(i16* nocapture %sp0) {
86 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
87 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
88 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
90 %addr = getelementptr i16* %sp0, i64 1
91 %pix_sp0.0.copyload = load i16* %addr, align 1
92 %val = uitofp i16 %pix_sp0.0.copyload to float
93 %vmull.i = fmul float %val, %val
97 define float @fct3(i32* nocapture %sp0) {
99 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
100 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
101 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
103 %addr = getelementptr i32* %sp0, i64 1
104 %pix_sp0.0.copyload = load i32* %addr, align 1
105 %val = uitofp i32 %pix_sp0.0.copyload to float
106 %vmull.i = fmul float %val, %val
110 ; i64 -> f32 is not supported on floating point unit.
111 define float @fct4(i64* nocapture %sp0) {
113 ; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, #8]
114 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]]
115 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
117 %addr = getelementptr i64* %sp0, i64 1
118 %pix_sp0.0.copyload = load i64* %addr, align 1
119 %val = uitofp i64 %pix_sp0.0.copyload to float
120 %vmull.i = fmul float %val, %val
124 ; ********* 2. load with scaled register to float. *********
125 define float @fct5(i8* nocapture %sp0, i64 %offset) {
127 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
128 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
129 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
131 %addr = getelementptr i8* %sp0, i64 %offset
132 %pix_sp0.0.copyload = load i8* %addr, align 1
133 %val = uitofp i8 %pix_sp0.0.copyload to float
134 %vmull.i = fmul float %val, %val
138 define float @fct6(i16* nocapture %sp0, i64 %offset) {
140 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
141 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
142 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
144 %addr = getelementptr i16* %sp0, i64 %offset
145 %pix_sp0.0.copyload = load i16* %addr, align 1
146 %val = uitofp i16 %pix_sp0.0.copyload to float
147 %vmull.i = fmul float %val, %val
151 define float @fct7(i32* nocapture %sp0, i64 %offset) {
153 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
154 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
155 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
157 %addr = getelementptr i32* %sp0, i64 %offset
158 %pix_sp0.0.copyload = load i32* %addr, align 1
159 %val = uitofp i32 %pix_sp0.0.copyload to float
160 %vmull.i = fmul float %val, %val
164 ; i64 -> f32 is not supported on floating point unit.
165 define float @fct8(i64* nocapture %sp0, i64 %offset) {
167 ; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, x1, lsl #3]
168 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]]
169 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
171 %addr = getelementptr i64* %sp0, i64 %offset
172 %pix_sp0.0.copyload = load i64* %addr, align 1
173 %val = uitofp i64 %pix_sp0.0.copyload to float
174 %vmull.i = fmul float %val, %val
179 ; ********* 3. load with scaled imm to double. *********
180 define double @fct9(i8* nocapture %sp0) {
182 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
183 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
184 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
186 %addr = getelementptr i8* %sp0, i64 1
187 %pix_sp0.0.copyload = load i8* %addr, align 1
188 %val = uitofp i8 %pix_sp0.0.copyload to double
189 %vmull.i = fmul double %val, %val
193 define double @fct10(i16* nocapture %sp0) {
194 ; CHECK-LABEL: fct10:
195 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
196 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
197 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
199 %addr = getelementptr i16* %sp0, i64 1
200 %pix_sp0.0.copyload = load i16* %addr, align 1
201 %val = uitofp i16 %pix_sp0.0.copyload to double
202 %vmull.i = fmul double %val, %val
206 define double @fct11(i32* nocapture %sp0) {
207 ; CHECK-LABEL: fct11:
208 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
209 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
210 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
212 %addr = getelementptr i32* %sp0, i64 1
213 %pix_sp0.0.copyload = load i32* %addr, align 1
214 %val = uitofp i32 %pix_sp0.0.copyload to double
215 %vmull.i = fmul double %val, %val
219 define double @fct12(i64* nocapture %sp0) {
220 ; CHECK-LABEL: fct12:
221 ; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8]
222 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
223 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
225 %addr = getelementptr i64* %sp0, i64 1
226 %pix_sp0.0.copyload = load i64* %addr, align 1
227 %val = uitofp i64 %pix_sp0.0.copyload to double
228 %vmull.i = fmul double %val, %val
232 ; ********* 4. load with scaled register to double. *********
233 define double @fct13(i8* nocapture %sp0, i64 %offset) {
234 ; CHECK-LABEL: fct13:
235 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
236 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
237 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
239 %addr = getelementptr i8* %sp0, i64 %offset
240 %pix_sp0.0.copyload = load i8* %addr, align 1
241 %val = uitofp i8 %pix_sp0.0.copyload to double
242 %vmull.i = fmul double %val, %val
246 define double @fct14(i16* nocapture %sp0, i64 %offset) {
247 ; CHECK-LABEL: fct14:
248 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
249 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
250 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
252 %addr = getelementptr i16* %sp0, i64 %offset
253 %pix_sp0.0.copyload = load i16* %addr, align 1
254 %val = uitofp i16 %pix_sp0.0.copyload to double
255 %vmull.i = fmul double %val, %val
259 define double @fct15(i32* nocapture %sp0, i64 %offset) {
260 ; CHECK-LABEL: fct15:
261 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
262 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
263 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
265 %addr = getelementptr i32* %sp0, i64 %offset
266 %pix_sp0.0.copyload = load i32* %addr, align 1
267 %val = uitofp i32 %pix_sp0.0.copyload to double
268 %vmull.i = fmul double %val, %val
272 define double @fct16(i64* nocapture %sp0, i64 %offset) {
273 ; CHECK-LABEL: fct16:
274 ; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, x1, lsl #3]
275 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
276 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
278 %addr = getelementptr i64* %sp0, i64 %offset
279 %pix_sp0.0.copyload = load i64* %addr, align 1
280 %val = uitofp i64 %pix_sp0.0.copyload to double
281 %vmull.i = fmul double %val, %val
285 ; ********* 5. load with unscaled imm to float. *********
286 define float @fct17(i8* nocapture %sp0) {
288 ; CHECK-LABEL: fct17:
289 ; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1]
290 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
291 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
292 %bitcast = ptrtoint i8* %sp0 to i64
293 %add = add i64 %bitcast, -1
294 %addr = inttoptr i64 %add to i8*
295 %pix_sp0.0.copyload = load i8* %addr, align 1
296 %val = uitofp i8 %pix_sp0.0.copyload to float
297 %vmull.i = fmul float %val, %val
301 define float @fct18(i16* nocapture %sp0) {
302 ; CHECK-LABEL: fct18:
303 ; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1]
304 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
305 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
306 %bitcast = ptrtoint i16* %sp0 to i64
307 %add = add i64 %bitcast, 1
308 %addr = inttoptr i64 %add to i16*
309 %pix_sp0.0.copyload = load i16* %addr, align 1
310 %val = uitofp i16 %pix_sp0.0.copyload to float
311 %vmull.i = fmul float %val, %val
315 define float @fct19(i32* nocapture %sp0) {
316 ; CHECK-LABEL: fct19:
317 ; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1]
318 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
319 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
320 %bitcast = ptrtoint i32* %sp0 to i64
321 %add = add i64 %bitcast, 1
322 %addr = inttoptr i64 %add to i32*
323 %pix_sp0.0.copyload = load i32* %addr, align 1
324 %val = uitofp i32 %pix_sp0.0.copyload to float
325 %vmull.i = fmul float %val, %val
329 ; i64 -> f32 is not supported on floating point unit.
330 define float @fct20(i64* nocapture %sp0) {
331 ; CHECK-LABEL: fct20:
332 ; CHECK: ldur x[[REGNUM:[0-9]+]], [x0, #1]
333 ; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]]
334 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
335 %bitcast = ptrtoint i64* %sp0 to i64
336 %add = add i64 %bitcast, 1
337 %addr = inttoptr i64 %add to i64*
338 %pix_sp0.0.copyload = load i64* %addr, align 1
339 %val = uitofp i64 %pix_sp0.0.copyload to float
340 %vmull.i = fmul float %val, %val
345 ; ********* 6. load with unscaled imm to double. *********
346 define double @fct21(i8* nocapture %sp0) {
348 ; CHECK-LABEL: fct21:
349 ; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1]
350 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
351 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
352 %bitcast = ptrtoint i8* %sp0 to i64
353 %add = add i64 %bitcast, -1
354 %addr = inttoptr i64 %add to i8*
355 %pix_sp0.0.copyload = load i8* %addr, align 1
356 %val = uitofp i8 %pix_sp0.0.copyload to double
357 %vmull.i = fmul double %val, %val
361 define double @fct22(i16* nocapture %sp0) {
362 ; CHECK-LABEL: fct22:
363 ; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1]
364 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
365 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
366 %bitcast = ptrtoint i16* %sp0 to i64
367 %add = add i64 %bitcast, 1
368 %addr = inttoptr i64 %add to i16*
369 %pix_sp0.0.copyload = load i16* %addr, align 1
370 %val = uitofp i16 %pix_sp0.0.copyload to double
371 %vmull.i = fmul double %val, %val
375 define double @fct23(i32* nocapture %sp0) {
376 ; CHECK-LABEL: fct23:
377 ; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1]
378 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
379 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
380 %bitcast = ptrtoint i32* %sp0 to i64
381 %add = add i64 %bitcast, 1
382 %addr = inttoptr i64 %add to i32*
383 %pix_sp0.0.copyload = load i32* %addr, align 1
384 %val = uitofp i32 %pix_sp0.0.copyload to double
385 %vmull.i = fmul double %val, %val
389 define double @fct24(i64* nocapture %sp0) {
390 ; CHECK-LABEL: fct24:
391 ; CHECK: ldur d[[REGNUM:[0-9]+]], [x0, #1]
392 ; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
393 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
394 %bitcast = ptrtoint i64* %sp0 to i64
395 %add = add i64 %bitcast, 1
396 %addr = inttoptr i64 %add to i64*
397 %pix_sp0.0.copyload = load i64* %addr, align 1
398 %val = uitofp i64 %pix_sp0.0.copyload to double
399 %vmull.i = fmul double %val, %val
404 ; ********* 1s. load with scaled imm to float. *********
405 define float @sfct1(i8* nocapture %sp0) {
406 ; CHECK-LABEL: sfct1:
407 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
408 ; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
409 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
410 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
411 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
413 %addr = getelementptr i8* %sp0, i64 1
414 %pix_sp0.0.copyload = load i8* %addr, align 1
415 %val = sitofp i8 %pix_sp0.0.copyload to float
416 %vmull.i = fmul float %val, %val
420 define float @sfct2(i16* nocapture %sp0) {
421 ; CHECK-LABEL: sfct2:
422 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
423 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
424 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
425 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
427 %addr = getelementptr i16* %sp0, i64 1
428 %pix_sp0.0.copyload = load i16* %addr, align 1
429 %val = sitofp i16 %pix_sp0.0.copyload to float
430 %vmull.i = fmul float %val, %val
434 define float @sfct3(i32* nocapture %sp0) {
435 ; CHECK-LABEL: sfct3:
436 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
437 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
438 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
440 %addr = getelementptr i32* %sp0, i64 1
441 %pix_sp0.0.copyload = load i32* %addr, align 1
442 %val = sitofp i32 %pix_sp0.0.copyload to float
443 %vmull.i = fmul float %val, %val
447 ; i64 -> f32 is not supported on floating point unit.
448 define float @sfct4(i64* nocapture %sp0) {
449 ; CHECK-LABEL: sfct4:
450 ; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, #8]
451 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]]
452 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
454 %addr = getelementptr i64* %sp0, i64 1
455 %pix_sp0.0.copyload = load i64* %addr, align 1
456 %val = sitofp i64 %pix_sp0.0.copyload to float
457 %vmull.i = fmul float %val, %val
461 ; ********* 2s. load with scaled register to float. *********
462 define float @sfct5(i8* nocapture %sp0, i64 %offset) {
463 ; CHECK-LABEL: sfct5:
464 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
465 ; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
466 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
467 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
468 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
470 %addr = getelementptr i8* %sp0, i64 %offset
471 %pix_sp0.0.copyload = load i8* %addr, align 1
472 %val = sitofp i8 %pix_sp0.0.copyload to float
473 %vmull.i = fmul float %val, %val
477 define float @sfct6(i16* nocapture %sp0, i64 %offset) {
478 ; CHECK-LABEL: sfct6:
479 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
480 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
481 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
482 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
484 %addr = getelementptr i16* %sp0, i64 %offset
485 %pix_sp0.0.copyload = load i16* %addr, align 1
486 %val = sitofp i16 %pix_sp0.0.copyload to float
487 %vmull.i = fmul float %val, %val
491 define float @sfct7(i32* nocapture %sp0, i64 %offset) {
492 ; CHECK-LABEL: sfct7:
493 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
494 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
495 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
497 %addr = getelementptr i32* %sp0, i64 %offset
498 %pix_sp0.0.copyload = load i32* %addr, align 1
499 %val = sitofp i32 %pix_sp0.0.copyload to float
500 %vmull.i = fmul float %val, %val
504 ; i64 -> f32 is not supported on floating point unit.
505 define float @sfct8(i64* nocapture %sp0, i64 %offset) {
506 ; CHECK-LABEL: sfct8:
507 ; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, x1, lsl #3]
508 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]]
509 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
511 %addr = getelementptr i64* %sp0, i64 %offset
512 %pix_sp0.0.copyload = load i64* %addr, align 1
513 %val = sitofp i64 %pix_sp0.0.copyload to float
514 %vmull.i = fmul float %val, %val
518 ; ********* 3s. load with scaled imm to double. *********
519 define double @sfct9(i8* nocapture %sp0) {
520 ; CHECK-LABEL: sfct9:
521 ; CHECK: ldrsb w[[REGNUM:[0-9]+]], [x0, #1]
522 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
523 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
525 %addr = getelementptr i8* %sp0, i64 1
526 %pix_sp0.0.copyload = load i8* %addr, align 1
527 %val = sitofp i8 %pix_sp0.0.copyload to double
528 %vmull.i = fmul double %val, %val
532 define double @sfct10(i16* nocapture %sp0) {
533 ; CHECK-LABEL: sfct10:
534 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
535 ; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
536 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
537 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
538 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
540 %addr = getelementptr i16* %sp0, i64 1
541 %pix_sp0.0.copyload = load i16* %addr, align 1
542 %val = sitofp i16 %pix_sp0.0.copyload to double
543 %vmull.i = fmul double %val, %val
547 define double @sfct11(i32* nocapture %sp0) {
548 ; CHECK-LABEL: sfct11:
549 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
550 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
551 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
552 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
554 %addr = getelementptr i32* %sp0, i64 1
555 %pix_sp0.0.copyload = load i32* %addr, align 1
556 %val = sitofp i32 %pix_sp0.0.copyload to double
557 %vmull.i = fmul double %val, %val
561 define double @sfct12(i64* nocapture %sp0) {
562 ; CHECK-LABEL: sfct12:
563 ; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8]
564 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
565 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
567 %addr = getelementptr i64* %sp0, i64 1
568 %pix_sp0.0.copyload = load i64* %addr, align 1
569 %val = sitofp i64 %pix_sp0.0.copyload to double
570 %vmull.i = fmul double %val, %val
574 ; ********* 4s. load with scaled register to double. *********
575 define double @sfct13(i8* nocapture %sp0, i64 %offset) {
576 ; CHECK-LABEL: sfct13:
577 ; CHECK: ldrsb w[[REGNUM:[0-9]+]], [x0, x1]
578 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
579 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
581 %addr = getelementptr i8* %sp0, i64 %offset
582 %pix_sp0.0.copyload = load i8* %addr, align 1
583 %val = sitofp i8 %pix_sp0.0.copyload to double
584 %vmull.i = fmul double %val, %val
588 define double @sfct14(i16* nocapture %sp0, i64 %offset) {
589 ; CHECK-LABEL: sfct14:
590 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
591 ; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
592 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
593 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
594 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
596 %addr = getelementptr i16* %sp0, i64 %offset
597 %pix_sp0.0.copyload = load i16* %addr, align 1
598 %val = sitofp i16 %pix_sp0.0.copyload to double
599 %vmull.i = fmul double %val, %val
603 define double @sfct15(i32* nocapture %sp0, i64 %offset) {
604 ; CHECK-LABEL: sfct15:
605 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
606 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
607 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
608 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
610 %addr = getelementptr i32* %sp0, i64 %offset
611 %pix_sp0.0.copyload = load i32* %addr, align 1
612 %val = sitofp i32 %pix_sp0.0.copyload to double
613 %vmull.i = fmul double %val, %val
617 define double @sfct16(i64* nocapture %sp0, i64 %offset) {
618 ; CHECK-LABEL: sfct16:
619 ; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, x1, lsl #3]
620 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
621 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
623 %addr = getelementptr i64* %sp0, i64 %offset
624 %pix_sp0.0.copyload = load i64* %addr, align 1
625 %val = sitofp i64 %pix_sp0.0.copyload to double
626 %vmull.i = fmul double %val, %val
630 ; ********* 5s. load with unscaled imm to float. *********
631 define float @sfct17(i8* nocapture %sp0) {
633 ; CHECK-LABEL: sfct17:
634 ; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1]
635 ; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
636 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
637 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
638 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
639 %bitcast = ptrtoint i8* %sp0 to i64
640 %add = add i64 %bitcast, -1
641 %addr = inttoptr i64 %add to i8*
642 %pix_sp0.0.copyload = load i8* %addr, align 1
643 %val = sitofp i8 %pix_sp0.0.copyload to float
644 %vmull.i = fmul float %val, %val
648 define float @sfct18(i16* nocapture %sp0) {
649 ; CHECK-LABEL: sfct18:
650 ; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1]
651 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
652 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
653 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
654 %bitcast = ptrtoint i16* %sp0 to i64
655 %add = add i64 %bitcast, 1
656 %addr = inttoptr i64 %add to i16*
657 %pix_sp0.0.copyload = load i16* %addr, align 1
658 %val = sitofp i16 %pix_sp0.0.copyload to float
659 %vmull.i = fmul float %val, %val
663 define float @sfct19(i32* nocapture %sp0) {
664 ; CHECK-LABEL: sfct19:
665 ; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1]
666 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
667 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
668 %bitcast = ptrtoint i32* %sp0 to i64
669 %add = add i64 %bitcast, 1
670 %addr = inttoptr i64 %add to i32*
671 %pix_sp0.0.copyload = load i32* %addr, align 1
672 %val = sitofp i32 %pix_sp0.0.copyload to float
673 %vmull.i = fmul float %val, %val
677 ; i64 -> f32 is not supported on floating point unit.
678 define float @sfct20(i64* nocapture %sp0) {
679 ; CHECK-LABEL: sfct20:
680 ; CHECK: ldur x[[REGNUM:[0-9]+]], [x0, #1]
681 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]]
682 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
683 %bitcast = ptrtoint i64* %sp0 to i64
684 %add = add i64 %bitcast, 1
685 %addr = inttoptr i64 %add to i64*
686 %pix_sp0.0.copyload = load i64* %addr, align 1
687 %val = sitofp i64 %pix_sp0.0.copyload to float
688 %vmull.i = fmul float %val, %val
693 ; ********* 6s. load with unscaled imm to double. *********
694 define double @sfct21(i8* nocapture %sp0) {
696 ; CHECK-LABEL: sfct21:
697 ; CHECK: ldursb w[[REGNUM:[0-9]+]], [x0, #-1]
698 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
699 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
700 %bitcast = ptrtoint i8* %sp0 to i64
701 %add = add i64 %bitcast, -1
702 %addr = inttoptr i64 %add to i8*
703 %pix_sp0.0.copyload = load i8* %addr, align 1
704 %val = sitofp i8 %pix_sp0.0.copyload to double
705 %vmull.i = fmul double %val, %val
709 define double @sfct22(i16* nocapture %sp0) {
710 ; CHECK-LABEL: sfct22:
711 ; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1]
712 ; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
713 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
714 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
715 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
716 %bitcast = ptrtoint i16* %sp0 to i64
717 %add = add i64 %bitcast, 1
718 %addr = inttoptr i64 %add to i16*
719 %pix_sp0.0.copyload = load i16* %addr, align 1
720 %val = sitofp i16 %pix_sp0.0.copyload to double
721 %vmull.i = fmul double %val, %val
725 define double @sfct23(i32* nocapture %sp0) {
726 ; CHECK-LABEL: sfct23:
727 ; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1]
728 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
729 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
730 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
731 %bitcast = ptrtoint i32* %sp0 to i64
732 %add = add i64 %bitcast, 1
733 %addr = inttoptr i64 %add to i32*
734 %pix_sp0.0.copyload = load i32* %addr, align 1
735 %val = sitofp i32 %pix_sp0.0.copyload to double
736 %vmull.i = fmul double %val, %val
740 define double @sfct24(i64* nocapture %sp0) {
741 ; CHECK-LABEL: sfct24:
742 ; CHECK: ldur d[[REGNUM:[0-9]+]], [x0, #1]
743 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
744 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
745 %bitcast = ptrtoint i64* %sp0 to i64
746 %add = add i64 %bitcast, 1
747 %addr = inttoptr i64 %add to i64*
748 %pix_sp0.0.copyload = load i64* %addr, align 1
749 %val = sitofp i64 %pix_sp0.0.copyload to double
750 %vmull.i = fmul double %val, %val
755 ; Check that we do not use SSHLL code sequence when code size is a concern.
756 define float @codesize_sfct17(i8* nocapture %sp0) optsize {
758 ; CHECK-LABEL: codesize_sfct17:
759 ; CHECK: ldursb w[[REGNUM:[0-9]+]], [x0, #-1]
760 ; CHECK-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]]
761 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
762 %bitcast = ptrtoint i8* %sp0 to i64
763 %add = add i64 %bitcast, -1
764 %addr = inttoptr i64 %add to i8*
765 %pix_sp0.0.copyload = load i8* %addr, align 1
766 %val = sitofp i8 %pix_sp0.0.copyload to float
767 %vmull.i = fmul float %val, %val
771 define double @codesize_sfct11(i32* nocapture %sp0) minsize {
772 ; CHECK-LABEL: sfct11:
773 ; CHECK: ldr w[[REGNUM:[0-9]+]], [x0, #4]
774 ; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
775 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
777 %addr = getelementptr i32* %sp0, i64 1
778 %pix_sp0.0.copyload = load i32* %addr, align 1
779 %val = sitofp i32 %pix_sp0.0.copyload to double
780 %vmull.i = fmul double %val, %val
784 ; Adding fp128 custom lowering makes these a little fragile since we have to
785 ; return the correct mix of Legal/Expand from the custom method.
787 ; rdar://problem/14991489
789 define float @float_from_i128(i128 %in) {
790 ; CHECK-LABEL: float_from_i128:
791 ; CHECK: bl {{_?__floatuntisf}}
792 %conv = uitofp i128 %in to float
796 define double @double_from_i128(i128 %in) {
797 ; CHECK-LABEL: double_from_i128:
798 ; CHECK: bl {{_?__floattidf}}
799 %conv = sitofp i128 %in to double
803 define fp128 @fp128_from_i128(i128 %in) {
804 ; CHECK-LABEL: fp128_from_i128:
805 ; CHECK: bl {{_?__floatuntitf}}
806 %conv = uitofp i128 %in to fp128
810 define i128 @i128_from_float(float %in) {
811 ; CHECK-LABEL: i128_from_float
812 ; CHECK: bl {{_?__fixsfti}}
813 %conv = fptosi float %in to i128
817 define i128 @i128_from_double(double %in) {
818 ; CHECK-LABEL: i128_from_double
819 ; CHECK: bl {{_?__fixunsdfti}}
820 %conv = fptoui double %in to i128
824 define i128 @i128_from_fp128(fp128 %in) {
825 ; CHECK-LABEL: i128_from_fp128
826 ; CHECK: bl {{_?__fixtfti}}
827 %conv = fptosi fp128 %in to i128