test/CodeGen/ARM/fp16-promote.ll

   1 ; RUN: llc -asm-verbose=false < %s -mattr=+vfp3,+fp16 | FileCheck %s -check-prefix=CHECK-FP16 -check-prefix=CHECK-ALL
   2 ; RUN: llc -asm-verbose=false < %s | FileCheck %s -check-prefix=CHECK-LIBCALL -check-prefix=CHECK-ALL
   3
   4 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32"
   5 target triple = "armv7-eabihf"
   6
   7 ; CHECK-FP16-LABEL: test_fadd:
   8 ; CHECK-FP16: vcvtb.f32.f16
   9 ; CHECK-FP16: vcvtb.f32.f16
  10 ; CHECK-FP16: vadd.f32
  11 ; CHECK-FP16: vcvtb.f16.f32
  12 ; CHECK-LIBCALL-LABEL: test_fadd:
  13 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
  14 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
  15 ; CHECK-LIBCALL: vadd.f32
  16 ; CHECK-LIBCALL: bl __gnu_f2h_ieee
  17 define void @test_fadd(half* %p, half* %q) #0 {
  18   %a = load half, half* %p, align 2
  19   %b = load half, half* %q, align 2
  20   %r = fadd half %a, %b
  21   store half %r, half* %p
  22   ret void
  23 }
  24
  25 ; CHECK-FP16-LABEL: test_fsub:
  26 ; CHECK-FP16: vcvtb.f32.f16
  27 ; CHECK-FP16: vcvtb.f32.f16
  28 ; CHECK-FP16: vsub.f32
  29 ; CHECK-FP16: vcvtb.f16.f32
  30 ; CHECK-LIBCALL-LABEL: test_fsub:
  31 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
  32 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
  33 ; CHECK-LIBCALL: vsub.f32
  34 ; CHECK-LIBCALL: bl __gnu_f2h_ieee
  35 define void @test_fsub(half* %p, half* %q) #0 {
  36   %a = load half, half* %p, align 2
  37   %b = load half, half* %q, align 2
  38   %r = fsub half %a, %b
  39   store half %r, half* %p
  40   ret void
  41 }
  42
  43 ; CHECK-FP16-LABEL: test_fmul:
  44 ; CHECK-FP16: vcvtb.f32.f16
  45 ; CHECK-FP16: vcvtb.f32.f16
  46 ; CHECK-FP16: vmul.f32
  47 ; CHECK-FP16: vcvtb.f16.f32
  48 ; CHECK-LIBCALL-LABEL: test_fmul
  49 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
  50 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
  51 ; CHECK-LIBCALL: vmul.f32
  52 ; CHECK-LIBCALL: bl __gnu_f2h_ieee
  53 define void @test_fmul(half* %p, half* %q) #0 {
  54   %a = load half, half* %p, align 2
  55   %b = load half, half* %q, align 2
  56   %r = fmul half %a, %b
  57   store half %r, half* %p
  58   ret void
  59 }
  60
  61 ; CHECK-FP16-LABEL: test_fdiv:
  62 ; CHECK-FP16: vcvtb.f32.f16
  63 ; CHECK-FP16: vcvtb.f32.f16
  64 ; CHECK-FP16: vdiv.f32
  65 ; CHECK-FP16: vcvtb.f16.f32
  66 ; CHECK-LIBCALL-LABEL: test_fdiv
  67 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
  68 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
  69 ; CHECK-LIBCALL: vdiv.f32
  70 ; CHECK-LIBCALL: bl __gnu_f2h_ieee
  71 define void @test_fdiv(half* %p, half* %q) #0 {
  72   %a = load half, half* %p, align 2
  73   %b = load half, half* %q, align 2
  74   %r = fdiv half %a, %b
  75   store half %r, half* %p
  76   ret void
  77 }
  78
  79 ; CHECK-FP16-LABEL: test_frem:
  80 ; CHECK-FP16: vcvtb.f32.f16
  81 ; CHECK-FP16: vcvtb.f32.f16
  82 ; CHECK-FP16: bl fmodf
  83 ; CHECK-FP16: vcvtb.f16.f32
  84 ; CHECK-LIBCALL-LABEL: test_frem
  85 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
  86 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
  87 ; CHECK-LIBCALL: bl fmodf
  88 ; CHECK-LIBCALL: bl __gnu_f2h_ieee
  89 define void @test_frem(half* %p, half* %q) #0 {
  90   %a = load half, half* %p, align 2
  91   %b = load half, half* %q, align 2
  92   %r = frem half %a, %b
  93   store half %r, half* %p
  94   ret void
  95 }
  96
  97 ; CHECK-ALL-LABEL: test_load_store:
  98 ; CHECK-ALL-NEXT: .fnstart
  99 ; CHECK-ALL-NEXT: ldrh r0, [r0]
 100 ; CHECK-ALL-NEXT: strh r0, [r1]
 101 ; CHECK-ALL-NEXT: bx lr
 102 define void @test_load_store(half* %p, half* %q) #0 {
 103   %a = load half, half* %p, align 2
 104   store half %a, half* %q
 105   ret void
 106 }
 107
 108 ; Testing only successfull compilation of function calls.  In ARM ABI, half
 109 ; args and returns are handled as f32.
 110
 111 declare half @test_callee(half %a, half %b) #0
 112
 113 ; CHECK-ALL-LABEL: test_call:
 114 ; CHECK-ALL-NEXT: .fnstart
 115 ; CHECK-ALL-NEXT: push {r11, lr}
 116 ; CHECK-ALL-NEXT: bl test_callee
 117 ; CHECK-ALL-NEXT: pop {r11, pc}
 118 define half @test_call(half %a, half %b) #0 {
 119   %r = call half @test_callee(half %a, half %b)
 120   ret half %r
 121 }
 122
 123 ; CHECK-ALL-LABEL: test_call_flipped:
 124 ; CHECK-ALL-NEXT: .fnstart
 125 ; CHECK-ALL-NEXT: push {r11, lr}
 126 ; CHECK-ALL-NEXT: mov r2, r0
 127 ; CHECK-ALL-NEXT: mov r0, r1
 128 ; CHECK-ALL-NEXT: mov r1, r2
 129 ; CHECK-ALL-NEXT: bl test_callee
 130 ; CHECK-ALL-NEXT: pop {r11, pc}
 131 define half @test_call_flipped(half %a, half %b) #0 {
 132   %r = call half @test_callee(half %b, half %a)
 133   ret half %r
 134 }
 135
 136 ; CHECK-ALL-LABEL: test_tailcall_flipped:
 137 ; CHECK-ALL-NEXT: .fnstart
 138 ; CHECK-ALL-NEXT: mov r2, r0
 139 ; CHECK-ALL-NEXT: mov r0, r1
 140 ; CHECK-ALL-NEXT: mov r1, r2
 141 ; CHECK-ALL-NEXT: b test_callee
 142 define half @test_tailcall_flipped(half %a, half %b) #0 {
 143   %r = tail call half @test_callee(half %b, half %a)
 144   ret half %r
 145 }
 146
 147 ; Optimizer picks %p or %q based on %c and only loads that value
 148 ; No conversion is needed
 149 ; CHECK-ALL-LABEL: test_select:
 150 ; CHECK-ALL-NEXT: .fnstart
 151 ; CHECK-ALL-NEXT: cmp r2, #0
 152 ; CHECK-ALL-NEXT: movne r1, r0
 153 ; CHECK-ALL-NEXT: ldrh r1, [r1]
 154 ; CHECK-ALL-NEXT: strh r1, [r0]
 155 ; CHECK-ALL-NEXT: bx lr
 156 define void @test_select(half* %p, half* %q, i1 zeroext %c) #0 {
 157   %a = load half, half* %p, align 2
 158   %b = load half, half* %q, align 2
 159   %r = select i1 %c, half %a, half %b
 160   store half %r, half* %p
 161   ret void
 162 }
 163
 164 ; Test only two variants of fcmp.  These get translated to f32 vcmpe
 165 ; instructions anyway.
 166 ; CHECK-FP16-LABEL: test_fcmp_une:
 167 ; CHECK-FP16: vcvtb.f32.f16
 168 ; CHECK-FP16: vcvtb.f32.f16
 169 ; CHECK-FP16: vcmpe.f32
 170 ; CHECK-FP16: vmrs APSR_nzcv, fpscr
 171 ; CHECK-FP16: movwne
 172 ; CHECK-LIBCALL-LABEL: test_fcmp_une:
 173 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 174 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 175 ; CHECK-LIBCALL: vcmpe.f32
 176 ; CHECK-LIBCALL: movwne
 177 define i1 @test_fcmp_une(half* %p, half* %q) #0 {
 178   %a = load half, half* %p, align 2
 179   %b = load half, half* %q, align 2
 180   %r = fcmp une half %a, %b
 181   ret i1 %r
 182 }
 183
 184 ; CHECK-FP16-LABEL: test_fcmp_ueq:
 185 ; CHECK-FP16: vcvtb.f32.f16
 186 ; CHECK-FP16: vcvtb.f32.f16
 187 ; CHECK-FP16: vcmpe.f32
 188 ; CHECK-FP16: vmrs APSR_nzcv, fpscr
 189 ; CHECK-FP16: movweq
 190 ; CHECK-FP16: movwvs
 191 ; CHECK-LIBCALL-LABEL: test_fcmp_ueq:
 192 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 193 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 194 ; CHECK-LIBCALL: vcmpe.f32
 195 ; CHECK-LIBCALL: movweq
 196 define i1 @test_fcmp_ueq(half* %p, half* %q) #0 {
 197   %a = load half, half* %p, align 2
 198   %b = load half, half* %q, align 2
 199   %r = fcmp ueq half %a, %b
 200   ret i1 %r
 201 }
 202
 203 ; CHECK-FP16-LABEL: test_br_cc:
 204 ; CHECK-FP16: vcvtb.f32.f16
 205 ; CHECK-FP16: vcvtb.f32.f16
 206 ; CHECK-FP16: vcmpe.f32
 207 ; CHECK-FP16: vmrs APSR_nzcv, fpscr
 208 ; CHECK-FP16: strmi
 209 ; CHECK-FP16: strpl
 210 ; CHECK-LIBCALL-LABEL: test_br_cc:
 211 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 212 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 213 ; CHECK-LIBCALL: vcmpe.f32
 214 ; CHECK-LIBCALL: strmi
 215 ; CHECK-LIBCALL: strpl
 216 define void @test_br_cc(half* %p, half* %q, i32* %p1, i32* %p2) #0 {
 217   %a = load half, half* %p, align 2
 218   %b = load half, half* %q, align 2
 219   %c = fcmp uge half %a, %b
 220   br i1 %c, label %then, label %else
 221 then:
 222   store i32 0, i32* %p1
 223   ret void
 224 else:
 225   store i32 0, i32* %p2
 226   ret void
 227 }
 228
 229 declare i1 @test_dummy(half* %p) #0
 230 ; CHECK-FP16-LABEL: test_phi:
 231 ; CHECK-FP16: vcvtb.f32.f16
 232 ; CHECK-FP16: [[LOOP:.LBB[1-9_]+]]:
 233 ; CHECK-FP16: vcvtb.f32.f16
 234 ; CHECK-FP16: bl      test_dummy
 235 ; CHECK-FP16: bne     [[LOOP]]
 236 ; CHECK-FP16: vcvtb.f16.f32
 237 ; CHECK-LIBCALL-LABEL: test_phi:
 238 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 239 ; CHECK-LIBCALL: [[LOOP:.LBB[1-9_]+]]:
 240 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 241 ; CHECK-LIBCALL: bl test_dummy
 242 ; CHECK-LIBCALL: bne     [[LOOP]]
 243 ; CHECK-LIBCALL: bl __gnu_f2h_ieee
 244 define void @test_phi(half* %p) #0 {
 245 entry:
 246   %a = load half, half* %p
 247   br label %loop
 248 loop:
 249   %r = phi half [%a, %entry], [%b, %loop]
 250   %b = load half, half* %p
 251   %c = call i1 @test_dummy(half* %p)
 252   br i1 %c, label %loop, label %return
 253 return:
 254   store half %r, half* %p
 255   ret void
 256 }
 257
 258 ; CHECK-FP16-LABEL: test_fptosi_i32:
 259 ; CHECK-FP16: vcvtb.f32.f16
 260 ; CHECK-FP16: vcvt.s32.f32
 261 ; CHECK-LIBCALL-LABEL: test_fptosi_i32:
 262 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 263 ; CHECK-LIBCALL: vcvt.s32.f32
 264 define i32 @test_fptosi_i32(half* %p) #0 {
 265   %a = load half, half* %p, align 2
 266   %r = fptosi half %a to i32
 267   ret i32 %r
 268 }
 269
 270 ; CHECK-FP16-LABEL: test_fptosi_i64:
 271 ; CHECK-FP16: vcvtb.f32.f16
 272 ; CHECK-FP16: bl __aeabi_f2lz
 273 ; CHECK-LIBCALL-LABEL: test_fptosi_i64:
 274 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 275 ; CHECK-LIBCALL: bl __aeabi_f2lz
 276 define i64 @test_fptosi_i64(half* %p) #0 {
 277   %a = load half, half* %p, align 2
 278   %r = fptosi half %a to i64
 279   ret i64 %r
 280 }
 281
 282 ; CHECK-FP16-LABEL: test_fptoui_i32:
 283 ; CHECK-FP16: vcvtb.f32.f16
 284 ; CHECK-FP16: vcvt.u32.f32
 285 ; CHECK-LIBCALL-LABEL: test_fptoui_i32:
 286 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 287 ; CHECK-LIBCALL: vcvt.u32.f32
 288 define i32 @test_fptoui_i32(half* %p) #0 {
 289   %a = load half, half* %p, align 2
 290   %r = fptoui half %a to i32
 291   ret i32 %r
 292 }
 293
 294 ; CHECK-FP16-LABEL: test_fptoui_i64:
 295 ; CHECK-FP16: vcvtb.f32.f16
 296 ; CHECK-FP16: bl __aeabi_f2ulz
 297 ; CHECK-LIBCALL-LABEL: test_fptoui_i64:
 298 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 299 ; CHECK-LIBCALL: bl __aeabi_f2ulz
 300 define i64 @test_fptoui_i64(half* %p) #0 {
 301   %a = load half, half* %p, align 2
 302   %r = fptoui half %a to i64
 303   ret i64 %r
 304 }
 305
 306 ; CHECK-FP16-LABEL: test_sitofp_i32:
 307 ; CHECK-FP16: vcvt.f32.s32
 308 ; CHECK-FP16: vcvtb.f16.f32
 309 ; CHECK-LIBCALL-LABEL: test_sitofp_i32:
 310 ; CHECK-LIBCALL: vcvt.f32.s32
 311 ; CHECK-LIBCALL: bl __gnu_f2h_ieee
 312 define void @test_sitofp_i32(i32 %a, half* %p) #0 {
 313   %r = sitofp i32 %a to half
 314   store half %r, half* %p
 315   ret void
 316 }
 317
 318 ; CHECK-FP16-LABEL: test_uitofp_i32:
 319 ; CHECK-FP16: vcvt.f32.u32
 320 ; CHECK-FP16: vcvtb.f16.f32
 321 ; CHECK-LIBCALL-LABEL: test_uitofp_i32:
 322 ; CHECK-LIBCALL: vcvt.f32.u32
 323 ; CHECK-LIBCALL: bl __gnu_f2h_ieee
 324 define void @test_uitofp_i32(i32 %a, half* %p) #0 {
 325   %r = uitofp i32 %a to half
 326   store half %r, half* %p
 327   ret void
 328 }
 329
 330 ; CHECK-FP16-LABEL: test_sitofp_i64:
 331 ; CHECK-FP16: bl __aeabi_l2f
 332 ; CHECK-FP16: vcvtb.f16.f32
 333 ; CHECK-LIBCALL-LABEL: test_sitofp_i64:
 334 ; CHECK-LIBCALL: bl __aeabi_l2f
 335 ; CHECK-LIBCALL: bl __gnu_f2h_ieee
 336 define void @test_sitofp_i64(i64 %a, half* %p) #0 {
 337   %r = sitofp i64 %a to half
 338   store half %r, half* %p
 339   ret void
 340 }
 341
 342 ; CHECK-FP16-LABEL: test_uitofp_i64:
 343 ; CHECK-FP16: bl __aeabi_ul2f
 344 ; CHECK-FP16: vcvtb.f16.f32
 345 ; CHECK-LIBCALL-LABEL: test_uitofp_i64:
 346 ; CHECK-LIBCALL: bl __aeabi_ul2f
 347 ; CHECK-LIBCALL: bl __gnu_f2h_ieee
 348 define void @test_uitofp_i64(i64 %a, half* %p) #0 {
 349   %r = uitofp i64 %a to half
 350   store half %r, half* %p
 351   ret void
 352 }
 353
 354 ; CHECK-FP16-LABEL: test_fptrunc_float:
 355 ; CHECK-FP16: vcvtb.f16.f32
 356 ; CHECK-LIBCALL-LABEL: test_fptrunc_float:
 357 ; CHECK-LIBCALL: bl __gnu_f2h_ieee
 358 define void @test_fptrunc_float(float %f, half* %p) #0 {
 359   %a = fptrunc float %f to half
 360   store half %a, half* %p
 361   ret void
 362 }
 363
 364 ; CHECK-FP16-LABEL: test_fptrunc_double:
 365 ; CHECK-FP16: bl __aeabi_d2h
 366 ; CHECK-LIBCALL-LABEL: test_fptrunc_double:
 367 ; CHECK-LIBCALL: bl __aeabi_d2h
 368 define void @test_fptrunc_double(double %d, half* %p) #0 {
 369   %a = fptrunc double %d to half
 370   store half %a, half* %p
 371   ret void
 372 }
 373
 374 ; CHECK-FP16-LABEL: test_fpextend_float:
 375 ; CHECK-FP16: vcvtb.f32.f16
 376 ; CHECK-LIBCALL-LABEL: test_fpextend_float:
 377 ; CHECK-LIBCALL: b __gnu_h2f_ieee
 378 define float @test_fpextend_float(half* %p) {
 379   %a = load half, half* %p, align 2
 380   %r = fpext half %a to float
 381   ret float %r
 382 }
 383
 384 ; CHECK-FP16-LABEL: test_fpextend_double:
 385 ; CHECK-FP16: vcvtb.f32.f16
 386 ; CHECK-FP16: vcvt.f64.f32
 387 ; CHECK-LIBCALL-LABEL: test_fpextend_double:
 388 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 389 ; CHECK-LIBCALL: vcvt.f64.f32
 390 define double @test_fpextend_double(half* %p) {
 391   %a = load half, half* %p, align 2
 392   %r = fpext half %a to double
 393   ret double %r
 394 }
 395
 396 ; CHECK-ALL-LABEL: test_bitcast_halftoi16:
 397 ; CHECK-ALL-NEXT: .fnstart
 398 ; CHECK-ALL-NEXT: ldrh r0, [r0]
 399 ; CHECK-ALL-NEXT: bx lr
 400 define i16 @test_bitcast_halftoi16(half* %p) #0 {
 401   %a = load half, half* %p, align 2
 402   %r = bitcast half %a to i16
 403   ret i16 %r
 404 }
 405
 406 ; CHECK-ALL-LABEL: test_bitcast_i16tohalf:
 407 ; CHECK-ALL-NEXT: .fnstart
 408 ; CHECK-ALL-NEXT: strh r0, [r1]
 409 ; CHECK-ALL-NEXT: bx lr
 410 define void @test_bitcast_i16tohalf(i16 %a, half* %p) #0 {
 411   %r = bitcast i16 %a to half
 412   store half %r, half* %p
 413   ret void
 414 }
 415
 416 declare half @llvm.sqrt.f16(half %a) #0
 417 declare half @llvm.powi.f16(half %a, i32 %b) #0
 418 declare half @llvm.sin.f16(half %a) #0
 419 declare half @llvm.cos.f16(half %a) #0
 420 declare half @llvm.pow.f16(half %a, half %b) #0
 421 declare half @llvm.exp.f16(half %a) #0
 422 declare half @llvm.exp2.f16(half %a) #0
 423 declare half @llvm.log.f16(half %a) #0
 424 declare half @llvm.log10.f16(half %a) #0
 425 declare half @llvm.log2.f16(half %a) #0
 426 declare half @llvm.fma.f16(half %a, half %b, half %c) #0
 427 declare half @llvm.fabs.f16(half %a) #0
 428 declare half @llvm.minnum.f16(half %a, half %b) #0
 429 declare half @llvm.maxnum.f16(half %a, half %b) #0
 430 declare half @llvm.copysign.f16(half %a, half %b) #0
 431 declare half @llvm.floor.f16(half %a) #0
 432 declare half @llvm.ceil.f16(half %a) #0
 433 declare half @llvm.trunc.f16(half %a) #0
 434 declare half @llvm.rint.f16(half %a) #0
 435 declare half @llvm.nearbyint.f16(half %a) #0
 436 declare half @llvm.round.f16(half %a) #0
 437 declare half @llvm.fmuladd.f16(half %a, half %b, half %c) #0
 438
 439 ; CHECK-FP16-LABEL: test_sqrt:
 440 ; CHECK-FP16: vcvtb.f32.f16
 441 ; CHECK-FP16: vsqrt.f32
 442 ; CHECK-FP16: vcvtb.f16.f32
 443 ; CHECK-LIBCALL-LABEL: test_sqrt:
 444 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 445 ; CHECK-LIBCALL: vsqrt.f32
 446 ; CHECK-LIBCALL: bl __gnu_f2h_ieee
 447 define void @test_sqrt(half* %p) #0 {
 448   %a = load half, half* %p, align 2
 449   %r = call half @llvm.sqrt.f16(half %a)
 450   store half %r, half* %p
 451   ret void
 452 }
 453
 454 ; CHECK-FP16-LABEL: test_fpowi:
 455 ; CHECK-FP16: vcvtb.f32.f16
 456 ; CHECK-FP16: bl __powisf2
 457 ; CHECK-FP16: vcvtb.f16.f32
 458 ; CHECK-LIBCALL-LABEL: test_fpowi:
 459 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 460 ; CHECK-LIBCALL: bl __powisf2
 461 ; CHECK-LIBCALL: bl __gnu_f2h_ieee
 462 define void @test_fpowi(half* %p, i32 %b) #0 {
 463   %a = load half, half* %p, align 2
 464   %r = call half @llvm.powi.f16(half %a, i32 %b)
 465   store half %r, half* %p
 466   ret void
 467 }
 468
 469 ; CHECK-FP16-LABEL: test_sin:
 470 ; CHECK-FP16: vcvtb.f32.f16
 471 ; CHECK-FP16: bl sinf
 472 ; CHECK-FP16: vcvtb.f16.f32
 473 ; CHECK-LIBCALL-LABEL: test_sin:
 474 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 475 ; CHECK-LIBCALL: bl sinf
 476 ; CHECK-LIBCALL: bl __gnu_f2h_ieee
 477 define void @test_sin(half* %p) #0 {
 478   %a = load half, half* %p, align 2
 479   %r = call half @llvm.sin.f16(half %a)
 480   store half %r, half* %p
 481   ret void
 482 }
 483
 484 ; CHECK-FP16-LABEL: test_cos:
 485 ; CHECK-FP16: vcvtb.f32.f16
 486 ; CHECK-FP16: bl cosf
 487 ; CHECK-FP16: vcvtb.f16.f32
 488 ; CHECK-LIBCALL-LABEL: test_cos:
 489 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 490 ; CHECK-LIBCALL: bl cosf
 491 ; CHECK-LIBCALL: bl __gnu_f2h_ieee
 492 define void @test_cos(half* %p) #0 {
 493   %a = load half, half* %p, align 2
 494   %r = call half @llvm.cos.f16(half %a)
 495   store half %r, half* %p
 496   ret void
 497 }
 498
 499 ; CHECK-FP16-LABEL: test_pow:
 500 ; CHECK-FP16: vcvtb.f32.f16
 501 ; CHECK-FP16: vcvtb.f32.f16
 502 ; CHECK-FP16: bl powf
 503 ; CHECK-FP16: vcvtb.f16.f32
 504 ; CHECK-LIBCALL-LABEL: test_pow:
 505 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 506 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 507 ; CHECK-LIBCALL: bl powf
 508 ; CHECK-LIBCALL: bl __gnu_f2h_ieee
 509 define void @test_pow(half* %p, half* %q) #0 {
 510   %a = load half, half* %p, align 2
 511   %b = load half, half* %q, align 2
 512   %r = call half @llvm.pow.f16(half %a, half %b)
 513   store half %r, half* %p
 514   ret void
 515 }
 516
 517 ; CHECK-FP16-LABEL: test_exp:
 518 ; CHECK-FP16: vcvtb.f32.f16
 519 ; CHECK-FP16: bl expf
 520 ; CHECK-FP16: vcvtb.f16.f32
 521 ; CHECK-LIBCALL-LABEL: test_exp:
 522 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 523 ; CHECK-LIBCALL: bl expf
 524 ; CHECK-LIBCALL: bl __gnu_f2h_ieee
 525 define void @test_exp(half* %p) #0 {
 526   %a = load half, half* %p, align 2
 527   %r = call half @llvm.exp.f16(half %a)
 528   store half %r, half* %p
 529   ret void
 530 }
 531
 532 ; CHECK-FP16-LABEL: test_exp2:
 533 ; CHECK-FP16: vcvtb.f32.f16
 534 ; CHECK-FP16: bl exp2f
 535 ; CHECK-FP16: vcvtb.f16.f32
 536 ; CHECK-LIBCALL-LABEL: test_exp2:
 537 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 538 ; CHECK-LIBCALL: bl exp2f
 539 ; CHECK-LIBCALL: bl __gnu_f2h_ieee
 540 define void @test_exp2(half* %p) #0 {
 541   %a = load half, half* %p, align 2
 542   %r = call half @llvm.exp2.f16(half %a)
 543   store half %r, half* %p
 544   ret void
 545 }
 546
 547 ; CHECK-FP16-LABEL: test_log:
 548 ; CHECK-FP16: vcvtb.f32.f16
 549 ; CHECK-FP16: bl logf
 550 ; CHECK-FP16: vcvtb.f16.f32
 551 ; CHECK-LIBCALL-LABEL: test_log:
 552 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 553 ; CHECK-LIBCALL: bl logf
 554 ; CHECK-LIBCALL: bl __gnu_f2h_ieee
 555 define void @test_log(half* %p) #0 {
 556   %a = load half, half* %p, align 2
 557   %r = call half @llvm.log.f16(half %a)
 558   store half %r, half* %p
 559   ret void
 560 }
 561
 562 ; CHECK-FP16-LABEL: test_log10:
 563 ; CHECK-FP16: vcvtb.f32.f16
 564 ; CHECK-FP16: bl log10f
 565 ; CHECK-FP16: vcvtb.f16.f32
 566 ; CHECK-LIBCALL-LABEL: test_log10:
 567 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 568 ; CHECK-LIBCALL: bl log10f
 569 ; CHECK-LIBCALL: bl __gnu_f2h_ieee
 570 define void @test_log10(half* %p) #0 {
 571   %a = load half, half* %p, align 2
 572   %r = call half @llvm.log10.f16(half %a)
 573   store half %r, half* %p
 574   ret void
 575 }
 576
 577 ; CHECK-FP16-LABEL: test_log2:
 578 ; CHECK-FP16: vcvtb.f32.f16
 579 ; CHECK-FP16: bl log2f
 580 ; CHECK-FP16: vcvtb.f16.f32
 581 ; CHECK-LIBCALL-LABEL: test_log2:
 582 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 583 ; CHECK-LIBCALL: bl log2f
 584 ; CHECK-LIBCALL: bl __gnu_f2h_ieee
 585 define void @test_log2(half* %p) #0 {
 586   %a = load half, half* %p, align 2
 587   %r = call half @llvm.log2.f16(half %a)
 588   store half %r, half* %p
 589   ret void
 590 }
 591
 592 ; CHECK-FP16-LABEL: test_fma:
 593 ; CHECK-FP16: vcvtb.f32.f16
 594 ; CHECK-FP16: vcvtb.f32.f16
 595 ; CHECK-FP16: vcvtb.f32.f16
 596 ; CHECK-FP16: bl fmaf
 597 ; CHECK-FP16: vcvtb.f16.f32
 598 ; CHECK-LIBCALL-LABEL: test_fma:
 599 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 600 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 601 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 602 ; CHECK-LIBCALL: bl fmaf
 603 ; CHECK-LIBCALL: bl __gnu_f2h_ieee
 604 define void @test_fma(half* %p, half* %q, half* %r) #0 {
 605   %a = load half, half* %p, align 2
 606   %b = load half, half* %q, align 2
 607   %c = load half, half* %r, align 2
 608   %v = call half @llvm.fma.f16(half %a, half %b, half %c)
 609   store half %v, half* %p
 610   ret void
 611 }
 612
 613 ; CHECK-FP16-LABEL: test_fabs:
 614 ; CHECK-FP16: vcvtb.f32.f16
 615 ; CHECK-FP16: vabs.f32
 616 ; CHECK-FP16: vcvtb.f16.f32
 617 ; CHECK-LIBCALL-LABEL: test_fabs:
 618 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 619 ; CHECK-LIBCALL: bfc
 620 ; CHECK-LIBCALL: bl __gnu_f2h_ieee
 621 define void @test_fabs(half* %p) {
 622   %a = load half, half* %p, align 2
 623   %r = call half @llvm.fabs.f16(half %a)
 624   store half %r, half* %p
 625   ret void
 626 }
 627
 628 ; CHECK-FP16-LABEL: test_minnum:
 629 ; CHECK-FP16: vcvtb.f32.f16
 630 ; CHECK-FP16: vcvtb.f32.f16
 631 ; CHECK-FP16: bl fminf
 632 ; CHECK-FP16: vcvtb.f16.f32
 633 ; CHECK-LIBCALL-LABEL: test_minnum:
 634 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 635 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 636 ; CHECK-LIBCALL: bl fminf
 637 ; CHECK-LIBCALL: bl __gnu_f2h_ieee
 638 define void @test_minnum(half* %p, half* %q) #0 {
 639   %a = load half, half* %p, align 2
 640   %b = load half, half* %q, align 2
 641   %r = call half @llvm.minnum.f16(half %a, half %b)
 642   store half %r, half* %p
 643   ret void
 644 }
 645
 646 ; CHECK-FP16-LABEL: test_maxnum:
 647 ; CHECK-FP16: vcvtb.f32.f16
 648 ; CHECK-FP16: vcvtb.f32.f16
 649 ; CHECK-FP16: bl fmaxf
 650 ; CHECK-FP16: vcvtb.f16.f32
 651 ; CHECK-LIBCALL-LABEL: test_maxnum:
 652 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 653 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 654 ; CHECK-LIBCALL: bl fmaxf
 655 ; CHECK-LIBCALL: bl __gnu_f2h_ieee
 656 define void @test_maxnum(half* %p, half* %q) #0 {
 657   %a = load half, half* %p, align 2
 658   %b = load half, half* %q, align 2
 659   %r = call half @llvm.maxnum.f16(half %a, half %b)
 660   store half %r, half* %p
 661   ret void
 662 }
 663
 664 ; CHECK-FP16-LABEL: test_copysign:
 665 ; CHECK-FP16: vcvtb.f32.f16
 666 ; CHECK-FP16: vcvtb.f32.f16
 667 ; CHECK-FP16: vbsl
 668 ; CHECK-FP16: vcvtb.f16.f32
 669 ; CHECK-LIBCALL-LABEL: test_copysign:
 670 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 671 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 672 ; CHECK-LIBCALL: vbsl
 673 ; CHECK-LIBCALL: bl __gnu_f2h_ieee
 674 define void @test_copysign(half* %p, half* %q) #0 {
 675   %a = load half, half* %p, align 2
 676   %b = load half, half* %q, align 2
 677   %r = call half @llvm.copysign.f16(half %a, half %b)
 678   store half %r, half* %p
 679   ret void
 680 }
 681
 682 ; CHECK-FP16-LABEL: test_floor:
 683 ; CHECK-FP16: vcvtb.f32.f16
 684 ; CHECK-FP16: bl floorf
 685 ; CHECK-FP16: vcvtb.f16.f32
 686 ; CHECK-LIBCALL-LABEL: test_floor:
 687 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 688 ; CHECK-LIBCALL: bl floorf
 689 ; CHECK-LIBCALL: bl __gnu_f2h_ieee
 690 define void @test_floor(half* %p) {
 691   %a = load half, half* %p, align 2
 692   %r = call half @llvm.floor.f16(half %a)
 693   store half %r, half* %p
 694   ret void
 695 }
 696
 697 ; CHECK-FP16-LABEL: test_ceil:
 698 ; CHECK-FP16: vcvtb.f32.f16
 699 ; CHECK-FP16: bl ceilf
 700 ; CHECK-FP16: vcvtb.f16.f32
 701 ; CHECK-LIBCALL-LABEL: test_ceil:
 702 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 703 ; CHECK-LIBCALL: bl ceilf
 704 ; CHECK-LIBCALL: bl __gnu_f2h_ieee
 705 define void @test_ceil(half* %p) {
 706   %a = load half, half* %p, align 2
 707   %r = call half @llvm.ceil.f16(half %a)
 708   store half %r, half* %p
 709   ret void
 710 }
 711
 712 ; CHECK-FP16-LABEL: test_trunc:
 713 ; CHECK-FP16: vcvtb.f32.f16
 714 ; CHECK-FP16: bl truncf
 715 ; CHECK-FP16: vcvtb.f16.f32
 716 ; CHECK-LIBCALL-LABEL: test_trunc:
 717 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 718 ; CHECK-LIBCALL: bl truncf
 719 ; CHECK-LIBCALL: bl __gnu_f2h_ieee
 720 define void @test_trunc(half* %p) {
 721   %a = load half, half* %p, align 2
 722   %r = call half @llvm.trunc.f16(half %a)
 723   store half %r, half* %p
 724   ret void
 725 }
 726
 727 ; CHECK-FP16-LABEL: test_rint:
 728 ; CHECK-FP16: vcvtb.f32.f16
 729 ; CHECK-FP16: bl rintf
 730 ; CHECK-FP16: vcvtb.f16.f32
 731 ; CHECK-LIBCALL-LABEL: test_rint:
 732 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 733 ; CHECK-LIBCALL: bl rintf
 734 ; CHECK-LIBCALL: bl __gnu_f2h_ieee
 735 define void @test_rint(half* %p) {
 736   %a = load half, half* %p, align 2
 737   %r = call half @llvm.rint.f16(half %a)
 738   store half %r, half* %p
 739   ret void
 740 }
 741
 742 ; CHECK-FP16-LABEL: test_nearbyint:
 743 ; CHECK-FP16: vcvtb.f32.f16
 744 ; CHECK-FP16: bl nearbyintf
 745 ; CHECK-FP16: vcvtb.f16.f32
 746 ; CHECK-LIBCALL-LABEL: test_nearbyint:
 747 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 748 ; CHECK-LIBCALL: bl nearbyintf
 749 ; CHECK-LIBCALL: bl __gnu_f2h_ieee
 750 define void @test_nearbyint(half* %p) {
 751   %a = load half, half* %p, align 2
 752   %r = call half @llvm.nearbyint.f16(half %a)
 753   store half %r, half* %p
 754   ret void
 755 }
 756
 757 ; CHECK-FP16-LABEL: test_round:
 758 ; CHECK-FP16: vcvtb.f32.f16
 759 ; CHECK-FP16: bl roundf
 760 ; CHECK-FP16: vcvtb.f16.f32
 761 ; CHECK-LIBCALL-LABEL: test_round:
 762 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 763 ; CHECK-LIBCALL: bl roundf
 764 ; CHECK-LIBCALL: bl __gnu_f2h_ieee
 765 define void @test_round(half* %p) {
 766   %a = load half, half* %p, align 2
 767   %r = call half @llvm.round.f16(half %a)
 768   store half %r, half* %p
 769   ret void
 770 }
 771
 772 ; CHECK-FP16-LABEL: test_fmuladd:
 773 ; CHECK-FP16: vcvtb.f32.f16
 774 ; CHECK-FP16: vcvtb.f32.f16
 775 ; CHECK-FP16: vcvtb.f32.f16
 776 ; CHECK-FP16: vmla.f32
 777 ; CHECK-FP16: vcvtb.f16.f32
 778 ; CHECK-LIBCALL-LABEL: test_fmuladd:
 779 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 780 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 781 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 782 ; CHECK-LIBCALL: vmla.f32
 783 ; CHECK-LIBCALL: bl __gnu_f2h_ieee
 784 define void @test_fmuladd(half* %p, half* %q, half* %r) #0 {
 785   %a = load half, half* %p, align 2
 786   %b = load half, half* %q, align 2
 787   %c = load half, half* %r, align 2
 788   %v = call half @llvm.fmuladd.f16(half %a, half %b, half %c)
 789   store half %v, half* %p
 790   ret void
 791 }
 792
 793 ; f16 vectors are not legal in the backend.  Vector elements are not assigned
 794 ; to the register, but are stored in the stack instead.  Hence insertelement
 795 ; and extractelement have these extra loads and stores.
 796
 797 ; CHECK-ALL-LABEL: test_insertelement:
 798 ; CHECK-ALL-NEXT: .fnstart
 799 ; CHECK-ALL-NEXT: sub sp, sp, #8
 800 ; CHECK-ALL-NEXT: ldrh
 801 ; CHECK-ALL-NEXT: strh
 802 ; CHECK-ALL-NEXT: ldrh
 803 ; CHECK-ALL-NEXT: strh
 804 ; CHECK-ALL-NEXT: ldrh
 805 ; CHECK-ALL-NEXT: strh
 806 ; CHECK-ALL-NEXT: ldrh
 807 ; CHECK-ALL-NEXT: strh
 808 ; CHECK-ALL-NEXT: mov
 809 ; CHECK-ALL-NEXT: ldrh
 810 ; CHECK-ALL-NEXT: add
 811 ; CHECK-ALL-NEXT: strh
 812 ; CHECK-ALL-NEXT: ldrh
 813 ; CHECK-ALL-NEXT: strh
 814 ; CHECK-ALL-NEXT: ldrh
 815 ; CHECK-ALL-NEXT: strh
 816 ; CHECK-ALL-NEXT: ldrh
 817 ; CHECK-ALL-NEXT: strh
 818 ; CHECK-ALL-NEXT: ldrh
 819 ; CHECK-ALL-NEXT: strh
 820 ; CHECK-ALL-NEXT: add sp, sp, #8
 821 ; CHECK-ALL-NEXT: bx lr
 822 define void @test_insertelement(half* %p, <4 x half>* %q, i32 %i) #0 {
 823   %a = load half, half* %p, align 2
 824   %b = load <4 x half>, <4 x half>* %q, align 8
 825   %c = insertelement <4 x half> %b, half %a, i32 %i
 826   store <4 x half> %c, <4 x half>* %q
 827   ret void
 828 }
 829
 830 ; CHECK-ALL-LABEL: test_extractelement:
 831 ; CHECK-ALL-NEXT: .fnstart
 832 ; CHECK-ALL-NEXT: sub sp, sp, #8
 833 ; CHECK-ALL-NEXT: ldrh
 834 ; CHECK-ALL-NEXT: ldrh
 835 ; CHECK-ALL-NEXT: orr
 836 ; CHECK-ALL-NEXT: str
 837 ; CHECK-ALL-NEXT: ldrh
 838 ; CHECK-ALL-NEXT: ldrh
 839 ; CHECK-ALL-NEXT: orr
 840 ; CHECK-ALL-NEXT: str
 841 ; CHECK-ALL-NEXT: mov
 842 ; CHECK-ALL-NEXT: add
 843 ; CHECK-ALL-NEXT: ldrh
 844 ; CHECK-ALL-NEXT: strh
 845 ; CHECK-ALL-NEXT: add sp, sp, #8
 846 ; CHECK-ALL-NEXT: bx lr
 847 define void @test_extractelement(half* %p, <4 x half>* %q, i32 %i) #0 {
 848   %a = load <4 x half>, <4 x half>* %q, align 8
 849   %b = extractelement <4 x half> %a, i32 %i
 850   store half %b, half* %p
 851   ret void
 852 }
 853
 854 ; test struct operations
 855
 856 %struct.dummy = type { i32, half }
 857
 858 ; CHECK-ALL-LABEL: test_insertvalue:
 859 ; CHECK-ALL-NEXT: .fnstart
 860 ; CHECK-ALL-NEXT: ldr
 861 ; CHECK-ALL-NEXT: ldrh
 862 ; CHECK-ALL-NEXT: strh
 863 ; CHECK-ALL-NEXT: str
 864 ; CHECK-ALL-NEXT: bx lr
 865 define void @test_insertvalue(%struct.dummy* %p, half* %q) {
 866   %a = load %struct.dummy, %struct.dummy* %p
 867   %b = load half, half* %q
 868   %c = insertvalue %struct.dummy %a, half %b, 1
 869   store %struct.dummy %c, %struct.dummy* %p
 870   ret void
 871 }
 872
 873 ; CHECK-ALL-LABEL: test_extractvalue:
 874 ; CHECK-ALL-NEXT: .fnstart
 875 ; CHECK-ALL-NEXT: ldrh
 876 ; CHECK-ALL-NEXT: strh
 877 ; CHECK-ALL-NEXT: bx lr
 878 define void @test_extractvalue(%struct.dummy* %p, half* %q) {
 879   %a = load %struct.dummy, %struct.dummy* %p
 880   %b = extractvalue %struct.dummy %a, 1
 881   store half %b, half* %q
 882   ret void
 883 }
 884
 885 ; CHECK-FP16-LABEL: test_struct_return:
 886 ; CHECK-FP16: vcvtb.f32.f16
 887 ; CHECK-LIBCALL-LABEL: test_struct_return:
 888 ; CHECK-LIBCALL: bl __gnu_h2f_ieee
 889 define %struct.dummy @test_struct_return(%struct.dummy* %p) {
 890   %a = load %struct.dummy, %struct.dummy* %p
 891   ret %struct.dummy %a
 892 }
 893
 894 ; CHECK-ALL-LABEL: test_struct_arg:
 895 ; CHECK-ALL-NEXT: .fnstart
 896 ; CHECK-ALL-NEXT: mov r0, r1
 897 ; CHECK-ALL-NEXT: bx lr
 898 define half @test_struct_arg(%struct.dummy %p) {
 899   %a = extractvalue %struct.dummy %p, 1
 900   ret half %a
 901 }
 902
 903 attributes #0 = { nounwind }