test/CodeGen/AArch64/neon-scalar-recip.ll

   1 ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
   2
   3 define float @test_vrecpss_f32(float %a, float %b) {
   4 ; CHECK: test_vrecpss_f32
   5 ; CHECK: frecps {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
   6   %1 = insertelement <1 x float> undef, float %a, i32 0
   7   %2 = insertelement <1 x float> undef, float %b, i32 0
   8   %3 = call <1 x float> @llvm.arm.neon.vrecps.v1f32(<1 x float> %1, <1 x float> %2)
   9   %4 = extractelement <1 x float> %3, i32 0
  10   ret float %4
  11 }
  12
  13 define double @test_vrecpsd_f64(double %a, double %b) {
  14 ; CHECK: test_vrecpsd_f64
  15 ; CHECK: frecps {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
  16   %1 = insertelement <1 x double> undef, double %a, i32 0
  17   %2 = insertelement <1 x double> undef, double %b, i32 0
  18   %3 = call <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double> %1, <1 x double> %2)
  19   %4 = extractelement <1 x double> %3, i32 0
  20   ret double %4
  21 }
  22
  23 declare <1 x float> @llvm.arm.neon.vrecps.v1f32(<1 x float>, <1 x float>)
  24 declare <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double>, <1 x double>)
  25
  26 define float @test_vrsqrtss_f32(float %a, float %b) {
  27 ; CHECK: test_vrsqrtss_f32
  28 ; CHECK: frsqrts {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
  29   %1 = insertelement <1 x float> undef, float %a, i32 0
  30   %2 = insertelement <1 x float> undef, float %b, i32 0
  31   %3 = call <1 x float> @llvm.arm.neon.vrsqrts.v1f32(<1 x float> %1, <1 x float> %2)
  32   %4 = extractelement <1 x float> %3, i32 0
  33   ret float %4
  34 }
  35
  36 define double @test_vrsqrtsd_f64(double %a, double %b) {
  37 ; CHECK: test_vrsqrtsd_f64
  38 ; CHECK: frsqrts {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
  39   %1 = insertelement <1 x double> undef, double %a, i32 0
  40   %2 = insertelement <1 x double> undef, double %b, i32 0
  41   %3 = call <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double> %1, <1 x double> %2)
  42   %4 = extractelement <1 x double> %3, i32 0
  43   ret double %4
  44 }
  45
  46 declare <1 x float> @llvm.arm.neon.vrsqrts.v1f32(<1 x float>, <1 x float>)
  47 declare <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double>, <1 x double>)
  48
  49 define float @test_vrecpes_f32(float %a) {
  50 ; CHECK: test_vrecpes_f32
  51 ; CHECK: frecpe {{s[0-9]+}}, {{s[0-9]+}}
  52 entry:
  53   %0 = call float @llvm.aarch64.neon.vrecpe.f32(float %a)
  54   ret float %0
  55 }
  56
  57 define double @test_vrecped_f64(double %a) {
  58 ; CHECK: test_vrecped_f64
  59 ; CHECK: frecpe {{d[0-9]+}}, {{d[0-9]+}}
  60 entry:
  61   %0 = call double @llvm.aarch64.neon.vrecpe.f64(double %a)
  62   ret double %0
  63 }
  64
  65 declare float @llvm.aarch64.neon.vrecpe.f32(float)
  66 declare double @llvm.aarch64.neon.vrecpe.f64(double)
  67
  68 define float @test_vrecpxs_f32(float %a) {
  69 ; CHECK: test_vrecpxs_f32
  70 ; CHECK: frecpx {{s[0-9]+}}, {{s[0-9]+}}
  71 entry:
  72   %0 = call float @llvm.aarch64.neon.vrecpx.f32(float %a)
  73   ret float %0
  74 }
  75
  76 define double @test_vrecpxd_f64(double %a) {
  77 ; CHECK: test_vrecpxd_f64
  78 ; CHECK: frecpx {{d[0-9]+}}, {{d[0-9]+}}
  79 entry:
  80   %0 = call double @llvm.aarch64.neon.vrecpx.f64(double %a)
  81   ret double %0
  82 }
  83
  84 declare float @llvm.aarch64.neon.vrecpx.f32(float)
  85 declare double @llvm.aarch64.neon.vrecpx.f64(double)
  86
  87 define float @test_vrsqrtes_f32(float %a) {
  88 ; CHECK: test_vrsqrtes_f32
  89 ; CHECK: frsqrte {{s[0-9]+}}, {{s[0-9]+}}
  90 entry:
  91   %0 = call float @llvm.aarch64.neon.vrsqrte.f32(float %a)
  92   ret float %0
  93 }
  94
  95 define double @test_vrsqrted_f64(double %a) {
  96 ; CHECK: test_vrsqrted_f64
  97 ; CHECK: frsqrte {{d[0-9]+}}, {{d[0-9]+}}
  98 entry:
  99   %0 = call double @llvm.aarch64.neon.vrsqrte.f64(double %a)
 100   ret double %0
 101 }
 102
 103 declare float @llvm.aarch64.neon.vrsqrte.f32(float)
 104 declare double @llvm.aarch64.neon.vrsqrte.f64(double)