test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll

   1 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
   2 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
   3
   4 declare float @llvm.fma.f32(float, float, float)
   5 declare double @llvm.fma.f64(double, double, double)
   6
   7 define float @test_fmla_ss4S(float %a, float %b, <4 x float> %v) {
   8   ; CHECK-LABEL: test_fmla_ss4S
   9   ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
  10   %tmp1 = extractelement <4 x float> %v, i32 3
  11   %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
  12   ret float %tmp2
  13 }
  14
  15 define float @test_fmla_ss4S_swap(float %a, float %b, <4 x float> %v) {
  16   ; CHECK-LABEL: test_fmla_ss4S_swap
  17   ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
  18   %tmp1 = extractelement <4 x float> %v, i32 3
  19   %tmp2 = call float @llvm.fma.f32(float %tmp1, float %a, float %a)
  20   ret float %tmp2
  21 }
  22
  23 define float @test_fmla_ss2S(float %a, float %b, <2 x float> %v) {
  24   ; CHECK-LABEL: test_fmla_ss2S
  25   ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
  26   %tmp1 = extractelement <2 x float> %v, i32 1
  27   %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
  28   ret float %tmp2
  29 }
  30
  31 define double @test_fmla_ddD(double %a, double %b, <1 x double> %v) {
  32   ; CHECK-LABEL: test_fmla_ddD
  33   ; CHECK: {{fmla d[0-9]+, d[0-9]+, v[0-9]+.d\[0]|fmadd d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}
  34   %tmp1 = extractelement <1 x double> %v, i32 0
  35   %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a)
  36   ret double %tmp2
  37 }
  38
  39 define double @test_fmla_dd2D(double %a, double %b, <2 x double> %v) {
  40   ; CHECK-LABEL: test_fmla_dd2D
  41   ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
  42   %tmp1 = extractelement <2 x double> %v, i32 1
  43   %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a)
  44   ret double %tmp2
  45 }
  46
  47 define double @test_fmla_dd2D_swap(double %a, double %b, <2 x double> %v) {
  48   ; CHECK-LABEL: test_fmla_dd2D_swap
  49   ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
  50   %tmp1 = extractelement <2 x double> %v, i32 1
  51   %tmp2 = call double @llvm.fma.f64(double %tmp1, double %b, double %a)
  52   ret double %tmp2
  53 }
  54
  55 define float @test_fmls_ss4S(float %a, float %b, <4 x float> %v) {
  56   ; CHECK-LABEL: test_fmls_ss4S
  57   ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
  58   %tmp1 = extractelement <4 x float> %v, i32 3
  59   %tmp2 = fsub float -0.0, %tmp1
  60   %tmp3 = call float @llvm.fma.f32(float %tmp2, float %tmp1, float %a)
  61   ret float %tmp3
  62 }
  63
  64 define float @test_fmls_ss4S_swap(float %a, float %b, <4 x float> %v) {
  65   ; CHECK-LABEL: test_fmls_ss4S_swap
  66   ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
  67   %tmp1 = extractelement <4 x float> %v, i32 3
  68   %tmp2 = fsub float -0.0, %tmp1
  69   %tmp3 = call float @llvm.fma.f32(float %tmp1, float %tmp2, float %a)
  70   ret float %tmp3
  71 }
  72
  73
  74 define float @test_fmls_ss2S(float %a, float %b, <2 x float> %v) {
  75   ; CHECK-LABEL: test_fmls_ss2S
  76   ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
  77   %tmp1 = extractelement <2 x float> %v, i32 1
  78   %tmp2 = fsub float -0.0, %tmp1
  79   %tmp3 = call float @llvm.fma.f32(float %tmp2, float %tmp1, float %a)
  80   ret float %tmp3
  81 }
  82
  83 define double @test_fmls_ddD(double %a, double %b, <1 x double> %v) {
  84   ; CHECK-LABEL: test_fmls_ddD
  85   ; CHECK: {{fmls d[0-9]+, d[0-9]+, v[0-9]+.d\[0]|fmsub d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}
  86   %tmp1 = extractelement <1 x double> %v, i32 0
  87   %tmp2 = fsub double -0.0, %tmp1
  88   %tmp3 = call double @llvm.fma.f64(double %tmp2, double %tmp1, double %a)
  89   ret double %tmp3
  90 }
  91
  92 define double @test_fmls_dd2D(double %a, double %b, <2 x double> %v) {
  93   ; CHECK-LABEL: test_fmls_dd2D
  94   ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
  95   %tmp1 = extractelement <2 x double> %v, i32 1
  96   %tmp2 = fsub double -0.0, %tmp1
  97   %tmp3 = call double @llvm.fma.f64(double %tmp2, double %tmp1, double %a)
  98   ret double %tmp3
  99 }
 100
 101 define double @test_fmls_dd2D_swap(double %a, double %b, <2 x double> %v) {
 102   ; CHECK-LABEL: test_fmls_dd2D_swap
 103   ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
 104   %tmp1 = extractelement <2 x double> %v, i32 1
 105   %tmp2 = fsub double -0.0, %tmp1
 106   %tmp3 = call double @llvm.fma.f64(double %tmp1, double %tmp2, double %a)
 107   ret double %tmp3
 108 }
 109