1 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
2 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
4 declare float @llvm.fma.f32(float, float, float)
5 declare double @llvm.fma.f64(double, double, double)
7 define float @test_fmla_ss4S(float %a, float %b, <4 x float> %v) {
8 ; CHECK-LABEL: test_fmla_ss4S
9 ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
10 %tmp1 = extractelement <4 x float> %v, i32 3
11 %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
15 define float @test_fmla_ss4S_swap(float %a, float %b, <4 x float> %v) {
16 ; CHECK-LABEL: test_fmla_ss4S_swap
17 ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
18 %tmp1 = extractelement <4 x float> %v, i32 3
19 %tmp2 = call float @llvm.fma.f32(float %tmp1, float %a, float %a)
23 define float @test_fmla_ss2S(float %a, float %b, <2 x float> %v) {
24 ; CHECK-LABEL: test_fmla_ss2S
25 ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
26 %tmp1 = extractelement <2 x float> %v, i32 1
27 %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
31 define double @test_fmla_ddD(double %a, double %b, <1 x double> %v) {
32 ; CHECK-LABEL: test_fmla_ddD
33 ; CHECK: {{fmla d[0-9]+, d[0-9]+, v[0-9]+.d\[0]|fmadd d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}
34 %tmp1 = extractelement <1 x double> %v, i32 0
35 %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a)
39 define double @test_fmla_dd2D(double %a, double %b, <2 x double> %v) {
40 ; CHECK-LABEL: test_fmla_dd2D
41 ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
42 %tmp1 = extractelement <2 x double> %v, i32 1
43 %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a)
47 define double @test_fmla_dd2D_swap(double %a, double %b, <2 x double> %v) {
48 ; CHECK-LABEL: test_fmla_dd2D_swap
49 ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
50 %tmp1 = extractelement <2 x double> %v, i32 1
51 %tmp2 = call double @llvm.fma.f64(double %tmp1, double %b, double %a)
55 define float @test_fmls_ss4S(float %a, float %b, <4 x float> %v) {
56 ; CHECK-LABEL: test_fmls_ss4S
57 ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
58 %tmp1 = extractelement <4 x float> %v, i32 3
59 %tmp2 = fsub float -0.0, %tmp1
60 %tmp3 = call float @llvm.fma.f32(float %tmp2, float %tmp1, float %a)
64 define float @test_fmls_ss4S_swap(float %a, float %b, <4 x float> %v) {
65 ; CHECK-LABEL: test_fmls_ss4S_swap
66 ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
67 %tmp1 = extractelement <4 x float> %v, i32 3
68 %tmp2 = fsub float -0.0, %tmp1
69 %tmp3 = call float @llvm.fma.f32(float %tmp1, float %tmp2, float %a)
74 define float @test_fmls_ss2S(float %a, float %b, <2 x float> %v) {
75 ; CHECK-LABEL: test_fmls_ss2S
76 ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
77 %tmp1 = extractelement <2 x float> %v, i32 1
78 %tmp2 = fsub float -0.0, %tmp1
79 %tmp3 = call float @llvm.fma.f32(float %tmp2, float %tmp1, float %a)
83 define double @test_fmls_ddD(double %a, double %b, <1 x double> %v) {
84 ; CHECK-LABEL: test_fmls_ddD
85 ; CHECK: {{fmls d[0-9]+, d[0-9]+, v[0-9]+.d\[0]|fmsub d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}
86 %tmp1 = extractelement <1 x double> %v, i32 0
87 %tmp2 = fsub double -0.0, %tmp1
88 %tmp3 = call double @llvm.fma.f64(double %tmp2, double %tmp1, double %a)
92 define double @test_fmls_dd2D(double %a, double %b, <2 x double> %v) {
93 ; CHECK-LABEL: test_fmls_dd2D
94 ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
95 %tmp1 = extractelement <2 x double> %v, i32 1
96 %tmp2 = fsub double -0.0, %tmp1
97 %tmp3 = call double @llvm.fma.f64(double %tmp2, double %tmp1, double %a)
101 define double @test_fmls_dd2D_swap(double %a, double %b, <2 x double> %v) {
102 ; CHECK-LABEL: test_fmls_dd2D_swap
103 ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
104 %tmp1 = extractelement <2 x double> %v, i32 1
105 %tmp2 = fsub double -0.0, %tmp1
106 %tmp3 = call double @llvm.fma.f64(double %tmp1, double %tmp2, double %a)