From: Karthik Bhat Date: Mon, 5 Jan 2015 13:57:59 +0000 (+0000) Subject: Select lower fsub,fabs pattern to fabd on AArch64 X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=050064d32c0ab7c931094c36baf9705b5847eae1;hp=6abfc44aab521664e8007dd97c135167af7f55c0;p=oota-llvm.git Select lower fsub,fabs pattern to fabd on AArch64 This patch lowers patterns such as- fsub v0.4s, v0.4s, v1.4s fabs v0.4s, v0.4s to fabd v0.4s, v0.4s, v1.4s on AArch64. Review: http://reviews.llvm.org/D6791 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225169 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 7f914797e37..f4a555499d2 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -2760,6 +2760,13 @@ def : Pat<(xor (v4i32 (AArch64vashr (v4i32(sub V128:$Rn, V128:$Rm)), (i32 31))), (AArch64vashr (v4i32(sub V128:$Rn, V128:$Rm)), (i32 31))))), (SABDv4i32 V128:$Rn, V128:$Rm)>; +def : Pat<(v2f32 (fabs (fsub V64:$Rn, V64:$Rm))), + (FABDv2f32 V64:$Rn, V64:$Rm)>; +def : Pat<(v4f32 (fabs (fsub V128:$Rn, V128:$Rm))), + (FABDv4f32 V128:$Rn, V128:$Rm)>; +def : Pat<(v2f64 (fabs (fsub V128:$Rn, V128:$Rm))), + (FABDv2f64 V128:$Rn, V128:$Rm)>; + def : Pat<(AArch64bsl (v8i8 V64:$Rd), V64:$Rn, V64:$Rm), (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; def : Pat<(AArch64bsl (v4i16 V64:$Rd), V64:$Rn, V64:$Rm), @@ -3049,6 +3056,11 @@ defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd", int_aarch64_neon_usqadd>; +def : Pat<(f32 (fabs (fsub FPR32:$Rn, FPR32:$Rm))), + (FABD32 FPR32:$Rn, FPR32:$Rm)>; +def : Pat<(f64 (fabs (fsub FPR64:$Rn, FPR64:$Rm))), + (FABD64 FPR64:$Rn, FPR64:$Rm)>; + def : Pat<(AArch64neg (v1i64 V64:$Rn)), (NEGv1i64 V64:$Rn)>; def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))), diff --git a/test/CodeGen/AArch64/arm64-neon-simd-vabs.ll b/test/CodeGen/AArch64/arm64-neon-simd-vabs.ll index 3e1c4b72850..dea65deee02 100644 --- a/test/CodeGen/AArch64/arm64-neon-simd-vabs.ll +++ b/test/CodeGen/AArch64/arm64-neon-simd-vabs.ll @@ -99,3 +99,72 @@ define void @testv8i8(i8* noalias nocapture %a, i8* noalias nocapture readonly % ret void } +; CHECK: test_v4f32 +; CHECK: fabd v0.4s, v0.4s, v1.4s +declare <4 x float> @llvm.fabs.v4f32(<4 x float>) +define void @test_v4f32(float* noalias nocapture %a, float* noalias nocapture readonly %b, float* noalias nocapture readonly %c){ + %1 = bitcast float* %b to <4 x float>* + %2 = load <4 x float>* %1 + %3 = bitcast float* %c to <4 x float>* + %4 = load <4 x float>* %3 + %5 = fsub <4 x float> %2, %4 + %6 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %5) + %7 = bitcast float* %a to <4 x float>* + store <4 x float> %6, <4 x float>* %7 + ret void +} + +; CHECK: test_v2f32 +; CHECK: fabd v0.2s, v0.2s, v1.2s +declare <2 x float> @llvm.fabs.v2f32(<2 x float>) +define void @test_v2f32(float* noalias nocapture %a, float* noalias nocapture readonly %b, float* noalias nocapture readonly %c){ + %1 = bitcast float* %b to <2 x float>* + %2 = load <2 x float>* %1 + %3 = bitcast float* %c to <2 x float>* + %4 = load <2 x float>* %3 + %5 = fsub <2 x float> %2, %4 + %6 = call <2 x float> @llvm.fabs.v2f32(<2 x float> %5) + %7 = bitcast float* %a to <2 x float>* + store <2 x float> %6, <2 x float>* %7 + ret void +} + +; CHECK: test_v2f64 +; CHECK: fabd v0.2d, v0.2d, v1.2d +declare <2 x double> @llvm.fabs.v2f64(<2 x double>) +define void @test_v2f64(double* noalias nocapture %a, double* noalias nocapture readonly %b, double* noalias nocapture readonly %c){ + %1 = bitcast double* %b to <2 x double>* + %2 = load <2 x double>* %1 + %3 = bitcast double* %c to <2 x double>* + %4 = load <2 x double>* %3 + %5 = fsub <2 x double> %2, %4 + %6 = call <2 x double> @llvm.fabs.v2f64(<2 x double> %5) + %7 = bitcast double* %a to <2 x double>* + store <2 x double> %6, <2 x double>* %7 + ret void +} + +@a = common global float 0.000000e+00 +declare float @fabsf(float) +; CHECK: test_fabd32 +; CHECK: fabd s0, s0, s1 +define void @test_fabd32(float %b, float %c) { + %1 = fsub float %b, %c + %fabsf = tail call float @fabsf(float %1) #0 + store float %fabsf, float* @a + ret void +} + +@d = common global double 0.000000e+00 +declare double @fabs(double) +; CHECK: test_fabd64 +; CHECK: fabd d0, d0, d1 +define void @test_fabd64(double %b, double %c) { + %1 = fsub double %b, %c + %2 = tail call double @fabs(double %1) #0 + store double %2, double* @d + ret void +} + +attributes #0 = { nounwind readnone} +