[AArch64] Refactor the NEON scalar floating-point reciprocal step and

author Chad Rosier <mcrosier@codeaurora.org>

Wed, 11 Dec 2013 21:03:43 +0000 (21:03 +0000)

committer Chad Rosier <mcrosier@codeaurora.org>

Wed, 11 Dec 2013 21:03:43 +0000 (21:03 +0000)
author Chad Rosier <mcrosier@codeaurora.org>
Wed, 11 Dec 2013 21:03:43 +0000 (21:03 +0000)
committer Chad Rosier <mcrosier@codeaurora.org>
Wed, 11 Dec 2013 21:03:43 +0000 (21:03 +0000)
diff --git a/include/llvm/IR/IntrinsicsAArch64.td b/include/llvm/IR/IntrinsicsAArch64.td

index fb31452fb2392c83b86ae805fd1cfe67cc82115a..52b651ea9bcb8e0a6059f36e6f0c40a3771b0273 100644 (file)
--- a/include/llvm/IR/IntrinsicsAArch64.td
+++ b/include/llvm/IR/IntrinsicsAArch64.td
@@ -272,6 +272,16 @@ def int_aarch64_neon_vrecpx :
  def int_aarch64_neon_vrsqrte :
    Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
  
+// Scalar Floating-point Reciprocal Step
+def int_aarch64_neon_vrecps :
+  Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
+            [IntrNoMem]>;
+
+// Scalar Floating-point Reciprocal Square Root Step
+def int_aarch64_neon_vrsqrts :
+  Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
+            [IntrNoMem]>;
+
  class Neon_Cmp_Intrinsic
    : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_anyvector_ty],
                [IntrNoMem]>;
diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td

index 9e02dc4fff179803d2c4e385048fa768b9865080..f9d404252b692443f6e08eb5e97fb4d364362c26 100644 (file)
--- a/lib/Target/AArch64/AArch64InstrNEON.td
+++ b/lib/Target/AArch64/AArch64InstrNEON.td
@@ -4187,11 +4187,14 @@ multiclass Neon_Scalar3Same_fabd_SD_size_patterns<SDPatternOperator opnode,
  }
  
  multiclass Neon_Scalar3Same_SD_size_patterns<SDPatternOperator opnode,
+                                             SDPatternOperator opnodeV,
                                               Instruction INSTS,
                                               Instruction INSTD> {
-  def : Pat<(v1f32 (opnode (v1f32 FPR32:$Rn), (v1f32 FPR32:$Rm))),
+  def : Pat<(f32 (opnode (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
              (INSTS FPR32:$Rn, FPR32:$Rm)>;
-  def : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+  def : Pat<(f64 (opnode (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
+            (INSTD FPR64:$Rn, FPR64:$Rm)>;
+  def : Pat<(v1f64 (opnodeV (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
              (INSTD FPR64:$Rn, FPR64:$Rm)>;
  }
  
@@ -4874,18 +4877,15 @@ defm FMULX : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11011, "fmulx", 1>;
  
  // Scalar Floating-point Reciprocal Step
  defm FRECPS : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11111, "frecps", 0>;
+defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vrecps,
+                                         int_arm_neon_vrecps, FRECPSsss,
+                                         FRECPSddd>;
  
  // Scalar Floating-point Reciprocal Square Root Step
  defm FRSQRTS : NeonI_Scalar3Same_SD_sizes<0b0, 0b1, 0b11111, "frsqrts", 0>;
-
-// Patterns to match llvm.arm.* intrinsic for
-// Scalar Floating-point Reciprocal Step and
-// Scalar Floating-point Reciprocal Square Root Step
-defm : Neon_Scalar3Same_SD_size_patterns<int_arm_neon_vrecps, FRECPSsss,
-                                                              FRECPSddd>;
-defm : Neon_Scalar3Same_SD_size_patterns<int_arm_neon_vrsqrts, FRSQRTSsss,
-                                                               FRSQRTSddd>;
-
+defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vrsqrts,
+                                         int_arm_neon_vrsqrts, FRSQRTSsss,
+                                         FRSQRTSddd>;
  def : Pat<(v1f64 (fsqrt (v1f64 FPR64:$Rn))), (FSQRTdd FPR64:$Rn)>;
  
  // Patterns to match llvm.aarch64.* intrinsic for
diff --git a/test/CodeGen/AArch64/neon-scalar-recip.ll b/test/CodeGen/AArch64/neon-scalar-recip.ll

index bd549a86a40c164d704b09ebebdd0bf366fdd1fd..100839b14e6781f1190af766ac97db90ec6141ee 100644 (file)
--- a/test/CodeGen/AArch64/neon-scalar-recip.ll
+++ b/test/CodeGen/AArch64/neon-scalar-recip.ll
@@ -3,48 +3,36 @@
  define float @test_vrecpss_f32(float %a, float %b) {
  ; CHECK: test_vrecpss_f32
  ; CHECK: frecps {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-  %1 = insertelement <1 x float> undef, float %a, i32 0
-  %2 = insertelement <1 x float> undef, float %b, i32 0
-  %3 = call <1 x float> @llvm.arm.neon.vrecps.v1f32(<1 x float> %1, <1 x float> %2)
-  %4 = extractelement <1 x float> %3, i32 0
-  ret float %4
+  %1 = call float @llvm.aarch64.neon.vrecps.f32(float %a, float %b)
+  ret float %1
  }
  
  define double @test_vrecpsd_f64(double %a, double %b) {
  ; CHECK: test_vrecpsd_f64
  ; CHECK: frecps {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  %1 = insertelement <1 x double> undef, double %a, i32 0
-  %2 = insertelement <1 x double> undef, double %b, i32 0
-  %3 = call <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double> %1, <1 x double> %2)
-  %4 = extractelement <1 x double> %3, i32 0
-  ret double %4
+  %1 = call double @llvm.aarch64.neon.vrecps.f64(double %a, double %b)
+  ret double %1
  }
  
-declare <1 x float> @llvm.arm.neon.vrecps.v1f32(<1 x float>, <1 x float>)
-declare <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double>, <1 x double>)
+declare float @llvm.aarch64.neon.vrecps.f32(float, float)
+declare double @llvm.aarch64.neon.vrecps.f64(double, double)
  
  define float @test_vrsqrtss_f32(float %a, float %b) {
  ; CHECK: test_vrsqrtss_f32
  ; CHECK: frsqrts {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-  %1 = insertelement <1 x float> undef, float %a, i32 0
-  %2 = insertelement <1 x float> undef, float %b, i32 0
-  %3 = call <1 x float> @llvm.arm.neon.vrsqrts.v1f32(<1 x float> %1, <1 x float> %2)
-  %4 = extractelement <1 x float> %3, i32 0
-  ret float %4
+  %1 = call float @llvm.aarch64.neon.vrsqrts.f32(float %a, float %b)
+  ret float %1
  }
  
  define double @test_vrsqrtsd_f64(double %a, double %b) {
  ; CHECK: test_vrsqrtsd_f64
  ; CHECK: frsqrts {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  %1 = insertelement <1 x double> undef, double %a, i32 0
-  %2 = insertelement <1 x double> undef, double %b, i32 0
-  %3 = call <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double> %1, <1 x double> %2)
-  %4 = extractelement <1 x double> %3, i32 0
-  ret double %4
+  %1 = call double @llvm.aarch64.neon.vrsqrts.f64(double %a, double %b)
+  ret double %1
  }
  
-declare <1 x float> @llvm.arm.neon.vrsqrts.v1f32(<1 x float>, <1 x float>)
-declare <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double>, <1 x double>)
+declare float @llvm.aarch64.neon.vrsqrts.f32(float, float)
+declare double @llvm.aarch64.neon.vrsqrts.f64(double, double)
  
  define float @test_vrecpes_f32(float %a) {
  ; CHECK: test_vrecpes_f32
author	Chad Rosier <mcrosier@codeaurora.org>
	Wed, 11 Dec 2013 21:03:43 +0000 (21:03 +0000)
committer	Chad Rosier <mcrosier@codeaurora.org>
	Wed, 11 Dec 2013 21:03:43 +0000 (21:03 +0000)
include/llvm/IR/IntrinsicsAArch64.td		patch \| blob \| history
lib/Target/AArch64/AArch64InstrNEON.td		patch \| blob \| history
test/CodeGen/AArch64/neon-scalar-recip.ll		patch \| blob \| history