ARM & AArch64: merge NEON absolute compare intrinsics

author Tim Northover <tnorthover@apple.com>

Tue, 4 Feb 2014 14:55:42 +0000 (14:55 +0000)

committer Tim Northover <tnorthover@apple.com>

Tue, 4 Feb 2014 14:55:42 +0000 (14:55 +0000)
author Tim Northover <tnorthover@apple.com>
Tue, 4 Feb 2014 14:55:42 +0000 (14:55 +0000)
committer Tim Northover <tnorthover@apple.com>
Tue, 4 Feb 2014 14:55:42 +0000 (14:55 +0000)
diff --git a/include/llvm/IR/IntrinsicsARM.td b/include/llvm/IR/IntrinsicsARM.td

index ff95421dfe043cb5e301272a5fe2356a5b1fe5f1..dd80e2e179e3966c2e02dc3d2d1e3ec8a60fd0fa 100644 (file)
--- a/include/llvm/IR/IntrinsicsARM.td
+++ b/include/llvm/IR/IntrinsicsARM.td
@@ -155,6 +155,10 @@ class Neon_CvtFPToFx_Intrinsic
  class Neon_CvtFPtoInt_1Arg_Intrinsic
    : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
  
+class Neon_Compare_Intrinsic
+  : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, LLVMMatchType<1>],
+              [IntrNoMem]>;
+
  // The table operands for VTBL and VTBX consist of 1 to 4 v8i8 vectors.
  // Besides the table, VTBL has one other v8i8 argument and VTBX has two.
  // Overall, the classes range from 2 to 6 v8i8 arguments.
@@ -224,18 +228,8 @@ def int_arm_neon_vqsubu : Neon_2Arg_Intrinsic;
  def int_arm_neon_vrsubhn : Neon_2Arg_Narrow_Intrinsic;
  
  // Vector Absolute Compare.
-def int_arm_neon_vacged : Intrinsic<[llvm_v2i32_ty],
-                                    [llvm_v2f32_ty, llvm_v2f32_ty],
-                                    [IntrNoMem]>;
-def int_arm_neon_vacgeq : Intrinsic<[llvm_v4i32_ty],
-                                    [llvm_v4f32_ty, llvm_v4f32_ty],
-                                    [IntrNoMem]>;
-def int_arm_neon_vacgtd : Intrinsic<[llvm_v2i32_ty],
-                                    [llvm_v2f32_ty, llvm_v2f32_ty],
-                                    [IntrNoMem]>;
-def int_arm_neon_vacgtq : Intrinsic<[llvm_v4i32_ty],
-                                    [llvm_v4f32_ty, llvm_v4f32_ty],
-                                    [IntrNoMem]>;
+def int_arm_neon_vacge : Neon_Compare_Intrinsic;
+def int_arm_neon_vacgt : Neon_Compare_Intrinsic;
  
  // Vector Absolute Differences.
  def int_arm_neon_vabds : Neon_2Arg_Intrinsic;
diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td

index edb619e1e7a66725b37ac94ebc699932c0b0361f..1b7e0f93b9540a57ce52c02f56df855aa0730389 100644 (file)
--- a/lib/Target/AArch64/AArch64InstrNEON.td
+++ b/lib/Target/AArch64/AArch64InstrNEON.td
@@ -210,9 +210,7 @@ multiclass NeonI_3VSame_BHSD_sizes<bit u, bits<5> opcode,
  // Multiclass NeonI_3VSame_SD_sizes: Operand types are floating point types,
  // but Result types can be integer or floating point types.
  multiclass NeonI_3VSame_SD_sizes<bit u, bit size, bits<5> opcode,
-                                 string asmop, SDPatternOperator opnode2S,
-                                 SDPatternOperator opnode4S,
-                                 SDPatternOperator opnode2D,
+                                 string asmop, SDPatternOperator opnode,
                                   ValueType ResTy2S, ValueType ResTy4S,
                                   ValueType ResTy2D, bit Commutable = 0> {
    let isCommutable = Commutable in {
@@ -220,21 +218,21 @@ multiclass NeonI_3VSame_SD_sizes<bit u, bit size, bits<5> opcode,
                (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
                asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
                [(set (ResTy2S VPR64:$Rd),
-                 (ResTy2S (opnode2S (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))))],
+                 (ResTy2S (opnode (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))))],
                NoItinerary>;
  
      def _4S : NeonI_3VSame<0b1, u, {size, 0b0}, opcode,
                (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
                asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
                [(set (ResTy4S VPR128:$Rd),
-                 (ResTy4S (opnode4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))))],
+                 (ResTy4S (opnode (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))))],
                NoItinerary>;
  
      def _2D : NeonI_3VSame<0b1, u, {size, 0b1}, opcode,
                (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
                asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
                [(set (ResTy2D VPR128:$Rd),
-                 (ResTy2D (opnode2D (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))))],
+                 (ResTy2D (opnode (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))))],
                 NoItinerary>;
    }
  }
@@ -248,19 +246,19 @@ multiclass NeonI_3VSame_SD_sizes<bit u, bit size, bits<5> opcode,
  // Vector Add (Integer and Floating-Point)
  
  defm ADDvvv :  NeonI_3VSame_BHSD_sizes<0b0, 0b10000, "add", add, 1>;
-defm FADDvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11010, "fadd", fadd, fadd, fadd,
+defm FADDvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11010, "fadd", fadd,
                                       v2f32, v4f32, v2f64, 1>;
  
  // Vector Sub (Integer and Floating-Point)
  
  defm SUBvvv :  NeonI_3VSame_BHSD_sizes<0b1, 0b10000, "sub", sub, 0>;
-defm FSUBvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11010, "fsub", fsub, fsub, fsub,
+defm FSUBvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11010, "fsub", fsub,
                                       v2f32, v4f32, v2f64, 0>;
  
  // Vector Multiply (Integer and Floating-Point)
  
  defm MULvvv :  NeonI_3VSame_BHS_sizes<0b0, 0b10011, "mul", mul, 1>;
-defm FMULvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11011, "fmul", fmul, fmul, fmul,
+defm FMULvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11011, "fmul", fmul,
                                       v2f32, v4f32, v2f64, 1>;
  
  // Vector Multiply (Polynomial)
@@ -359,7 +357,7 @@ def : Pat<(v2f64 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
  
  // Vector Divide (Floating-Point)
  
-defm FDIVvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11111, "fdiv", fdiv, fdiv, fdiv,
+defm FDIVvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11111, "fdiv", fdiv,
                                       v2f32, v4f32, v2f64, 0>;
  
  // Vector Bitwise Operations
@@ -610,19 +608,15 @@ defm SABDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01110, "sabd", int_arm_neon_vabds,
  
  // Vector Absolute Difference (Floating Point)
  defm FABDvvv: NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11010, "fabd",
-                                    int_arm_neon_vabds, int_arm_neon_vabds,
                                      int_arm_neon_vabds, v2f32, v4f32, v2f64, 0>;
  
  // Vector Reciprocal Step (Floating Point)
  defm FRECPSvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11111, "frecps",
-                                       int_arm_neon_vrecps, int_arm_neon_vrecps,
                                         int_arm_neon_vrecps,
                                         v2f32, v4f32, v2f64, 0>;
  
  // Vector Reciprocal Square Root Step (Floating Point)
  defm FRSQRTSvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11111, "frsqrts",
-                                        int_arm_neon_vrsqrts,
-                                        int_arm_neon_vrsqrts,
                                          int_arm_neon_vrsqrts,
                                          v2f32, v4f32, v2f64, 0>;
  
@@ -795,18 +789,15 @@ defm CMLTvvi : NeonI_cmpz_sizes<0b0, 0b01010, "cmlt", SETLT>;
  // Vector Compare Mask Equal (Floating Point)
  let isCommutable =1 in {
  defm FCMEQvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11100, "fcmeq", Neon_cmeq,
-                                      Neon_cmeq, Neon_cmeq,
                                        v2i32, v4i32, v2i64, 0>;
  }
  
  // Vector Compare Mask Greater Than Or Equal (Floating Point)
  defm FCMGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11100, "fcmge", Neon_cmge,
-                                      Neon_cmge, Neon_cmge,
                                        v2i32, v4i32, v2i64, 0>;
  
  // Vector Compare Mask Greater Than (Floating Point)
  defm FCMGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11100, "fcmgt", Neon_cmgt,
-                                      Neon_cmgt, Neon_cmgt,
                                        v2i32, v4i32, v2i64, 0>;
  
  // Vector Compare Mask Less Than Or Equal (Floating Point)
@@ -878,14 +869,12 @@ defm FCMLTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01110, "fcmlt", SETLT>;
  
  // Vector Absolute Compare Mask Greater Than Or Equal (Floating Point)
  defm FACGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11101, "facge",
-                                      int_arm_neon_vacged, int_arm_neon_vacgeq,
-                                      int_aarch64_neon_vacgeq,
+                                      int_arm_neon_vacge,
                                        v2i32, v4i32, v2i64, 0>;
  
  // Vector Absolute Compare Mask Greater Than (Floating Point)
  defm FACGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11101, "facgt",
-                                      int_arm_neon_vacgtd, int_arm_neon_vacgtq,
-                                      int_aarch64_neon_vacgtq,
+                                      int_arm_neon_vacgt,
                                        v2i32, v4i32, v2i64, 0>;
  
  // Vector Absolute Compare Mask Less Than Or Equal (Floating Point)
@@ -964,25 +953,21 @@ defm UMINvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01101, "umin", int_arm_neon_vminu,
  
  // Vector Maximum (Floating Point)
  defm FMAXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11110, "fmax",
-                                     int_arm_neon_vmaxs, int_arm_neon_vmaxs,
-                                     int_arm_neon_vmaxs, v2f32, v4f32, v2f64, 1>;
+                                     int_arm_neon_vmaxs,
+                                     v2f32, v4f32, v2f64, 1>;
  
  // Vector Minimum (Floating Point)
  defm FMINvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11110, "fmin",
-                                     int_arm_neon_vmins, int_arm_neon_vmins,
-                                     int_arm_neon_vmins, v2f32, v4f32, v2f64, 1>;
+                                     int_arm_neon_vmins,
+                                     v2f32, v4f32, v2f64, 1>;
  
  // Vector maxNum (Floating Point) -  prefer a number over a quiet NaN)
  defm FMAXNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11000, "fmaxnm",
-                                       int_aarch64_neon_vmaxnm,
-                                       int_aarch64_neon_vmaxnm,
                                         int_aarch64_neon_vmaxnm,
                                         v2f32, v4f32, v2f64, 1>;
  
  // Vector minNum (Floating Point) - prefer a number over a quiet NaN)
  defm FMINNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11000, "fminnm",
-                                       int_aarch64_neon_vminnm,
-                                       int_aarch64_neon_vminnm,
                                         int_aarch64_neon_vminnm,
                                         v2f32, v4f32, v2f64, 1>;
  
@@ -996,25 +981,19 @@ defm UMINPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10101, "uminp", int_arm_neon_vpmin
  
  // Vector Maximum Pairwise (Floating Point)
  defm FMAXPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11110, "fmaxp",
-                                     int_arm_neon_vpmaxs, int_arm_neon_vpmaxs,
                                       int_arm_neon_vpmaxs, v2f32, v4f32, v2f64, 1>;
  
  // Vector Minimum Pairwise (Floating Point)
  defm FMINPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11110, "fminp",
-                                     int_arm_neon_vpmins, int_arm_neon_vpmins,
                                       int_arm_neon_vpmins, v2f32, v4f32, v2f64, 1>;
  
  // Vector maxNum Pairwise (Floating Point) -  prefer a number over a quiet NaN)
  defm FMAXNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11000, "fmaxnmp",
-                                       int_aarch64_neon_vpmaxnm,
-                                       int_aarch64_neon_vpmaxnm,
                                         int_aarch64_neon_vpmaxnm,
                                         v2f32, v4f32, v2f64, 1>;
  
  // Vector minNum Pairwise (Floating Point) -  prefer a number over a quiet NaN)
  defm FMINNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11000, "fminnmp",
-                                       int_aarch64_neon_vpminnm,
-                                       int_aarch64_neon_vpminnm,
                                         int_aarch64_neon_vpminnm,
                                         v2f32, v4f32, v2f64, 1>;
  
@@ -1023,8 +1002,6 @@ defm ADDP : NeonI_3VSame_BHSD_sizes<0b0, 0b10111, "addp", int_arm_neon_vpadd, 1>
  
  // Vector Addition Pairwise (Floating Point)
  defm FADDP : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11010, "faddp",
-                                       int_arm_neon_vpadd,
-                                       int_arm_neon_vpadd,
                                         int_arm_neon_vpadd,
                                         v2f32, v4f32, v2f64, 1>;
  
@@ -1038,8 +1015,6 @@ defm SQRDMULHvvv : NeonI_3VSame_HS_sizes<0b1, 0b10110, "sqrdmulh",
  
  // Vector Multiply Extended (Floating Point)
  defm FMULXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11011, "fmulx",
-                                      int_aarch64_neon_vmulx,
-                                      int_aarch64_neon_vmulx,
                                        int_aarch64_neon_vmulx,
                                        v2f32, v4f32, v2f64, 1>;
  
@@ -5381,14 +5356,14 @@ defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fcltz, SETLT,
  defm FACGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11101, "facge">;
  defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcage, v1i32, f32,
                                           FACGEsss, v1i64, f64, FACGEddd>;
-def : Pat<(v1i64 (int_aarch64_neon_vcage (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+def : Pat<(v1i64 (int_arm_neon_vacge (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
            (FACGEddd FPR64:$Rn, FPR64:$Rm)>;
  
  // Scalar Floating-point Absolute Compare Mask Greater Than
  defm FACGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11101, "facgt">;
  defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcagt, v1i32, f32,
                                           FACGTsss, v1i64, f64, FACGTddd>;
-def : Pat<(v1i64 (int_aarch64_neon_vcagt (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+def : Pat<(v1i64 (int_arm_neon_vacgt (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
            (FACGTddd FPR64:$Rn, FPR64:$Rm)>;
  
  // Scalar Floating-point Absolute Difference
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td

index bbf8590935ad9a3663e36e2edb9366e4a7ff61f9..b732bca84b87b944b2fa89a0e6276e8b708d3609 100644 (file)
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -4427,14 +4427,14 @@ defm VCLTz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
  
  //   VACGE    : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
  def  VACGEd   : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
-                        "f32", v2i32, v2f32, int_arm_neon_vacged, 0>;
+                        "f32", v2i32, v2f32, int_arm_neon_vacge, 0>;
  def  VACGEq   : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
-                        "f32", v4i32, v4f32, int_arm_neon_vacgeq, 0>;
+                        "f32", v4i32, v4f32, int_arm_neon_vacge, 0>;
  //   VACGT    : Vector Absolute Compare Greater Than (aka VCAGT)
  def  VACGTd   : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
-                        "f32", v2i32, v2f32, int_arm_neon_vacgtd, 0>;
+                        "f32", v2i32, v2f32, int_arm_neon_vacgt, 0>;
  def  VACGTq   : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
-                        "f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>;
+                        "f32", v4i32, v4f32, int_arm_neon_vacgt, 0>;
  //   VTST     : Vector Test Bits
  defm VTST     : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
                          IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
diff --git a/test/CodeGen/AArch64/neon-facge-facgt.ll b/test/CodeGen/AArch64/neon-facge-facgt.ll

index 146256e4be112433bfeacb2bb4ccb0df24bfda38..28e821222ff6cc48246d7360f1aaf72f8c375df5 100644 (file)
--- a/test/CodeGen/AArch64/neon-facge-facgt.ll
+++ b/test/CodeGen/AArch64/neon-facge-facgt.ll
@@ -1,20 +1,20 @@
  ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
  
-declare <2 x i32> @llvm.arm.neon.vacged(<2 x float>, <2 x float>)
-declare <4 x i32> @llvm.arm.neon.vacgeq(<4 x float>, <4 x float>)
-declare <2 x i64> @llvm.aarch64.neon.vacgeq(<2 x double>, <2 x double>)
+declare <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float>, <2 x float>)
+declare <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float>, <4 x float>)
+declare <2 x i64> @llvm.arm.neon.vacge.v2i64.v2f64(<2 x double>, <2 x double>)
  
  define <2 x i32> @facge_from_intr_v2i32(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
  ; Using registers other than v0, v1 and v2 are possible, but would be odd.
  ; CHECK: facge_from_intr_v2i32:
-  %val = call <2 x i32> @llvm.arm.neon.vacged(<2 x float> %A, <2 x float> %B)
+  %val = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> %A, <2 x float> %B)
  ; CHECK: facge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
    ret <2 x i32> %val
  }
  define <4 x i32> @facge_from_intr_v4i32( <4 x float> %A, <4 x float> %B) {
  ; Using registers other than v0, v1 and v2 are possible, but would be odd.
  ; CHECK: facge_from_intr_v4i32:
-  %val = call <4 x i32> @llvm.arm.neon.vacgeq(<4 x float> %A, <4 x float> %B)
+  %val = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> %A, <4 x float> %B)
  ; CHECK: facge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
    ret <4 x i32> %val
  }
@@ -22,26 +22,26 @@ define <4 x i32> @facge_from_intr_v4i32( <4 x float> %A, <4 x float> %B) {
  define <2 x i64> @facge_from_intr_v2i64(<2 x double> %A, <2 x double> %B) {
  ; Using registers other than v0, v1 and v2 are possible, but would be odd.
  ; CHECK: facge_from_intr_v2i64:
-  %val = call <2 x i64> @llvm.aarch64.neon.vacgeq(<2 x double> %A, <2 x double> %B)
+  %val = call <2 x i64> @llvm.arm.neon.vacge.v2i64.v2f64(<2 x double> %A, <2 x double> %B)
  ; CHECK: facge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
    ret <2 x i64> %val
  }
  
-declare <2 x i32> @llvm.arm.neon.vacgtd(<2 x float>, <2 x float>)
-declare <4 x i32> @llvm.arm.neon.vacgtq(<4 x float>, <4 x float>)
-declare <2 x i64> @llvm.aarch64.neon.vacgtq(<2 x double>, <2 x double>)
+declare <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float>, <2 x float>)
+declare <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float>, <4 x float>)
+declare <2 x i64> @llvm.arm.neon.vacgt.v2i64.v2f64(<2 x double>, <2 x double>)
  
  define <2 x i32> @facgt_from_intr_v2i32(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
  ; Using registers other than v0, v1 and v2 are possible, but would be odd.
  ; CHECK: facgt_from_intr_v2i32:
-  %val = call <2 x i32> @llvm.arm.neon.vacgtd(<2 x float> %A, <2 x float> %B)
+  %val = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> %A, <2 x float> %B)
  ; CHECK: facgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
    ret <2 x i32> %val
  }
  define <4 x i32> @facgt_from_intr_v4i32( <4 x float> %A, <4 x float> %B) {
  ; Using registers other than v0, v1 and v2 are possible, but would be odd.
  ; CHECK: facgt_from_intr_v4i32:
-  %val = call <4 x i32> @llvm.arm.neon.vacgtq(<4 x float> %A, <4 x float> %B)
+  %val = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> %A, <4 x float> %B)
  ; CHECK: facgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
    ret <4 x i32> %val
  }
@@ -49,7 +49,7 @@ define <4 x i32> @facgt_from_intr_v4i32( <4 x float> %A, <4 x float> %B) {
  define <2 x i64> @facgt_from_intr_v2i64(<2 x double> %A, <2 x double> %B) {
  ; Using registers other than v0, v1 and v2 are possible, but would be odd.
  ; CHECK: facgt_from_intr_v2i64:
-  %val = call <2 x i64> @llvm.aarch64.neon.vacgtq(<2 x double> %A, <2 x double> %B)
+  %val = call <2 x i64> @llvm.arm.neon.vacgt.v2i64.v2f64(<2 x double> %A, <2 x double> %B)
  ; CHECK: facgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
    ret <2 x i64> %val
  }
diff --git a/test/CodeGen/AArch64/neon-scalar-compare.ll b/test/CodeGen/AArch64/neon-scalar-compare.ll

index d1b5f9c2546e394ceeafa09bc21d6ce69927be6b..e1f39645f1bd34f10292b554e4d3cd9fe1c609b0 100644 (file)
--- a/test/CodeGen/AArch64/neon-scalar-compare.ll
+++ b/test/CodeGen/AArch64/neon-scalar-compare.ll
@@ -122,28 +122,28 @@ entry:
  define <1 x i64> @test_vcage_f64(<1 x double> %a, <1 x double> %b) #0 {
  ; CHECK: test_vcage_f64
  ; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %vcage2.i = tail call <1 x i64> @llvm.aarch64.neon.vcage.v1i64.v1f64.v1f64(<1 x double> %a, <1 x double> %b) #2
+  %vcage2.i = tail call <1 x i64> @llvm.arm.neon.vacge.v1i64.v1f64(<1 x double> %a, <1 x double> %b) #2
    ret <1 x i64> %vcage2.i
  }
  
  define <1 x i64> @test_vcagt_f64(<1 x double> %a, <1 x double> %b) #0 {
  ; CHECK: test_vcagt_f64
  ; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %vcagt2.i = tail call <1 x i64> @llvm.aarch64.neon.vcagt.v1i64.v1f64.v1f64(<1 x double> %a, <1 x double> %b) #2
+  %vcagt2.i = tail call <1 x i64> @llvm.arm.neon.vacgt.v1i64.v1f64(<1 x double> %a, <1 x double> %b) #2
    ret <1 x i64> %vcagt2.i
  }
  
  define <1 x i64> @test_vcale_f64(<1 x double> %a, <1 x double> %b) #0 {
  ; CHECK: test_vcale_f64
  ; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %vcage2.i = tail call <1 x i64> @llvm.aarch64.neon.vcage.v1i64.v1f64.v1f64(<1 x double> %b, <1 x double> %a) #2
+  %vcage2.i = tail call <1 x i64> @llvm.arm.neon.vacge.v1i64.v1f64(<1 x double> %b, <1 x double> %a) #2
    ret <1 x i64> %vcage2.i
  }
  
  define <1 x i64> @test_vcalt_f64(<1 x double> %a, <1 x double> %b) #0 {
  ; CHECK: test_vcalt_f64
  ; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %vcagt2.i = tail call <1 x i64> @llvm.aarch64.neon.vcagt.v1i64.v1f64.v1f64(<1 x double> %b, <1 x double> %a) #2
+  %vcagt2.i = tail call <1 x i64> @llvm.arm.neon.vacgt.v1i64.v1f64(<1 x double> %b, <1 x double> %a) #2
    ret <1 x i64> %vcagt2.i
  }
  
@@ -331,8 +331,8 @@ define <1 x i64> @test_vcltz_s64(<1 x i64> %a) #0 {
    ret <1 x i64> %vcltz.i
  }
  
-declare <1 x i64> @llvm.aarch64.neon.vcagt.v1i64.v1f64.v1f64(<1 x double>, <1 x double>)
-declare <1 x i64> @llvm.aarch64.neon.vcage.v1i64.v1f64.v1f64(<1 x double>, <1 x double>)
+declare <1 x i64> @llvm.arm.neon.vacgt.v1i64.v1f64(<1 x double>, <1 x double>)
+declare <1 x i64> @llvm.arm.neon.vacge.v1i64.v1f64(<1 x double>, <1 x double>)
  declare <1 x i64> @llvm.aarch64.neon.vtstd.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>)
  declare <1 x i64> @llvm.aarch64.neon.vcltz.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>)
  declare <1 x i64> @llvm.aarch64.neon.vchs.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>)
diff --git a/test/CodeGen/ARM/vcge.ll b/test/CodeGen/ARM/vcge.ll

index 81a59dbdfe90772fd02d01c978e5418568ed45ac..df72835ae202ec04bfcb5ecadb9ea61463864410 100644 (file)
--- a/test/CodeGen/ARM/vcge.ll
+++ b/test/CodeGen/ARM/vcge.ll
@@ -145,7 +145,7 @@ define <2 x i32> @vacgef32(<2 x float>* %A, <2 x float>* %B) nounwind {
  ;CHECK: vacge.f32
         %tmp1 = load <2 x float>* %A
         %tmp2 = load <2 x float>* %B
-       %tmp3 = call <2 x i32> @llvm.arm.neon.vacged(<2 x float> %tmp1, <2 x float> %tmp2)
+       %tmp3 = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
         ret <2 x i32> %tmp3
  }
  
@@ -154,12 +154,12 @@ define <4 x i32> @vacgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
  ;CHECK: vacge.f32
         %tmp1 = load <4 x float>* %A
         %tmp2 = load <4 x float>* %B
-       %tmp3 = call <4 x i32> @llvm.arm.neon.vacgeq(<4 x float> %tmp1, <4 x float> %tmp2)
+       %tmp3 = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
         ret <4 x i32> %tmp3
  }
  
-declare <2 x i32> @llvm.arm.neon.vacged(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vacgeq(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float>, <4 x float>) nounwind readnone
  
  define <8 x i8> @vcgei8Z(<8 x i8>* %A) nounwind {
  ;CHECK-LABEL: vcgei8Z:
diff --git a/test/CodeGen/ARM/vcgt.ll b/test/CodeGen/ARM/vcgt.ll

index 056866fe994b39c2cfb3452994c234535a4cbf59..adee76a2fbb943355b7b2098a1ef0afc5fe87b68 100644 (file)
--- a/test/CodeGen/ARM/vcgt.ll
+++ b/test/CodeGen/ARM/vcgt.ll
@@ -146,7 +146,7 @@ define <2 x i32> @vacgtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
  ;CHECK: vacgt.f32
         %tmp1 = load <2 x float>* %A
         %tmp2 = load <2 x float>* %B
-       %tmp3 = call <2 x i32> @llvm.arm.neon.vacgtd(<2 x float> %tmp1, <2 x float> %tmp2)
+       %tmp3 = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
         ret <2 x i32> %tmp3
  }
  
@@ -155,7 +155,7 @@ define <4 x i32> @vacgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
  ;CHECK: vacgt.f32
         %tmp1 = load <4 x float>* %A
         %tmp2 = load <4 x float>* %B
-       %tmp3 = call <4 x i32> @llvm.arm.neon.vacgtq(<4 x float> %tmp1, <4 x float> %tmp2)
+       %tmp3 = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
         ret <4 x i32> %tmp3
  }
  
@@ -172,8 +172,8 @@ define <4 x i32> @vcgt_zext(<4 x float>* %A, <4 x float>* %B) nounwind {
         ret <4 x i32> %tmp4
  }
  
-declare <2 x i32> @llvm.arm.neon.vacgtd(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vacgtq(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float>, <4 x float>) nounwind readnone
  
  define <8 x i8> @vcgti8Z(<8 x i8>* %A) nounwind {
  ;CHECK-LABEL: vcgti8Z:
author	Tim Northover <tnorthover@apple.com>
	Tue, 4 Feb 2014 14:55:42 +0000 (14:55 +0000)
committer	Tim Northover <tnorthover@apple.com>
	Tue, 4 Feb 2014 14:55:42 +0000 (14:55 +0000)
include/llvm/IR/IntrinsicsARM.td		patch \| blob \| history
lib/Target/AArch64/AArch64InstrNEON.td		patch \| blob \| history
lib/Target/ARM/ARMInstrNEON.td		patch \| blob \| history
test/CodeGen/AArch64/neon-facge-facgt.ll		patch \| blob \| history
test/CodeGen/AArch64/neon-scalar-compare.ll		patch \| blob \| history
test/CodeGen/ARM/vcge.ll		patch \| blob \| history
test/CodeGen/ARM/vcgt.ll		patch \| blob \| history