From: Tim Northover Date: Tue, 4 Feb 2014 14:55:42 +0000 (+0000) Subject: ARM & AArch64: merge NEON absolute compare intrinsics X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=f9ced85e49f530c1cc3559b881c4b28a29408985;p=oota-llvm.git ARM & AArch64: merge NEON absolute compare intrinsics There was an extremely confusing proliferation of LLVM intrinsics to implement the vacge & vacgt instructions. This combines them all into two polymorphic intrinsics, shared across both backends. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@200768 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/IR/IntrinsicsARM.td b/include/llvm/IR/IntrinsicsARM.td index ff95421dfe0..dd80e2e179e 100644 --- a/include/llvm/IR/IntrinsicsARM.td +++ b/include/llvm/IR/IntrinsicsARM.td @@ -155,6 +155,10 @@ class Neon_CvtFPToFx_Intrinsic class Neon_CvtFPtoInt_1Arg_Intrinsic : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; +class Neon_Compare_Intrinsic + : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, LLVMMatchType<1>], + [IntrNoMem]>; + // The table operands for VTBL and VTBX consist of 1 to 4 v8i8 vectors. // Besides the table, VTBL has one other v8i8 argument and VTBX has two. // Overall, the classes range from 2 to 6 v8i8 arguments. @@ -224,18 +228,8 @@ def int_arm_neon_vqsubu : Neon_2Arg_Intrinsic; def int_arm_neon_vrsubhn : Neon_2Arg_Narrow_Intrinsic; // Vector Absolute Compare. -def int_arm_neon_vacged : Intrinsic<[llvm_v2i32_ty], - [llvm_v2f32_ty, llvm_v2f32_ty], - [IntrNoMem]>; -def int_arm_neon_vacgeq : Intrinsic<[llvm_v4i32_ty], - [llvm_v4f32_ty, llvm_v4f32_ty], - [IntrNoMem]>; -def int_arm_neon_vacgtd : Intrinsic<[llvm_v2i32_ty], - [llvm_v2f32_ty, llvm_v2f32_ty], - [IntrNoMem]>; -def int_arm_neon_vacgtq : Intrinsic<[llvm_v4i32_ty], - [llvm_v4f32_ty, llvm_v4f32_ty], - [IntrNoMem]>; +def int_arm_neon_vacge : Neon_Compare_Intrinsic; +def int_arm_neon_vacgt : Neon_Compare_Intrinsic; // Vector Absolute Differences. def int_arm_neon_vabds : Neon_2Arg_Intrinsic; diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index edb619e1e7a..1b7e0f93b95 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -210,9 +210,7 @@ multiclass NeonI_3VSame_BHSD_sizes opcode, // Multiclass NeonI_3VSame_SD_sizes: Operand types are floating point types, // but Result types can be integer or floating point types. multiclass NeonI_3VSame_SD_sizes opcode, - string asmop, SDPatternOperator opnode2S, - SDPatternOperator opnode4S, - SDPatternOperator opnode2D, + string asmop, SDPatternOperator opnode, ValueType ResTy2S, ValueType ResTy4S, ValueType ResTy2D, bit Commutable = 0> { let isCommutable = Commutable in { @@ -220,21 +218,21 @@ multiclass NeonI_3VSame_SD_sizes opcode, (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm), asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s", [(set (ResTy2S VPR64:$Rd), - (ResTy2S (opnode2S (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))))], + (ResTy2S (opnode (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))))], NoItinerary>; def _4S : NeonI_3VSame<0b1, u, {size, 0b0}, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s", [(set (ResTy4S VPR128:$Rd), - (ResTy4S (opnode4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))))], + (ResTy4S (opnode (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))))], NoItinerary>; def _2D : NeonI_3VSame<0b1, u, {size, 0b1}, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d", [(set (ResTy2D VPR128:$Rd), - (ResTy2D (opnode2D (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))))], + (ResTy2D (opnode (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))))], NoItinerary>; } } @@ -248,19 +246,19 @@ multiclass NeonI_3VSame_SD_sizes opcode, // Vector Add (Integer and Floating-Point) defm ADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b10000, "add", add, 1>; -defm FADDvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11010, "fadd", fadd, fadd, fadd, +defm FADDvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11010, "fadd", fadd, v2f32, v4f32, v2f64, 1>; // Vector Sub (Integer and Floating-Point) defm SUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10000, "sub", sub, 0>; -defm FSUBvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11010, "fsub", fsub, fsub, fsub, +defm FSUBvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11010, "fsub", fsub, v2f32, v4f32, v2f64, 0>; // Vector Multiply (Integer and Floating-Point) defm MULvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10011, "mul", mul, 1>; -defm FMULvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11011, "fmul", fmul, fmul, fmul, +defm FMULvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11011, "fmul", fmul, v2f32, v4f32, v2f64, 1>; // Vector Multiply (Polynomial) @@ -359,7 +357,7 @@ def : Pat<(v2f64 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)), // Vector Divide (Floating-Point) -defm FDIVvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11111, "fdiv", fdiv, fdiv, fdiv, +defm FDIVvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11111, "fdiv", fdiv, v2f32, v4f32, v2f64, 0>; // Vector Bitwise Operations @@ -610,19 +608,15 @@ defm SABDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01110, "sabd", int_arm_neon_vabds, // Vector Absolute Difference (Floating Point) defm FABDvvv: NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11010, "fabd", - int_arm_neon_vabds, int_arm_neon_vabds, int_arm_neon_vabds, v2f32, v4f32, v2f64, 0>; // Vector Reciprocal Step (Floating Point) defm FRECPSvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11111, "frecps", - int_arm_neon_vrecps, int_arm_neon_vrecps, int_arm_neon_vrecps, v2f32, v4f32, v2f64, 0>; // Vector Reciprocal Square Root Step (Floating Point) defm FRSQRTSvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11111, "frsqrts", - int_arm_neon_vrsqrts, - int_arm_neon_vrsqrts, int_arm_neon_vrsqrts, v2f32, v4f32, v2f64, 0>; @@ -795,18 +789,15 @@ defm CMLTvvi : NeonI_cmpz_sizes<0b0, 0b01010, "cmlt", SETLT>; // Vector Compare Mask Equal (Floating Point) let isCommutable =1 in { defm FCMEQvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11100, "fcmeq", Neon_cmeq, - Neon_cmeq, Neon_cmeq, v2i32, v4i32, v2i64, 0>; } // Vector Compare Mask Greater Than Or Equal (Floating Point) defm FCMGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11100, "fcmge", Neon_cmge, - Neon_cmge, Neon_cmge, v2i32, v4i32, v2i64, 0>; // Vector Compare Mask Greater Than (Floating Point) defm FCMGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11100, "fcmgt", Neon_cmgt, - Neon_cmgt, Neon_cmgt, v2i32, v4i32, v2i64, 0>; // Vector Compare Mask Less Than Or Equal (Floating Point) @@ -878,14 +869,12 @@ defm FCMLTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01110, "fcmlt", SETLT>; // Vector Absolute Compare Mask Greater Than Or Equal (Floating Point) defm FACGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11101, "facge", - int_arm_neon_vacged, int_arm_neon_vacgeq, - int_aarch64_neon_vacgeq, + int_arm_neon_vacge, v2i32, v4i32, v2i64, 0>; // Vector Absolute Compare Mask Greater Than (Floating Point) defm FACGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11101, "facgt", - int_arm_neon_vacgtd, int_arm_neon_vacgtq, - int_aarch64_neon_vacgtq, + int_arm_neon_vacgt, v2i32, v4i32, v2i64, 0>; // Vector Absolute Compare Mask Less Than Or Equal (Floating Point) @@ -964,25 +953,21 @@ defm UMINvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01101, "umin", int_arm_neon_vminu, // Vector Maximum (Floating Point) defm FMAXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11110, "fmax", - int_arm_neon_vmaxs, int_arm_neon_vmaxs, - int_arm_neon_vmaxs, v2f32, v4f32, v2f64, 1>; + int_arm_neon_vmaxs, + v2f32, v4f32, v2f64, 1>; // Vector Minimum (Floating Point) defm FMINvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11110, "fmin", - int_arm_neon_vmins, int_arm_neon_vmins, - int_arm_neon_vmins, v2f32, v4f32, v2f64, 1>; + int_arm_neon_vmins, + v2f32, v4f32, v2f64, 1>; // Vector maxNum (Floating Point) - prefer a number over a quiet NaN) defm FMAXNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11000, "fmaxnm", - int_aarch64_neon_vmaxnm, - int_aarch64_neon_vmaxnm, int_aarch64_neon_vmaxnm, v2f32, v4f32, v2f64, 1>; // Vector minNum (Floating Point) - prefer a number over a quiet NaN) defm FMINNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11000, "fminnm", - int_aarch64_neon_vminnm, - int_aarch64_neon_vminnm, int_aarch64_neon_vminnm, v2f32, v4f32, v2f64, 1>; @@ -996,25 +981,19 @@ defm UMINPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10101, "uminp", int_arm_neon_vpmin // Vector Maximum Pairwise (Floating Point) defm FMAXPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11110, "fmaxp", - int_arm_neon_vpmaxs, int_arm_neon_vpmaxs, int_arm_neon_vpmaxs, v2f32, v4f32, v2f64, 1>; // Vector Minimum Pairwise (Floating Point) defm FMINPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11110, "fminp", - int_arm_neon_vpmins, int_arm_neon_vpmins, int_arm_neon_vpmins, v2f32, v4f32, v2f64, 1>; // Vector maxNum Pairwise (Floating Point) - prefer a number over a quiet NaN) defm FMAXNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11000, "fmaxnmp", - int_aarch64_neon_vpmaxnm, - int_aarch64_neon_vpmaxnm, int_aarch64_neon_vpmaxnm, v2f32, v4f32, v2f64, 1>; // Vector minNum Pairwise (Floating Point) - prefer a number over a quiet NaN) defm FMINNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11000, "fminnmp", - int_aarch64_neon_vpminnm, - int_aarch64_neon_vpminnm, int_aarch64_neon_vpminnm, v2f32, v4f32, v2f64, 1>; @@ -1023,8 +1002,6 @@ defm ADDP : NeonI_3VSame_BHSD_sizes<0b0, 0b10111, "addp", int_arm_neon_vpadd, 1> // Vector Addition Pairwise (Floating Point) defm FADDP : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11010, "faddp", - int_arm_neon_vpadd, - int_arm_neon_vpadd, int_arm_neon_vpadd, v2f32, v4f32, v2f64, 1>; @@ -1038,8 +1015,6 @@ defm SQRDMULHvvv : NeonI_3VSame_HS_sizes<0b1, 0b10110, "sqrdmulh", // Vector Multiply Extended (Floating Point) defm FMULXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11011, "fmulx", - int_aarch64_neon_vmulx, - int_aarch64_neon_vmulx, int_aarch64_neon_vmulx, v2f32, v4f32, v2f64, 1>; @@ -5381,14 +5356,14 @@ defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns; defm : Neon_Scalar3Same_SD_size_patterns; -def : Pat<(v1i64 (int_aarch64_neon_vcage (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), +def : Pat<(v1i64 (int_arm_neon_vacge (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), (FACGEddd FPR64:$Rn, FPR64:$Rm)>; // Scalar Floating-point Absolute Compare Mask Greater Than defm FACGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11101, "facgt">; defm : Neon_Scalar3Same_SD_size_patterns; -def : Pat<(v1i64 (int_aarch64_neon_vcagt (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), +def : Pat<(v1i64 (int_arm_neon_vacgt (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), (FACGTddd FPR64:$Rn, FPR64:$Rm)>; // Scalar Floating-point Absolute Difference diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index bbf8590935a..b732bca84b8 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -4427,14 +4427,14 @@ defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", - "f32", v2i32, v2f32, int_arm_neon_vacged, 0>; + "f32", v2i32, v2f32, int_arm_neon_vacge, 0>; def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", - "f32", v4i32, v4f32, int_arm_neon_vacgeq, 0>; + "f32", v4i32, v4f32, int_arm_neon_vacge, 0>; // VACGT : Vector Absolute Compare Greater Than (aka VCAGT) def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", - "f32", v2i32, v2f32, int_arm_neon_vacgtd, 0>; + "f32", v2i32, v2f32, int_arm_neon_vacgt, 0>; def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", - "f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>; + "f32", v4i32, v4f32, int_arm_neon_vacgt, 0>; // VTST : Vector Test Bits defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; diff --git a/test/CodeGen/AArch64/neon-facge-facgt.ll b/test/CodeGen/AArch64/neon-facge-facgt.ll index 146256e4be1..28e821222ff 100644 --- a/test/CodeGen/AArch64/neon-facge-facgt.ll +++ b/test/CodeGen/AArch64/neon-facge-facgt.ll @@ -1,20 +1,20 @@ ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -declare <2 x i32> @llvm.arm.neon.vacged(<2 x float>, <2 x float>) -declare <4 x i32> @llvm.arm.neon.vacgeq(<4 x float>, <4 x float>) -declare <2 x i64> @llvm.aarch64.neon.vacgeq(<2 x double>, <2 x double>) +declare <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float>, <2 x float>) +declare <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float>, <4 x float>) +declare <2 x i64> @llvm.arm.neon.vacge.v2i64.v2f64(<2 x double>, <2 x double>) define <2 x i32> @facge_from_intr_v2i32(<2 x float> %A, <2 x float> %B, <2 x float> %C) { ; Using registers other than v0, v1 and v2 are possible, but would be odd. ; CHECK: facge_from_intr_v2i32: - %val = call <2 x i32> @llvm.arm.neon.vacged(<2 x float> %A, <2 x float> %B) + %val = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> %A, <2 x float> %B) ; CHECK: facge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s ret <2 x i32> %val } define <4 x i32> @facge_from_intr_v4i32( <4 x float> %A, <4 x float> %B) { ; Using registers other than v0, v1 and v2 are possible, but would be odd. ; CHECK: facge_from_intr_v4i32: - %val = call <4 x i32> @llvm.arm.neon.vacgeq(<4 x float> %A, <4 x float> %B) + %val = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> %A, <4 x float> %B) ; CHECK: facge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s ret <4 x i32> %val } @@ -22,26 +22,26 @@ define <4 x i32> @facge_from_intr_v4i32( <4 x float> %A, <4 x float> %B) { define <2 x i64> @facge_from_intr_v2i64(<2 x double> %A, <2 x double> %B) { ; Using registers other than v0, v1 and v2 are possible, but would be odd. ; CHECK: facge_from_intr_v2i64: - %val = call <2 x i64> @llvm.aarch64.neon.vacgeq(<2 x double> %A, <2 x double> %B) + %val = call <2 x i64> @llvm.arm.neon.vacge.v2i64.v2f64(<2 x double> %A, <2 x double> %B) ; CHECK: facge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d ret <2 x i64> %val } -declare <2 x i32> @llvm.arm.neon.vacgtd(<2 x float>, <2 x float>) -declare <4 x i32> @llvm.arm.neon.vacgtq(<4 x float>, <4 x float>) -declare <2 x i64> @llvm.aarch64.neon.vacgtq(<2 x double>, <2 x double>) +declare <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float>, <2 x float>) +declare <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float>, <4 x float>) +declare <2 x i64> @llvm.arm.neon.vacgt.v2i64.v2f64(<2 x double>, <2 x double>) define <2 x i32> @facgt_from_intr_v2i32(<2 x float> %A, <2 x float> %B, <2 x float> %C) { ; Using registers other than v0, v1 and v2 are possible, but would be odd. ; CHECK: facgt_from_intr_v2i32: - %val = call <2 x i32> @llvm.arm.neon.vacgtd(<2 x float> %A, <2 x float> %B) + %val = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> %A, <2 x float> %B) ; CHECK: facgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s ret <2 x i32> %val } define <4 x i32> @facgt_from_intr_v4i32( <4 x float> %A, <4 x float> %B) { ; Using registers other than v0, v1 and v2 are possible, but would be odd. ; CHECK: facgt_from_intr_v4i32: - %val = call <4 x i32> @llvm.arm.neon.vacgtq(<4 x float> %A, <4 x float> %B) + %val = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> %A, <4 x float> %B) ; CHECK: facgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s ret <4 x i32> %val } @@ -49,7 +49,7 @@ define <4 x i32> @facgt_from_intr_v4i32( <4 x float> %A, <4 x float> %B) { define <2 x i64> @facgt_from_intr_v2i64(<2 x double> %A, <2 x double> %B) { ; Using registers other than v0, v1 and v2 are possible, but would be odd. ; CHECK: facgt_from_intr_v2i64: - %val = call <2 x i64> @llvm.aarch64.neon.vacgtq(<2 x double> %A, <2 x double> %B) + %val = call <2 x i64> @llvm.arm.neon.vacgt.v2i64.v2f64(<2 x double> %A, <2 x double> %B) ; CHECK: facgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d ret <2 x i64> %val } diff --git a/test/CodeGen/AArch64/neon-scalar-compare.ll b/test/CodeGen/AArch64/neon-scalar-compare.ll index d1b5f9c2546..e1f39645f1b 100644 --- a/test/CodeGen/AArch64/neon-scalar-compare.ll +++ b/test/CodeGen/AArch64/neon-scalar-compare.ll @@ -122,28 +122,28 @@ entry: define <1 x i64> @test_vcage_f64(<1 x double> %a, <1 x double> %b) #0 { ; CHECK: test_vcage_f64 ; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %vcage2.i = tail call <1 x i64> @llvm.aarch64.neon.vcage.v1i64.v1f64.v1f64(<1 x double> %a, <1 x double> %b) #2 + %vcage2.i = tail call <1 x i64> @llvm.arm.neon.vacge.v1i64.v1f64(<1 x double> %a, <1 x double> %b) #2 ret <1 x i64> %vcage2.i } define <1 x i64> @test_vcagt_f64(<1 x double> %a, <1 x double> %b) #0 { ; CHECK: test_vcagt_f64 ; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %vcagt2.i = tail call <1 x i64> @llvm.aarch64.neon.vcagt.v1i64.v1f64.v1f64(<1 x double> %a, <1 x double> %b) #2 + %vcagt2.i = tail call <1 x i64> @llvm.arm.neon.vacgt.v1i64.v1f64(<1 x double> %a, <1 x double> %b) #2 ret <1 x i64> %vcagt2.i } define <1 x i64> @test_vcale_f64(<1 x double> %a, <1 x double> %b) #0 { ; CHECK: test_vcale_f64 ; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %vcage2.i = tail call <1 x i64> @llvm.aarch64.neon.vcage.v1i64.v1f64.v1f64(<1 x double> %b, <1 x double> %a) #2 + %vcage2.i = tail call <1 x i64> @llvm.arm.neon.vacge.v1i64.v1f64(<1 x double> %b, <1 x double> %a) #2 ret <1 x i64> %vcage2.i } define <1 x i64> @test_vcalt_f64(<1 x double> %a, <1 x double> %b) #0 { ; CHECK: test_vcalt_f64 ; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %vcagt2.i = tail call <1 x i64> @llvm.aarch64.neon.vcagt.v1i64.v1f64.v1f64(<1 x double> %b, <1 x double> %a) #2 + %vcagt2.i = tail call <1 x i64> @llvm.arm.neon.vacgt.v1i64.v1f64(<1 x double> %b, <1 x double> %a) #2 ret <1 x i64> %vcagt2.i } @@ -331,8 +331,8 @@ define <1 x i64> @test_vcltz_s64(<1 x i64> %a) #0 { ret <1 x i64> %vcltz.i } -declare <1 x i64> @llvm.aarch64.neon.vcagt.v1i64.v1f64.v1f64(<1 x double>, <1 x double>) -declare <1 x i64> @llvm.aarch64.neon.vcage.v1i64.v1f64.v1f64(<1 x double>, <1 x double>) +declare <1 x i64> @llvm.arm.neon.vacgt.v1i64.v1f64(<1 x double>, <1 x double>) +declare <1 x i64> @llvm.arm.neon.vacge.v1i64.v1f64(<1 x double>, <1 x double>) declare <1 x i64> @llvm.aarch64.neon.vtstd.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>) declare <1 x i64> @llvm.aarch64.neon.vcltz.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>) declare <1 x i64> @llvm.aarch64.neon.vchs.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>) diff --git a/test/CodeGen/ARM/vcge.ll b/test/CodeGen/ARM/vcge.ll index 81a59dbdfe9..df72835ae20 100644 --- a/test/CodeGen/ARM/vcge.ll +++ b/test/CodeGen/ARM/vcge.ll @@ -145,7 +145,7 @@ define <2 x i32> @vacgef32(<2 x float>* %A, <2 x float>* %B) nounwind { ;CHECK: vacge.f32 %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B - %tmp3 = call <2 x i32> @llvm.arm.neon.vacged(<2 x float> %tmp1, <2 x float> %tmp2) + %tmp3 = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) ret <2 x i32> %tmp3 } @@ -154,12 +154,12 @@ define <4 x i32> @vacgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind { ;CHECK: vacge.f32 %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vacgeq(<4 x float> %tmp1, <4 x float> %tmp2) + %tmp3 = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) ret <4 x i32> %tmp3 } -declare <2 x i32> @llvm.arm.neon.vacged(<2 x float>, <2 x float>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vacgeq(<4 x float>, <4 x float>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float>, <2 x float>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float>, <4 x float>) nounwind readnone define <8 x i8> @vcgei8Z(<8 x i8>* %A) nounwind { ;CHECK-LABEL: vcgei8Z: diff --git a/test/CodeGen/ARM/vcgt.ll b/test/CodeGen/ARM/vcgt.ll index 056866fe994..adee76a2fbb 100644 --- a/test/CodeGen/ARM/vcgt.ll +++ b/test/CodeGen/ARM/vcgt.ll @@ -146,7 +146,7 @@ define <2 x i32> @vacgtf32(<2 x float>* %A, <2 x float>* %B) nounwind { ;CHECK: vacgt.f32 %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B - %tmp3 = call <2 x i32> @llvm.arm.neon.vacgtd(<2 x float> %tmp1, <2 x float> %tmp2) + %tmp3 = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) ret <2 x i32> %tmp3 } @@ -155,7 +155,7 @@ define <4 x i32> @vacgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind { ;CHECK: vacgt.f32 %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vacgtq(<4 x float> %tmp1, <4 x float> %tmp2) + %tmp3 = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) ret <4 x i32> %tmp3 } @@ -172,8 +172,8 @@ define <4 x i32> @vcgt_zext(<4 x float>* %A, <4 x float>* %B) nounwind { ret <4 x i32> %tmp4 } -declare <2 x i32> @llvm.arm.neon.vacgtd(<2 x float>, <2 x float>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vacgtq(<4 x float>, <4 x float>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float>, <2 x float>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float>, <4 x float>) nounwind readnone define <8 x i8> @vcgti8Z(<8 x i8>* %A) nounwind { ;CHECK-LABEL: vcgti8Z: