From: Kevin Qin Date: Wed, 4 Dec 2013 07:53:28 +0000 (+0000) Subject: [AArch64 NEON] Add missing compare intrinsics. X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=c7f14e3d8ceabe2a9ea5ea1cc1640521be4f479d;p=oota-llvm.git [AArch64 NEON] Add missing compare intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@196360 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index 42eb868c10b..83edb07d56a 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -4169,6 +4169,11 @@ multiclass Neon_Scalar3Same_cmp_SD_size_patterns; } +class Neon_Scalar3Same_cmp_V1_D_size_patterns + : Pat<(v1i64 (Neon_cmp (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm), CC)), + (INSTD FPR64:$Rn, FPR64:$Rm)>; + // Scalar Three Different class NeonI_Scalar3Diff_size size, bits<4> opcode, string asmop, @@ -4335,6 +4340,12 @@ class Neon_Scalar2SameMisc_cmpz_D_size_patterns; +class Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns + : Pat<(v1i64 (Neon_cmpz (v1i64 FPR64:$Rn), + (i32 neon_uimm0:$Imm), CC)), + (INSTD FPR64:$Rn, neon_uimm0:$Imm)>; + multiclass Neon_Scalar2SameMisc_cmpz_SD_size_patterns { @@ -5044,18 +5055,22 @@ def : Neon_Scalar3Same_cmp_D_size_v1_patterns; // Scalar Compare Signed Greather Than Or Equal def CMGEddd: NeonI_Scalar3Same_D_size<0b0, 0b00111, "cmge">; def : Neon_Scalar3Same_cmp_D_size_patterns; +def : Neon_Scalar3Same_cmp_D_size_v1_patterns; // Scalar Compare Unsigned Higher Or Same def CMHSddd: NeonI_Scalar3Same_D_size<0b1, 0b00111, "cmhs">; def : Neon_Scalar3Same_cmp_D_size_patterns; +def : Neon_Scalar3Same_cmp_D_size_v1_patterns; // Scalar Compare Unsigned Higher def CMHIddd: NeonI_Scalar3Same_D_size<0b1, 0b00110, "cmhi">; def : Neon_Scalar3Same_cmp_D_size_patterns; +def : Neon_Scalar3Same_cmp_D_size_v1_patterns; // Scalar Compare Signed Greater Than def CMGTddd: NeonI_Scalar3Same_D_size<0b0, 0b00110, "cmgt">; def : Neon_Scalar3Same_cmp_D_size_patterns; +def : Neon_Scalar3Same_cmp_D_size_v1_patterns; // Scalar Compare Bitwise Test Bits def CMTSTddd: NeonI_Scalar3Same_D_size<0b0, 0b10001, "cmtst">; @@ -5066,26 +5081,31 @@ def : Neon_Scalar3Same_cmp_D_size_patterns; def CMEQddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01001, "cmeq">; def : Neon_Scalar2SameMisc_cmpz_D_size_patterns; +def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns; // Scalar Compare Signed Greather Than Or Equal To Zero def CMGEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01000, "cmge">; def : Neon_Scalar2SameMisc_cmpz_D_size_patterns; +def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns; // Scalar Compare Signed Greater Than Zero def CMGTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01000, "cmgt">; def : Neon_Scalar2SameMisc_cmpz_D_size_patterns; +def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns; // Scalar Compare Signed Less Than Or Equal To Zero def CMLEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01001, "cmle">; def : Neon_Scalar2SameMisc_cmpz_D_size_patterns; +def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns; // Scalar Compare Less Than Zero def CMLTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01010, "cmlt">; def : Neon_Scalar2SameMisc_cmpz_D_size_patterns; +def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns; // Scalar Floating-point Compare @@ -5093,6 +5113,7 @@ def : Neon_Scalar2SameMisc_cmpz_D_size_patterns; defm : Neon_Scalar3Same_cmp_SD_size_patterns; +def : Neon_Scalar3Same_cmp_V1_D_size_patterns; // Scalar Floating-point Compare Mask Equal To Zero defm FCMEQZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01101, "fcmeq">; @@ -5103,6 +5124,7 @@ defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns; defm : Neon_Scalar3Same_cmp_SD_size_patterns; +def : Neon_Scalar3Same_cmp_V1_D_size_patterns; // Scalar Floating-point Compare Mask Greater Than Or Equal To Zero defm FCMGEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01100, "fcmge">; @@ -5113,6 +5135,7 @@ defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns; defm : Neon_Scalar3Same_cmp_SD_size_patterns; +def : Neon_Scalar3Same_cmp_V1_D_size_patterns; // Scalar Floating-point Compare Mask Greather Than Zero defm FCMGTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01100, "fcmgt">; diff --git a/test/CodeGen/AArch64/neon-scalar-compare.ll b/test/CodeGen/AArch64/neon-scalar-compare.ll index a1cfdf0b5c7..5f10cbbab2a 100644 --- a/test/CodeGen/AArch64/neon-scalar-compare.ll +++ b/test/CodeGen/AArch64/neon-scalar-compare.ll @@ -118,6 +118,221 @@ entry: ret i64 %0 } + +define <1 x i64> @test_vcage_f64(<1 x double> %a, <1 x double> %b) #0 { +; CHECK: test_vcage_f64 +; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %vcage2.i = tail call <1 x i64> @llvm.aarch64.neon.vcage.v1i64.v1f64.v1f64(<1 x double> %a, <1 x double> %b) #2 + ret <1 x i64> %vcage2.i +} + +define <1 x i64> @test_vcagt_f64(<1 x double> %a, <1 x double> %b) #0 { +; CHECK: test_vcagt_f64 +; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %vcagt2.i = tail call <1 x i64> @llvm.aarch64.neon.vcagt.v1i64.v1f64.v1f64(<1 x double> %a, <1 x double> %b) #2 + ret <1 x i64> %vcagt2.i +} + +define <1 x i64> @test_vcale_f64(<1 x double> %a, <1 x double> %b) #0 { +; CHECK: test_vcale_f64 +; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %vcage2.i = tail call <1 x i64> @llvm.aarch64.neon.vcage.v1i64.v1f64.v1f64(<1 x double> %b, <1 x double> %a) #2 + ret <1 x i64> %vcage2.i +} + +define <1 x i64> @test_vcalt_f64(<1 x double> %a, <1 x double> %b) #0 { +; CHECK: test_vcalt_f64 +; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %vcagt2.i = tail call <1 x i64> @llvm.aarch64.neon.vcagt.v1i64.v1f64.v1f64(<1 x double> %b, <1 x double> %a) #2 + ret <1 x i64> %vcagt2.i +} + +define <1 x i64> @test_vceq_s64(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK: test_vceq_s64 +; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %cmp.i = icmp eq <1 x i64> %a, %b + %sext.i = sext <1 x i1> %cmp.i to <1 x i64> + ret <1 x i64> %sext.i +} + +define <1 x i64> @test_vceq_u64(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK: test_vceq_u64 +; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %cmp.i = icmp eq <1 x i64> %a, %b + %sext.i = sext <1 x i1> %cmp.i to <1 x i64> + ret <1 x i64> %sext.i +} + +define <1 x i64> @test_vceq_f64(<1 x double> %a, <1 x double> %b) #0 { +; CHECK: test_vceq_f64 +; CHECK: fcmeq {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %cmp.i = fcmp oeq <1 x double> %a, %b + %sext.i = sext <1 x i1> %cmp.i to <1 x i64> + ret <1 x i64> %sext.i +} + +define <1 x i64> @test_vcge_s64(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK: test_vcge_s64 +; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %cmp.i = icmp sge <1 x i64> %a, %b + %sext.i = sext <1 x i1> %cmp.i to <1 x i64> + ret <1 x i64> %sext.i +} + +define <1 x i64> @test_vcge_u64(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK: test_vcge_u64 +; CHECK: cmhs {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %cmp.i = icmp uge <1 x i64> %a, %b + %sext.i = sext <1 x i1> %cmp.i to <1 x i64> + ret <1 x i64> %sext.i +} + +define <1 x i64> @test_vcge_f64(<1 x double> %a, <1 x double> %b) #0 { +; CHECK: test_vcge_f64 +; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %cmp.i = fcmp oge <1 x double> %a, %b + %sext.i = sext <1 x i1> %cmp.i to <1 x i64> + ret <1 x i64> %sext.i +} + +define <1 x i64> @test_vcle_s64(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK: test_vcle_s64 +; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %cmp.i = icmp sle <1 x i64> %a, %b + %sext.i = sext <1 x i1> %cmp.i to <1 x i64> + ret <1 x i64> %sext.i +} + +define <1 x i64> @test_vcle_u64(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK: test_vcle_u64 +; CHECK: cmhs {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %cmp.i = icmp ule <1 x i64> %a, %b + %sext.i = sext <1 x i1> %cmp.i to <1 x i64> + ret <1 x i64> %sext.i +} + +define <1 x i64> @test_vcle_f64(<1 x double> %a, <1 x double> %b) #0 { +; CHECK: test_vcle_f64 +; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %cmp.i = fcmp ole <1 x double> %a, %b + %sext.i = sext <1 x i1> %cmp.i to <1 x i64> + ret <1 x i64> %sext.i +} + +define <1 x i64> @test_vcgt_s64(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK: test_vcgt_s64 +; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %cmp.i = icmp sgt <1 x i64> %a, %b + %sext.i = sext <1 x i1> %cmp.i to <1 x i64> + ret <1 x i64> %sext.i +} + +define <1 x i64> @test_vcgt_u64(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK: test_vcgt_u64 +; CHECK: cmhi {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %cmp.i = icmp ugt <1 x i64> %a, %b + %sext.i = sext <1 x i1> %cmp.i to <1 x i64> + ret <1 x i64> %sext.i +} + +define <1 x i64> @test_vcgt_f64(<1 x double> %a, <1 x double> %b) #0 { +; CHECK: test_vcgt_f64 +; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %cmp.i = fcmp ogt <1 x double> %a, %b + %sext.i = sext <1 x i1> %cmp.i to <1 x i64> + ret <1 x i64> %sext.i +} + +define <1 x i64> @test_vclt_s64(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK: test_vclt_s64 +; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %cmp.i = icmp slt <1 x i64> %a, %b + %sext.i = sext <1 x i1> %cmp.i to <1 x i64> + ret <1 x i64> %sext.i +} + +define <1 x i64> @test_vclt_u64(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK: test_vclt_u64 +; CHECK: cmhi {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %cmp.i = icmp ult <1 x i64> %a, %b + %sext.i = sext <1 x i1> %cmp.i to <1 x i64> + ret <1 x i64> %sext.i +} + +define <1 x i64> @test_vclt_f64(<1 x double> %a, <1 x double> %b) #0 { +; CHECK: test_vclt_f64 +; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %cmp.i = fcmp olt <1 x double> %a, %b + %sext.i = sext <1 x i1> %cmp.i to <1 x i64> + ret <1 x i64> %sext.i +} + +define <1 x i64> @test_vceqz_s64(<1 x i64> %a) #0 { +; CHECK: test_vceqz_s64 +; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, #0x0 + %1 = icmp eq <1 x i64> %a, zeroinitializer + %vceqz.i = zext <1 x i1> %1 to <1 x i64> + ret <1 x i64> %vceqz.i +} + +define <1 x i64> @test_vceqz_u64(<1 x i64> %a) #0 { +; CHECK: test_vceqz_u64 +; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, #0x0 + %1 = icmp eq <1 x i64> %a, zeroinitializer + %vceqz.i = zext <1 x i1> %1 to <1 x i64> + ret <1 x i64> %vceqz.i +} + +define <1 x i64> @test_vceqz_p64(<1 x i64> %a) #0 { +; CHECK: test_vceqz_p64 +; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, #0x0 + %1 = icmp eq <1 x i64> %a, zeroinitializer + %vceqz.i = zext <1 x i1> %1 to <1 x i64> + ret <1 x i64> %vceqz.i +} + +define <2 x i64> @test_vceqzq_p64(<2 x i64> %a) #0 { +; CHECK: test_vceqzq_p64 +; CHECK: cmeq {{v[0-9]}}.2d, {{v[0-9]}}.2d, #0 + %1 = icmp eq <2 x i64> %a, zeroinitializer + %vceqz.i = zext <2 x i1> %1 to <2 x i64> + ret <2 x i64> %vceqz.i +} + +define <1 x i64> @test_vcgez_s64(<1 x i64> %a) #0 { +; CHECK: test_vcgez_s64 +; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, #0x0 + %1 = icmp sge <1 x i64> %a, zeroinitializer + %vcgez.i = zext <1 x i1> %1 to <1 x i64> + ret <1 x i64> %vcgez.i +} + +define <1 x i64> @test_vclez_s64(<1 x i64> %a) #0 { +; CHECK: test_vclez_s64 +; CHECK: cmle {{d[0-9]}}, {{d[0-9]}}, #0x0 + %1 = icmp sle <1 x i64> %a, zeroinitializer + %vclez.i = zext <1 x i1> %1 to <1 x i64> + ret <1 x i64> %vclez.i +} + +define <1 x i64> @test_vcgtz_s64(<1 x i64> %a) #0 { +; CHECK: test_vcgtz_s64 +; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, #0x0 + %1 = icmp sgt <1 x i64> %a, zeroinitializer + %vcgtz.i = zext <1 x i1> %1 to <1 x i64> + ret <1 x i64> %vcgtz.i +} + +define <1 x i64> @test_vcltz_s64(<1 x i64> %a) #0 { +; CHECK: test_vcltz_s64 +; CHECK: cmlt {{d[0-9]}}, {{d[0-9]}}, #0 + %1 = icmp slt <1 x i64> %a, zeroinitializer + %vcltz.i = zext <1 x i1> %1 to <1 x i64> + ret <1 x i64> %vcltz.i +} + +declare <1 x i64> @llvm.aarch64.neon.vcagt.v1i64.v1f64.v1f64(<1 x double>, <1 x double>) +declare <1 x i64> @llvm.aarch64.neon.vcage.v1i64.v1f64.v1f64(<1 x double>, <1 x double>) declare <1 x i64> @llvm.aarch64.neon.vtstd.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>) declare <1 x i64> @llvm.aarch64.neon.vcltz.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>) declare <1 x i64> @llvm.aarch64.neon.vchs.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>)