From: Hao Liu Date: Mon, 9 Dec 2013 03:51:42 +0000 (+0000) Subject: [AArch64]Add missing pair intrinsics such as: X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=a339740cb86e49a3300979b16f8c05df43bce637;p=oota-llvm.git [AArch64]Add missing pair intrinsics such as: int32_t vminv_s32(int32x2_t a) which should be compiled into SMINP Vd.2S,Vn.2S,Vm.2S git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@196749 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index eb62c13df01..a273468041c 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -978,6 +978,20 @@ defm FMULXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11011, "fmulx", int_aarch64_neon_vmulx, v2f32, v4f32, v2f64, 1>; +// Patterns to match llvm.aarch64.* intrinsic for +// ADDP, SMINP, UMINP, SMAXP, UMAXP having i32 as output +class Neon_VectorPair_v2i32_pattern + : Pat<(v1i32 (opnode (v2i32 VPR64:$Rn))), + (EXTRACT_SUBREG + (v2i32 (INST (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rn))), + sub_32)>; + +def : Neon_VectorPair_v2i32_pattern; +def : Neon_VectorPair_v2i32_pattern; +def : Neon_VectorPair_v2i32_pattern; +def : Neon_VectorPair_v2i32_pattern; +def : Neon_VectorPair_v2i32_pattern; + // Vector Immediate Instructions multiclass neon_mov_imm_shift_asmoperands @@ -7695,6 +7709,11 @@ defm SADDLP : NeonI_PairwiseAdd<"saddlp", 0b0, 0b00010, defm UADDLP : NeonI_PairwiseAdd<"uaddlp", 0b1, 0b00010, int_arm_neon_vpaddlu>; +def : Pat<(v1i64 (int_aarch64_neon_saddlv (v2i32 VPR64:$Rn))), + (SADDLP2s1d $Rn)>; +def : Pat<(v1i64 (int_aarch64_neon_uaddlv (v2i32 VPR64:$Rn))), + (UADDLP2s1d $Rn)>; + multiclass NeonI_PairwiseAddAcc opcode, SDPatternOperator Neon_Padd> { let Constraints = "$src = $Rd" in { diff --git a/test/CodeGen/AArch64/neon-add-pairwise.ll b/test/CodeGen/AArch64/neon-add-pairwise.ll index 1abfed31908..32d8222ded2 100644 --- a/test/CodeGen/AArch64/neon-add-pairwise.ll +++ b/test/CodeGen/AArch64/neon-add-pairwise.ll @@ -90,3 +90,12 @@ define <2 x double> @test_faddp_v2f64(<2 x double> %lhs, <2 x double> %rhs) { ret <2 x double> %val } +define i32 @test_vaddv.v2i32(<2 x i32> %a) { +; CHECK-LABEL: test_vaddv.v2i32 +; CHECK: addp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %1 = tail call <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v2i32(<2 x i32> %a) + %2 = extractelement <1 x i32> %1, i32 0 + ret i32 %2 +} + +declare <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v2i32(<2 x i32>) \ No newline at end of file diff --git a/test/CodeGen/AArch64/neon-max-min-pairwise.ll b/test/CodeGen/AArch64/neon-max-min-pairwise.ll index d757aca86a6..3e18077337d 100644 --- a/test/CodeGen/AArch64/neon-max-min-pairwise.ll +++ b/test/CodeGen/AArch64/neon-max-min-pairwise.ll @@ -308,3 +308,39 @@ define <2 x double> @test_fminnmp_v2f64(<2 x double> %lhs, <2 x double> %rhs) { ret <2 x double> %val } +define i32 @test_vminv_s32(<2 x i32> %a) { +; CHECK-LABEL: test_vminv_s32 +; CHECK: sminp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %1 = tail call <1 x i32> @llvm.aarch64.neon.sminv.v1i32.v2i32(<2 x i32> %a) + %2 = extractelement <1 x i32> %1, i32 0 + ret i32 %2 +} + +define i32 @test_vminv_u32(<2 x i32> %a) { +; CHECK-LABEL: test_vminv_u32 +; CHECK: uminp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %1 = tail call <1 x i32> @llvm.aarch64.neon.uminv.v1i32.v2i32(<2 x i32> %a) + %2 = extractelement <1 x i32> %1, i32 0 + ret i32 %2 +} + +define i32 @test_vmaxv_s32(<2 x i32> %a) { +; CHECK-LABEL: test_vmaxv_s32 +; CHECK: smaxp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %1 = tail call <1 x i32> @llvm.aarch64.neon.smaxv.v1i32.v2i32(<2 x i32> %a) + %2 = extractelement <1 x i32> %1, i32 0 + ret i32 %2 +} + +define i32 @test_vmaxv_u32(<2 x i32> %a) { +; CHECK-LABEL: test_vmaxv_u32 +; CHECK: umaxp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %1 = tail call <1 x i32> @llvm.aarch64.neon.umaxv.v1i32.v2i32(<2 x i32> %a) + %2 = extractelement <1 x i32> %1, i32 0 + ret i32 %2 +} + +declare <1 x i32> @llvm.aarch64.neon.uminv.v1i32.v2i32(<2 x i32>) +declare <1 x i32> @llvm.aarch64.neon.sminv.v1i32.v2i32(<2 x i32>) +declare <1 x i32> @llvm.aarch64.neon.umaxv.v1i32.v2i32(<2 x i32>) +declare <1 x i32> @llvm.aarch64.neon.smaxv.v1i32.v2i32(<2 x i32>) \ No newline at end of file diff --git a/test/CodeGen/AArch64/neon-misc.ll b/test/CodeGen/AArch64/neon-misc.ll index 9660bf2c7a3..851a2f364c0 100644 --- a/test/CodeGen/AArch64/neon-misc.ll +++ b/test/CodeGen/AArch64/neon-misc.ll @@ -1796,4 +1796,23 @@ declare <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double>, <1 x double>) declare <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double>, <1 x double>) declare <1 x double> @llvm.sqrt.v1f64(<1 x double>) declare <1 x double> @llvm.arm.neon.vrecpe.v1f64(<1 x double>) -declare <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double>) \ No newline at end of file +declare <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double>) + +define i64 @test_vaddlv_s32(<2 x i32> %a) { +; CHECK-LABEL: test_vaddlv_s32 +; CHECK: saddlp {{v[0-9]+}}.1d, {{v[0-9]+}}.2s + %1 = tail call <1 x i64> @llvm.aarch64.neon.saddlv.v1i64.v2i32(<2 x i32> %a) + %2 = extractelement <1 x i64> %1, i32 0 + ret i64 %2 +} + +define i64 @test_vaddlv_u32(<2 x i32> %a) { +; CHECK-LABEL: test_vaddlv_u32 +; CHECK: uaddlp {{v[0-9]+}}.1d, {{v[0-9]+}}.2s + %1 = tail call <1 x i64> @llvm.aarch64.neon.uaddlv.v1i64.v2i32(<2 x i32> %a) + %2 = extractelement <1 x i64> %1, i32 0 + ret i64 %2 +} + +declare <1 x i64> @llvm.aarch64.neon.saddlv.v1i64.v2i32(<2 x i32>) +declare <1 x i64> @llvm.aarch64.neon.uaddlv.v1i64.v2i32(<2 x i32>) \ No newline at end of file