From: Chad Rosier Date: Wed, 11 Dec 2013 23:21:25 +0000 (+0000) Subject: [AArch64] Refactor NEON floating-point Max/Min/Maxnm/Minnm across vector AArch64 X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=eb1bac0afa78e57bd71a9d8ef98a7f82fb4253a9;p=oota-llvm.git [AArch64] Refactor NEON floating-point Max/Min/Maxnm/Minnm across vector AArch64 intrinsics to use f32 types, rather than their vector equivalents. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@197090 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/IR/IntrinsicsAArch64.td b/include/llvm/IR/IntrinsicsAArch64.td index 94b8457bd9d..5c289ce2505 100644 --- a/include/llvm/IR/IntrinsicsAArch64.td +++ b/include/llvm/IR/IntrinsicsAArch64.td @@ -98,10 +98,14 @@ def int_aarch64_neon_umaxv : Neon_Across_Intrinsic; def int_aarch64_neon_sminv : Neon_Across_Intrinsic; def int_aarch64_neon_uminv : Neon_Across_Intrinsic; def int_aarch64_neon_vaddv : Neon_Across_Intrinsic; -def int_aarch64_neon_vmaxv : Neon_Across_Intrinsic; -def int_aarch64_neon_vminv : Neon_Across_Intrinsic; -def int_aarch64_neon_vmaxnmv : Neon_Across_Intrinsic; -def int_aarch64_neon_vminnmv : Neon_Across_Intrinsic; +def int_aarch64_neon_vmaxv : + Intrinsic<[llvm_float_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_aarch64_neon_vminv : + Intrinsic<[llvm_float_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_aarch64_neon_vmaxnmv : + Intrinsic<[llvm_float_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_aarch64_neon_vminnmv : + Intrinsic<[llvm_float_ty], [llvm_v4f32_ty], [IntrNoMem]>; // Vector Table Lookup. def int_aarch64_neon_vtbl1 : diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index 1b7e0251412..6a2c79de6b7 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -2385,8 +2385,8 @@ multiclass NeonI_2VAcross_3 opcode, bits<2> size, def _1s4s: NeonI_2VAcross<0b1, u, size, opcode, (outs FPR32:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd, $Rn.4s", - [(set (v1f32 FPR32:$Rd), - (v1f32 (opnode (v4f32 VPR128:$Rn))))], + [(set (f32 FPR32:$Rd), + (f32 (opnode (v4f32 VPR128:$Rn))))], NoItinerary>; } diff --git a/test/CodeGen/AArch64/neon-across.ll b/test/CodeGen/AArch64/neon-across.ll index 733db970cf3..6d30c953022 100644 --- a/test/CodeGen/AArch64/neon-across.ll +++ b/test/CodeGen/AArch64/neon-across.ll @@ -1,12 +1,12 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -declare <1 x float> @llvm.aarch64.neon.vminnmv.v1f32.v4f32(<4 x float>) +declare float @llvm.aarch64.neon.vminnmv(<4 x float>) -declare <1 x float> @llvm.aarch64.neon.vmaxnmv.v1f32.v4f32(<4 x float>) +declare float @llvm.aarch64.neon.vmaxnmv(<4 x float>) -declare <1 x float> @llvm.aarch64.neon.vminv.v1f32.v4f32(<4 x float>) +declare float @llvm.aarch64.neon.vminv(<4 x float>) -declare <1 x float> @llvm.aarch64.neon.vmaxv.v1f32.v4f32(<4 x float>) +declare float @llvm.aarch64.neon.vmaxv(<4 x float>) declare <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v4i32(<4 x i32>) @@ -442,8 +442,7 @@ define float @test_vmaxvq_f32(<4 x float> %a) { ; CHECK: test_vmaxvq_f32: ; CHECK: fmaxv s{{[0-9]+}}, {{v[0-9]+}}.4s entry: - %vmaxv.i = tail call <1 x float> @llvm.aarch64.neon.vmaxv.v1f32.v4f32(<4 x float> %a) - %0 = extractelement <1 x float> %vmaxv.i, i32 0 + %0 = call float @llvm.aarch64.neon.vmaxv(<4 x float> %a) ret float %0 } @@ -451,8 +450,7 @@ define float @test_vminvq_f32(<4 x float> %a) { ; CHECK: test_vminvq_f32: ; CHECK: fminv s{{[0-9]+}}, {{v[0-9]+}}.4s entry: - %vminv.i = tail call <1 x float> @llvm.aarch64.neon.vminv.v1f32.v4f32(<4 x float> %a) - %0 = extractelement <1 x float> %vminv.i, i32 0 + %0 = call float @llvm.aarch64.neon.vminv(<4 x float> %a) ret float %0 } @@ -460,8 +458,7 @@ define float @test_vmaxnmvq_f32(<4 x float> %a) { ; CHECK: test_vmaxnmvq_f32: ; CHECK: fmaxnmv s{{[0-9]+}}, {{v[0-9]+}}.4s entry: - %vmaxnmv.i = tail call <1 x float> @llvm.aarch64.neon.vmaxnmv.v1f32.v4f32(<4 x float> %a) - %0 = extractelement <1 x float> %vmaxnmv.i, i32 0 + %0 = call float @llvm.aarch64.neon.vmaxnmv(<4 x float> %a) ret float %0 } @@ -469,8 +466,7 @@ define float @test_vminnmvq_f32(<4 x float> %a) { ; CHECK: test_vminnmvq_f32: ; CHECK: fminnmv s{{[0-9]+}}, {{v[0-9]+}}.4s entry: - %vminnmv.i = tail call <1 x float> @llvm.aarch64.neon.vminnmv.v1f32.v4f32(<4 x float> %a) - %0 = extractelement <1 x float> %vminnmv.i, i32 0 + %0 = call float @llvm.aarch64.neon.vminnmv(<4 x float> %a) ret float %0 }