From: Javed Absar Date: Mon, 29 Jun 2015 09:32:29 +0000 (+0000) Subject: [ARM]: Extend -mfpu options for half-precision and vfpv3xd X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=263dd533ee45e47a6866bc942f73dd75e87b6c11;p=oota-llvm.git [ARM]: Extend -mfpu options for half-precision and vfpv3xd Some of the the permissible ARM -mfpu options, which are supported in GCC, are currently not present in llvm/clang.This patch adds the options: 'neon-fp16', 'vfpv3-fp16', 'vfpv3-d16-fp16', 'vfpv3xd' and 'vfpv3xd-fp16. These are related to half-precision floating-point and single precision. Reviewers: rengolin, ranjeet.singh Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D10645 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@240930 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/Support/TargetParser.h b/include/llvm/Support/TargetParser.h index 777ee2075d6..dab724895e8 100644 --- a/include/llvm/Support/TargetParser.h +++ b/include/llvm/Support/TargetParser.h @@ -36,7 +36,11 @@ namespace ARM { FK_VFP, FK_VFPV2, FK_VFPV3, + FK_VFPV3_FP16, FK_VFPV3_D16, + FK_VFPV3_D16_FP16, + FK_VFPV3XD, + FK_VFPV3XD_FP16, FK_VFPV4, FK_VFPV4_D16, FK_FPV4_SP_D16, @@ -44,6 +48,7 @@ namespace ARM { FK_FPV5_SP_D16, FK_FP_ARMV8, FK_NEON, + FK_NEON_FP16, FK_NEON_VFPV4, FK_NEON_FP_ARMV8, FK_CRYPTO_NEON_FP_ARMV8, @@ -51,6 +56,16 @@ namespace ARM { FK_LAST }; + // FPU Version + enum FPUVersion { + FV_NONE = 0, + FV_VFPV2, + FV_VFPV3, + FV_VFPV3_FP16, + FV_VFPV4, + FV_VFPV5 + }; + // An FPU name implies one of three levels of Neon support: enum NeonSupportLevel { NS_None = 0, ///< No Neon diff --git a/lib/Support/TargetParser.cpp b/lib/Support/TargetParser.cpp index 760cdc11f26..b1ba39e4405 100644 --- a/lib/Support/TargetParser.cpp +++ b/lib/Support/TargetParser.cpp @@ -25,31 +25,37 @@ namespace { // List of canonical FPU names (use getFPUSynonym) and which architectural // features they correspond to (use getFPUFeatures). // FIXME: TableGen this. +// The entries must appear in the order listed in ARM::FPUKind for correct indexing struct { const char * Name; ARM::FPUKind ID; - unsigned FPUVersion; ///< Corresponds directly to the FP arch version number. + ARM::FPUVersion FPUVersion; ARM::NeonSupportLevel NeonSupport; ARM::FPURestriction Restriction; } FPUNames[] = { - { "invalid", ARM::FK_INVALID, 0, ARM::NS_None, ARM::FR_None}, - { "none", ARM::FK_NONE, 0, ARM::NS_None, ARM::FR_None}, - { "vfp", ARM::FK_VFP, 2, ARM::NS_None, ARM::FR_None}, - { "vfpv2", ARM::FK_VFPV2, 2, ARM::NS_None, ARM::FR_None}, - { "vfpv3", ARM::FK_VFPV3, 3, ARM::NS_None, ARM::FR_None}, - { "vfpv3-d16", ARM::FK_VFPV3_D16, 3, ARM::NS_None, ARM::FR_D16}, - { "vfpv4", ARM::FK_VFPV4, 4, ARM::NS_None, ARM::FR_None}, - { "vfpv4-d16", ARM::FK_VFPV4_D16, 4, ARM::NS_None, ARM::FR_D16}, - { "fpv4-sp-d16", ARM::FK_FPV4_SP_D16, 4, ARM::NS_None, ARM::FR_SP_D16}, - { "fpv5-d16", ARM::FK_FPV5_D16, 5, ARM::NS_None, ARM::FR_D16}, - { "fpv5-sp-d16", ARM::FK_FPV5_SP_D16, 5, ARM::NS_None, ARM::FR_SP_D16}, - { "fp-armv8", ARM::FK_FP_ARMV8, 5, ARM::NS_None, ARM::FR_None}, - { "neon", ARM::FK_NEON, 3, ARM::NS_Neon, ARM::FR_None}, - { "neon-vfpv4", ARM::FK_NEON_VFPV4, 4, ARM::NS_Neon, ARM::FR_None}, - { "neon-fp-armv8", ARM::FK_NEON_FP_ARMV8, 5, ARM::NS_Neon, ARM::FR_None}, + { "invalid", ARM::FK_INVALID, ARM::FV_NONE, ARM::NS_None, ARM::FR_None}, + { "none", ARM::FK_NONE, ARM::FV_NONE, ARM::NS_None, ARM::FR_None}, + { "vfp", ARM::FK_VFP, ARM::FV_VFPV2, ARM::NS_None, ARM::FR_None}, + { "vfpv2", ARM::FK_VFPV2, ARM::FV_VFPV2, ARM::NS_None, ARM::FR_None}, + { "vfpv3", ARM::FK_VFPV3, ARM::FV_VFPV3, ARM::NS_None, ARM::FR_None}, + { "vfpv3-fp16", ARM::FK_VFPV3_FP16, ARM::FV_VFPV3_FP16, ARM::NS_None, ARM::FR_None}, + { "vfpv3-d16", ARM::FK_VFPV3_D16, ARM::FV_VFPV3, ARM::NS_None, ARM::FR_D16}, + { "vfpv3-d16-fp16", ARM::FK_VFPV3_D16_FP16, ARM::FV_VFPV3_FP16, ARM::NS_None, ARM::FR_D16}, + { "vfpv3xd", ARM::FK_VFPV3XD, ARM::FV_VFPV3, ARM::NS_None, ARM::FR_SP_D16}, + { "vfpv3xd-fp16", ARM::FK_VFPV3XD_FP16, ARM::FV_VFPV3_FP16, ARM::NS_None, ARM::FR_SP_D16}, + { "vfpv4", ARM::FK_VFPV4, ARM::FV_VFPV4, ARM::NS_None, ARM::FR_None}, + { "vfpv4-d16", ARM::FK_VFPV4_D16, ARM::FV_VFPV4, ARM::NS_None, ARM::FR_D16}, + { "fpv4-sp-d16", ARM::FK_FPV4_SP_D16, ARM::FV_VFPV4, ARM::NS_None, ARM::FR_SP_D16}, + { "fpv5-d16", ARM::FK_FPV5_D16, ARM::FV_VFPV5, ARM::NS_None, ARM::FR_D16}, + { "fpv5-sp-d16", ARM::FK_FPV5_SP_D16, ARM::FV_VFPV5, ARM::NS_None, ARM::FR_SP_D16}, + { "fp-armv8", ARM::FK_FP_ARMV8, ARM::FV_VFPV5, ARM::NS_None, ARM::FR_None}, + { "neon", ARM::FK_NEON, ARM::FV_VFPV3, ARM::NS_Neon, ARM::FR_None}, + { "neon-fp16", ARM::FK_NEON_FP16, ARM::FV_VFPV3_FP16, ARM::NS_Neon, ARM::FR_None}, + { "neon-vfpv4", ARM::FK_NEON_VFPV4, ARM::FV_VFPV4, ARM::NS_Neon, ARM::FR_None}, + { "neon-fp-armv8", ARM::FK_NEON_FP_ARMV8, ARM::FV_VFPV5, ARM::NS_Neon, ARM::FR_None}, { "crypto-neon-fp-armv8", - ARM::FK_CRYPTO_NEON_FP_ARMV8, 5, ARM::NS_Crypto, ARM::FR_None}, - { "softvfp", ARM::FK_SOFTVFP, 0, ARM::NS_None, ARM::FR_None}, + ARM::FK_CRYPTO_NEON_FP_ARMV8, ARM::FV_VFPV5, ARM::NS_Crypto, ARM::FR_None}, + { "softvfp", ARM::FK_SOFTVFP, ARM::FV_NONE, ARM::NS_None, ARM::FR_None}, }; // List of canonical arch names (use getArchSynonym). @@ -279,33 +285,41 @@ bool ARMTargetParser::getFPUFeatures(unsigned FPUKind, // higher. We also have to make sure to disable fp16 when vfp4 is disabled, // as +vfp4 implies +fp16 but -vfp4 does not imply -fp16. switch (FPUNames[FPUKind].FPUVersion) { - case 5: + case ARM::FV_VFPV5: Features.push_back("+fp-armv8"); break; - case 4: + case ARM::FV_VFPV4: Features.push_back("+vfp4"); Features.push_back("-fp-armv8"); break; - case 3: + case ARM::FV_VFPV3_FP16: + Features.push_back("+vfp3"); + Features.push_back("+fp16"); + Features.push_back("-vfp4"); + Features.push_back("-fp-armv8"); + break; + case ARM::FV_VFPV3: Features.push_back("+vfp3"); Features.push_back("-fp16"); Features.push_back("-vfp4"); Features.push_back("-fp-armv8"); break; - case 2: + case ARM::FV_VFPV2: Features.push_back("+vfp2"); Features.push_back("-vfp3"); Features.push_back("-fp16"); Features.push_back("-vfp4"); Features.push_back("-fp-armv8"); break; - case 0: + case ARM::FV_NONE: Features.push_back("-vfp2"); Features.push_back("-vfp3"); Features.push_back("-fp16"); Features.push_back("-vfp4"); Features.push_back("-fp-armv8"); break; + default: + return false; } // crypto includes neon, so we handle this similarly to FPU version. diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 4530e4155ae..738ddedccda 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -630,7 +630,7 @@ void ARMAsmPrinter::emitAttributes() { } else if (STI.hasVFP4()) ATS.emitFPU(ARM::FK_NEON_VFPV4); else - ATS.emitFPU(ARM::FK_NEON); + ATS.emitFPU(STI.hasFP16() ? ARM::FK_NEON_FP16 : ARM::FK_NEON); // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture if (STI.hasV8Ops()) ATS.emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch, @@ -648,7 +648,13 @@ void ARMAsmPrinter::emitAttributes() { ? (STI.isFPOnlySP() ? ARM::FK_FPV4_SP_D16 : ARM::FK_VFPV4_D16) : ARM::FK_VFPV4); else if (STI.hasVFP3()) - ATS.emitFPU(STI.hasD16() ? ARM::FK_VFPV3_D16 : ARM::FK_VFPV3); + ATS.emitFPU(STI.hasD16() + // +d16 + ? (STI.isFPOnlySP() + ? (STI.hasFP16() ? ARM::FK_VFPV3XD_FP16 : ARM::FK_VFPV3XD) + : (STI.hasFP16() ? ARM::FK_VFPV3_D16_FP16 : ARM::FK_VFPV3_D16)) + // -d16 + : (STI.hasFP16() ? ARM::FK_VFPV3_FP16 : ARM::FK_VFPV3)); else if (STI.hasVFP2()) ATS.emitFPU(ARM::FK_VFPV2); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 61af583fd60..4d12bfb5d60 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -797,12 +797,44 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() { /* OverwriteExisting= */ false); break; + case ARM::FK_VFPV3_FP16: + setAttributeItem(ARMBuildAttrs::FP_arch, + ARMBuildAttrs::AllowFPv3A, + /* OverwriteExisting= */ false); + setAttributeItem(ARMBuildAttrs::FP_HP_extension, + ARMBuildAttrs::AllowHPFP, + /* OverwriteExisting= */ false); + break; + case ARM::FK_VFPV3_D16: setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPv3B, /* OverwriteExisting= */ false); break; + case ARM::FK_VFPV3_D16_FP16: + setAttributeItem(ARMBuildAttrs::FP_arch, + ARMBuildAttrs::AllowFPv3B, + /* OverwriteExisting= */ false); + setAttributeItem(ARMBuildAttrs::FP_HP_extension, + ARMBuildAttrs::AllowHPFP, + /* OverwriteExisting= */ false); + break; + + case ARM::FK_VFPV3XD: + setAttributeItem(ARMBuildAttrs::FP_arch, + ARMBuildAttrs::AllowFPv3B, + /* OverwriteExisting= */ false); + break; + case ARM::FK_VFPV3XD_FP16: + setAttributeItem(ARMBuildAttrs::FP_arch, + ARMBuildAttrs::AllowFPv3B, + /* OverwriteExisting= */ false); + setAttributeItem(ARMBuildAttrs::FP_HP_extension, + ARMBuildAttrs::AllowHPFP, + /* OverwriteExisting= */ false); + break; + case ARM::FK_VFPV4: setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPv4A, @@ -842,6 +874,18 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() { /* OverwriteExisting= */ false); break; + case ARM::FK_NEON_FP16: + setAttributeItem(ARMBuildAttrs::FP_arch, + ARMBuildAttrs::AllowFPv3A, + /* OverwriteExisting= */ false); + setAttributeItem(ARMBuildAttrs::Advanced_SIMD_arch, + ARMBuildAttrs::AllowNeon, + /* OverwriteExisting= */ false); + setAttributeItem(ARMBuildAttrs::FP_HP_extension, + ARMBuildAttrs::AllowHPFP, + /* OverwriteExisting= */ false); + break; + case ARM::FK_NEON_VFPV4: setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPv4A, diff --git a/test/CodeGen/ARM/build-attributes.ll b/test/CodeGen/ARM/build-attributes.ll index 9aa2d6565d3..29c702304a3 100644 --- a/test/CodeGen/ARM/build-attributes.ll +++ b/test/CodeGen/ARM/build-attributes.ll @@ -51,6 +51,13 @@ ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A17-FAST ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 -mattr=-vfp2 | FileCheck %s --check-prefix=CORTEX-A17-NOFPU ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 -mattr=-vfp2 -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A17-NOFPU-FAST + +; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mattr=-neon,+vfp3,+fp16 | FileCheck %s --check-prefix=GENERIC-FPU-VFPV3-FP16 +; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mattr=-neon,+vfp3,+d16,+fp16 | FileCheck %s --check-prefix=GENERIC-FPU-VFPV3-D16-FP16 +; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mattr=-neon,+vfp3,+fp-only-sp,+d16 | FileCheck %s --check-prefix=GENERIC-FPU-VFPV3XD +; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mattr=-neon,+vfp3,+fp-only-sp,+d16,+fp16 | FileCheck %s --check-prefix=GENERIC-FPU-VFPV3XD-FP16 +; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mattr=+neon,+fp16 | FileCheck %s --check-prefix=GENERIC-FPU-NEON-FP16 + ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING ; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0 | FileCheck %s --check-prefix=CORTEX-M0 ; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0 -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M0-FAST @@ -1091,7 +1098,7 @@ ; CORTEX-R7: .eabi_attribute 7, 82 ; CORTEX-R7: .eabi_attribute 8, 1 ; CORTEX-R7: .eabi_attribute 9, 2 -; CORTEX-R7: .fpu vfpv3-d16 +; CORTEX-R7: .fpu vfpv3xd ; CORTEX-R7-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-R7: .eabi_attribute 20, 1 @@ -1205,6 +1212,12 @@ ; CORTEX-A72-FAST-NOT: .eabi_attribute 22 ; CORTEX-A72-FAST: .eabi_attribute 23, 1 +; GENERIC-FPU-VFPV3-FP16: .fpu vfpv3-fp16 +; GENERIC-FPU-VFPV3-D16-FP16: .fpu vfpv3-d16-fp16 +; GENERIC-FPU-VFPV3XD: .fpu vfpv3xd +; GENERIC-FPU-VFPV3XD-FP16: .fpu vfpv3xd-fp16 +; GENERIC-FPU-NEON-FP16: .fpu neon-fp16 + ; GENERIC-ARMV8_1-A: .eabi_attribute 6, 14 ; GENERIC-ARMV8_1-A: .eabi_attribute 7, 65 ; GENERIC-ARMV8_1-A: .eabi_attribute 8, 1 diff --git a/test/MC/ARM/directive-fpu-multiple.s b/test/MC/ARM/directive-fpu-multiple.s index 66fc2741596..50389a11532 100644 --- a/test/MC/ARM/directive-fpu-multiple.s +++ b/test/MC/ARM/directive-fpu-multiple.s @@ -10,7 +10,11 @@ .fpu vfp .fpu vfpv2 .fpu vfpv3 + .fpu vfpv3-fp16 .fpu vfpv3-d16 + .fpu vfpv3-d16-fp16 + .fpu vfpv3xd + .fpu vfpv3xd-fp16 .fpu vfpv4 .fpu vfpv4-d16 .fpu fpv4-sp-d16 @@ -18,6 +22,7 @@ .fpu fpv5-sp-d16 .fpu fp-armv8 .fpu neon + .fpu neon-fp16 .fpu neon-vfpv4 .fpu neon-fp-armv8 .fpu crypto-neon-fp-armv8