From: Javed Absar <javed.absar@arm.com>
Date: Mon, 29 Jun 2015 09:32:29 +0000 (+0000)
Subject: [ARM]: Extend -mfpu options for half-precision and vfpv3xd
X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=263dd533ee45e47a6866bc942f73dd75e87b6c11;p=oota-llvm.git

[ARM]: Extend -mfpu options for half-precision and vfpv3xd

Some of the the permissible ARM -mfpu options, which are supported in GCC,
are currently not present in llvm/clang.This patch adds the options:
'neon-fp16', 'vfpv3-fp16', 'vfpv3-d16-fp16', 'vfpv3xd' and 'vfpv3xd-fp16.
These are related to half-precision floating-point and single precision.

Reviewers: rengolin, ranjeet.singh

Subscribers: llvm-commits

Differential Revision: http://reviews.llvm.org/D10645


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@240930 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/include/llvm/Support/TargetParser.h b/include/llvm/Support/TargetParser.h
index 777ee2075d6..dab724895e8 100644
--- a/include/llvm/Support/TargetParser.h
+++ b/include/llvm/Support/TargetParser.h
@@ -36,7 +36,11 @@ namespace ARM {
     FK_VFP,
     FK_VFPV2,
     FK_VFPV3,
+    FK_VFPV3_FP16,
     FK_VFPV3_D16,
+    FK_VFPV3_D16_FP16,
+    FK_VFPV3XD,
+    FK_VFPV3XD_FP16,
     FK_VFPV4,
     FK_VFPV4_D16,
     FK_FPV4_SP_D16,
@@ -44,6 +48,7 @@ namespace ARM {
     FK_FPV5_SP_D16,
     FK_FP_ARMV8,
     FK_NEON,
+    FK_NEON_FP16,
     FK_NEON_VFPV4,
     FK_NEON_FP_ARMV8,
     FK_CRYPTO_NEON_FP_ARMV8,
@@ -51,6 +56,16 @@ namespace ARM {
     FK_LAST
   };
 
+  // FPU Version
+  enum FPUVersion {
+    FV_NONE = 0,
+    FV_VFPV2,
+    FV_VFPV3,
+    FV_VFPV3_FP16,
+    FV_VFPV4,
+    FV_VFPV5
+  };
+
   // An FPU name implies one of three levels of Neon support:
   enum NeonSupportLevel {
     NS_None = 0, ///< No Neon
diff --git a/lib/Support/TargetParser.cpp b/lib/Support/TargetParser.cpp
index 760cdc11f26..b1ba39e4405 100644
--- a/lib/Support/TargetParser.cpp
+++ b/lib/Support/TargetParser.cpp
@@ -25,31 +25,37 @@ namespace {
 // List of canonical FPU names (use getFPUSynonym) and which architectural
 // features they correspond to (use getFPUFeatures).
 // FIXME: TableGen this.
+// The entries must appear in the order listed in ARM::FPUKind for correct indexing
 struct {
   const char * Name;
   ARM::FPUKind ID;
-  unsigned FPUVersion; ///< Corresponds directly to the FP arch version number.
+  ARM::FPUVersion FPUVersion;
   ARM::NeonSupportLevel NeonSupport;
   ARM::FPURestriction Restriction;
 } FPUNames[] = {
-  { "invalid",       ARM::FK_INVALID,       0, ARM::NS_None,   ARM::FR_None},
-  { "none",          ARM::FK_NONE,          0, ARM::NS_None,   ARM::FR_None},
-  { "vfp",           ARM::FK_VFP,           2, ARM::NS_None,   ARM::FR_None},
-  { "vfpv2",         ARM::FK_VFPV2,         2, ARM::NS_None,   ARM::FR_None},
-  { "vfpv3",         ARM::FK_VFPV3,         3, ARM::NS_None,   ARM::FR_None},
-  { "vfpv3-d16",     ARM::FK_VFPV3_D16,     3, ARM::NS_None,   ARM::FR_D16},
-  { "vfpv4",         ARM::FK_VFPV4,         4, ARM::NS_None,   ARM::FR_None},
-  { "vfpv4-d16",     ARM::FK_VFPV4_D16,     4, ARM::NS_None,   ARM::FR_D16},
-  { "fpv4-sp-d16",   ARM::FK_FPV4_SP_D16,   4, ARM::NS_None,   ARM::FR_SP_D16},
-  { "fpv5-d16",      ARM::FK_FPV5_D16,      5, ARM::NS_None,   ARM::FR_D16},
-  { "fpv5-sp-d16",   ARM::FK_FPV5_SP_D16,   5, ARM::NS_None,   ARM::FR_SP_D16},
-  { "fp-armv8",      ARM::FK_FP_ARMV8,      5, ARM::NS_None,   ARM::FR_None},
-  { "neon",          ARM::FK_NEON,          3, ARM::NS_Neon,   ARM::FR_None},
-  { "neon-vfpv4",    ARM::FK_NEON_VFPV4,    4, ARM::NS_Neon,   ARM::FR_None},
-  { "neon-fp-armv8", ARM::FK_NEON_FP_ARMV8, 5, ARM::NS_Neon,   ARM::FR_None},
+  { "invalid",        ARM::FK_INVALID,        ARM::FV_NONE,       ARM::NS_None,   ARM::FR_None},
+  { "none",           ARM::FK_NONE,           ARM::FV_NONE,       ARM::NS_None,   ARM::FR_None},
+  { "vfp",            ARM::FK_VFP,            ARM::FV_VFPV2,      ARM::NS_None,   ARM::FR_None},
+  { "vfpv2",          ARM::FK_VFPV2,          ARM::FV_VFPV2,      ARM::NS_None,   ARM::FR_None},
+  { "vfpv3",          ARM::FK_VFPV3,          ARM::FV_VFPV3,      ARM::NS_None,   ARM::FR_None},
+  { "vfpv3-fp16",     ARM::FK_VFPV3_FP16,     ARM::FV_VFPV3_FP16, ARM::NS_None,   ARM::FR_None},
+  { "vfpv3-d16",      ARM::FK_VFPV3_D16,      ARM::FV_VFPV3,      ARM::NS_None,   ARM::FR_D16},
+  { "vfpv3-d16-fp16", ARM::FK_VFPV3_D16_FP16, ARM::FV_VFPV3_FP16, ARM::NS_None,   ARM::FR_D16},
+  { "vfpv3xd",        ARM::FK_VFPV3XD,        ARM::FV_VFPV3,      ARM::NS_None,   ARM::FR_SP_D16},
+  { "vfpv3xd-fp16",   ARM::FK_VFPV3XD_FP16,   ARM::FV_VFPV3_FP16, ARM::NS_None,   ARM::FR_SP_D16},
+  { "vfpv4",          ARM::FK_VFPV4,          ARM::FV_VFPV4,      ARM::NS_None,   ARM::FR_None},
+  { "vfpv4-d16",      ARM::FK_VFPV4_D16,      ARM::FV_VFPV4,      ARM::NS_None,   ARM::FR_D16},
+  { "fpv4-sp-d16",    ARM::FK_FPV4_SP_D16,    ARM::FV_VFPV4,      ARM::NS_None,   ARM::FR_SP_D16},
+  { "fpv5-d16",       ARM::FK_FPV5_D16,       ARM::FV_VFPV5,      ARM::NS_None,   ARM::FR_D16},
+  { "fpv5-sp-d16",    ARM::FK_FPV5_SP_D16,    ARM::FV_VFPV5,      ARM::NS_None,   ARM::FR_SP_D16},
+  { "fp-armv8",       ARM::FK_FP_ARMV8,       ARM::FV_VFPV5,      ARM::NS_None,   ARM::FR_None},
+  { "neon",           ARM::FK_NEON,           ARM::FV_VFPV3,      ARM::NS_Neon,   ARM::FR_None},
+  { "neon-fp16",      ARM::FK_NEON_FP16,      ARM::FV_VFPV3_FP16, ARM::NS_Neon,   ARM::FR_None},
+  { "neon-vfpv4",     ARM::FK_NEON_VFPV4,     ARM::FV_VFPV4,      ARM::NS_Neon,   ARM::FR_None},
+  { "neon-fp-armv8",  ARM::FK_NEON_FP_ARMV8,  ARM::FV_VFPV5,      ARM::NS_Neon,   ARM::FR_None},
   { "crypto-neon-fp-armv8",
-              ARM::FK_CRYPTO_NEON_FP_ARMV8, 5, ARM::NS_Crypto, ARM::FR_None},
-  { "softvfp",       ARM::FK_SOFTVFP,       0, ARM::NS_None,   ARM::FR_None},
+               ARM::FK_CRYPTO_NEON_FP_ARMV8,  ARM::FV_VFPV5,      ARM::NS_Crypto, ARM::FR_None},
+  { "softvfp",        ARM::FK_SOFTVFP,        ARM::FV_NONE,       ARM::NS_None,   ARM::FR_None},
 };
 
 // List of canonical arch names (use getArchSynonym).
@@ -279,33 +285,41 @@ bool ARMTargetParser::getFPUFeatures(unsigned FPUKind,
   // higher. We also have to make sure to disable fp16 when vfp4 is disabled,
   // as +vfp4 implies +fp16 but -vfp4 does not imply -fp16.
   switch (FPUNames[FPUKind].FPUVersion) {
-  case 5:
+  case ARM::FV_VFPV5:
     Features.push_back("+fp-armv8");
     break;
-  case 4:
+  case ARM::FV_VFPV4:
     Features.push_back("+vfp4");
     Features.push_back("-fp-armv8");
     break;
-  case 3:
+  case ARM::FV_VFPV3_FP16:
+    Features.push_back("+vfp3");
+    Features.push_back("+fp16");
+    Features.push_back("-vfp4");
+    Features.push_back("-fp-armv8");
+    break;
+  case ARM::FV_VFPV3:
     Features.push_back("+vfp3");
     Features.push_back("-fp16");
     Features.push_back("-vfp4");
     Features.push_back("-fp-armv8");
     break;
-  case 2:
+  case ARM::FV_VFPV2:
     Features.push_back("+vfp2");
     Features.push_back("-vfp3");
     Features.push_back("-fp16");
     Features.push_back("-vfp4");
     Features.push_back("-fp-armv8");
     break;
-  case 0:
+  case ARM::FV_NONE:
     Features.push_back("-vfp2");
     Features.push_back("-vfp3");
     Features.push_back("-fp16");
     Features.push_back("-vfp4");
     Features.push_back("-fp-armv8");
     break;
+  default:
+    return false;
   }
 
   // crypto includes neon, so we handle this similarly to FPU version.
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 4530e4155ae..738ddedccda 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -630,7 +630,7 @@ void ARMAsmPrinter::emitAttributes() {
     } else if (STI.hasVFP4())
       ATS.emitFPU(ARM::FK_NEON_VFPV4);
     else
-      ATS.emitFPU(ARM::FK_NEON);
+      ATS.emitFPU(STI.hasFP16() ? ARM::FK_NEON_FP16 : ARM::FK_NEON);
     // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture
     if (STI.hasV8Ops())
       ATS.emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
@@ -648,7 +648,13 @@ void ARMAsmPrinter::emitAttributes() {
                   ? (STI.isFPOnlySP() ? ARM::FK_FPV4_SP_D16 : ARM::FK_VFPV4_D16)
                   : ARM::FK_VFPV4);
     else if (STI.hasVFP3())
-      ATS.emitFPU(STI.hasD16() ? ARM::FK_VFPV3_D16 : ARM::FK_VFPV3);
+      ATS.emitFPU(STI.hasD16()
+                  // +d16
+                  ? (STI.isFPOnlySP()
+                     ? (STI.hasFP16() ? ARM::FK_VFPV3XD_FP16 : ARM::FK_VFPV3XD)
+                     : (STI.hasFP16() ? ARM::FK_VFPV3_D16_FP16 : ARM::FK_VFPV3_D16))
+                  // -d16
+                  : (STI.hasFP16() ? ARM::FK_VFPV3_FP16 : ARM::FK_VFPV3));
     else if (STI.hasVFP2())
       ATS.emitFPU(ARM::FK_VFPV2);
   }
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 61af583fd60..4d12bfb5d60 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -797,12 +797,44 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
                      /* OverwriteExisting= */ false);
     break;
 
+  case ARM::FK_VFPV3_FP16:
+    setAttributeItem(ARMBuildAttrs::FP_arch,
+                     ARMBuildAttrs::AllowFPv3A,
+                     /* OverwriteExisting= */ false);
+    setAttributeItem(ARMBuildAttrs::FP_HP_extension,
+                     ARMBuildAttrs::AllowHPFP,
+                     /* OverwriteExisting= */ false);
+    break;
+
   case ARM::FK_VFPV3_D16:
     setAttributeItem(ARMBuildAttrs::FP_arch,
                      ARMBuildAttrs::AllowFPv3B,
                      /* OverwriteExisting= */ false);
     break;
 
+  case ARM::FK_VFPV3_D16_FP16:
+    setAttributeItem(ARMBuildAttrs::FP_arch,
+                     ARMBuildAttrs::AllowFPv3B,
+                     /* OverwriteExisting= */ false);
+    setAttributeItem(ARMBuildAttrs::FP_HP_extension,
+                     ARMBuildAttrs::AllowHPFP,
+                     /* OverwriteExisting= */ false);
+    break;
+
+  case ARM::FK_VFPV3XD:
+    setAttributeItem(ARMBuildAttrs::FP_arch,
+                     ARMBuildAttrs::AllowFPv3B,
+                     /* OverwriteExisting= */ false);
+    break;
+  case ARM::FK_VFPV3XD_FP16:
+    setAttributeItem(ARMBuildAttrs::FP_arch,
+                     ARMBuildAttrs::AllowFPv3B,
+                     /* OverwriteExisting= */ false);
+    setAttributeItem(ARMBuildAttrs::FP_HP_extension,
+                     ARMBuildAttrs::AllowHPFP,
+                     /* OverwriteExisting= */ false);
+    break;
+
   case ARM::FK_VFPV4:
     setAttributeItem(ARMBuildAttrs::FP_arch,
                      ARMBuildAttrs::AllowFPv4A,
@@ -842,6 +874,18 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
                      /* OverwriteExisting= */ false);
     break;
 
+  case ARM::FK_NEON_FP16:
+    setAttributeItem(ARMBuildAttrs::FP_arch,
+                     ARMBuildAttrs::AllowFPv3A,
+                     /* OverwriteExisting= */ false);
+    setAttributeItem(ARMBuildAttrs::Advanced_SIMD_arch,
+                     ARMBuildAttrs::AllowNeon,
+                     /* OverwriteExisting= */ false);
+    setAttributeItem(ARMBuildAttrs::FP_HP_extension,
+                     ARMBuildAttrs::AllowHPFP,
+                     /* OverwriteExisting= */ false);
+    break;
+
   case ARM::FK_NEON_VFPV4:
     setAttributeItem(ARMBuildAttrs::FP_arch,
                      ARMBuildAttrs::AllowFPv4A,
diff --git a/test/CodeGen/ARM/build-attributes.ll b/test/CodeGen/ARM/build-attributes.ll
index 9aa2d6565d3..29c702304a3 100644
--- a/test/CodeGen/ARM/build-attributes.ll
+++ b/test/CodeGen/ARM/build-attributes.ll
@@ -51,6 +51,13 @@
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A17-FAST
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 -mattr=-vfp2 | FileCheck %s --check-prefix=CORTEX-A17-NOFPU
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 -mattr=-vfp2  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A17-NOFPU-FAST
+
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mattr=-neon,+vfp3,+fp16 | FileCheck %s --check-prefix=GENERIC-FPU-VFPV3-FP16
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mattr=-neon,+vfp3,+d16,+fp16 | FileCheck %s --check-prefix=GENERIC-FPU-VFPV3-D16-FP16
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mattr=-neon,+vfp3,+fp-only-sp,+d16 | FileCheck %s --check-prefix=GENERIC-FPU-VFPV3XD
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mattr=-neon,+vfp3,+fp-only-sp,+d16,+fp16 | FileCheck %s --check-prefix=GENERIC-FPU-VFPV3XD-FP16
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mattr=+neon,+fp16 | FileCheck %s --check-prefix=GENERIC-FPU-NEON-FP16
+
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
 ; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0 | FileCheck %s --check-prefix=CORTEX-M0
 ; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M0-FAST
@@ -1091,7 +1098,7 @@
 ; CORTEX-R7:  .eabi_attribute 7, 82
 ; CORTEX-R7:  .eabi_attribute 8, 1
 ; CORTEX-R7:  .eabi_attribute 9, 2
-; CORTEX-R7:  .fpu vfpv3-d16
+; CORTEX-R7:  .fpu vfpv3xd
 ; CORTEX-R7-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
 ; CORTEX-R7:  .eabi_attribute 20, 1
@@ -1205,6 +1212,12 @@
 ; CORTEX-A72-FAST-NOT:  .eabi_attribute 22
 ; CORTEX-A72-FAST:  .eabi_attribute 23, 1
 
+; GENERIC-FPU-VFPV3-FP16: .fpu vfpv3-fp16
+; GENERIC-FPU-VFPV3-D16-FP16: .fpu vfpv3-d16-fp16
+; GENERIC-FPU-VFPV3XD: .fpu vfpv3xd
+; GENERIC-FPU-VFPV3XD-FP16: .fpu vfpv3xd-fp16
+; GENERIC-FPU-NEON-FP16: .fpu neon-fp16
+
 ; GENERIC-ARMV8_1-A:  .eabi_attribute 6, 14
 ; GENERIC-ARMV8_1-A:  .eabi_attribute 7, 65
 ; GENERIC-ARMV8_1-A:  .eabi_attribute 8, 1
diff --git a/test/MC/ARM/directive-fpu-multiple.s b/test/MC/ARM/directive-fpu-multiple.s
index 66fc2741596..50389a11532 100644
--- a/test/MC/ARM/directive-fpu-multiple.s
+++ b/test/MC/ARM/directive-fpu-multiple.s
@@ -10,7 +10,11 @@
 	.fpu vfp
 	.fpu vfpv2
 	.fpu vfpv3
+	.fpu vfpv3-fp16
 	.fpu vfpv3-d16
+	.fpu vfpv3-d16-fp16
+	.fpu vfpv3xd
+	.fpu vfpv3xd-fp16
 	.fpu vfpv4
 	.fpu vfpv4-d16
 	.fpu fpv4-sp-d16
@@ -18,6 +22,7 @@
 	.fpu fpv5-sp-d16
 	.fpu fp-armv8
 	.fpu neon
+	.fpu neon-fp16
 	.fpu neon-vfpv4
 	.fpu neon-fp-armv8
 	.fpu crypto-neon-fp-armv8