R600: Add denormal handling subtarget features.

author Matt Arsenault <Matthew.Arsenault@amd.com>

Mon, 14 Jul 2014 23:40:49 +0000 (23:40 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Mon, 14 Jul 2014 23:40:49 +0000 (23:40 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Mon, 14 Jul 2014 23:40:49 +0000 (23:40 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Mon, 14 Jul 2014 23:40:49 +0000 (23:40 +0000)
diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td

index 89992c202ea682372a99776febb88338d05a9fa3..6a23d361eb85edd083cf98818959205e43121cea 100644 (file)
--- a/lib/Target/R600/AMDGPU.td
+++ b/lib/Target/R600/AMDGPU.td
@@ -42,6 +42,20 @@ def FeatureFP64 : SubtargetFeature<"fp64",
          "true",
          "Enable double precision operations">;
  
+def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
+        "FP64Denormals",
+        "true",
+        "Enable double precision denormal handling",
+        [FeatureFP64]>;
+
+// Some instructions do not support denormals despite this flag. Using
+// fp32 denormals also causes instructions to run at the double
+// precision rate for the device.
+def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals",
+        "FP32Denormals",
+        "true",
+        "Enable single precision denormal handling">;
+
  def Feature64BitPtr : SubtargetFeature<"64BitPtr",
          "Is64bit",
          "true",
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp

index b2b7bf9af4435f49961795927b4e00d51e547133..aaef1405157e8074676d883861f0cdb2e11a0b40 100644 (file)
--- a/lib/Target/R600/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -55,11 +55,20 @@ using namespace llvm;
  // We want to use these instructions, and using fp32 denormals also causes
  // instructions to run at the double precision rate for the device so it's
  // probably best to just report no single precision denormals.
-static uint32_t getFPMode(const MachineFunction &) {
+static uint32_t getFPMode(const MachineFunction &F) {
+  const AMDGPUSubtarget& ST = F.getTarget().getSubtarget<AMDGPUSubtarget>();
+  // TODO: Is there any real use for the flush in only / flush out only modes?
+
+  uint32_t FP32Denormals =
+    ST.hasFP32Denormals() ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
+
+  uint32_t FP64Denormals =
+    ST.hasFP64Denormals() ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
+
    return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) |
           FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) |
-         FP_DENORM_MODE_SP(FP_DENORM_FLUSH_IN_FLUSH_OUT) |
-         FP_DENORM_MODE_DP(FP_DENORM_FLUSH_NONE);
+         FP_DENORM_MODE_SP(FP32Denormals) |
+         FP_DENORM_MODE_DP(FP64Denormals);
  }
  
  static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td

index b86b7818fc1a55d12c507c368908a30f22eb8985..38620705784b4d9e4c764ed7590e91c810a92695 100644 (file)
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -34,6 +34,9 @@ class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
  
  }
  
+def FP32Denormals : Predicate<"Subtarget.hasFP32Denormals()">;
+def FP64Denormals : Predicate<"Subtarget.hasFP64Denormals()">;
+
  def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
  def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
  
diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp

index d5203611756ff710181f04469972ed4985c57a8d..e3c2a50ab828bb741d64cbff1be0e3476c3baf06 100644 (file)
--- a/lib/Target/R600/AMDGPUSubtarget.cpp
+++ b/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -15,6 +15,7 @@
  #include "AMDGPUSubtarget.h"
  #include "R600InstrInfo.h"
  #include "SIInstrInfo.h"
+#include "llvm/ADT/SmallString.h"
  
  #include "llvm/ADT/SmallString.h"
  
@@ -37,6 +38,8 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS) :
    TexVTXClauseSize(0),
    Gen(AMDGPUSubtarget::R600),
    FP64(false),
+  FP64Denormals(false),
+  FP32Denormals(false),
    CaymanISA(false),
    EnableIRStructurizer(true),
    EnablePromoteAlloca(false),
@@ -45,14 +48,27 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS) :
    CFALUBug(false),
    LocalMemorySize(0),
    InstrItins(getInstrItineraryForCPU(GPU)) {
+  // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
+  // enabled, but some instructions do not respect them and they run at the
+  // double precision rate, so don't enable by default.
+  //
+  // We want to be able to turn these off, but making this a subtarget feature
+  // for SI has the unhelpful behavior that it unsets everything else if you
+  // disable it.
  
-  SmallString<256> FullFS("+promote-alloca,");
+  SmallString<256> FullFS("+promote-alloca,+fp64-denormals,");
    FullFS += FS;
  
    ParseSubtargetFeatures(GPU, FullFS);
  
    if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
      InstrInfo.reset(new R600InstrInfo(*this));
+
+    // FIXME: I don't think think Evergreen has any useful support for
+    // denormals, but should be checked. Should we issue a warning somewhere if
+    // someone tries to enable these?
+    FP32Denormals = false;
+    FP64Denormals = false;
    } else {
      InstrInfo.reset(new SIInstrInfo(*this));
    }
diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h

index 68634ea883b16adf1dc1dad451dcda7355db27ff..abe4a2cec49853f76b5848b408b875fde8cff71e 100644 (file)
--- a/lib/Target/R600/AMDGPUSubtarget.h
+++ b/lib/Target/R600/AMDGPUSubtarget.h
@@ -50,6 +50,8 @@ private:
    short TexVTXClauseSize;
    Generation Gen;
    bool FP64;
+  bool FP64Denormals;
+  bool FP32Denormals;
    bool CaymanISA;
    bool EnableIRStructurizer;
    bool EnablePromoteAlloca;
@@ -97,6 +99,14 @@ public:
      return CaymanISA;
    }
  
+  bool hasFP32Denormals() const {
+    return FP32Denormals;
+  }
+
+  bool hasFP64Denormals() const {
+    return FP64Denormals;
+  }
+
    bool hasBFE() const {
      return (getGeneration() >= EVERGREEN);
    }
diff --git a/test/CodeGen/R600/default-fp-mode.ll b/test/CodeGen/R600/default-fp-mode.ll

index 4488bdb7f44539eca2c09b3500c75788a826c90a..b24a7a246fda8b35371a1ce70ae972e789b7491b 100644 (file)
--- a/test/CodeGen/R600/default-fp-mode.ll
+++ b/test/CodeGen/R600/default-fp-mode.ll
@@ -1,8 +1,27 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=FP64-DENORMAL -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI -mattr=+fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=FP32-DENORMAL -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI -mattr=+fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=BOTH-DENORMAL -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=NO-DENORMAL -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI -mattr=+fp64-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
  
-; SI-LABEL: @test_kernel
-; SI: FloatMode: 192
-; SI: IeeeMode: 0
+; FUNC-LABEL: @test_kernel
+
+; DEFAULT: FloatMode: 192
+; DEFAULT: IeeeMode: 0
+
+; FP64-DENORMAL: FloatMode: 192
+; FP64-DENORMAL: IeeeMode: 0
+
+; FP32-DENORMAL: FloatMode: 48
+; FP32-DENORMAL: IeeeMode: 0
+
+; BOTH-DENORMAL: FloatMode: 240
+; BOTH-DENORMAL: IeeeMode: 0
+
+; NO-DENORMAL: FloatMode: 0
+; NO-DENORMAL: IeeeMode: 0
  define void @test_kernel(float addrspace(1)* %out0, double addrspace(1)* %out1) nounwind {
    store float 0.0, float addrspace(1)* %out0
    store double 0.0, double addrspace(1)* %out1
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Mon, 14 Jul 2014 23:40:49 +0000 (23:40 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Mon, 14 Jul 2014 23:40:49 +0000 (23:40 +0000)
lib/Target/R600/AMDGPU.td		patch \| blob \| history
lib/Target/R600/AMDGPUAsmPrinter.cpp		patch \| blob \| history
lib/Target/R600/AMDGPUInstructions.td		patch \| blob \| history
lib/Target/R600/AMDGPUSubtarget.cpp		patch \| blob \| history
lib/Target/R600/AMDGPUSubtarget.h		patch \| blob \| history
test/CodeGen/R600/default-fp-mode.ll		patch \| blob \| history