R600/SI: Default to no single precision denormals.

author Matt Arsenault <Matthew.Arsenault@amd.com>

Mon, 14 Jul 2014 23:40:43 +0000 (23:40 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Mon, 14 Jul 2014 23:40:43 +0000 (23:40 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Mon, 14 Jul 2014 23:40:43 +0000 (23:40 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Mon, 14 Jul 2014 23:40:43 +0000 (23:40 +0000)
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp

index 8c5fc846ed4c042fe789aef7cd3360ca9343fc45..b2b7bf9af4435f49961795927b4e00d51e547133 100644 (file)
--- a/lib/Target/R600/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -47,10 +47,18 @@ using namespace llvm;
  // precision, and leaves single precision to flush all and does not report
  // CL_FP_DENORM for CL_DEVICE_SINGLE_FP_CONFIG. Mesa's OpenCL currently reports
  // CL_FP_DENORM for both.
+//
+// FIXME: It seems some instructions do not support single precision denormals
+// regardless of the mode (exp_*_f32, rcp_*_f32, rsq_*_f32, rsq_*f32, sqrt_f32,
+// and sin_f32, cos_f32 on most parts).
+
+// We want to use these instructions, and using fp32 denormals also causes
+// instructions to run at the double precision rate for the device so it's
+// probably best to just report no single precision denormals.
  static uint32_t getFPMode(const MachineFunction &) {
    return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) |
           FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) |
-         FP_DENORM_MODE_SP(FP_DENORM_FLUSH_NONE) |
+         FP_DENORM_MODE_SP(FP_DENORM_FLUSH_IN_FLUSH_OUT) |
           FP_DENORM_MODE_DP(FP_DENORM_FLUSH_NONE);
  }
  
diff --git a/test/CodeGen/R600/default-fp-mode.ll b/test/CodeGen/R600/default-fp-mode.ll

index 214b2c20369c3294d6f5743e96a683c50ba9de5a..4488bdb7f44539eca2c09b3500c75788a826c90a 100644 (file)
--- a/test/CodeGen/R600/default-fp-mode.ll
+++ b/test/CodeGen/R600/default-fp-mode.ll
@@ -1,7 +1,7 @@
  ; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
  
  ; SI-LABEL: @test_kernel
-; SI: FloatMode: 240
+; SI: FloatMode: 192
  ; SI: IeeeMode: 0
  define void @test_kernel(float addrspace(1)* %out0, double addrspace(1)* %out1) nounwind {
    store float 0.0, float addrspace(1)* %out0
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Mon, 14 Jul 2014 23:40:43 +0000 (23:40 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Mon, 14 Jul 2014 23:40:43 +0000 (23:40 +0000)
lib/Target/R600/AMDGPUAsmPrinter.cpp		patch \| blob \| history
test/CodeGen/R600/default-fp-mode.ll		patch \| blob \| history