From de929f8b7d703a84a66c713ab8f849dd04792821 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 24 Jul 2014 06:59:24 +0000 Subject: [PATCH] R600: Match rcp node on pre-SI git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213844 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/CaymanInstructions.td | 2 ++ lib/Target/R600/EvergreenInstructions.td | 1 + lib/Target/R600/R600Instructions.td | 7 ++++- test/CodeGen/R600/llvm.AMDGPU.rcp.ll | 40 +++++++----------------- 4 files changed, 20 insertions(+), 30 deletions(-) diff --git a/lib/Target/R600/CaymanInstructions.td b/lib/Target/R600/CaymanInstructions.td index 26303452c10..58b5ce24b4a 100644 --- a/lib/Target/R600/CaymanInstructions.td +++ b/lib/Target/R600/CaymanInstructions.td @@ -46,6 +46,8 @@ def SIN_cm : SIN_Common<0x8D>; def COS_cm : COS_Common<0x8E>; } // End isVector = 1 +defm : RsqPat; + def : POW_Common ; defm DIV_cm : DIV_Common; diff --git a/lib/Target/R600/EvergreenInstructions.td b/lib/Target/R600/EvergreenInstructions.td index 484e52250d1..17edf15ed8f 100644 --- a/lib/Target/R600/EvergreenInstructions.td +++ b/lib/Target/R600/EvergreenInstructions.td @@ -69,6 +69,7 @@ def EXP_IEEE_eg : EXP_IEEE_Common<0x81>; def LOG_IEEE_eg : LOG_IEEE_Common<0x83>; def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>; def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>; +defm : RsqPat; def SIN_eg : SIN_Common<0x8D>; def COS_eg : COS_Common<0x8E>; diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 704507d368e..436b0956f21 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -1068,7 +1068,7 @@ class RECIP_CLAMPED_Common inst> : R600_1OP < } class RECIP_IEEE_Common inst> : R600_1OP < - inst, "RECIP_IEEE", [(set f32:$dst, (fdiv FP_ONE, f32:$src0))] + inst, "RECIP_IEEE", [(set f32:$dst, (AMDGPUrcp f32:$src0))] > { let Itinerary = TransALU; } @@ -1114,6 +1114,7 @@ def FNEG_R600 : FNEG; // Helper patterns for complex intrinsics //===----------------------------------------------------------------------===// +// FIXME: Should be predicated on unsafe fp math. multiclass DIV_Common { def : Pat< (int_AMDGPU_div f32:$src0, f32:$src1), @@ -1124,6 +1125,8 @@ def : Pat< (fdiv f32:$src0, f32:$src1), (MUL_IEEE $src0, (recip_ieee $src1)) >; + +def : RcpPat; } class TGSI_LIT_Z_Common @@ -1180,6 +1183,8 @@ let Predicates = [isR600] in { def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common; def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>; + defm : RsqPat; + def : FROUNDPat ; def R600_ExportSwz : ExportSwzInst { diff --git a/test/CodeGen/R600/llvm.AMDGPU.rcp.ll b/test/CodeGen/R600/llvm.AMDGPU.rcp.ll index 8d5d66e149b..df6c3bb6a2c 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.rcp.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.rcp.ll @@ -1,65 +1,47 @@ ; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s - ; XUN: llc -march=r600 -mcpu=SI -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE-SPDENORM -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG-SAFE -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + declare float @llvm.AMDGPU.rcp.f32(float) nounwind readnone declare double @llvm.AMDGPU.rcp.f64(double) nounwind readnone - declare float @llvm.sqrt.f32(float) nounwind readnone -declare double @llvm.sqrt.f64(double) nounwind readnone ; FUNC-LABEL: @rcp_f32 ; SI: V_RCP_F32_e32 +; EG: RECIP_IEEE define void @rcp_f32(float addrspace(1)* %out, float %src) nounwind { %rcp = call float @llvm.AMDGPU.rcp.f32(float %src) nounwind readnone store float %rcp, float addrspace(1)* %out, align 4 ret void } -; FUNC-LABEL: @rcp_f64 -; SI: V_RCP_F64_e32 -define void @rcp_f64(double addrspace(1)* %out, double %src) nounwind { - %rcp = call double @llvm.AMDGPU.rcp.f64(double %src) nounwind readnone - store double %rcp, double addrspace(1)* %out, align 8 - ret void -} - +; FIXME: Evergreen only ever does unsafe fp math. ; FUNC-LABEL: @rcp_pat_f32 + ; SI-SAFE: V_RCP_F32_e32 ; XSI-SAFE-SPDENORM-NOT: V_RCP_F32_e32 + +; EG: RECIP_IEEE + define void @rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind { %rcp = fdiv float 1.0, %src store float %rcp, float addrspace(1)* %out, align 4 ret void } -; FUNC-LABEL: @rcp_pat_f64 -; SI: V_RCP_F64_e32 -define void @rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind { - %rcp = fdiv double 1.0, %src - store double %rcp, double addrspace(1)* %out, align 8 - ret void -} - ; FUNC-LABEL: @rsq_rcp_pat_f32 ; SI-UNSAFE: V_RSQ_F32_e32 ; SI-SAFE: V_SQRT_F32_e32 ; SI-SAFE: V_RCP_F32_e32 + +; EG: RECIPSQRT_IEEE define void @rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind { %sqrt = call float @llvm.sqrt.f32(float %src) nounwind readnone %rcp = call float @llvm.AMDGPU.rcp.f32(float %sqrt) nounwind readnone store float %rcp, float addrspace(1)* %out, align 4 ret void } - -; FUNC-LABEL: @rsq_rcp_pat_f64 -; SI-UNSAFE: V_RSQ_F64_e32 -; SI-SAFE-NOT: V_RSQ_F64_e32 -define void @rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind { - %sqrt = call double @llvm.sqrt.f64(double %src) nounwind readnone - %rcp = call double @llvm.AMDGPU.rcp.f64(double %sqrt) nounwind readnone - store double %rcp, double addrspace(1)* %out, align 8 - ret void -} -- 2.34.1