From: Matt Arsenault Date: Thu, 24 Jul 2014 17:41:01 +0000 (+0000) Subject: R600: Add FMA instructions for Evergreen X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=855a7e6effbfa8e0e502766cd5191a606e0985a7;p=oota-llvm.git R600: Add FMA instructions for Evergreen git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213882 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/R600/EvergreenInstructions.td b/lib/Target/R600/EvergreenInstructions.td index 17edf15ed8f..a83567a6aca 100644 --- a/lib/Target/R600/EvergreenInstructions.td +++ b/lib/Target/R600/EvergreenInstructions.td @@ -257,6 +257,12 @@ def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1, let Predicates = [isEGorCayman] in { +// Should be predicated on FeatureFP64 +// def FMA_64 : R600_3OP < +// 0xA, "FMA_64", +// [(set f64:$dst, (fma f64:$src0, f64:$src1, f64:$src2))] +// >; + // BFE_UINT - bit_extract, an optimization for mask and shift // Src0 = Input // Src1 = Offset @@ -313,6 +319,7 @@ def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", [], VecALU>; def : ROTRPattern ; def MULADD_eg : MULADD_Common<0x14>; def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>; +def FMA_eg : FMA_Common<0x7>; def ASHR_eg : ASHR_Common<0x15>; def LSHR_eg : LSHR_Common<0x16>; def LSHL_eg : LSHL_Common<0x17>; diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 436b0956f21..671f923a91c 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -915,6 +915,11 @@ class MULADD_IEEE_Common inst> : R600_3OP < [(set f32:$dst, (fadd (fmul f32:$src0, f32:$src1), f32:$src2))] >; +class FMA_Common inst> : R600_3OP < + inst, "FMA", + [(set f32:$dst, (fma f32:$src0, f32:$src1, f32:$src2))] +>; + class CNDE_Common inst> : R600_3OP < inst, "CNDE", [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OEQ))] diff --git a/test/CodeGen/R600/fma.f64.ll b/test/CodeGen/R600/fma.f64.ll new file mode 100644 index 00000000000..b4b9450e2f7 --- /dev/null +++ b/test/CodeGen/R600/fma.f64.ll @@ -0,0 +1,46 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +declare double @llvm.fma.f64(double, double, double) nounwind readnone +declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone +declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) nounwind readnone + + +; FUNC-LABEL: @fma_f64 +; SI: V_FMA_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} +define void @fma_f64(double addrspace(1)* %out, double addrspace(1)* %in1, + double addrspace(1)* %in2, double addrspace(1)* %in3) { + %r0 = load double addrspace(1)* %in1 + %r1 = load double addrspace(1)* %in2 + %r2 = load double addrspace(1)* %in3 + %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %r2) + store double %r3, double addrspace(1)* %out + ret void +} + +; FUNC-LABEL: @fma_v2f64 +; SI: V_FMA_F64 +; SI: V_FMA_F64 +define void @fma_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1, + <2 x double> addrspace(1)* %in2, <2 x double> addrspace(1)* %in3) { + %r0 = load <2 x double> addrspace(1)* %in1 + %r1 = load <2 x double> addrspace(1)* %in2 + %r2 = load <2 x double> addrspace(1)* %in3 + %r3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %r0, <2 x double> %r1, <2 x double> %r2) + store <2 x double> %r3, <2 x double> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: @fma_v4f64 +; SI: V_FMA_F64 +; SI: V_FMA_F64 +; SI: V_FMA_F64 +; SI: V_FMA_F64 +define void @fma_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in1, + <4 x double> addrspace(1)* %in2, <4 x double> addrspace(1)* %in3) { + %r0 = load <4 x double> addrspace(1)* %in1 + %r1 = load <4 x double> addrspace(1)* %in2 + %r2 = load <4 x double> addrspace(1)* %in3 + %r3 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %r0, <4 x double> %r1, <4 x double> %r2) + store <4 x double> %r3, <4 x double> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/fma.ll b/test/CodeGen/R600/fma.ll index d72ffeceb92..eec8eb7f1c5 100644 --- a/test/CodeGen/R600/fma.ll +++ b/test/CodeGen/R600/fma.ll @@ -1,13 +1,10 @@ ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare float @llvm.fma.f32(float, float, float) nounwind readnone declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone -declare double @llvm.fma.f64(double, double, double) nounwind readnone -declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone -declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) nounwind readnone - ; FUNC-LABEL: @fma_f32 ; SI: V_FMA_F32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}} define void @fma_f32(float addrspace(1)* %out, float addrspace(1)* %in1, @@ -47,43 +44,3 @@ define void @fma_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* store <4 x float> %r3, <4 x float> addrspace(1)* %out ret void } - -; FUNC-LABEL: @fma_f64 -; SI: V_FMA_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} -define void @fma_f64(double addrspace(1)* %out, double addrspace(1)* %in1, - double addrspace(1)* %in2, double addrspace(1)* %in3) { - %r0 = load double addrspace(1)* %in1 - %r1 = load double addrspace(1)* %in2 - %r2 = load double addrspace(1)* %in3 - %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %r2) - store double %r3, double addrspace(1)* %out - ret void -} - -; FUNC-LABEL: @fma_v2f64 -; SI: V_FMA_F64 -; SI: V_FMA_F64 -define void @fma_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1, - <2 x double> addrspace(1)* %in2, <2 x double> addrspace(1)* %in3) { - %r0 = load <2 x double> addrspace(1)* %in1 - %r1 = load <2 x double> addrspace(1)* %in2 - %r2 = load <2 x double> addrspace(1)* %in3 - %r3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %r0, <2 x double> %r1, <2 x double> %r2) - store <2 x double> %r3, <2 x double> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: @fma_v4f64 -; SI: V_FMA_F64 -; SI: V_FMA_F64 -; SI: V_FMA_F64 -; SI: V_FMA_F64 -define void @fma_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in1, - <4 x double> addrspace(1)* %in2, <4 x double> addrspace(1)* %in3) { - %r0 = load <4 x double> addrspace(1)* %in1 - %r1 = load <4 x double> addrspace(1)* %in2 - %r2 = load <4 x double> addrspace(1)* %in3 - %r3 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %r0, <4 x double> %r1, <4 x double> %r2) - store <4 x double> %r3, <4 x double> addrspace(1)* %out - ret void -}