R600: Use DAG lowering pass to handle fcos/fsin

author Vincent Lejeune <vljn@ovi.com>

Tue, 9 Jul 2013 15:03:11 +0000 (15:03 +0000)

committer Vincent Lejeune <vljn@ovi.com>

Tue, 9 Jul 2013 15:03:11 +0000 (15:03 +0000)
author Vincent Lejeune <vljn@ovi.com>
Tue, 9 Jul 2013 15:03:11 +0000 (15:03 +0000)
committer Vincent Lejeune <vljn@ovi.com>
Tue, 9 Jul 2013 15:03:11 +0000 (15:03 +0000)
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h

index d739a01324249b4d459acefe89962a755c34fdd5..7f4468c15edb3dc923c541de4aac656920a73b20 100644 (file)
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -121,6 +121,8 @@ enum {
    // End AMDIL ISD Opcodes
    DWORDADDR,
    FRACT,
+  COS_HW,
+  SIN_HW,
    FMAX,
    SMAX,
    UMAX,
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp

index ce2aa9208f8ff47d390c39b40b7aaed79b8fed8c..4413734b401b40c3c97112398ec53d0e2bed83f8 100644 (file)
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -26,7 +26,8 @@
  using namespace llvm;
  
  R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
-    AMDGPUTargetLowering(TM) {
+    AMDGPUTargetLowering(TM),
+    Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
    addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
    addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
    addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
@@ -38,6 +39,9 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
    setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
    setOperationAction(ISD::FSUB, MVT::v4f32, Expand);
  
+  setOperationAction(ISD::FCOS, MVT::f32, Custom);
+  setOperationAction(ISD::FSIN, MVT::f32, Custom);
+
    setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
    setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
    setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
@@ -473,6 +477,8 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
    R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
    switch (Op.getOpcode()) {
    default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+  case ISD::FCOS:
+  case ISD::FSIN: return LowerTrig(Op, DAG);
    case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
    case ISD::SELECT: return LowerSELECT(Op, DAG);
    case ISD::STORE: return LowerSTORE(Op, DAG);
@@ -723,6 +729,37 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
    }
  }
  
+SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
+  // On hw >= R700, COS/SIN input must be between -1. and 1.
+  // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
+  EVT VT = Op.getValueType();
+  SDValue Arg = Op.getOperand(0);
+  SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
+      DAG.getNode(ISD::FADD, SDLoc(Op), VT,
+        DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
+          DAG.getConstantFP(0.15915494309, MVT::f32)),
+        DAG.getConstantFP(0.5, MVT::f32)));
+  unsigned TrigNode;
+  switch (Op.getOpcode()) {
+  case ISD::FCOS:
+    TrigNode = AMDGPUISD::COS_HW;
+    break;
+  case ISD::FSIN:
+    TrigNode = AMDGPUISD::SIN_HW;
+    break;
+  default:
+    llvm_unreachable("Wrong trig opcode");
+  }
+  SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
+      DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
+        DAG.getConstantFP(-0.5, MVT::f32)));
+  if (Gen >= AMDGPUSubtarget::R700)
+    return TrigVal;
+  // On R600 hw, COS/SIN input must be between -Pi and Pi.
+  return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
+      DAG.getConstantFP(3.14159265359, MVT::f32));
+}
+
  SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
    return DAG.getNode(
        ISD::SETCC,
diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h

index d4ba4c877bb94d1f3e6fcb72ff21f9606dccd903..a033fcba64350321f954387853e5f48296768ce8 100644 (file)
--- a/lib/Target/R600/R600ISelLowering.h
+++ b/lib/Target/R600/R600ISelLowering.h
@@ -40,6 +40,7 @@ public:
                                        SmallVectorImpl<SDValue> &InVals) const;
    virtual EVT getSetCCResultType(LLVMContext &, EVT VT) const;
  private:
+  unsigned Gen;
    /// Each OpenCL kernel has nine implicit parameters that are stored in the
    /// first nine dwords of a Vertex Buffer.  These implicit parameters are
    /// lowered to load instructions which retreive the values from the Vertex
@@ -60,6 +61,7 @@ private:
    SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
    SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
    SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
  
    SDValue stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth,
                                            SelectionDAG &DAG) const;
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td

index cb887d13430b761ba92d8fd538e1c25f0b7d2bc5..735dcfc02569e7f5e1733441beb98692e8b0b2bc 100644 (file)
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -364,6 +364,14 @@ def DOT4 : SDNode<"AMDGPUISD::DOT4",
    []
  >;
  
+def COS_HW : SDNode<"AMDGPUISD::COS_HW",
+  SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>
+>;
+
+def SIN_HW : SDNode<"AMDGPUISD::SIN_HW",
+  SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>
+>;
+
  def TEXTURE_FETCH_Type : SDTypeProfile<1, 19, [SDTCisFP<0>]>;
  
  def TEXTURE_FETCH: SDNode<"AMDGPUISD::TEXTURE_FETCH", TEXTURE_FETCH_Type, []>;
@@ -1080,14 +1088,14 @@ class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP <
  }
  
  class SIN_Common <bits<11> inst> : R600_1OP <
-  inst, "SIN", []>{
+  inst, "SIN", [(set f32:$dst, (SIN_HW f32:$src0))]>{
    let Trig = 1;
    let TransOnly = 1;
    let Itinerary = TransALU;
  }
  
  class COS_Common <bits<11> inst> : R600_1OP <
-  inst, "COS", []> {
+  inst, "COS", [(set f32:$dst, (COS_HW f32:$src0))]> {
    let Trig = 1;
    let TransOnly = 1;
    let Itinerary = TransALU;
@@ -1228,18 +1236,6 @@ let Predicates = [isR600] in {
  
  }
  
-// Helper pattern for normalizing inputs to triginomic instructions for R700+
-// cards.
-class COS_PAT <InstR600 trig> : Pat<
-  (fcos f32:$src),
-  (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), $src))
->;
-
-class SIN_PAT <InstR600 trig> : Pat<
-  (fsin f32:$src),
-  (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), $src))
->;
-
  //===----------------------------------------------------------------------===//
  // R700 Only instructions
  //===----------------------------------------------------------------------===//
@@ -1247,10 +1243,6 @@ class SIN_PAT <InstR600 trig> : Pat<
  let Predicates = [isR700] in {
    def SIN_r700 : SIN_Common<0x6E>;
    def COS_r700 : COS_Common<0x6F>;
-
-  // R700 normalizes inputs to SIN/COS the same as EG
-  def : SIN_PAT <SIN_r700>;
-  def : COS_PAT <COS_r700>;
  }
  
  //===----------------------------------------------------------------------===//
@@ -1276,8 +1268,6 @@ def SIN_eg : SIN_Common<0x8D>;
  def COS_eg : COS_Common<0x8E>;
  
  def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL>;
-def : SIN_PAT <SIN_eg>;
-def : COS_PAT <COS_eg>;
  def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>;
  
  //===----------------------------------------------------------------------===//
@@ -1726,8 +1716,6 @@ def COS_cm : COS_Common<0x8E>;
  } // End isVector = 1
  
  def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>;
-def : SIN_PAT <SIN_cm>;
-def : COS_PAT <COS_cm>;
  
  defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
  
diff --git a/test/CodeGen/R600/llvm.cos.ll b/test/CodeGen/R600/llvm.cos.ll

index b444fa782bea922d5fced87391a1abccebdffe21..8fb4559f89d9a78cc593c225c25e72ef2a103e7c 100644 (file)
--- a/test/CodeGen/R600/llvm.cos.ll
+++ b/test/CodeGen/R600/llvm.cos.ll
@@ -1,5 +1,8 @@
  ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
  
+;CHECK: MULADD_IEEE *
+;CHECK: FRACT *
+;CHECK: ADD *
  ;CHECK: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
  
  define void @test() {
diff --git a/test/CodeGen/R600/llvm.sin.ll b/test/CodeGen/R600/llvm.sin.ll

index 09cc3d2c52dee30508794c6c2b81fb5ab4ac1fa7..e94c2ba56b85d91cea700d230ac1a1d79edf56ca 100644 (file)
--- a/test/CodeGen/R600/llvm.sin.ll
+++ b/test/CodeGen/R600/llvm.sin.ll
@@ -1,5 +1,8 @@
  ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
  
+;CHECK: MULADD_IEEE *
+;CHECK: FRACT *
+;CHECK: ADD *
  ;CHECK: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
  
  define void @test() {
author	Vincent Lejeune <vljn@ovi.com>
	Tue, 9 Jul 2013 15:03:11 +0000 (15:03 +0000)
committer	Vincent Lejeune <vljn@ovi.com>
	Tue, 9 Jul 2013 15:03:11 +0000 (15:03 +0000)
lib/Target/R600/AMDGPUISelLowering.h		patch \| blob \| history
lib/Target/R600/R600ISelLowering.cpp		patch \| blob \| history
lib/Target/R600/R600ISelLowering.h		patch \| blob \| history
lib/Target/R600/R600Instructions.td		patch \| blob \| history
test/CodeGen/R600/llvm.cos.ll		patch \| blob \| history
test/CodeGen/R600/llvm.sin.ll		patch \| blob \| history