lib/Target/R600/AMDGPUISelLowering.cpp

   1 //===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 /// \file
  11 /// \brief This is the parent TargetLowering class for hardware code gen
  12 /// targets.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "AMDGPUISelLowering.h"
  17 #include "AMDGPU.h"
  18 #include "AMDGPURegisterInfo.h"
  19 #include "AMDGPUSubtarget.h"
  20 #include "AMDILIntrinsicInfo.h"
  21 #include "SIMachineFunctionInfo.h"
  22 #include "llvm/CodeGen/CallingConvLower.h"
  23 #include "llvm/CodeGen/MachineFunction.h"
  24 #include "llvm/CodeGen/MachineRegisterInfo.h"
  25 #include "llvm/CodeGen/SelectionDAG.h"
  26 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
  27
  28 using namespace llvm;
  29
  30 #include "AMDGPUGenCallingConv.inc"
  31
  32 AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
  33   TargetLowering(TM, new TargetLoweringObjectFileELF()) {
  34
  35   // Initialize target lowering borrowed from AMDIL
  36   InitAMDILLowering();
  37
  38   // We need to custom lower some of the intrinsics
  39   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
  40
  41   // Library functions.  These default to Expand, but we have instructions
  42   // for them.
  43   setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
  44   setOperationAction(ISD::FEXP2,  MVT::f32, Legal);
  45   setOperationAction(ISD::FPOW,   MVT::f32, Legal);
  46   setOperationAction(ISD::FLOG2,  MVT::f32, Legal);
  47   setOperationAction(ISD::FABS,   MVT::f32, Legal);
  48   setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
  49   setOperationAction(ISD::FRINT,  MVT::f32, Legal);
  50
  51   // The hardware supports ROTR, but not ROTL
  52   setOperationAction(ISD::ROTL, MVT::i32, Expand);
  53
  54   // Lower floating point store/load to integer store/load to reduce the number
  55   // of patterns in tablegen.
  56   setOperationAction(ISD::STORE, MVT::f32, Promote);
  57   AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
  58
  59   setOperationAction(ISD::STORE, MVT::v4f32, Promote);
  60   AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
  61
  62   setOperationAction(ISD::LOAD, MVT::f32, Promote);
  63   AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
  64
  65   setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
  66   AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
  67
  68   setOperationAction(ISD::MUL, MVT::i64, Expand);
  69
  70   setOperationAction(ISD::UDIV, MVT::i32, Expand);
  71   setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
  72   setOperationAction(ISD::UREM, MVT::i32, Expand);
  73 }
  74
  75 //===---------------------------------------------------------------------===//
  76 // TargetLowering Callbacks
  77 //===---------------------------------------------------------------------===//
  78
  79 void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
  80                              const SmallVectorImpl<ISD::InputArg> &Ins) const {
  81
  82   State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
  83 }
  84
  85 SDValue AMDGPUTargetLowering::LowerReturn(
  86                                      SDValue Chain,
  87                                      CallingConv::ID CallConv,
  88                                      bool isVarArg,
  89                                      const SmallVectorImpl<ISD::OutputArg> &Outs,
  90                                      const SmallVectorImpl<SDValue> &OutVals,
  91                                      SDLoc DL, SelectionDAG &DAG) const {
  92   return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
  93 }
  94
  95 //===---------------------------------------------------------------------===//
  96 // Target specific lowering
  97 //===---------------------------------------------------------------------===//
  98
  99 SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
 100     const {
 101   switch (Op.getOpcode()) {
 102   default:
 103     Op.getNode()->dump();
 104     assert(0 && "Custom lowering code for this"
 105         "instruction is not implemented yet!");
 106     break;
 107   // AMDIL DAG lowering
 108   case ISD::SDIV: return LowerSDIV(Op, DAG);
 109   case ISD::SREM: return LowerSREM(Op, DAG);
 110   case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
 111   case ISD::BRCOND: return LowerBRCOND(Op, DAG);
 112   // AMDGPU DAG lowering
 113   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
 114   case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
 115   }
 116   return Op;
 117 }
 118
 119 SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
 120     SelectionDAG &DAG) const {
 121   unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
 122   SDLoc DL(Op);
 123   EVT VT = Op.getValueType();
 124
 125   switch (IntrinsicID) {
 126     default: return Op;
 127     case AMDGPUIntrinsic::AMDIL_abs:
 128       return LowerIntrinsicIABS(Op, DAG);
 129     case AMDGPUIntrinsic::AMDIL_exp:
 130       return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
 131     case AMDGPUIntrinsic::AMDGPU_lrp:
 132       return LowerIntrinsicLRP(Op, DAG);
 133     case AMDGPUIntrinsic::AMDIL_fraction:
 134       return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
 135     case AMDGPUIntrinsic::AMDIL_max:
 136       return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
 137                                                   Op.getOperand(2));
 138     case AMDGPUIntrinsic::AMDGPU_imax:
 139       return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
 140                                                   Op.getOperand(2));
 141     case AMDGPUIntrinsic::AMDGPU_umax:
 142       return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
 143                                                   Op.getOperand(2));
 144     case AMDGPUIntrinsic::AMDIL_min:
 145       return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
 146                                                   Op.getOperand(2));
 147     case AMDGPUIntrinsic::AMDGPU_imin:
 148       return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
 149                                                   Op.getOperand(2));
 150     case AMDGPUIntrinsic::AMDGPU_umin:
 151       return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
 152                                                   Op.getOperand(2));
 153     case AMDGPUIntrinsic::AMDIL_round_nearest:
 154       return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
 155   }
 156 }
 157
 158 ///IABS(a) = SMAX(sub(0, a), a)
 159 SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
 160     SelectionDAG &DAG) const {
 161
 162   SDLoc DL(Op);
 163   EVT VT = Op.getValueType();
 164   SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
 165                                               Op.getOperand(1));
 166
 167   return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
 168 }
 169
 170 /// Linear Interpolation
 171 /// LRP(a, b, c) = muladd(a,  b, (1 - a) * c)
 172 SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
 173     SelectionDAG &DAG) const {
 174   SDLoc DL(Op);
 175   EVT VT = Op.getValueType();
 176   SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
 177                                 DAG.getConstantFP(1.0f, MVT::f32),
 178                                 Op.getOperand(1));
 179   SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
 180                                                     Op.getOperand(3));
 181   return DAG.getNode(ISD::FADD, DL, VT,
 182       DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)),
 183       OneSubAC);
 184 }
 185
 186 /// \brief Generate Min/Max node
 187 SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
 188     SelectionDAG &DAG) const {
 189   SDLoc DL(Op);
 190   EVT VT = Op.getValueType();
 191
 192   SDValue LHS = Op.getOperand(0);
 193   SDValue RHS = Op.getOperand(1);
 194   SDValue True = Op.getOperand(2);
 195   SDValue False = Op.getOperand(3);
 196   SDValue CC = Op.getOperand(4);
 197
 198   if (VT != MVT::f32 ||
 199       !((LHS == True && RHS == False) || (LHS == False && RHS == True))) {
 200     return SDValue();
 201   }
 202
 203   ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
 204   switch (CCOpcode) {
 205   case ISD::SETOEQ:
 206   case ISD::SETONE:
 207   case ISD::SETUNE:
 208   case ISD::SETNE:
 209   case ISD::SETUEQ:
 210   case ISD::SETEQ:
 211   case ISD::SETFALSE:
 212   case ISD::SETFALSE2:
 213   case ISD::SETTRUE:
 214   case ISD::SETTRUE2:
 215   case ISD::SETUO:
 216   case ISD::SETO:
 217     assert(0 && "Operation should already be optimised !");
 218   case ISD::SETULE:
 219   case ISD::SETULT:
 220   case ISD::SETOLE:
 221   case ISD::SETOLT:
 222   case ISD::SETLE:
 223   case ISD::SETLT: {
 224     if (LHS == True)
 225       return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
 226     else
 227       return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
 228   }
 229   case ISD::SETGT:
 230   case ISD::SETGE:
 231   case ISD::SETUGE:
 232   case ISD::SETOGE:
 233   case ISD::SETUGT:
 234   case ISD::SETOGT: {
 235     if (LHS == True)
 236       return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
 237     else
 238       return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
 239   }
 240   case ISD::SETCC_INVALID:
 241     assert(0 && "Invalid setcc condcode !");
 242   }
 243   return Op;
 244 }
 245
 246
 247
 248 SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
 249     SelectionDAG &DAG) const {
 250   SDLoc DL(Op);
 251   EVT VT = Op.getValueType();
 252
 253   SDValue Num = Op.getOperand(0);
 254   SDValue Den = Op.getOperand(1);
 255
 256   SmallVector<SDValue, 8> Results;
 257
 258   // RCP =  URECIP(Den) = 2^32 / Den + e
 259   // e is rounding error.
 260   SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
 261
 262   // RCP_LO = umulo(RCP, Den) */
 263   SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
 264
 265   // RCP_HI = mulhu (RCP, Den) */
 266   SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
 267
 268   // NEG_RCP_LO = -RCP_LO
 269   SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
 270                                                      RCP_LO);
 271
 272   // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
 273   SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
 274                                            NEG_RCP_LO, RCP_LO,
 275                                            ISD::SETEQ);
 276   // Calculate the rounding error from the URECIP instruction
 277   // E = mulhu(ABS_RCP_LO, RCP)
 278   SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
 279
 280   // RCP_A_E = RCP + E
 281   SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
 282
 283   // RCP_S_E = RCP - E
 284   SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
 285
 286   // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
 287   SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
 288                                      RCP_A_E, RCP_S_E,
 289                                      ISD::SETEQ);
 290   // Quotient = mulhu(Tmp0, Num)
 291   SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
 292
 293   // Num_S_Remainder = Quotient * Den
 294   SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
 295
 296   // Remainder = Num - Num_S_Remainder
 297   SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
 298
 299   // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
 300   SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
 301                                                  DAG.getConstant(-1, VT),
 302                                                  DAG.getConstant(0, VT),
 303                                                  ISD::SETGE);
 304   // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
 305   SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
 306                                                   DAG.getConstant(0, VT),
 307                                                   DAG.getConstant(-1, VT),
 308                                                   DAG.getConstant(0, VT),
 309                                                   ISD::SETGE);
 310   // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
 311   SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
 312                                                Remainder_GE_Zero);
 313
 314   // Calculate Division result:
 315
 316   // Quotient_A_One = Quotient + 1
 317   SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
 318                                                          DAG.getConstant(1, VT));
 319
 320   // Quotient_S_One = Quotient - 1
 321   SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
 322                                                          DAG.getConstant(1, VT));
 323
 324   // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
 325   SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
 326                                      Quotient, Quotient_A_One, ISD::SETEQ);
 327
 328   // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
 329   Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
 330                             Quotient_S_One, Div, ISD::SETEQ);
 331
 332   // Calculate Rem result:
 333
 334   // Remainder_S_Den = Remainder - Den
 335   SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
 336
 337   // Remainder_A_Den = Remainder + Den
 338   SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
 339
 340   // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
 341   SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
 342                                     Remainder, Remainder_S_Den, ISD::SETEQ);
 343
 344   // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
 345   Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
 346                             Remainder_A_Den, Rem, ISD::SETEQ);
 347   SDValue Ops[2];
 348   Ops[0] = Div;
 349   Ops[1] = Rem;
 350   return DAG.getMergeValues(Ops, 2, DL);
 351 }
 352
 353 //===----------------------------------------------------------------------===//
 354 // Helper functions
 355 //===----------------------------------------------------------------------===//
 356
 357 bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const {
 358   if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
 359     return CFP->isExactlyValue(1.0);
 360   }
 361   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
 362     return C->isAllOnesValue();
 363   }
 364   return false;
 365 }
 366
 367 bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const {
 368   if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
 369     return CFP->getValueAPF().isZero();
 370   }
 371   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
 372     return C->isNullValue();
 373   }
 374   return false;
 375 }
 376
 377 SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
 378                                                   const TargetRegisterClass *RC,
 379                                                    unsigned Reg, EVT VT) const {
 380   MachineFunction &MF = DAG.getMachineFunction();
 381   MachineRegisterInfo &MRI = MF.getRegInfo();
 382   unsigned VirtualRegister;
 383   if (!MRI.isLiveIn(Reg)) {
 384     VirtualRegister = MRI.createVirtualRegister(RC);
 385     MRI.addLiveIn(Reg, VirtualRegister);
 386   } else {
 387     VirtualRegister = MRI.getLiveInVirtReg(Reg);
 388   }
 389   return DAG.getRegister(VirtualRegister, VT);
 390 }
 391
 392 #define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
 393
 394 const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
 395   switch (Opcode) {
 396   default: return 0;
 397   // AMDIL DAG nodes
 398   NODE_NAME_CASE(CALL);
 399   NODE_NAME_CASE(UMUL);
 400   NODE_NAME_CASE(DIV_INF);
 401   NODE_NAME_CASE(RET_FLAG);
 402   NODE_NAME_CASE(BRANCH_COND);
 403
 404   // AMDGPU DAG nodes
 405   NODE_NAME_CASE(DWORDADDR)
 406   NODE_NAME_CASE(FRACT)
 407   NODE_NAME_CASE(FMAX)
 408   NODE_NAME_CASE(SMAX)
 409   NODE_NAME_CASE(UMAX)
 410   NODE_NAME_CASE(FMIN)
 411   NODE_NAME_CASE(SMIN)
 412   NODE_NAME_CASE(UMIN)
 413   NODE_NAME_CASE(URECIP)
 414   NODE_NAME_CASE(EXPORT)
 415   NODE_NAME_CASE(CONST_ADDRESS)
 416   NODE_NAME_CASE(REGISTER_LOAD)
 417   NODE_NAME_CASE(REGISTER_STORE)
 418   }
 419 }