lib/Target/R600/AMDGPUISelLowering.cpp

   1 //===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 /// \file
  11 /// \brief This is the parent TargetLowering class for hardware code gen
  12 /// targets.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "AMDGPUISelLowering.h"
  17 #include "AMDGPU.h"
  18 #include "AMDGPURegisterInfo.h"
  19 #include "AMDGPUSubtarget.h"
  20 #include "AMDILIntrinsicInfo.h"
  21 #include "R600MachineFunctionInfo.h"
  22 #include "SIMachineFunctionInfo.h"
  23 #include "llvm/CodeGen/CallingConvLower.h"
  24 #include "llvm/CodeGen/MachineFunction.h"
  25 #include "llvm/CodeGen/MachineRegisterInfo.h"
  26 #include "llvm/CodeGen/SelectionDAG.h"
  27 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
  28 #include "llvm/IR/DataLayout.h"
  29
  30 using namespace llvm;
  31
  32 #include "AMDGPUGenCallingConv.inc"
  33
  34 AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
  35   TargetLowering(TM, new TargetLoweringObjectFileELF()) {
  36
  37   // Initialize target lowering borrowed from AMDIL
  38   InitAMDILLowering();
  39
  40   // We need to custom lower some of the intrinsics
  41   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
  42
  43   // Library functions.  These default to Expand, but we have instructions
  44   // for them.
  45   setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
  46   setOperationAction(ISD::FEXP2,  MVT::f32, Legal);
  47   setOperationAction(ISD::FPOW,   MVT::f32, Legal);
  48   setOperationAction(ISD::FLOG2,  MVT::f32, Legal);
  49   setOperationAction(ISD::FABS,   MVT::f32, Legal);
  50   setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
  51   setOperationAction(ISD::FRINT,  MVT::f32, Legal);
  52
  53   // The hardware supports ROTR, but not ROTL
  54   setOperationAction(ISD::ROTL, MVT::i32, Expand);
  55
  56   // Lower floating point store/load to integer store/load to reduce the number
  57   // of patterns in tablegen.
  58   setOperationAction(ISD::STORE, MVT::f32, Promote);
  59   AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
  60
  61   setOperationAction(ISD::STORE, MVT::v2f32, Promote);
  62   AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32);
  63
  64   setOperationAction(ISD::STORE, MVT::v4f32, Promote);
  65   AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
  66
  67   setOperationAction(ISD::STORE, MVT::f64, Promote);
  68   AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64);
  69
  70   setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
  71   setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
  72   setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);
  73   // XXX: This can be change to Custom, once ExpandVectorStores can
  74   // handle 64-bit stores.
  75   setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
  76
  77   setOperationAction(ISD::LOAD, MVT::f32, Promote);
  78   AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
  79
  80   setOperationAction(ISD::LOAD, MVT::v2f32, Promote);
  81   AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32);
  82
  83   setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
  84   AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
  85
  86   setOperationAction(ISD::LOAD, MVT::f64, Promote);
  87   AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64);
  88
  89   setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
  90   setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom);
  91   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom);
  92   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom);
  93
  94   setLoadExtAction(ISD::EXTLOAD, MVT::v2i8, Expand);
  95   setLoadExtAction(ISD::SEXTLOAD, MVT::v2i8, Expand);
  96   setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i8, Expand);
  97   setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Expand);
  98   setLoadExtAction(ISD::SEXTLOAD, MVT::v4i8, Expand);
  99   setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Expand);
 100   setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, Expand);
 101   setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, Expand);
 102   setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, Expand);
 103   setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, Expand);
 104   setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, Expand);
 105   setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, Expand);
 106
 107   setOperationAction(ISD::FNEG, MVT::v2f32, Expand);
 108   setOperationAction(ISD::FNEG, MVT::v4f32, Expand);
 109
 110   setOperationAction(ISD::MUL, MVT::i64, Expand);
 111
 112   setOperationAction(ISD::UDIV, MVT::i32, Expand);
 113   setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
 114   setOperationAction(ISD::UREM, MVT::i32, Expand);
 115   setOperationAction(ISD::VSELECT, MVT::v2f32, Expand);
 116   setOperationAction(ISD::VSELECT, MVT::v4f32, Expand);
 117
 118   static const int types[] = {
 119     (int)MVT::v2i32,
 120     (int)MVT::v4i32
 121   };
 122   const size_t NumTypes = array_lengthof(types);
 123
 124   for (unsigned int x  = 0; x < NumTypes; ++x) {
 125     MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
 126     //Expand the following operations for the current type by default
 127     setOperationAction(ISD::ADD,  VT, Expand);
 128     setOperationAction(ISD::AND,  VT, Expand);
 129     setOperationAction(ISD::FP_TO_SINT, VT, Expand);
 130     setOperationAction(ISD::FP_TO_UINT, VT, Expand);
 131     setOperationAction(ISD::MUL,  VT, Expand);
 132     setOperationAction(ISD::OR,   VT, Expand);
 133     setOperationAction(ISD::SHL,  VT, Expand);
 134     setOperationAction(ISD::SINT_TO_FP, VT, Expand);
 135     setOperationAction(ISD::SRL,  VT, Expand);
 136     setOperationAction(ISD::SRA,  VT, Expand);
 137     setOperationAction(ISD::SUB,  VT, Expand);
 138     setOperationAction(ISD::UDIV, VT, Expand);
 139     setOperationAction(ISD::UINT_TO_FP, VT, Expand);
 140     setOperationAction(ISD::UREM, VT, Expand);
 141     setOperationAction(ISD::VSELECT, VT, Expand);
 142     setOperationAction(ISD::XOR,  VT, Expand);
 143   }
 144 }
 145
 146 //===----------------------------------------------------------------------===//
 147 // Target Information
 148 //===----------------------------------------------------------------------===//
 149
 150 MVT AMDGPUTargetLowering::getVectorIdxTy() const {
 151   return MVT::i32;
 152 }
 153
 154
 155 //===---------------------------------------------------------------------===//
 156 // Target Properties
 157 //===---------------------------------------------------------------------===//
 158
 159 bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const {
 160   assert(VT.isFloatingPoint());
 161   return VT == MVT::f32;
 162 }
 163
 164 bool AMDGPUTargetLowering::isFNegFree(EVT VT) const {
 165   assert(VT.isFloatingPoint());
 166   return VT == MVT::f32;
 167 }
 168
 169 //===---------------------------------------------------------------------===//
 170 // TargetLowering Callbacks
 171 //===---------------------------------------------------------------------===//
 172
 173 void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
 174                              const SmallVectorImpl<ISD::InputArg> &Ins) const {
 175
 176   State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
 177 }
 178
 179 SDValue AMDGPUTargetLowering::LowerReturn(
 180                                      SDValue Chain,
 181                                      CallingConv::ID CallConv,
 182                                      bool isVarArg,
 183                                      const SmallVectorImpl<ISD::OutputArg> &Outs,
 184                                      const SmallVectorImpl<SDValue> &OutVals,
 185                                      SDLoc DL, SelectionDAG &DAG) const {
 186   return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
 187 }
 188
 189 //===---------------------------------------------------------------------===//
 190 // Target specific lowering
 191 //===---------------------------------------------------------------------===//
 192
 193 SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
 194     const {
 195   switch (Op.getOpcode()) {
 196   default:
 197     Op.getNode()->dump();
 198     assert(0 && "Custom lowering code for this"
 199         "instruction is not implemented yet!");
 200     break;
 201   // AMDIL DAG lowering
 202   case ISD::SDIV: return LowerSDIV(Op, DAG);
 203   case ISD::SREM: return LowerSREM(Op, DAG);
 204   case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
 205   case ISD::BRCOND: return LowerBRCOND(Op, DAG);
 206   // AMDGPU DAG lowering
 207   case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
 208   case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
 209   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
 210   case ISD::STORE: return LowerVectorStore(Op, DAG);
 211   case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
 212   }
 213   return Op;
 214 }
 215
 216 SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
 217                                                  SDValue Op,
 218                                                  SelectionDAG &DAG) const {
 219
 220   const DataLayout *TD = getTargetMachine().getDataLayout();
 221   GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
 222   // XXX: What does the value of G->getOffset() mean?
 223   assert(G->getOffset() == 0 &&
 224          "Do not know what to do with an non-zero offset");
 225
 226   unsigned Offset = MFI->LDSSize;
 227   const GlobalValue *GV = G->getGlobal();
 228   uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
 229
 230   // XXX: Account for alignment?
 231   MFI->LDSSize += Size;
 232
 233   return DAG.getConstant(Offset, TD->getPointerSize() == 8 ? MVT::i64 : MVT::i32);
 234 }
 235
 236 void AMDGPUTargetLowering::ExtractVectorElements(SDValue Op, SelectionDAG &DAG,
 237                                          SmallVectorImpl<SDValue> &Args,
 238                                          unsigned Start,
 239                                          unsigned Count) const {
 240   EVT VT = Op.getValueType();
 241   for (unsigned i = Start, e = Start + Count; i != e; ++i) {
 242     Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op),
 243                                VT.getVectorElementType(),
 244                                Op, DAG.getConstant(i, MVT::i32)));
 245   }
 246 }
 247
 248 SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
 249                                                   SelectionDAG &DAG) const {
 250   SmallVector<SDValue, 8> Args;
 251   SDValue A = Op.getOperand(0);
 252   SDValue B = Op.getOperand(1);
 253
 254   ExtractVectorElements(A, DAG, Args, 0,
 255                         A.getValueType().getVectorNumElements());
 256   ExtractVectorElements(B, DAG, Args, 0,
 257                         B.getValueType().getVectorNumElements());
 258
 259   return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(),
 260                      &Args[0], Args.size());
 261 }
 262
 263 SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
 264                                                      SelectionDAG &DAG) const {
 265
 266   SmallVector<SDValue, 8> Args;
 267   EVT VT = Op.getValueType();
 268   unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
 269   ExtractVectorElements(Op.getOperand(0), DAG, Args, Start,
 270                         VT.getVectorNumElements());
 271
 272   return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(),
 273                      &Args[0], Args.size());
 274 }
 275
 276
 277 SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
 278     SelectionDAG &DAG) const {
 279   unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
 280   SDLoc DL(Op);
 281   EVT VT = Op.getValueType();
 282
 283   switch (IntrinsicID) {
 284     default: return Op;
 285     case AMDGPUIntrinsic::AMDIL_abs:
 286       return LowerIntrinsicIABS(Op, DAG);
 287     case AMDGPUIntrinsic::AMDIL_exp:
 288       return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
 289     case AMDGPUIntrinsic::AMDGPU_lrp:
 290       return LowerIntrinsicLRP(Op, DAG);
 291     case AMDGPUIntrinsic::AMDIL_fraction:
 292       return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
 293     case AMDGPUIntrinsic::AMDIL_max:
 294       return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
 295                                                   Op.getOperand(2));
 296     case AMDGPUIntrinsic::AMDGPU_imax:
 297       return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
 298                                                   Op.getOperand(2));
 299     case AMDGPUIntrinsic::AMDGPU_umax:
 300       return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
 301                                                   Op.getOperand(2));
 302     case AMDGPUIntrinsic::AMDIL_min:
 303       return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
 304                                                   Op.getOperand(2));
 305     case AMDGPUIntrinsic::AMDGPU_imin:
 306       return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
 307                                                   Op.getOperand(2));
 308     case AMDGPUIntrinsic::AMDGPU_umin:
 309       return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
 310                                                   Op.getOperand(2));
 311     case AMDGPUIntrinsic::AMDIL_round_nearest:
 312       return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
 313   }
 314 }
 315
 316 ///IABS(a) = SMAX(sub(0, a), a)
 317 SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
 318     SelectionDAG &DAG) const {
 319
 320   SDLoc DL(Op);
 321   EVT VT = Op.getValueType();
 322   SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
 323                                               Op.getOperand(1));
 324
 325   return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
 326 }
 327
 328 /// Linear Interpolation
 329 /// LRP(a, b, c) = muladd(a,  b, (1 - a) * c)
 330 SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
 331     SelectionDAG &DAG) const {
 332   SDLoc DL(Op);
 333   EVT VT = Op.getValueType();
 334   SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
 335                                 DAG.getConstantFP(1.0f, MVT::f32),
 336                                 Op.getOperand(1));
 337   SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
 338                                                     Op.getOperand(3));
 339   return DAG.getNode(ISD::FADD, DL, VT,
 340       DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)),
 341       OneSubAC);
 342 }
 343
 344 /// \brief Generate Min/Max node
 345 SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
 346     SelectionDAG &DAG) const {
 347   SDLoc DL(Op);
 348   EVT VT = Op.getValueType();
 349
 350   SDValue LHS = Op.getOperand(0);
 351   SDValue RHS = Op.getOperand(1);
 352   SDValue True = Op.getOperand(2);
 353   SDValue False = Op.getOperand(3);
 354   SDValue CC = Op.getOperand(4);
 355
 356   if (VT != MVT::f32 ||
 357       !((LHS == True && RHS == False) || (LHS == False && RHS == True))) {
 358     return SDValue();
 359   }
 360
 361   ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
 362   switch (CCOpcode) {
 363   case ISD::SETOEQ:
 364   case ISD::SETONE:
 365   case ISD::SETUNE:
 366   case ISD::SETNE:
 367   case ISD::SETUEQ:
 368   case ISD::SETEQ:
 369   case ISD::SETFALSE:
 370   case ISD::SETFALSE2:
 371   case ISD::SETTRUE:
 372   case ISD::SETTRUE2:
 373   case ISD::SETUO:
 374   case ISD::SETO:
 375     assert(0 && "Operation should already be optimised !");
 376   case ISD::SETULE:
 377   case ISD::SETULT:
 378   case ISD::SETOLE:
 379   case ISD::SETOLT:
 380   case ISD::SETLE:
 381   case ISD::SETLT: {
 382     if (LHS == True)
 383       return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
 384     else
 385       return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
 386   }
 387   case ISD::SETGT:
 388   case ISD::SETGE:
 389   case ISD::SETUGE:
 390   case ISD::SETOGE:
 391   case ISD::SETUGT:
 392   case ISD::SETOGT: {
 393     if (LHS == True)
 394       return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
 395     else
 396       return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
 397   }
 398   case ISD::SETCC_INVALID:
 399     assert(0 && "Invalid setcc condcode !");
 400   }
 401   return Op;
 402 }
 403
 404
 405
 406 SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
 407     SelectionDAG &DAG) const {
 408   SDLoc DL(Op);
 409   EVT VT = Op.getValueType();
 410
 411   SDValue Num = Op.getOperand(0);
 412   SDValue Den = Op.getOperand(1);
 413
 414   SmallVector<SDValue, 8> Results;
 415
 416   // RCP =  URECIP(Den) = 2^32 / Den + e
 417   // e is rounding error.
 418   SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
 419
 420   // RCP_LO = umulo(RCP, Den) */
 421   SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
 422
 423   // RCP_HI = mulhu (RCP, Den) */
 424   SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
 425
 426   // NEG_RCP_LO = -RCP_LO
 427   SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
 428                                                      RCP_LO);
 429
 430   // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
 431   SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
 432                                            NEG_RCP_LO, RCP_LO,
 433                                            ISD::SETEQ);
 434   // Calculate the rounding error from the URECIP instruction
 435   // E = mulhu(ABS_RCP_LO, RCP)
 436   SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
 437
 438   // RCP_A_E = RCP + E
 439   SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
 440
 441   // RCP_S_E = RCP - E
 442   SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
 443
 444   // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
 445   SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
 446                                      RCP_A_E, RCP_S_E,
 447                                      ISD::SETEQ);
 448   // Quotient = mulhu(Tmp0, Num)
 449   SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
 450
 451   // Num_S_Remainder = Quotient * Den
 452   SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
 453
 454   // Remainder = Num - Num_S_Remainder
 455   SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
 456
 457   // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
 458   SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
 459                                                  DAG.getConstant(-1, VT),
 460                                                  DAG.getConstant(0, VT),
 461                                                  ISD::SETGE);
 462   // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
 463   SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
 464                                                   DAG.getConstant(0, VT),
 465                                                   DAG.getConstant(-1, VT),
 466                                                   DAG.getConstant(0, VT),
 467                                                   ISD::SETGE);
 468   // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
 469   SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
 470                                                Remainder_GE_Zero);
 471
 472   // Calculate Division result:
 473
 474   // Quotient_A_One = Quotient + 1
 475   SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
 476                                                          DAG.getConstant(1, VT));
 477
 478   // Quotient_S_One = Quotient - 1
 479   SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
 480                                                          DAG.getConstant(1, VT));
 481
 482   // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
 483   SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
 484                                      Quotient, Quotient_A_One, ISD::SETEQ);
 485
 486   // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
 487   Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
 488                             Quotient_S_One, Div, ISD::SETEQ);
 489
 490   // Calculate Rem result:
 491
 492   // Remainder_S_Den = Remainder - Den
 493   SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
 494
 495   // Remainder_A_Den = Remainder + Den
 496   SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
 497
 498   // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
 499   SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
 500                                     Remainder, Remainder_S_Den, ISD::SETEQ);
 501
 502   // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
 503   Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
 504                             Remainder_A_Den, Rem, ISD::SETEQ);
 505   SDValue Ops[2];
 506   Ops[0] = Div;
 507   Ops[1] = Rem;
 508   return DAG.getMergeValues(Ops, 2, DL);
 509 }
 510
 511 SDValue AMDGPUTargetLowering::LowerVectorStore(const SDValue &Op,
 512                                                SelectionDAG &DAG) const {
 513   StoreSDNode *Store = dyn_cast<StoreSDNode>(Op);
 514   EVT MemVT = Store->getMemoryVT();
 515   unsigned MemBits = MemVT.getSizeInBits();
 516
 517   // Byte stores are really expensive, so if possible, try to pack
 518   // 32-bit vector truncatating store into an i32 store.
 519   // XXX: We could also handle optimize other vector bitwidths
 520   if (!MemVT.isVector() || MemBits > 32) {
 521     return SDValue();
 522   }
 523
 524   SDLoc DL(Op);
 525   const SDValue &Value = Store->getValue();
 526   EVT VT = Value.getValueType();
 527   const SDValue &Ptr = Store->getBasePtr();
 528   EVT MemEltVT = MemVT.getVectorElementType();
 529   unsigned MemEltBits = MemEltVT.getSizeInBits();
 530   unsigned MemNumElements = MemVT.getVectorNumElements();
 531   EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
 532   SDValue Mask;
 533   switch(MemEltBits) {
 534   case 8:
 535     Mask = DAG.getConstant(0xFF, PackedVT);
 536     break;
 537   case 16:
 538     Mask = DAG.getConstant(0xFFFF, PackedVT);
 539     break;
 540   default:
 541     llvm_unreachable("Cannot lower this vector store");
 542   }
 543   SDValue PackedValue;
 544   for (unsigned i = 0; i < MemNumElements; ++i) {
 545     EVT ElemVT = VT.getVectorElementType();
 546     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value,
 547                               DAG.getConstant(i, MVT::i32));
 548     Elt = DAG.getZExtOrTrunc(Elt, DL, PackedVT);
 549     Elt = DAG.getNode(ISD::AND, DL, PackedVT, Elt, Mask);
 550     SDValue Shift = DAG.getConstant(MemEltBits * i, PackedVT);
 551     Elt = DAG.getNode(ISD::SHL, DL, PackedVT, Elt, Shift);
 552     if (i == 0) {
 553       PackedValue = Elt;
 554     } else {
 555       PackedValue = DAG.getNode(ISD::OR, DL, PackedVT, PackedValue, Elt);
 556     }
 557   }
 558   return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr,
 559                       MachinePointerInfo(Store->getMemOperand()->getValue()),
 560                       Store->isVolatile(),  Store->isNonTemporal(),
 561                       Store->getAlignment());
 562 }
 563
 564 //===----------------------------------------------------------------------===//
 565 // Helper functions
 566 //===----------------------------------------------------------------------===//
 567
 568 bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const {
 569   if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
 570     return CFP->isExactlyValue(1.0);
 571   }
 572   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
 573     return C->isAllOnesValue();
 574   }
 575   return false;
 576 }
 577
 578 bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const {
 579   if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
 580     return CFP->getValueAPF().isZero();
 581   }
 582   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
 583     return C->isNullValue();
 584   }
 585   return false;
 586 }
 587
 588 SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
 589                                                   const TargetRegisterClass *RC,
 590                                                    unsigned Reg, EVT VT) const {
 591   MachineFunction &MF = DAG.getMachineFunction();
 592   MachineRegisterInfo &MRI = MF.getRegInfo();
 593   unsigned VirtualRegister;
 594   if (!MRI.isLiveIn(Reg)) {
 595     VirtualRegister = MRI.createVirtualRegister(RC);
 596     MRI.addLiveIn(Reg, VirtualRegister);
 597   } else {
 598     VirtualRegister = MRI.getLiveInVirtReg(Reg);
 599   }
 600   return DAG.getRegister(VirtualRegister, VT);
 601 }
 602
 603 #define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
 604
 605 const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
 606   switch (Opcode) {
 607   default: return 0;
 608   // AMDIL DAG nodes
 609   NODE_NAME_CASE(CALL);
 610   NODE_NAME_CASE(UMUL);
 611   NODE_NAME_CASE(DIV_INF);
 612   NODE_NAME_CASE(RET_FLAG);
 613   NODE_NAME_CASE(BRANCH_COND);
 614
 615   // AMDGPU DAG nodes
 616   NODE_NAME_CASE(DWORDADDR)
 617   NODE_NAME_CASE(FRACT)
 618   NODE_NAME_CASE(FMAX)
 619   NODE_NAME_CASE(SMAX)
 620   NODE_NAME_CASE(UMAX)
 621   NODE_NAME_CASE(FMIN)
 622   NODE_NAME_CASE(SMIN)
 623   NODE_NAME_CASE(UMIN)
 624   NODE_NAME_CASE(URECIP)
 625   NODE_NAME_CASE(EXPORT)
 626   NODE_NAME_CASE(CONST_ADDRESS)
 627   NODE_NAME_CASE(REGISTER_LOAD)
 628   NODE_NAME_CASE(REGISTER_STORE)
 629   NODE_NAME_CASE(LOAD_CONSTANT)
 630   NODE_NAME_CASE(LOAD_INPUT)
 631   NODE_NAME_CASE(SAMPLE)
 632   NODE_NAME_CASE(SAMPLEB)
 633   NODE_NAME_CASE(SAMPLED)
 634   NODE_NAME_CASE(SAMPLEL)
 635   NODE_NAME_CASE(STORE_MSKOR)
 636   }
 637 }