lib/Target/R600/AMDGPUISelDAGToDAG.cpp

   1 //===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //==-----------------------------------------------------------------------===//
   9 //
  10 /// \file
  11 /// \brief Defines an instruction selector for the AMDGPU target.
  12 //
  13 //===----------------------------------------------------------------------===//
  14 #include "AMDGPUInstrInfo.h"
  15 #include "AMDGPUISelLowering.h" // For AMDGPUISD
  16 #include "AMDGPURegisterInfo.h"
  17 #include "AMDGPUSubtarget.h"
  18 #include "R600InstrInfo.h"
  19 #include "SIISelLowering.h"
  20 #include "llvm/CodeGen/FunctionLoweringInfo.h"
  21 #include "llvm/CodeGen/PseudoSourceValue.h"
  22 #include "llvm/CodeGen/SelectionDAG.h"
  23 #include "llvm/CodeGen/SelectionDAGISel.h"
  24 #include "llvm/IR/Function.h"
  25
  26 using namespace llvm;
  27
  28 //===----------------------------------------------------------------------===//
  29 // Instruction Selector Implementation
  30 //===----------------------------------------------------------------------===//
  31
  32 namespace {
  33 /// AMDGPU specific code to select AMDGPU machine instructions for
  34 /// SelectionDAG operations.
  35 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
  36   // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
  37   // make the right decision when generating code for different targets.
  38   const AMDGPUSubtarget &Subtarget;
  39 public:
  40   AMDGPUDAGToDAGISel(TargetMachine &TM);
  41   virtual ~AMDGPUDAGToDAGISel();
  42
  43   SDNode *Select(SDNode *N) override;
  44   const char *getPassName() const override;
  45   void PostprocessISelDAG() override;
  46
  47 private:
  48   bool isInlineImmediate(SDNode *N) const;
  49   inline SDValue getSmallIPtrImm(unsigned Imm);
  50   bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
  51                    const R600InstrInfo *TII);
  52   bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
  53   bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
  54
  55   // Complex pattern selectors
  56   bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
  57   bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
  58   bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
  59
  60   static bool checkType(const Value *ptr, unsigned int addrspace);
  61   static bool checkPrivateAddress(const MachineMemOperand *Op);
  62
  63   static bool isGlobalStore(const StoreSDNode *N);
  64   static bool isPrivateStore(const StoreSDNode *N);
  65   static bool isLocalStore(const StoreSDNode *N);
  66   static bool isRegionStore(const StoreSDNode *N);
  67
  68   bool isCPLoad(const LoadSDNode *N) const;
  69   bool isConstantLoad(const LoadSDNode *N, int cbID) const;
  70   bool isGlobalLoad(const LoadSDNode *N) const;
  71   bool isParamLoad(const LoadSDNode *N) const;
  72   bool isPrivateLoad(const LoadSDNode *N) const;
  73   bool isLocalLoad(const LoadSDNode *N) const;
  74   bool isRegionLoad(const LoadSDNode *N) const;
  75
  76   /// \returns True if the current basic block being selected is at control
  77   ///          flow depth 0.  Meaning that the current block dominates the
  78   //           exit block.
  79   bool isCFDepth0() const;
  80
  81   const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
  82   bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
  83   bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
  84                                        SDValue& Offset);
  85   bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
  86   bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
  87   bool SelectMUBUFAddr64(SDValue Addr, SDValue &Ptr, SDValue &Offset,
  88                         SDValue &ImmOffset) const;
  89
  90   SDNode *SelectADD_SUB_I64(SDNode *N);
  91   SDNode *SelectDIV_SCALE(SDNode *N);
  92
  93   // Include the pieces autogenerated from the target description.
  94 #include "AMDGPUGenDAGISel.inc"
  95 };
  96 }  // end anonymous namespace
  97
  98 /// \brief This pass converts a legalized DAG into a AMDGPU-specific
  99 // DAG, ready for instruction scheduling.
 100 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) {
 101   return new AMDGPUDAGToDAGISel(TM);
 102 }
 103
 104 AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM)
 105   : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<AMDGPUSubtarget>()) {
 106 }
 107
 108 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
 109 }
 110
 111 bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const {
 112   const SITargetLowering *TL
 113       = static_cast<const SITargetLowering *>(getTargetLowering());
 114   return TL->analyzeImmediate(N) == 0;
 115 }
 116
 117 /// \brief Determine the register class for \p OpNo
 118 /// \returns The register class of the virtual register that will be used for
 119 /// the given operand number \OpNo or NULL if the register class cannot be
 120 /// determined.
 121 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
 122                                                           unsigned OpNo) const {
 123   if (!N->isMachineOpcode())
 124     return nullptr;
 125
 126   switch (N->getMachineOpcode()) {
 127   default: {
 128     const MCInstrDesc &Desc = TM.getInstrInfo()->get(N->getMachineOpcode());
 129     unsigned OpIdx = Desc.getNumDefs() + OpNo;
 130     if (OpIdx >= Desc.getNumOperands())
 131       return nullptr;
 132     int RegClass = Desc.OpInfo[OpIdx].RegClass;
 133     if (RegClass == -1)
 134       return nullptr;
 135
 136     return TM.getRegisterInfo()->getRegClass(RegClass);
 137   }
 138   case AMDGPU::REG_SEQUENCE: {
 139     unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
 140     const TargetRegisterClass *SuperRC = TM.getRegisterInfo()->getRegClass(RCID);
 141
 142     SDValue SubRegOp = N->getOperand(OpNo + 1);
 143     unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
 144     return TM.getRegisterInfo()->getSubClassWithSubReg(SuperRC, SubRegIdx);
 145   }
 146   }
 147 }
 148
 149 SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) {
 150   return CurDAG->getTargetConstant(Imm, MVT::i32);
 151 }
 152
 153 bool AMDGPUDAGToDAGISel::SelectADDRParam(
 154   SDValue Addr, SDValue& R1, SDValue& R2) {
 155
 156   if (Addr.getOpcode() == ISD::FrameIndex) {
 157     if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
 158       R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
 159       R2 = CurDAG->getTargetConstant(0, MVT::i32);
 160     } else {
 161       R1 = Addr;
 162       R2 = CurDAG->getTargetConstant(0, MVT::i32);
 163     }
 164   } else if (Addr.getOpcode() == ISD::ADD) {
 165     R1 = Addr.getOperand(0);
 166     R2 = Addr.getOperand(1);
 167   } else {
 168     R1 = Addr;
 169     R2 = CurDAG->getTargetConstant(0, MVT::i32);
 170   }
 171   return true;
 172 }
 173
 174 bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
 175   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
 176       Addr.getOpcode() == ISD::TargetGlobalAddress) {
 177     return false;
 178   }
 179   return SelectADDRParam(Addr, R1, R2);
 180 }
 181
 182
 183 bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
 184   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
 185       Addr.getOpcode() == ISD::TargetGlobalAddress) {
 186     return false;
 187   }
 188
 189   if (Addr.getOpcode() == ISD::FrameIndex) {
 190     if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
 191       R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
 192       R2 = CurDAG->getTargetConstant(0, MVT::i64);
 193     } else {
 194       R1 = Addr;
 195       R2 = CurDAG->getTargetConstant(0, MVT::i64);
 196     }
 197   } else if (Addr.getOpcode() == ISD::ADD) {
 198     R1 = Addr.getOperand(0);
 199     R2 = Addr.getOperand(1);
 200   } else {
 201     R1 = Addr;
 202     R2 = CurDAG->getTargetConstant(0, MVT::i64);
 203   }
 204   return true;
 205 }
 206
 207 SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
 208   unsigned int Opc = N->getOpcode();
 209   if (N->isMachineOpcode()) {
 210     N->setNodeId(-1);
 211     return nullptr;   // Already selected.
 212   }
 213
 214   const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
 215   switch (Opc) {
 216   default: break;
 217   // We are selecting i64 ADD here instead of custom lower it during
 218   // DAG legalization, so we can fold some i64 ADDs used for address
 219   // calculation into the LOAD and STORE instructions.
 220   case ISD::ADD:
 221   case ISD::SUB: {
 222     if (N->getValueType(0) != MVT::i64 ||
 223         ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
 224       break;
 225
 226     return SelectADD_SUB_I64(N);
 227   }
 228   case ISD::SCALAR_TO_VECTOR:
 229   case AMDGPUISD::BUILD_VERTICAL_VECTOR:
 230   case ISD::BUILD_VECTOR: {
 231     unsigned RegClassID;
 232     const AMDGPURegisterInfo *TRI =
 233                    static_cast<const AMDGPURegisterInfo*>(TM.getRegisterInfo());
 234     const SIRegisterInfo *SIRI =
 235                    static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
 236     EVT VT = N->getValueType(0);
 237     unsigned NumVectorElts = VT.getVectorNumElements();
 238     EVT EltVT = VT.getVectorElementType();
 239     assert(EltVT.bitsEq(MVT::i32));
 240     if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
 241       bool UseVReg = true;
 242       for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
 243                                                     U != E; ++U) {
 244         if (!U->isMachineOpcode()) {
 245           continue;
 246         }
 247         const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
 248         if (!RC) {
 249           continue;
 250         }
 251         if (SIRI->isSGPRClass(RC)) {
 252           UseVReg = false;
 253         }
 254       }
 255       switch(NumVectorElts) {
 256       case 1: RegClassID = UseVReg ? AMDGPU::VReg_32RegClassID :
 257                                      AMDGPU::SReg_32RegClassID;
 258         break;
 259       case 2: RegClassID = UseVReg ? AMDGPU::VReg_64RegClassID :
 260                                      AMDGPU::SReg_64RegClassID;
 261         break;
 262       case 4: RegClassID = UseVReg ? AMDGPU::VReg_128RegClassID :
 263                                      AMDGPU::SReg_128RegClassID;
 264         break;
 265       case 8: RegClassID = UseVReg ? AMDGPU::VReg_256RegClassID :
 266                                      AMDGPU::SReg_256RegClassID;
 267         break;
 268       case 16: RegClassID = UseVReg ? AMDGPU::VReg_512RegClassID :
 269                                       AMDGPU::SReg_512RegClassID;
 270         break;
 271       default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
 272       }
 273     } else {
 274       // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
 275       // that adds a 128 bits reg copy when going through TwoAddressInstructions
 276       // pass. We want to avoid 128 bits copies as much as possible because they
 277       // can't be bundled by our scheduler.
 278       switch(NumVectorElts) {
 279       case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
 280       case 4:
 281         if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
 282           RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
 283         else
 284           RegClassID = AMDGPU::R600_Reg128RegClassID;
 285         break;
 286       default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
 287       }
 288     }
 289
 290     SDValue RegClass = CurDAG->getTargetConstant(RegClassID, MVT::i32);
 291
 292     if (NumVectorElts == 1) {
 293       return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT,
 294                                   N->getOperand(0), RegClass);
 295     }
 296
 297     assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
 298                                   "supported yet");
 299     // 16 = Max Num Vector Elements
 300     // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
 301     // 1 = Vector Register Class
 302     SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
 303
 304     RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, MVT::i32);
 305     bool IsRegSeq = true;
 306     unsigned NOps = N->getNumOperands();
 307     for (unsigned i = 0; i < NOps; i++) {
 308       // XXX: Why is this here?
 309       if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
 310         IsRegSeq = false;
 311         break;
 312       }
 313       RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
 314       RegSeqArgs[1 + (2 * i) + 1] =
 315               CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32);
 316     }
 317
 318     if (NOps != NumVectorElts) {
 319       // Fill in the missing undef elements if this was a scalar_to_vector.
 320       assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
 321
 322       MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
 323                                                      SDLoc(N), EltVT);
 324       for (unsigned i = NOps; i < NumVectorElts; ++i) {
 325         RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
 326         RegSeqArgs[1 + (2 * i) + 1] =
 327           CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32);
 328       }
 329     }
 330
 331     if (!IsRegSeq)
 332       break;
 333     return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
 334                                 RegSeqArgs);
 335   }
 336   case ISD::BUILD_PAIR: {
 337     SDValue RC, SubReg0, SubReg1;
 338     if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
 339       break;
 340     }
 341     if (N->getValueType(0) == MVT::i128) {
 342       RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32);
 343       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32);
 344       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32);
 345     } else if (N->getValueType(0) == MVT::i64) {
 346       RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32);
 347       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
 348       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
 349     } else {
 350       llvm_unreachable("Unhandled value type for BUILD_PAIR");
 351     }
 352     const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
 353                             N->getOperand(1), SubReg1 };
 354     return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
 355                                   SDLoc(N), N->getValueType(0), Ops);
 356   }
 357
 358   case ISD::Constant:
 359   case ISD::ConstantFP: {
 360     const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
 361     if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
 362         N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
 363       break;
 364
 365     uint64_t Imm;
 366     if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
 367       Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
 368     else {
 369       ConstantSDNode *C = cast<ConstantSDNode>(N);
 370       Imm = C->getZExtValue();
 371     }
 372
 373     SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32,
 374                                 CurDAG->getConstant(Imm & 0xFFFFFFFF, MVT::i32));
 375     SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32,
 376                                 CurDAG->getConstant(Imm >> 32, MVT::i32));
 377     const SDValue Ops[] = {
 378       CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
 379       SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
 380       SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32)
 381     };
 382
 383     return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SDLoc(N),
 384                                   N->getValueType(0), Ops);
 385   }
 386
 387   case AMDGPUISD::REGISTER_LOAD: {
 388     if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
 389       break;
 390     SDValue Addr, Offset;
 391
 392     SelectADDRIndirect(N->getOperand(1), Addr, Offset);
 393     const SDValue Ops[] = {
 394       Addr,
 395       Offset,
 396       CurDAG->getTargetConstant(0, MVT::i32),
 397       N->getOperand(0),
 398     };
 399     return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, SDLoc(N),
 400                                   CurDAG->getVTList(MVT::i32, MVT::i64, MVT::Other),
 401                                   Ops);
 402   }
 403   case AMDGPUISD::REGISTER_STORE: {
 404     if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
 405       break;
 406     SDValue Addr, Offset;
 407     SelectADDRIndirect(N->getOperand(2), Addr, Offset);
 408     const SDValue Ops[] = {
 409       N->getOperand(1),
 410       Addr,
 411       Offset,
 412       CurDAG->getTargetConstant(0, MVT::i32),
 413       N->getOperand(0),
 414     };
 415     return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, SDLoc(N),
 416                                         CurDAG->getVTList(MVT::Other),
 417                                         Ops);
 418   }
 419
 420   case AMDGPUISD::BFE_I32:
 421   case AMDGPUISD::BFE_U32: {
 422     if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
 423       break;
 424
 425     // There is a scalar version available, but unlike the vector version which
 426     // has a separate operand for the offset and width, the scalar version packs
 427     // the width and offset into a single operand. Try to move to the scalar
 428     // version if the offsets are constant, so that we can try to keep extended
 429     // loads of kernel arguments in SGPRs.
 430
 431     // TODO: Technically we could try to pattern match scalar bitshifts of
 432     // dynamic values, but it's probably not useful.
 433     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
 434     if (!Offset)
 435       break;
 436
 437     ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
 438     if (!Width)
 439       break;
 440
 441     bool Signed = Opc == AMDGPUISD::BFE_I32;
 442
 443     // Transformation function, pack the offset and width of a BFE into
 444     // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
 445     // source, bits [5:0] contain the offset and bits [22:16] the width.
 446
 447     uint32_t OffsetVal = Offset->getZExtValue();
 448     uint32_t WidthVal = Width->getZExtValue();
 449
 450     uint32_t PackedVal = OffsetVal | WidthVal << 16;
 451
 452     SDValue PackedOffsetWidth = CurDAG->getTargetConstant(PackedVal, MVT::i32);
 453     return CurDAG->getMachineNode(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
 454                                   SDLoc(N),
 455                                   MVT::i32,
 456                                   N->getOperand(0),
 457                                   PackedOffsetWidth);
 458
 459   }
 460   case AMDGPUISD::DIV_SCALE: {
 461     return SelectDIV_SCALE(N);
 462   }
 463   }
 464   return SelectCode(N);
 465 }
 466
 467
 468 bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) {
 469   assert(AS != 0 && "Use checkPrivateAddress instead.");
 470   if (!Ptr)
 471     return false;
 472
 473   return Ptr->getType()->getPointerAddressSpace() == AS;
 474 }
 475
 476 bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) {
 477   if (Op->getPseudoValue())
 478     return true;
 479
 480   if (PointerType *PT = dyn_cast<PointerType>(Op->getValue()->getType()))
 481     return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
 482
 483   return false;
 484 }
 485
 486 bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
 487   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
 488 }
 489
 490 bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
 491   const Value *MemVal = N->getMemOperand()->getValue();
 492   return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
 493           !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
 494           !checkType(MemVal, AMDGPUAS::REGION_ADDRESS));
 495 }
 496
 497 bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
 498   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
 499 }
 500
 501 bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
 502   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
 503 }
 504
 505 bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const {
 506   const Value *MemVal = N->getMemOperand()->getValue();
 507   if (CbId == -1)
 508     return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS);
 509
 510   return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId);
 511 }
 512
 513 bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const {
 514   if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) {
 515     const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
 516     if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
 517         N->getMemoryVT().bitsLT(MVT::i32)) {
 518       return true;
 519     }
 520   }
 521   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
 522 }
 523
 524 bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const {
 525   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS);
 526 }
 527
 528 bool AMDGPUDAGToDAGISel::isLocalLoad(const  LoadSDNode *N) const {
 529   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
 530 }
 531
 532 bool AMDGPUDAGToDAGISel::isRegionLoad(const  LoadSDNode *N) const {
 533   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
 534 }
 535
 536 bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const {
 537   MachineMemOperand *MMO = N->getMemOperand();
 538   if (checkPrivateAddress(N->getMemOperand())) {
 539     if (MMO) {
 540       const PseudoSourceValue *PSV = MMO->getPseudoValue();
 541       if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
 542         return true;
 543       }
 544     }
 545   }
 546   return false;
 547 }
 548
 549 bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const {
 550   if (checkPrivateAddress(N->getMemOperand())) {
 551     // Check to make sure we are not a constant pool load or a constant load
 552     // that is marked as a private load
 553     if (isCPLoad(N) || isConstantLoad(N, -1)) {
 554       return false;
 555     }
 556   }
 557
 558   const Value *MemVal = N->getMemOperand()->getValue();
 559   if (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
 560       !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
 561       !checkType(MemVal, AMDGPUAS::REGION_ADDRESS) &&
 562       !checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) &&
 563       !checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) &&
 564       !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS)){
 565     return true;
 566   }
 567   return false;
 568 }
 569
 570 bool AMDGPUDAGToDAGISel::isCFDepth0() const {
 571   // FIXME: Figure out a way to use DominatorTree analysis here.
 572   const BasicBlock *CurBlock = FuncInfo->MBB->getBasicBlock();
 573   const Function *Fn = FuncInfo->Fn;
 574   return &Fn->front() == CurBlock || &Fn->back() == CurBlock;
 575 }
 576
 577
 578 const char *AMDGPUDAGToDAGISel::getPassName() const {
 579   return "AMDGPU DAG->DAG Pattern Instruction Selection";
 580 }
 581
 582 #ifdef DEBUGTMP
 583 #undef INT64_C
 584 #endif
 585 #undef DEBUGTMP
 586
 587 //===----------------------------------------------------------------------===//
 588 // Complex Patterns
 589 //===----------------------------------------------------------------------===//
 590
 591 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
 592                                                          SDValue& IntPtr) {
 593   if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
 594     IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, true);
 595     return true;
 596   }
 597   return false;
 598 }
 599
 600 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
 601     SDValue& BaseReg, SDValue &Offset) {
 602   if (!isa<ConstantSDNode>(Addr)) {
 603     BaseReg = Addr;
 604     Offset = CurDAG->getIntPtrConstant(0, true);
 605     return true;
 606   }
 607   return false;
 608 }
 609
 610 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
 611                                            SDValue &Offset) {
 612   ConstantSDNode *IMMOffset;
 613
 614   if (Addr.getOpcode() == ISD::ADD
 615       && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
 616       && isInt<16>(IMMOffset->getZExtValue())) {
 617
 618       Base = Addr.getOperand(0);
 619       Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
 620       return true;
 621   // If the pointer address is constant, we can move it to the offset field.
 622   } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
 623              && isInt<16>(IMMOffset->getZExtValue())) {
 624     Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
 625                                   SDLoc(CurDAG->getEntryNode()),
 626                                   AMDGPU::ZERO, MVT::i32);
 627     Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
 628     return true;
 629   }
 630
 631   // Default case, no offset
 632   Base = Addr;
 633   Offset = CurDAG->getTargetConstant(0, MVT::i32);
 634   return true;
 635 }
 636
 637 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
 638                                             SDValue &Offset) {
 639   ConstantSDNode *C;
 640
 641   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
 642     Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
 643     Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
 644   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
 645             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
 646     Base = Addr.getOperand(0);
 647     Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
 648   } else {
 649     Base = Addr;
 650     Offset = CurDAG->getTargetConstant(0, MVT::i32);
 651   }
 652
 653   return true;
 654 }
 655
 656 SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
 657   SDLoc DL(N);
 658   SDValue LHS = N->getOperand(0);
 659   SDValue RHS = N->getOperand(1);
 660
 661   bool IsAdd = (N->getOpcode() == ISD::ADD);
 662
 663   SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
 664   SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
 665
 666   SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
 667                                        DL, MVT::i32, LHS, Sub0);
 668   SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
 669                                        DL, MVT::i32, LHS, Sub1);
 670
 671   SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
 672                                        DL, MVT::i32, RHS, Sub0);
 673   SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
 674                                        DL, MVT::i32, RHS, Sub1);
 675
 676   SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
 677   SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
 678
 679
 680   unsigned Opc = IsAdd ? AMDGPU::S_ADD_I32 : AMDGPU::S_SUB_I32;
 681   unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
 682
 683   if (!isCFDepth0()) {
 684     Opc = IsAdd ? AMDGPU::V_ADD_I32_e32 : AMDGPU::V_SUB_I32_e32;
 685     CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e32 : AMDGPU::V_SUBB_U32_e32;
 686   }
 687
 688   SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs);
 689   SDValue Carry(AddLo, 1);
 690   SDNode *AddHi
 691     = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32,
 692                              SDValue(Hi0, 0), SDValue(Hi1, 0), Carry);
 693
 694   SDValue Args[5] = {
 695     CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
 696     SDValue(AddLo,0),
 697     Sub0,
 698     SDValue(AddHi,0),
 699     Sub1,
 700   };
 701   return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
 702 }
 703
 704 SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
 705   SDLoc SL(N);
 706   EVT VT = N->getValueType(0);
 707
 708   assert(VT == MVT::f32 || VT == MVT::f64);
 709
 710   unsigned Opc
 711     = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
 712
 713   const SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32);
 714
 715   SDValue Ops[] = {
 716     N->getOperand(0),
 717     N->getOperand(1),
 718     N->getOperand(2),
 719     Zero,
 720     Zero,
 721     Zero,
 722     Zero
 723   };
 724
 725   return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
 726 }
 727
 728 static SDValue wrapAddr64Rsrc(SelectionDAG *DAG, SDLoc DL, SDValue Ptr) {
 729   return SDValue(DAG->getMachineNode(AMDGPU::SI_ADDR64_RSRC, DL, MVT::v4i32,
 730                                      Ptr), 0);
 731 }
 732
 733 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &Ptr,
 734                                            SDValue &Offset,
 735                                            SDValue &ImmOffset) const {
 736   SDLoc DL(Addr);
 737
 738   if (CurDAG->isBaseWithConstantOffset(Addr)) {
 739     SDValue N0 = Addr.getOperand(0);
 740     SDValue N1 = Addr.getOperand(1);
 741     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
 742
 743     if (isUInt<12>(C1->getZExtValue())) {
 744
 745       if (N0.getOpcode() == ISD::ADD) {
 746         // (add (add N2, N3), C1)
 747         SDValue N2 = N0.getOperand(0);
 748         SDValue N3 = N0.getOperand(1);
 749         Ptr = wrapAddr64Rsrc(CurDAG, DL, N2);
 750         Offset = N3;
 751         ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
 752         return true;
 753       }
 754
 755       // (add N0, C1)
 756       Ptr = wrapAddr64Rsrc(CurDAG, DL, CurDAG->getTargetConstant(0, MVT::i64));;
 757       Offset = N0;
 758       ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
 759       return true;
 760     }
 761   }
 762   if (Addr.getOpcode() == ISD::ADD) {
 763     // (add N0, N1)
 764     SDValue N0 = Addr.getOperand(0);
 765     SDValue N1 = Addr.getOperand(1);
 766     Ptr = wrapAddr64Rsrc(CurDAG, DL, N0);
 767     Offset = N1;
 768     ImmOffset = CurDAG->getTargetConstant(0, MVT::i16);
 769     return true;
 770   }
 771
 772   // default case
 773   Ptr = wrapAddr64Rsrc(CurDAG, DL, CurDAG->getConstant(0, MVT::i64));
 774   Offset = Addr;
 775   ImmOffset = CurDAG->getTargetConstant(0, MVT::i16);
 776   return true;
 777 }
 778
 779 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
 780   const AMDGPUTargetLowering& Lowering =
 781     *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
 782   bool IsModified = false;
 783   do {
 784     IsModified = false;
 785     // Go over all selected nodes and try to fold them a bit more
 786     for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
 787          E = CurDAG->allnodes_end(); I != E; ++I) {
 788
 789       SDNode *Node = I;
 790
 791       MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
 792       if (!MachineNode)
 793         continue;
 794
 795       SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
 796       if (ResNode != Node) {
 797         ReplaceUses(Node, ResNode);
 798         IsModified = true;
 799       }
 800     }
 801     CurDAG->RemoveDeadNodes();
 802   } while (IsModified);
 803 }