lib/Target/R600/AMDGPUISelDAGToDAG.cpp

   1 //===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //==-----------------------------------------------------------------------===//
   9 //
  10 /// \file
  11 /// \brief Defines an instruction selector for the AMDGPU target.
  12 //
  13 //===----------------------------------------------------------------------===//
  14 #include "AMDGPUInstrInfo.h"
  15 #include "AMDGPUISelLowering.h" // For AMDGPUISD
  16 #include "AMDGPURegisterInfo.h"
  17 #include "R600InstrInfo.h"
  18 #include "SIISelLowering.h"
  19 #include "llvm/ADT/ValueMap.h"
  20 #include "llvm/Analysis/ValueTracking.h"
  21 #include "llvm/CodeGen/MachineRegisterInfo.h"
  22 #include "llvm/CodeGen/PseudoSourceValue.h"
  23 #include "llvm/CodeGen/SelectionDAG.h"
  24 #include "llvm/CodeGen/SelectionDAGISel.h"
  25 #include "llvm/Support/Compiler.h"
  26 #include <list>
  27 #include <queue>
  28
  29 using namespace llvm;
  30
  31 //===----------------------------------------------------------------------===//
  32 // Instruction Selector Implementation
  33 //===----------------------------------------------------------------------===//
  34
  35 namespace {
  36 /// AMDGPU specific code to select AMDGPU machine instructions for
  37 /// SelectionDAG operations.
  38 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
  39   // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
  40   // make the right decision when generating code for different targets.
  41   const AMDGPUSubtarget &Subtarget;
  42 public:
  43   AMDGPUDAGToDAGISel(TargetMachine &TM);
  44   virtual ~AMDGPUDAGToDAGISel();
  45
  46   SDNode *Select(SDNode *N);
  47   virtual const char *getPassName() const;
  48   virtual void PostprocessISelDAG();
  49
  50 private:
  51   inline SDValue getSmallIPtrImm(unsigned Imm);
  52   bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
  53                    const R600InstrInfo *TII);
  54   bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
  55   bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
  56
  57   // Complex pattern selectors
  58   bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
  59   bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
  60   bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
  61   SDValue SimplifyI24(SDValue &Op);
  62   bool SelectI24(SDValue Addr, SDValue &Op);
  63   bool SelectU24(SDValue Addr, SDValue &Op);
  64
  65   static bool checkType(const Value *ptr, unsigned int addrspace);
  66
  67   static bool isGlobalStore(const StoreSDNode *N);
  68   static bool isPrivateStore(const StoreSDNode *N);
  69   static bool isLocalStore(const StoreSDNode *N);
  70   static bool isRegionStore(const StoreSDNode *N);
  71
  72   bool isCPLoad(const LoadSDNode *N) const;
  73   bool isConstantLoad(const LoadSDNode *N, int cbID) const;
  74   bool isGlobalLoad(const LoadSDNode *N) const;
  75   bool isParamLoad(const LoadSDNode *N) const;
  76   bool isPrivateLoad(const LoadSDNode *N) const;
  77   bool isLocalLoad(const LoadSDNode *N) const;
  78   bool isRegionLoad(const LoadSDNode *N) const;
  79
  80   const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
  81   bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
  82   bool SelectGlobalValueVariableOffset(SDValue Addr,
  83       SDValue &BaseReg, SDValue& Offset);
  84   bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
  85   bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
  86
  87   // Include the pieces autogenerated from the target description.
  88 #include "AMDGPUGenDAGISel.inc"
  89 };
  90 }  // end anonymous namespace
  91
  92 /// \brief This pass converts a legalized DAG into a AMDGPU-specific
  93 // DAG, ready for instruction scheduling.
  94 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM
  95                                        ) {
  96   return new AMDGPUDAGToDAGISel(TM);
  97 }
  98
  99 AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM)
 100   : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<AMDGPUSubtarget>()) {
 101 }
 102
 103 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
 104 }
 105
 106 /// \brief Determine the register class for \p OpNo
 107 /// \returns The register class of the virtual register that will be used for
 108 /// the given operand number \OpNo or NULL if the register class cannot be
 109 /// determined.
 110 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
 111                                                           unsigned OpNo) const {
 112   if (!N->isMachineOpcode()) {
 113     return NULL;
 114   }
 115   switch (N->getMachineOpcode()) {
 116   default: {
 117     const MCInstrDesc &Desc = TM.getInstrInfo()->get(N->getMachineOpcode());
 118     unsigned OpIdx = Desc.getNumDefs() + OpNo;
 119     if (OpIdx >= Desc.getNumOperands())
 120       return NULL;
 121     int RegClass = Desc.OpInfo[OpIdx].RegClass;
 122     if (RegClass == -1) {
 123       return NULL;
 124     }
 125     return TM.getRegisterInfo()->getRegClass(RegClass);
 126   }
 127   case AMDGPU::REG_SEQUENCE: {
 128     const TargetRegisterClass *SuperRC = TM.getRegisterInfo()->getRegClass(
 129                       cast<ConstantSDNode>(N->getOperand(0))->getZExtValue());
 130     unsigned SubRegIdx =
 131             dyn_cast<ConstantSDNode>(N->getOperand(OpNo + 1))->getZExtValue();
 132     return TM.getRegisterInfo()->getSubClassWithSubReg(SuperRC, SubRegIdx);
 133   }
 134   }
 135 }
 136
 137 SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) {
 138   return CurDAG->getTargetConstant(Imm, MVT::i32);
 139 }
 140
 141 bool AMDGPUDAGToDAGISel::SelectADDRParam(
 142     SDValue Addr, SDValue& R1, SDValue& R2) {
 143
 144   if (Addr.getOpcode() == ISD::FrameIndex) {
 145     if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
 146       R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
 147       R2 = CurDAG->getTargetConstant(0, MVT::i32);
 148     } else {
 149       R1 = Addr;
 150       R2 = CurDAG->getTargetConstant(0, MVT::i32);
 151     }
 152   } else if (Addr.getOpcode() == ISD::ADD) {
 153     R1 = Addr.getOperand(0);
 154     R2 = Addr.getOperand(1);
 155   } else {
 156     R1 = Addr;
 157     R2 = CurDAG->getTargetConstant(0, MVT::i32);
 158   }
 159   return true;
 160 }
 161
 162 bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
 163   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
 164       Addr.getOpcode() == ISD::TargetGlobalAddress) {
 165     return false;
 166   }
 167   return SelectADDRParam(Addr, R1, R2);
 168 }
 169
 170
 171 bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
 172   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
 173       Addr.getOpcode() == ISD::TargetGlobalAddress) {
 174     return false;
 175   }
 176
 177   if (Addr.getOpcode() == ISD::FrameIndex) {
 178     if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
 179       R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
 180       R2 = CurDAG->getTargetConstant(0, MVT::i64);
 181     } else {
 182       R1 = Addr;
 183       R2 = CurDAG->getTargetConstant(0, MVT::i64);
 184     }
 185   } else if (Addr.getOpcode() == ISD::ADD) {
 186     R1 = Addr.getOperand(0);
 187     R2 = Addr.getOperand(1);
 188   } else {
 189     R1 = Addr;
 190     R2 = CurDAG->getTargetConstant(0, MVT::i64);
 191   }
 192   return true;
 193 }
 194
 195 SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
 196   const R600InstrInfo *TII =
 197                       static_cast<const R600InstrInfo*>(TM.getInstrInfo());
 198   unsigned int Opc = N->getOpcode();
 199   if (N->isMachineOpcode()) {
 200     return NULL;   // Already selected.
 201   }
 202   switch (Opc) {
 203   default: break;
 204   case AMDGPUISD::CONST_ADDRESS: {
 205     for (SDNode::use_iterator I = N->use_begin(), Next = llvm::next(I);
 206                               I != SDNode::use_end(); I = Next) {
 207       Next = llvm::next(I);
 208       if (!I->isMachineOpcode()) {
 209         continue;
 210       }
 211       unsigned Opcode = I->getMachineOpcode();
 212       bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
 213       int SrcIdx = I.getOperandNo();
 214       int SelIdx;
 215       // Unlike MachineInstrs, SDNodes do not have results in their operand
 216       // list, so we need to increment the SrcIdx, since
 217       // R600InstrInfo::getOperandIdx is based on the MachineInstr indices.
 218       if (HasDst) {
 219         SrcIdx++;
 220       }
 221
 222       SelIdx = TII->getSelIdx(I->getMachineOpcode(), SrcIdx);
 223       if (SelIdx < 0) {
 224         continue;
 225       }
 226
 227       SDValue CstOffset;
 228       if (N->getValueType(0).isVector() ||
 229           !SelectGlobalValueConstantOffset(N->getOperand(0), CstOffset))
 230         continue;
 231
 232       // Gather constants values
 233       int SrcIndices[] = {
 234         TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
 235         TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
 236         TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
 237         TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
 238         TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
 239         TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
 240         TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
 241         TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
 242         TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
 243         TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
 244         TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
 245       };
 246       std::vector<unsigned> Consts;
 247       for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
 248         int OtherSrcIdx = SrcIndices[i];
 249         int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
 250         if (OtherSrcIdx < 0 || OtherSelIdx < 0) {
 251           continue;
 252         }
 253         if (HasDst) {
 254           OtherSrcIdx--;
 255           OtherSelIdx--;
 256         }
 257         if (RegisterSDNode *Reg =
 258                          dyn_cast<RegisterSDNode>(I->getOperand(OtherSrcIdx))) {
 259           if (Reg->getReg() == AMDGPU::ALU_CONST) {
 260             ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(I->getOperand(OtherSelIdx));
 261             Consts.push_back(Cst->getZExtValue());
 262           }
 263         }
 264       }
 265
 266       ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
 267       Consts.push_back(Cst->getZExtValue());
 268       if (!TII->fitsConstReadLimitations(Consts))
 269         continue;
 270
 271       // Convert back to SDNode indices
 272       if (HasDst) {
 273         SrcIdx--;
 274         SelIdx--;
 275       }
 276       std::vector<SDValue> Ops;
 277       for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
 278         if (i == SrcIdx) {
 279           Ops.push_back(CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32));
 280         } else if (i == SelIdx) {
 281           Ops.push_back(CstOffset);
 282         } else {
 283           Ops.push_back(I->getOperand(i));
 284         }
 285       }
 286       CurDAG->UpdateNodeOperands(*I, Ops.data(), Ops.size());
 287     }
 288     break;
 289   }
 290   case ISD::BUILD_VECTOR: {
 291     unsigned RegClassID;
 292     const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
 293     const AMDGPURegisterInfo *TRI =
 294                    static_cast<const AMDGPURegisterInfo*>(TM.getRegisterInfo());
 295     const SIRegisterInfo *SIRI =
 296                    static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
 297     EVT VT = N->getValueType(0);
 298     unsigned NumVectorElts = VT.getVectorNumElements();
 299     assert(VT.getVectorElementType().bitsEq(MVT::i32));
 300     if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
 301       bool UseVReg = true;
 302       for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
 303                                                     U != E; ++U) {
 304         if (!U->isMachineOpcode()) {
 305           continue;
 306         }
 307         const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
 308         if (!RC) {
 309           continue;
 310         }
 311         if (SIRI->isSGPRClass(RC)) {
 312           UseVReg = false;
 313         }
 314       }
 315       switch(NumVectorElts) {
 316       case 1: RegClassID = UseVReg ? AMDGPU::VReg_32RegClassID :
 317                                      AMDGPU::SReg_32RegClassID;
 318         break;
 319       case 2: RegClassID = UseVReg ? AMDGPU::VReg_64RegClassID :
 320                                      AMDGPU::SReg_64RegClassID;
 321         break;
 322       case 4: RegClassID = UseVReg ? AMDGPU::VReg_128RegClassID :
 323                                      AMDGPU::SReg_128RegClassID;
 324         break;
 325       case 8: RegClassID = UseVReg ? AMDGPU::VReg_256RegClassID :
 326                                      AMDGPU::SReg_256RegClassID;
 327         break;
 328       case 16: RegClassID = UseVReg ? AMDGPU::VReg_512RegClassID :
 329                                       AMDGPU::SReg_512RegClassID;
 330         break;
 331       }
 332     } else {
 333       // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
 334       // that adds a 128 bits reg copy when going through TwoAddressInstructions
 335       // pass. We want to avoid 128 bits copies as much as possible because they
 336       // can't be bundled by our scheduler.
 337       switch(NumVectorElts) {
 338       case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
 339       case 4: RegClassID = AMDGPU::R600_Reg128RegClassID; break;
 340       default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
 341       }
 342     }
 343
 344     SDValue RegClass = CurDAG->getTargetConstant(RegClassID, MVT::i32);
 345
 346     if (NumVectorElts == 1) {
 347       return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS,
 348                                   VT.getVectorElementType(),
 349                                   N->getOperand(0), RegClass);
 350     }
 351
 352     assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
 353                                   "supported yet");
 354     // 16 = Max Num Vector Elements
 355     // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
 356     // 1 = Vector Register Class
 357     SDValue RegSeqArgs[16 * 2 + 1];
 358
 359     RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, MVT::i32);
 360     bool IsRegSeq = true;
 361     for (unsigned i = 0; i < N->getNumOperands(); i++) {
 362       // XXX: Why is this here?
 363       if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
 364         IsRegSeq = false;
 365         break;
 366       }
 367       RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
 368       RegSeqArgs[1 + (2 * i) + 1] =
 369               CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32);
 370     }
 371     if (!IsRegSeq)
 372       break;
 373     return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
 374         RegSeqArgs, 2 * N->getNumOperands() + 1);
 375   }
 376   case ISD::BUILD_PAIR: {
 377     SDValue RC, SubReg0, SubReg1;
 378     const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
 379     if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
 380       break;
 381     }
 382     if (N->getValueType(0) == MVT::i128) {
 383       RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32);
 384       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32);
 385       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32);
 386     } else if (N->getValueType(0) == MVT::i64) {
 387       RC = CurDAG->getTargetConstant(AMDGPU::VSrc_64RegClassID, MVT::i32);
 388       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
 389       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
 390     } else {
 391       llvm_unreachable("Unhandled value type for BUILD_PAIR");
 392     }
 393     const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
 394                             N->getOperand(1), SubReg1 };
 395     return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
 396                                   SDLoc(N), N->getValueType(0), Ops);
 397   }
 398
 399   case ISD::ConstantFP:
 400   case ISD::Constant: {
 401     const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
 402     // XXX: Custom immediate lowering not implemented yet.  Instead we use
 403     // pseudo instructions defined in SIInstructions.td
 404     if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
 405       break;
 406     }
 407
 408     uint64_t ImmValue = 0;
 409     unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
 410
 411     if (N->getOpcode() == ISD::ConstantFP) {
 412       // XXX: 64-bit Immediates not supported yet
 413       assert(N->getValueType(0) != MVT::f64);
 414
 415       ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N);
 416       APFloat Value = C->getValueAPF();
 417       float FloatValue = Value.convertToFloat();
 418       if (FloatValue == 0.0) {
 419         ImmReg = AMDGPU::ZERO;
 420       } else if (FloatValue == 0.5) {
 421         ImmReg = AMDGPU::HALF;
 422       } else if (FloatValue == 1.0) {
 423         ImmReg = AMDGPU::ONE;
 424       } else {
 425         ImmValue = Value.bitcastToAPInt().getZExtValue();
 426       }
 427     } else {
 428       // XXX: 64-bit Immediates not supported yet
 429       assert(N->getValueType(0) != MVT::i64);
 430
 431       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
 432       if (C->getZExtValue() == 0) {
 433         ImmReg = AMDGPU::ZERO;
 434       } else if (C->getZExtValue() == 1) {
 435         ImmReg = AMDGPU::ONE_INT;
 436       } else {
 437         ImmValue = C->getZExtValue();
 438       }
 439     }
 440
 441     for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use);
 442                               Use != SDNode::use_end(); Use = Next) {
 443       Next = llvm::next(Use);
 444       std::vector<SDValue> Ops;
 445       for (unsigned i = 0; i < Use->getNumOperands(); ++i) {
 446         Ops.push_back(Use->getOperand(i));
 447       }
 448
 449       if (!Use->isMachineOpcode()) {
 450           if (ImmReg == AMDGPU::ALU_LITERAL_X) {
 451             // We can only use literal constants (e.g. AMDGPU::ZERO,
 452             // AMDGPU::ONE, etc) in machine opcodes.
 453             continue;
 454           }
 455       } else {
 456         if (!TII->isALUInstr(Use->getMachineOpcode()) ||
 457             (TII->get(Use->getMachineOpcode()).TSFlags &
 458             R600_InstFlag::VECTOR)) {
 459           continue;
 460         }
 461
 462         int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(),
 463                                         AMDGPU::OpName::literal);
 464         if (ImmIdx == -1) {
 465           continue;
 466         }
 467
 468         if (TII->getOperandIdx(Use->getMachineOpcode(),
 469                                AMDGPU::OpName::dst) != -1) {
 470           // subtract one from ImmIdx, because the DST operand is usually index
 471           // 0 for MachineInstrs, but we have no DST in the Ops vector.
 472           ImmIdx--;
 473         }
 474
 475         // Check that we aren't already using an immediate.
 476         // XXX: It's possible for an instruction to have more than one
 477         // immediate operand, but this is not supported yet.
 478         if (ImmReg == AMDGPU::ALU_LITERAL_X) {
 479           ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx));
 480           assert(C);
 481
 482           if (C->getZExtValue() != 0) {
 483             // This instruction is already using an immediate.
 484             continue;
 485           }
 486
 487           // Set the immediate value
 488           Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32);
 489         }
 490       }
 491       // Set the immediate register
 492       Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32);
 493
 494       CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands());
 495     }
 496     break;
 497   }
 498   }
 499   SDNode *Result = SelectCode(N);
 500
 501   // Fold operands of selected node
 502
 503   const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
 504   if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
 505     const R600InstrInfo *TII =
 506         static_cast<const R600InstrInfo*>(TM.getInstrInfo());
 507     if (Result && Result->isMachineOpcode() && Result->getMachineOpcode() == AMDGPU::DOT_4) {
 508       bool IsModified = false;
 509       do {
 510         std::vector<SDValue> Ops;
 511         for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
 512             I != E; ++I)
 513           Ops.push_back(*I);
 514         IsModified = FoldDotOperands(Result->getMachineOpcode(), TII, Ops);
 515         if (IsModified) {
 516           Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
 517         }
 518       } while (IsModified);
 519
 520     }
 521     if (Result && Result->isMachineOpcode() &&
 522         !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)
 523         && TII->hasInstrModifiers(Result->getMachineOpcode())) {
 524       // Fold FNEG/FABS
 525       // TODO: Isel can generate multiple MachineInst, we need to recursively
 526       // parse Result
 527       bool IsModified = false;
 528       do {
 529         std::vector<SDValue> Ops;
 530         for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
 531             I != E; ++I)
 532           Ops.push_back(*I);
 533         IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops);
 534         if (IsModified) {
 535           Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
 536         }
 537       } while (IsModified);
 538
 539       // If node has a single use which is CLAMP_R600, folds it
 540       if (Result->hasOneUse() && Result->isMachineOpcode()) {
 541         SDNode *PotentialClamp = *Result->use_begin();
 542         if (PotentialClamp->isMachineOpcode() &&
 543             PotentialClamp->getMachineOpcode() == AMDGPU::CLAMP_R600) {
 544           unsigned ClampIdx =
 545             TII->getOperandIdx(Result->getMachineOpcode(), AMDGPU::OpName::clamp);
 546           std::vector<SDValue> Ops;
 547           unsigned NumOp = Result->getNumOperands();
 548           for (unsigned i = 0; i < NumOp; ++i) {
 549             Ops.push_back(Result->getOperand(i));
 550           }
 551           Ops[ClampIdx - 1] = CurDAG->getTargetConstant(1, MVT::i32);
 552           Result = CurDAG->SelectNodeTo(PotentialClamp,
 553               Result->getMachineOpcode(), PotentialClamp->getVTList(),
 554               Ops.data(), NumOp);
 555         }
 556       }
 557     }
 558   }
 559
 560   return Result;
 561 }
 562
 563 bool AMDGPUDAGToDAGISel::FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg,
 564                                      SDValue &Abs, const R600InstrInfo *TII) {
 565   switch (Src.getOpcode()) {
 566   case ISD::FNEG:
 567     Src = Src.getOperand(0);
 568     Neg = CurDAG->getTargetConstant(1, MVT::i32);
 569     return true;
 570   case ISD::FABS:
 571     if (!Abs.getNode())
 572       return false;
 573     Src = Src.getOperand(0);
 574     Abs = CurDAG->getTargetConstant(1, MVT::i32);
 575     return true;
 576   case ISD::BITCAST:
 577     Src = Src.getOperand(0);
 578     return true;
 579   default:
 580     return false;
 581   }
 582 }
 583
 584 bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
 585     const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
 586   int OperandIdx[] = {
 587     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
 588     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
 589     TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
 590   };
 591   int SelIdx[] = {
 592     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel),
 593     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel),
 594     TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_sel)
 595   };
 596   int NegIdx[] = {
 597     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
 598     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
 599     TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
 600   };
 601   int AbsIdx[] = {
 602     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
 603     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
 604     -1
 605   };
 606
 607
 608   for (unsigned i = 0; i < 3; i++) {
 609     if (OperandIdx[i] < 0)
 610       return false;
 611     SDValue &Src = Ops[OperandIdx[i] - 1];
 612     SDValue &Sel = Ops[SelIdx[i] - 1];
 613     SDValue &Neg = Ops[NegIdx[i] - 1];
 614     SDValue FakeAbs;
 615     SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
 616     if (FoldOperand(Src, Sel, Neg, Abs, TII))
 617       return true;
 618   }
 619   return false;
 620 }
 621
 622 bool AMDGPUDAGToDAGISel::FoldDotOperands(unsigned Opcode,
 623     const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
 624   int OperandIdx[] = {
 625     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
 626     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
 627     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
 628     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
 629     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
 630     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
 631     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
 632     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
 633   };
 634   int SelIdx[] = {
 635     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_X),
 636     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_Y),
 637     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_Z),
 638     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_W),
 639     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_X),
 640     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_Y),
 641     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_Z),
 642     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_W)
 643   };
 644   int NegIdx[] = {
 645     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
 646     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
 647     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
 648     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
 649     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
 650     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
 651     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
 652     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
 653   };
 654   int AbsIdx[] = {
 655     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
 656     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
 657     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
 658     TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
 659     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
 660     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
 661     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
 662     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
 663   };
 664
 665   for (unsigned i = 0; i < 8; i++) {
 666     if (OperandIdx[i] < 0)
 667       return false;
 668     SDValue &Src = Ops[OperandIdx[i] - 1];
 669     SDValue &Sel = Ops[SelIdx[i] - 1];
 670     SDValue &Neg = Ops[NegIdx[i] - 1];
 671     SDValue &Abs = Ops[AbsIdx[i] - 1];
 672     if (FoldOperand(Src, Sel, Neg, Abs, TII))
 673       return true;
 674   }
 675   return false;
 676 }
 677
 678 bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) {
 679   if (!ptr) {
 680     return false;
 681   }
 682   Type *ptrType = ptr->getType();
 683   return dyn_cast<PointerType>(ptrType)->getAddressSpace() == addrspace;
 684 }
 685
 686 bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
 687   return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS);
 688 }
 689
 690 bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
 691   return (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS)
 692           && !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS)
 693           && !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS));
 694 }
 695
 696 bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
 697   return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS);
 698 }
 699
 700 bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
 701   return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS);
 702 }
 703
 704 bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const {
 705   if (CbId == -1) {
 706     return checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS);
 707   }
 708   return checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_BUFFER_0 + CbId);
 709 }
 710
 711 bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const {
 712   if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) {
 713     const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
 714     if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
 715         N->getMemoryVT().bitsLT(MVT::i32)) {
 716       return true;
 717     }
 718   }
 719   return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS);
 720 }
 721
 722 bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const {
 723   return checkType(N->getSrcValue(), AMDGPUAS::PARAM_I_ADDRESS);
 724 }
 725
 726 bool AMDGPUDAGToDAGISel::isLocalLoad(const  LoadSDNode *N) const {
 727   return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS);
 728 }
 729
 730 bool AMDGPUDAGToDAGISel::isRegionLoad(const  LoadSDNode *N) const {
 731   return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS);
 732 }
 733
 734 bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const {
 735   MachineMemOperand *MMO = N->getMemOperand();
 736   if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) {
 737     if (MMO) {
 738       const Value *V = MMO->getValue();
 739       const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V);
 740       if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
 741         return true;
 742       }
 743     }
 744   }
 745   return false;
 746 }
 747
 748 bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const {
 749   if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) {
 750     // Check to make sure we are not a constant pool load or a constant load
 751     // that is marked as a private load
 752     if (isCPLoad(N) || isConstantLoad(N, -1)) {
 753       return false;
 754     }
 755   }
 756   if (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS)
 757       && !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS)
 758       && !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS)
 759       && !checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS)
 760       && !checkType(N->getSrcValue(), AMDGPUAS::PARAM_D_ADDRESS)
 761       && !checkType(N->getSrcValue(), AMDGPUAS::PARAM_I_ADDRESS)) {
 762     return true;
 763   }
 764   return false;
 765 }
 766
 767 const char *AMDGPUDAGToDAGISel::getPassName() const {
 768   return "AMDGPU DAG->DAG Pattern Instruction Selection";
 769 }
 770
 771 #ifdef DEBUGTMP
 772 #undef INT64_C
 773 #endif
 774 #undef DEBUGTMP
 775
 776 //===----------------------------------------------------------------------===//
 777 // Complex Patterns
 778 //===----------------------------------------------------------------------===//
 779
 780 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
 781     SDValue& IntPtr) {
 782   if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
 783     IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, true);
 784     return true;
 785   }
 786   return false;
 787 }
 788
 789 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
 790     SDValue& BaseReg, SDValue &Offset) {
 791   if (!dyn_cast<ConstantSDNode>(Addr)) {
 792     BaseReg = Addr;
 793     Offset = CurDAG->getIntPtrConstant(0, true);
 794     return true;
 795   }
 796   return false;
 797 }
 798
 799 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
 800                                            SDValue &Offset) {
 801   ConstantSDNode * IMMOffset;
 802
 803   if (Addr.getOpcode() == ISD::ADD
 804       && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
 805       && isInt<16>(IMMOffset->getZExtValue())) {
 806
 807       Base = Addr.getOperand(0);
 808       Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
 809       return true;
 810   // If the pointer address is constant, we can move it to the offset field.
 811   } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
 812              && isInt<16>(IMMOffset->getZExtValue())) {
 813     Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
 814                                   SDLoc(CurDAG->getEntryNode()),
 815                                   AMDGPU::ZERO, MVT::i32);
 816     Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
 817     return true;
 818   }
 819
 820   // Default case, no offset
 821   Base = Addr;
 822   Offset = CurDAG->getTargetConstant(0, MVT::i32);
 823   return true;
 824 }
 825
 826 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
 827                                             SDValue &Offset) {
 828   ConstantSDNode *C;
 829
 830   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
 831     Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
 832     Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
 833   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
 834             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
 835     Base = Addr.getOperand(0);
 836     Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
 837   } else {
 838     Base = Addr;
 839     Offset = CurDAG->getTargetConstant(0, MVT::i32);
 840   }
 841
 842   return true;
 843 }
 844
 845 SDValue AMDGPUDAGToDAGISel::SimplifyI24(SDValue &Op) {
 846   APInt Demanded = APInt(32, 0x00FFFFFF);
 847   APInt KnownZero, KnownOne;
 848   TargetLowering::TargetLoweringOpt TLO(*CurDAG, true, true);
 849   const TargetLowering *TLI = getTargetLowering();
 850   if (TLI->SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) {
 851     CurDAG->ReplaceAllUsesWith(Op, TLO.New);
 852     CurDAG->RepositionNode(Op.getNode(), TLO.New.getNode());
 853     return SimplifyI24(TLO.New);
 854   } else {
 855     return  Op;
 856   }
 857 }
 858
 859 bool AMDGPUDAGToDAGISel::SelectI24(SDValue Op, SDValue &I24) {
 860
 861   assert(Op.getValueType() == MVT::i32);
 862
 863   if (CurDAG->ComputeNumSignBits(Op) == 9) {
 864     I24 = SimplifyI24(Op);
 865     return true;
 866   }
 867   return false;
 868 }
 869
 870 bool AMDGPUDAGToDAGISel::SelectU24(SDValue Op, SDValue &U24) {
 871   APInt KnownZero;
 872   APInt KnownOne;
 873   CurDAG->ComputeMaskedBits(Op, KnownZero, KnownOne);
 874
 875   assert (Op.getValueType() == MVT::i32);
 876
 877   // ANY_EXTEND and EXTLOAD operations can only be done on types smaller than
 878   // i32.  These smaller types are legal to use with the i24 instructions.
 879   if ((KnownZero & APInt(KnownZero.getBitWidth(), 0xFF000000)) == 0xFF000000 ||
 880        Op.getOpcode() == ISD::ANY_EXTEND ||
 881        ISD::isEXTLoad(Op.getNode())) {
 882     U24 = SimplifyI24(Op);
 883     return true;
 884   }
 885   return false;
 886 }
 887
 888 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
 889
 890   if (Subtarget.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) {
 891     return;
 892   }
 893
 894   // Go over all selected nodes and try to fold them a bit more
 895   const AMDGPUTargetLowering& Lowering =
 896     (*(const AMDGPUTargetLowering*)getTargetLowering());
 897   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
 898        E = CurDAG->allnodes_end(); I != E; ++I) {
 899
 900     SDNode *Node = I;
 901
 902     MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
 903     if (!MachineNode)
 904       continue;
 905
 906     SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
 907     if (ResNode != Node) {
 908       ReplaceUses(Node, ResNode);
 909     }
 910   }
 911 }