R600/SI: Remove VReg_32 register class

[oota-llvm.git] / lib / Target / R600 / AMDGPUISelDAGToDAG.cpp
diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp

index bf1cf8832e72897d2bc9eea818dd289387675ed8..4f2f01902f26a9961571ddb4f970b767fffdeb5b 100644 (file)
--- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
@@ -79,11 +79,6 @@ private:
    bool isLocalLoad(const LoadSDNode *N) const;
    bool isRegionLoad(const LoadSDNode *N) const;
  
-  /// \returns True if the current basic block being selected is at control
-  ///          flow depth 0.  Meaning that the current block dominates the
-  //           exit block.
-  bool isCFDepth0() const;
-
    const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
    bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
    bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
@@ -116,6 +111,12 @@ private:
    bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
                         SDValue &Clamp, SDValue &Omod) const;
  
+  bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods,
+                            SDValue &Omod) const;
+  bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
+                                 SDValue &Clamp,
+                                 SDValue &Omod) const;
+
    SDNode *SelectADD_SUB_I64(SDNode *N);
    SDNode *SelectDIV_SCALE(SDNode *N);
  
@@ -285,7 +286,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
          }
        }
        switch(NumVectorElts) {
-      case 1: RegClassID = UseVReg ? AMDGPU::VReg_32RegClassID :
+      case 1: RegClassID = UseVReg ? AMDGPU::VGPR_32RegClassID :
                                       AMDGPU::SReg_32RegClassID;
          break;
        case 2: RegClassID = UseVReg ? AMDGPU::VReg_64RegClassID :
@@ -492,9 +493,16 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
    case AMDGPUISD::DIV_SCALE: {
      return SelectDIV_SCALE(N);
    }
+  case ISD::CopyToReg: {
+    const SITargetLowering& Lowering =
+      *static_cast<const SITargetLowering*>(getTargetLowering());
+    Lowering.legalizeTargetIndependentNode(N, *CurDAG);
+    break;
+  }
    case ISD::ADDRSPACECAST:
      return SelectAddrSpaceCast(N);
    }
+
    return SelectCode(N);
  }
  
@@ -610,14 +618,6 @@ bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const {
    return false;
  }
  
-bool AMDGPUDAGToDAGISel::isCFDepth0() const {
-  // FIXME: Figure out a way to use DominatorTree analysis here.
-  const BasicBlock *CurBlock = FuncInfo->MBB->getBasicBlock();
-  const Function *Fn = FuncInfo->Fn;
-  return &Fn->front() == CurBlock || &Fn->back() == CurBlock;
-}
-
-
  const char *AMDGPUDAGToDAGISel::getPassName() const {
    return "AMDGPU DAG->DAG Pattern Instruction Selection";
  }
@@ -723,11 +723,6 @@ SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
    unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
    unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
  
-  if (!isCFDepth0()) {
-    Opc = IsAdd ? AMDGPU::V_ADD_I32_e32 : AMDGPU::V_SUB_I32_e32;
-    CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e32 : AMDGPU::V_SUBB_U32_e32;
-  }
-
    SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs);
    SDValue Carry(AddLo, 1);
    SDNode *AddHi
@@ -754,15 +749,16 @@ SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
      = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
  
    const SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32);
-
+  const SDValue False = CurDAG->getTargetConstant(0, MVT::i1);
    SDValue Ops[] = {
-    N->getOperand(0),
-    N->getOperand(1),
-    N->getOperand(2),
-    Zero,
-    Zero,
-    Zero,
-    Zero
+    Zero,             // src0_modifiers
+    N->getOperand(0), // src0
+    Zero,             // src1_modifiers
+    N->getOperand(1), // src1
+    Zero,             // src2_modifiers
+    N->getOperand(2), // src2
+    False,            // clamp
+    Zero              // omod
    };
  
    return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
@@ -797,6 +793,21 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
      }
    }
  
+  // If we have a constant address, prefer to put the constant into the
+  // offset. This can save moves to load the constant address since multiple
+  // operations can share the zero base address register, and enables merging
+  // into read2 / write2 instructions.
+  if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
+    if (isUInt<16>(CAddr->getZExtValue())) {
+      SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32);
+      MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
+                                 SDLoc(Addr), MVT::i32, Zero);
+      Base = SDValue(MovZero, 0);
+      Offset = Addr;
+      return true;
+    }
+  }
+
    // default case
    Base = Addr;
    Offset = CurDAG->getTargetConstant(0, MVT::i16);
@@ -821,6 +832,23 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
      }
    }
  
+  if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
+    unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
+    unsigned DWordOffset1 = DWordOffset0 + 1;
+    assert(4 * DWordOffset0 == CAddr->getZExtValue());
+
+    if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
+      SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32);
+      MachineSDNode *MovZero
+        = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
+                                 SDLoc(Addr), MVT::i32, Zero);
+      Base = SDValue(MovZero, 0);
+      Offset0 = CurDAG->getTargetConstant(DWordOffset0, MVT::i8);
+      Offset1 = CurDAG->getTargetConstant(DWordOffset1, MVT::i8);
+      return true;
+    }
+  }
+
    // default case
    Base = Addr;
    Offset0 = CurDAG->getTargetConstant(0, MVT::i8);
@@ -828,11 +856,6 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
    return true;
  }
  
-static SDValue wrapAddr64Rsrc(SelectionDAG *DAG, SDLoc DL, SDValue Ptr) {
-  return SDValue(DAG->getMachineNode(AMDGPU::SI_ADDR64_RSRC, DL, MVT::v4i32,
-                                     Ptr), 0);
-}
-
  static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
    return isUInt<12>(Imm->getZExtValue());
  }
@@ -908,9 +931,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
    ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
    if (C->getSExtValue()) {
      SDLoc DL(Addr);
-    SRsrc = wrapAddr64Rsrc(CurDAG, DL, Ptr);
+
+    const SITargetLowering& Lowering =
+      *static_cast<const SITargetLowering*>(getTargetLowering());
+
+    SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
      return true;
    }
+
    return false;
  }
  
@@ -922,36 +950,6 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
    return SelectMUBUFAddr64(Addr, SRsrc, VAddr, Offset);
  }
  
-static SDValue buildRSRC(SelectionDAG *DAG, SDLoc DL, SDValue Ptr,
-                         uint32_t RsrcDword1, uint64_t RsrcDword2And3) {
-
-  SDValue PtrLo = DAG->getTargetExtractSubreg(AMDGPU::sub0, DL, MVT::i32, Ptr);
-  SDValue PtrHi = DAG->getTargetExtractSubreg(AMDGPU::sub1, DL, MVT::i32, Ptr);
-  if (RsrcDword1)
-    PtrHi = SDValue(DAG->getMachineNode(AMDGPU::S_OR_B32, DL, MVT::i32, PtrHi,
-                                    DAG->getConstant(RsrcDword1, MVT::i32)), 0);
-
-  SDValue DataLo = DAG->getTargetConstant(
-      RsrcDword2And3 & APInt::getAllOnesValue(32).getZExtValue(), MVT::i32);
-  SDValue DataHi = DAG->getTargetConstant(RsrcDword2And3 >> 32, MVT::i32);
-
-  const SDValue Ops[] = { PtrLo, PtrHi, DataLo, DataHi };
-  return SDValue(DAG->getMachineNode(AMDGPU::SI_BUFFER_RSRC, DL,
-                                     MVT::v4i32, Ops), 0);
-}
-
-/// \brief Return a resource descriptor with the 'Add TID' bit enabled
-///        The TID (Thread ID) is multipled by the stride value (bits [61:48]
-///        of the resource descriptor) to create an offset, which is added to the
-///        resource ponter.
-static SDValue buildScratchRSRC(SelectionDAG *DAG, SDLoc DL, SDValue Ptr) {
-
-  uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
-                  0xffffffff; // Size
-
-  return buildRSRC(DAG, DL, Ptr, 0, Rsrc);
-}
-
  bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
                                              SDValue &VAddr, SDValue &SOffset,
                                              SDValue &ImmOffset) const {
@@ -971,9 +969,10 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
    Lowering.CreateLiveInRegister(*CurDAG, &AMDGPU::SReg_32RegClass,
                                  ScratchOffsetReg, MVT::i32);
  
-  Rsrc = buildScratchRSRC(CurDAG, DL,
-      CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
-                             MRI.getLiveInVirtReg(ScratchPtrReg), MVT::i64));
+  SDValue ScratchPtr =
+    CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
+                           MRI.getLiveInVirtReg(ScratchPtrReg), MVT::i64);
+  Rsrc = SDValue(Lowering.buildScratchRSRC(*CurDAG, DL, ScratchPtr), 0);
    SOffset = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
        MRI.getLiveInVirtReg(ScratchOffsetReg), MVT::i32);
  
@@ -1016,6 +1015,8 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
                                             SDValue &GLC, SDValue &SLC,
                                             SDValue &TFE) const {
    SDValue Ptr, VAddr, Offen, Idxen, Addr64;
+  const SIInstrInfo *TII =
+    static_cast<const SIInstrInfo *>(Subtarget.getInstrInfo());
  
    SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
                GLC, SLC, TFE);
@@ -1023,10 +1024,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
    if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
        !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
        !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
-    uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT |
+    uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
                      APInt::getAllOnesValue(32).getZExtValue(); // Size
      SDLoc DL(Addr);
-    SRsrc = buildRSRC(CurDAG, DL, Ptr, 0, Rsrc);
+
+    const SITargetLowering& Lowering =
+      *static_cast<const SITargetLowering*>(getTargetLowering());
+
+    SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
      return true;
    }
    return false;
@@ -1132,6 +1137,23 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
    return SelectVOP3Mods(In, Src, SrcMods);
  }
  
+bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src,
+                                              SDValue &SrcMods,
+                                              SDValue &Omod) const {
+  // FIXME: Handle Omod
+  Omod = CurDAG->getTargetConstant(0, MVT::i32);
+
+  return SelectVOP3Mods(In, Src, SrcMods);
+}
+
+bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
+                                                   SDValue &SrcMods,
+                                                   SDValue &Clamp,
+                                                   SDValue &Omod) const {
+  Clamp = Omod = CurDAG->getTargetConstant(0, MVT::i32);
+  return SelectVOP3Mods(In, Src, SrcMods);
+}
+
  void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
    const AMDGPUTargetLowering& Lowering =
      *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());