R600: Move CONST_ADDRESS folding into AMDGPUDAGToDAGISel::Select()

author Tom Stellard <thomas.stellard@amd.com>

Tue, 23 Jul 2013 01:48:24 +0000 (01:48 +0000)

committer Tom Stellard <thomas.stellard@amd.com>

Tue, 23 Jul 2013 01:48:24 +0000 (01:48 +0000)
author Tom Stellard <thomas.stellard@amd.com>
Tue, 23 Jul 2013 01:48:24 +0000 (01:48 +0000)
committer Tom Stellard <thomas.stellard@amd.com>
Tue, 23 Jul 2013 01:48:24 +0000 (01:48 +0000)
diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp

index 78036a8a145f7052cfe0a7eb18039a0182a3a7c4..e4fb07dea37f990680be7cc248bb28650d9fecc0 100644 (file)
--- a/lib/Target/R600/AMDILISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp
@@ -50,7 +50,7 @@ public:
  private:
    inline SDValue getSmallIPtrImm(unsigned Imm);
    bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
-                   const R600InstrInfo *TII, std::vector<unsigned> Cst);
+                   const R600InstrInfo *TII);
    bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
    bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
  
@@ -158,12 +158,100 @@ bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
  }
  
  SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
+  const R600InstrInfo *TII =
+                      static_cast<const R600InstrInfo*>(TM.getInstrInfo());
    unsigned int Opc = N->getOpcode();
    if (N->isMachineOpcode()) {
      return NULL;   // Already selected.
    }
    switch (Opc) {
    default: break;
+  case AMDGPUISD::CONST_ADDRESS: {
+    for (SDNode::use_iterator I = N->use_begin(), Next = llvm::next(I);
+                              I != SDNode::use_end(); I = Next) {
+      Next = llvm::next(I);
+      if (!I->isMachineOpcode()) {
+        continue;
+      }
+      unsigned Opcode = I->getMachineOpcode();
+      bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
+      int SrcIdx = I.getOperandNo();
+      int SelIdx;
+      // Unlike MachineInstrs, SDNodes do not have results in their operand
+      // list, so we need to increment the SrcIdx, since
+      // R600InstrInfo::getOperandIdx is based on the MachineInstr indices.
+      if (HasDst) {
+        SrcIdx++;
+      }
+
+      SelIdx = TII->getSelIdx(I->getMachineOpcode(), SrcIdx);
+      if (SelIdx < 0) {
+        continue;
+      }
+
+      SDValue CstOffset;
+      if (N->getValueType(0).isVector() ||
+          !SelectGlobalValueConstantOffset(N->getOperand(0), CstOffset))
+        continue;
+
+      // Gather constants values
+      int SrcIndices[] = {
+        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
+        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
+        TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
+        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
+        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
+        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
+        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
+        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
+        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
+        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
+        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
+      };
+      std::vector<unsigned> Consts;
+      for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
+        int OtherSrcIdx = SrcIndices[i];
+        int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
+        if (OtherSrcIdx < 0 || OtherSelIdx < 0) {
+          continue;
+        }
+        if (HasDst) {
+          OtherSrcIdx--;
+          OtherSelIdx--;
+        }
+        if (RegisterSDNode *Reg =
+                         dyn_cast<RegisterSDNode>(I->getOperand(OtherSrcIdx))) {
+          if (Reg->getReg() == AMDGPU::ALU_CONST) {
+            ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(I->getOperand(OtherSelIdx));
+            Consts.push_back(Cst->getZExtValue());
+          }
+        }
+      }
+
+      ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
+      Consts.push_back(Cst->getZExtValue());
+      if (!TII->fitsConstReadLimitations(Consts))
+        continue;
+
+      // Convert back to SDNode indices
+      if (HasDst) {
+        SrcIdx--;
+        SelIdx--;
+      }
+      std::vector<SDValue> Ops;
+      for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
+        if (i == SrcIdx) {
+          Ops.push_back(CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32));
+        } else if (i == SelIdx) {
+          Ops.push_back(CstOffset);
+        } else {
+          Ops.push_back(I->getOperand(i));
+        }
+      }
+      CurDAG->UpdateNodeOperands(*I, Ops.data(), Ops.size());
+    }
+    break;
+  }
    case ISD::BUILD_VECTOR: {
      const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
      if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
@@ -224,7 +312,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
      if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
        break;
      }
-    const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo());
  
      uint64_t ImmValue = 0;
      unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
@@ -342,7 +429,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
      if (Result && Result->isMachineOpcode() &&
          !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)
          && TII->hasInstrModifiers(Result->getMachineOpcode())) {
-      // Fold FNEG/FABS/CONST_ADDRESS
+      // Fold FNEG/FABS
        // TODO: Isel can generate multiple MachineInst, we need to recursively
        // parse Result
        bool IsModified = false;
@@ -382,24 +469,8 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
  }
  
  bool AMDGPUDAGToDAGISel::FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg,
-                                     SDValue &Abs, const R600InstrInfo *TII,
-                                     std::vector<unsigned> Consts) {
+                                     SDValue &Abs, const R600InstrInfo *TII) {
    switch (Src.getOpcode()) {
-  case AMDGPUISD::CONST_ADDRESS: {
-    SDValue CstOffset;
-    if (Src.getValueType().isVector() ||
-        !SelectGlobalValueConstantOffset(Src.getOperand(0), CstOffset))
-      return false;
-
-    ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
-    Consts.push_back(Cst->getZExtValue());
-    if (!TII->fitsConstReadLimitations(Consts))
-      return false;
-
-    Src = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
-    Sel = CstOffset;
-    return true;
-    }
    case ISD::FNEG:
      Src = Src.getOperand(0);
      Neg = CurDAG->getTargetConstant(1, MVT::i32);
@@ -441,19 +512,6 @@ bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
      -1
    };
  
-  // Gather constants values
-  std::vector<unsigned> Consts;
-  for (unsigned j = 0; j < 3; j++) {
-    int SrcIdx = OperandIdx[j];
-    if (SrcIdx < 0)
-      break;
-    if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) {
-      if (Reg->getReg() == AMDGPU::ALU_CONST) {
-        ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]);
-        Consts.push_back(Cst->getZExtValue());
-      }
-    }
-  }
  
    for (unsigned i = 0; i < 3; i++) {
      if (OperandIdx[i] < 0)
@@ -463,7 +521,7 @@ bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
      SDValue &Neg = Ops[NegIdx[i] - 1];
      SDValue FakeAbs;
      SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
-    if (FoldOperand(Src, Sel, Neg, Abs, TII, Consts))
+    if (FoldOperand(Src, Sel, Neg, Abs, TII))
        return true;
    }
    return false;
@@ -512,20 +570,6 @@ bool AMDGPUDAGToDAGISel::FoldDotOperands(unsigned Opcode,
      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
    };
  
-  // Gather constants values
-  std::vector<unsigned> Consts;
-  for (unsigned j = 0; j < 8; j++) {
-    int SrcIdx = OperandIdx[j];
-    if (SrcIdx < 0)
-      break;
-    if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) {
-      if (Reg->getReg() == AMDGPU::ALU_CONST) {
-        ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]);
-        Consts.push_back(Cst->getZExtValue());
-      }
-    }
-  }
-
    for (unsigned i = 0; i < 8; i++) {
      if (OperandIdx[i] < 0)
        return false;
@@ -533,7 +577,7 @@ bool AMDGPUDAGToDAGISel::FoldDotOperands(unsigned Opcode,
      SDValue &Sel = Ops[SelIdx[i] - 1];
      SDValue &Neg = Ops[NegIdx[i] - 1];
      SDValue &Abs = Ops[AbsIdx[i] - 1];
-    if (FoldOperand(Src, Sel, Neg, Abs, TII, Consts))
+    if (FoldOperand(Src, Sel, Neg, Abs, TII))
        return true;
    }
    return false;
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp

index dd613d56a60d16653084fc8e81a17d0eba8595b6..a2bc2c3a9fa8fe0061ecd79de0629e3f3ec62d49 100644 (file)
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -1154,6 +1154,30 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
      return DAG.getMergeValues(MergedValues, 2, DL);
    }
  
+  // For most operations returning SDValue() will result int he node being
+  // expanded by the DAG Legalizer.  This is not the case for ISD::LOAD, so
+  // we need to manually expand loads that may be legal in some address spaces
+  // and illegal in others.  SEXT loads from CONSTANT_BUFFER_0 are supported
+  // for compute shaders, since the data is sign extended when it is uploaded
+  // to the buffer.  Howerver SEXT loads from other addresspaces are not
+  // supported, so we need to expand them here.
+  if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
+    EVT MemVT = LoadNode->getMemoryVT();
+    assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
+    SDValue ShiftAmount =
+          DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
+    SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
+                                  LoadNode->getPointerInfo(), MemVT,
+                                  LoadNode->isVolatile(),
+                                  LoadNode->isNonTemporal(),
+                                  LoadNode->getAlignment());
+    SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
+    SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
+
+    SDValue MergedValues[2] = { Sra, Chain };
+    return DAG.getMergeValues(MergedValues, 2, DL);
+  }
+
    if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
      return SDValue();
    }
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp

index 0c059aa3c41dd9eda45f024532997dd15687b623..3bc170f538bf8b4d0caef0b72c4269c3b0e42b5b 100644 (file)
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -186,6 +186,42 @@ bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
    }
  }
  
+int R600InstrInfo::getSrcIdx(unsigned Opcode, unsigned SrcNum) const {
+  static const unsigned OpTable[] = {
+    AMDGPU::OpName::src0,
+    AMDGPU::OpName::src1,
+    AMDGPU::OpName::src2
+  };
+
+  assert (SrcNum < 3);
+  return getOperandIdx(Opcode, OpTable[SrcNum]);
+}
+
+#define SRC_SEL_ROWS 11
+int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const {
+  static const unsigned SrcSelTable[SRC_SEL_ROWS][2] = {
+    {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
+    {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
+    {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
+    {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
+    {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
+    {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
+    {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
+    {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
+    {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
+    {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
+    {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}
+  };
+
+  for (unsigned i = 0; i < SRC_SEL_ROWS; ++i) {
+    if (getOperandIdx(Opcode, SrcSelTable[i][0]) == (int)SrcIdx) {
+      return getOperandIdx(Opcode, SrcSelTable[i][1]);
+    }
+  }
+  return -1;
+}
+#undef SRC_SEL_ROWS
+
  SmallVector<std::pair<MachineOperand *, int64_t>, 3>
  R600InstrInfo::getSrcs(MachineInstr *MI) const {
    SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result;
diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h

index 1ba4160747be001f23ee563810d1d88d037f9e7d..cdaa2fbefc896cc401e2877e6c2d6b084ce2bcdc 100644 (file)
--- a/lib/Target/R600/R600InstrInfo.h
+++ b/lib/Target/R600/R600InstrInfo.h
@@ -76,6 +76,13 @@ namespace llvm {
  
    bool mustBeLastInClause(unsigned Opcode) const;
  
+  /// \returns The operand index for the given source number.  Legal values
+  /// for SrcNum are 0, 1, and 2.
+  int getSrcIdx(unsigned Opcode, unsigned SrcNum) const;
+  /// \returns The operand Index for the Sel operand given an index to one
+  /// of the instruction's src operands.
+  int getSelIdx(unsigned Opcode, unsigned SrcIdx) const;
+
    /// \returns a pair for each src of an ALU instructions.
    /// The first member of a pair is the register id.
    /// If register is ALU_CONST, second member is SEL.
diff --git a/test/CodeGen/R600/bfi_int.ll b/test/CodeGen/R600/bfi_int.ll

index b001ad0de0040038753a39874d179141d7eb9a69..501c5567fff9ebbb4b14af4464fddbe95096434f 100644 (file)
--- a/test/CodeGen/R600/bfi_int.ll
+++ b/test/CodeGen/R600/bfi_int.ll
@@ -36,7 +36,7 @@ entry:
  ; SHA-256 Ma function
  ; ((x & z) | (y & (x | z)))
  ; R600-CHECK: @bfi_sha256_ma
-; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]],
+; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], KC0[2].Z, KC0[2].W
  ; R600-CHECK: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, KC0[3].X, KC0[2].W
  ; SI-CHECK: V_XOR_B32_e64 [[DST:VGPR[0-9]+]], {{[SV]GPR[0-9]+, [SV]GPR[0-9]+}}
  ; SI-CHECK: V_BFI_B32 {{VGPR[0-9]+}}, [[DST]], {{[SV]GPR[0-9]+, [SV]GPR[0-9]+}}
author	Tom Stellard <thomas.stellard@amd.com>
	Tue, 23 Jul 2013 01:48:24 +0000 (01:48 +0000)
committer	Tom Stellard <thomas.stellard@amd.com>
	Tue, 23 Jul 2013 01:48:24 +0000 (01:48 +0000)
lib/Target/R600/AMDILISelDAGToDAG.cpp		patch \| blob \| history
lib/Target/R600/R600ISelLowering.cpp		patch \| blob \| history
lib/Target/R600/R600InstrInfo.cpp		patch \| blob \| history
lib/Target/R600/R600InstrInfo.h		patch \| blob \| history
test/CodeGen/R600/bfi_int.ll		patch \| blob \| history