R600: rework handling of the constants

author Tom Stellard <thomas.stellard@amd.com>

Wed, 23 Jan 2013 02:09:06 +0000 (02:09 +0000)

committer Tom Stellard <thomas.stellard@amd.com>

Wed, 23 Jan 2013 02:09:06 +0000 (02:09 +0000)
author Tom Stellard <thomas.stellard@amd.com>
Wed, 23 Jan 2013 02:09:06 +0000 (02:09 +0000)
committer Tom Stellard <thomas.stellard@amd.com>
Wed, 23 Jan 2013 02:09:06 +0000 (02:09 +0000)
diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h

index c75ec245e0c1303c56e2a5ba06142f9181405f52..1aa607f57ea3a4bd8bc08015320abfff65aefc3c 100644 (file)
--- a/lib/Target/R600/AMDGPU.h
+++ b/lib/Target/R600/AMDGPU.h
@@ -23,6 +23,7 @@ class AMDGPUTargetMachine;
  // R600 Passes
  FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
  FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
  // R600 Passes
  FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
  FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
+FunctionPass *createR600LowerConstCopy(TargetMachine &tm);
  
  // SI Passes
  FunctionPass *createSIAnnotateControlFlowPass();
  
  // SI Passes
  FunctionPass *createSIAnnotateControlFlowPass();
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp

index 26ac928347d634550ee5e8983b04939bb394f4b4..7b069e7760471aa631f3efa43b75e50d0c8469c5 100644 (file)
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -136,6 +136,7 @@ bool AMDGPUPassConfig::addPreEmitPass() {
      addPass(createAMDGPUCFGPreparationPass(*TM));
      addPass(createAMDGPUCFGStructurizerPass(*TM));
      addPass(createR600ExpandSpecialInstrsPass(*TM));
      addPass(createAMDGPUCFGPreparationPass(*TM));
      addPass(createAMDGPUCFGStructurizerPass(*TM));
      addPass(createR600ExpandSpecialInstrsPass(*TM));
+    addPass(createR600LowerConstCopy(*TM));
      addPass(&FinalizeMachineBundlesID);
    } else {
      addPass(createSILowerLiteralConstantsPass(*TM));
      addPass(&FinalizeMachineBundlesID);
    } else {
      addPass(createSILowerLiteralConstantsPass(*TM));
diff --git a/lib/Target/R600/AMDIL.h b/lib/Target/R600/AMDIL.h

index 4e577dc23409899ea778ec76c641a11e0f887016..b39fbdbeed910c02f85f08d76b854781d1681c3c 100644 (file)
--- a/lib/Target/R600/AMDIL.h
+++ b/lib/Target/R600/AMDIL.h
@@ -90,14 +90,30 @@ namespace AMDGPUAS {
  enum AddressSpaces {
    PRIVATE_ADDRESS  = 0, ///< Address space for private memory.
    GLOBAL_ADDRESS   = 1, ///< Address space for global memory (RAT0, VTX0).
  enum AddressSpaces {
    PRIVATE_ADDRESS  = 0, ///< Address space for private memory.
    GLOBAL_ADDRESS   = 1, ///< Address space for global memory (RAT0, VTX0).
-  CONSTANT_ADDRESS = 2, ///< Address space for constant memory.
+  CONSTANT_ADDRESS = 2, ///< Address space for constant memory
    LOCAL_ADDRESS    = 3, ///< Address space for local memory.
    REGION_ADDRESS   = 4, ///< Address space for region memory.
    ADDRESS_NONE     = 5, ///< Address space for unknown memory.
    PARAM_D_ADDRESS  = 6, ///< Address space for direct addressible parameter memory (CONST0)
    PARAM_I_ADDRESS  = 7, ///< Address space for indirect addressible parameter memory (VTX1)
    USER_SGPR_ADDRESS = 8, ///< Address space for USER_SGPRS on SI
    LOCAL_ADDRESS    = 3, ///< Address space for local memory.
    REGION_ADDRESS   = 4, ///< Address space for region memory.
    ADDRESS_NONE     = 5, ///< Address space for unknown memory.
    PARAM_D_ADDRESS  = 6, ///< Address space for direct addressible parameter memory (CONST0)
    PARAM_I_ADDRESS  = 7, ///< Address space for indirect addressible parameter memory (VTX1)
    USER_SGPR_ADDRESS = 8, ///< Address space for USER_SGPRS on SI
-  LAST_ADDRESS     = 9
+  CONSTANT_BUFFER_0 = 9,
+  CONSTANT_BUFFER_1 = 10,
+  CONSTANT_BUFFER_2 = 11,
+  CONSTANT_BUFFER_3 = 12,
+  CONSTANT_BUFFER_4 = 13,
+  CONSTANT_BUFFER_5 = 14,
+  CONSTANT_BUFFER_6 = 15,
+  CONSTANT_BUFFER_7 = 16,
+  CONSTANT_BUFFER_8 = 17,
+  CONSTANT_BUFFER_9 = 18,
+  CONSTANT_BUFFER_10 = 19,
+  CONSTANT_BUFFER_11 = 20,
+  CONSTANT_BUFFER_12 = 21,
+  CONSTANT_BUFFER_13 = 22,
+  CONSTANT_BUFFER_14 = 23,
+  CONSTANT_BUFFER_15 = 24,
+  LAST_ADDRESS     = 25
  };
  
  } // namespace AMDGPUAS
  };
  
  } // namespace AMDGPUAS
diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp

index d15ed393c13810a26ef759c726c94a021b808d2b..567b3e26cf1486f33107becf7a5185a9947d92d0 100644 (file)
--- a/lib/Target/R600/AMDILISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp
@@ -20,6 +20,7 @@
  #include "llvm/CodeGen/PseudoSourceValue.h"
  #include "llvm/CodeGen/SelectionDAGISel.h"
  #include "llvm/Support/Compiler.h"
  #include "llvm/CodeGen/PseudoSourceValue.h"
  #include "llvm/CodeGen/SelectionDAGISel.h"
  #include "llvm/Support/Compiler.h"
+#include "llvm/CodeGen/SelectionDAG.h"
  #include <list>
  #include <queue>
  
  #include <list>
  #include <queue>
  
@@ -45,6 +46,7 @@ public:
  
  private:
    inline SDValue getSmallIPtrImm(unsigned Imm);
  
  private:
    inline SDValue getSmallIPtrImm(unsigned Imm);
+  bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
  
    // Complex pattern selectors
    bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
  
    // Complex pattern selectors
    bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
@@ -67,6 +69,9 @@ private:
    static bool isLocalLoad(const LoadSDNode *N);
    static bool isRegionLoad(const LoadSDNode *N);
  
    static bool isLocalLoad(const LoadSDNode *N);
    static bool isRegionLoad(const LoadSDNode *N);
  
+  bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
+  bool SelectGlobalValueVariableOffset(SDValue Addr,
+      SDValue &BaseReg, SDValue& Offset);
    bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset);
    bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset);
    bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
    bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset);
    bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset);
    bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
@@ -259,7 +264,65 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
      break;
    }
    }
      break;
    }
    }
-  return SelectCode(N);
+  SDNode *Result = SelectCode(N);
+
+  // Fold operands of selected node
+
+  const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+  if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+    const R600InstrInfo *TII =
+        static_cast<const R600InstrInfo*>(TM.getInstrInfo());
+    if (Result && TII->isALUInstr(Result->getMachineOpcode())) {
+      bool IsModified = false;
+      do {
+        std::vector<SDValue> Ops;
+        for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
+            I != E; ++I)
+          Ops.push_back(*I);
+        IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops);
+        if (IsModified) {
+          Result = CurDAG->MorphNodeTo(Result, Result->getOpcode(),
+              Result->getVTList(), Ops.data(), Ops.size());
+        }
+      } while (IsModified);
+    }
+  }
+
+  return Result;
+}
+
+bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
+    const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
+  int OperandIdx[] = {
+    TII->getOperandIdx(Opcode, R600Operands::SRC0),
+    TII->getOperandIdx(Opcode, R600Operands::SRC1),
+    TII->getOperandIdx(Opcode, R600Operands::SRC2)
+  };
+  int SelIdx[] = {
+    TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL),
+    TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL),
+    TII->getOperandIdx(Opcode, R600Operands::SRC2_SEL)
+  };
+  for (unsigned i = 0; i < 3; i++) {
+    if (OperandIdx[i] < 0)
+      return false;
+    SDValue Operand = Ops[OperandIdx[i] - 1];
+    switch (Operand.getOpcode()) {
+    case AMDGPUISD::CONST_ADDRESS: {
+      SDValue CstOffset;
+      if (!Operand.getValueType().isVector() &&
+          SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset)) {
+        Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
+        Ops[SelIdx[i] - 1] = CstOffset;
+        return true;
+      }
+      }
+      break;
+    default:
+      break;
+    }
+  }
+  return false;
  }
  
  bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) {
  }
  
  bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) {
@@ -406,6 +469,25 @@ const char *AMDGPUDAGToDAGISel::getPassName() const {
  
  ///==== AMDGPU Functions ====///
  
  
  ///==== AMDGPU Functions ====///
  
+bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
+    SDValue& IntPtr) {
+  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
+    IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, true);
+    return true;
+  }
+  return false;
+}
+
+bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
+    SDValue& BaseReg, SDValue &Offset) {
+  if (!dyn_cast<ConstantSDNode>(Addr)) {
+    BaseReg = Addr;
+    Offset = CurDAG->getIntPtrConstant(0, true);
+    return true;
+  }
+  return false;
+}
+
  bool AMDGPUDAGToDAGISel::SelectADDR8BitOffset(SDValue Addr, SDValue& Base,
                                               SDValue& Offset) {
    if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
  bool AMDGPUDAGToDAGISel::SelectADDR8BitOffset(SDValue Addr, SDValue& Base,
                                               SDValue& Offset) {
    if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt

index 790a4aa4db64a527b8628cc6b22d2bd17b3b2520..a8be7ed975cd016a7d7d2174e9739f37b6163c91 100644 (file)
--- a/lib/Target/R600/CMakeLists.txt
+++ b/lib/Target/R600/CMakeLists.txt
@@ -36,6 +36,7 @@ add_llvm_target(R600CodeGen
    R600ExpandSpecialInstrs.cpp
    R600InstrInfo.cpp
    R600ISelLowering.cpp
    R600ExpandSpecialInstrs.cpp
    R600InstrInfo.cpp
    R600ISelLowering.cpp
+  R600LowerConstCopy.cpp
    R600MachineFunctionInfo.cpp
    R600RegisterInfo.cpp
    SIAnnotateControlFlow.cpp
    R600MachineFunctionInfo.cpp
    R600RegisterInfo.cpp
    SIAnnotateControlFlow.cpp
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp

index e6c550b5ac4bb4639a93cba8bc07e2d6f8ffde39..e76c6c86757ebd8d709932146cb4f3347abad876 100644 (file)
--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
@@ -129,4 +129,28 @@ void AMDGPUInstPrinter::printWrite(const MCInst *MI, unsigned OpNo,
    }
  }
  
    }
  }
  
+void AMDGPUInstPrinter::printSel(const MCInst *MI, unsigned OpNo,
+                                  raw_ostream &O) {
+  const char * chans = "XYZW";
+  int sel = MI->getOperand(OpNo).getImm();
+
+  int chan = sel & 3;
+  sel >>= 2;
+
+  if (sel >= 512) {
+    sel -= 512;
+    int cb = sel >> 12;
+    sel &= 4095;
+    O << cb << "[" << sel << "]";
+  } else if (sel >= 448) {
+    sel -= 448;
+    O << sel;
+  } else if (sel >= 0){
+    O << sel;
+  }
+
+  if (sel >= 0)
+    O << "." << chans[chan];
+}
+
  #include "AMDGPUGenAsmWriter.inc"
  #include "AMDGPUGenAsmWriter.inc"
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h

index 96e0e46f8a6315b6b1033bf504ff1570dd9c68f9..e775c4c82e72a844fbf0737bc99702a87b561e22 100644 (file)
--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
@@ -45,6 +45,7 @@ private:
    void printUpdateExecMask(const MCInst *MI, unsigned OpNo, raw_ostream &O);
    void printUpdatePred(const MCInst *MI, unsigned OpNo, raw_ostream &O);
    void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O);
    void printUpdateExecMask(const MCInst *MI, unsigned OpNo, raw_ostream &O);
    void printUpdatePred(const MCInst *MI, unsigned OpNo, raw_ostream &O);
    void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
  };
  
  } // End namespace llvm
  };
  
  } // End namespace llvm
diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp

index 36deae9c0aba84ef028455c9a63f94d1aed9b616..01df8087d5fa64efa9a740dd15d5cd4b2edf9f7e 100644 (file)
--- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -63,8 +63,8 @@ private:
    void EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
                      raw_ostream &OS) const;
    void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const;
    void EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
                      raw_ostream &OS) const;
    void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const;
-  void EmitSrcISA(const MCInst &MI, unsigned OpIdx, uint64_t &Value,
-                  raw_ostream &OS) const;
+  void EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, unsigned SelOpIdx,
+                    raw_ostream &OS) const;
    void EmitDst(const MCInst &MI, raw_ostream &OS) const;
    void EmitTexInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
                      raw_ostream &OS) const;
    void EmitDst(const MCInst &MI, raw_ostream &OS) const;
    void EmitTexInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
                      raw_ostream &OS) const;
@@ -163,7 +163,8 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
      case AMDGPU::VTX_READ_PARAM_32_eg:
      case AMDGPU::VTX_READ_GLOBAL_8_eg:
      case AMDGPU::VTX_READ_GLOBAL_32_eg:
      case AMDGPU::VTX_READ_PARAM_32_eg:
      case AMDGPU::VTX_READ_GLOBAL_8_eg:
      case AMDGPU::VTX_READ_GLOBAL_32_eg:
-    case AMDGPU::VTX_READ_GLOBAL_128_eg: {
+    case AMDGPU::VTX_READ_GLOBAL_128_eg:
+    case AMDGPU::TEX_VTX_CONSTBUF: {
        uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
        uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
  
        uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
        uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
  
@@ -193,7 +194,6 @@ void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI,
                                       SmallVectorImpl<MCFixup> &Fixups,
                                       raw_ostream &OS) const {
    const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
                                       SmallVectorImpl<MCFixup> &Fixups,
                                       raw_ostream &OS) const {
    const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
-  unsigned NumOperands = MI.getNumOperands();
  
    // Emit instruction type
    EmitByte(INSTR_ALU, OS);
  
    // Emit instruction type
    EmitByte(INSTR_ALU, OS);
@@ -209,19 +209,21 @@ void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI,
      InstWord01 |= ISAOpCode << 1;
    }
  
      InstWord01 |= ISAOpCode << 1;
    }
  
-  unsigned SrcIdx = 0;
-  for (unsigned int OpIdx = 1; OpIdx < NumOperands; ++OpIdx) {
-    if (MI.getOperand(OpIdx).isImm() || MI.getOperand(OpIdx).isFPImm() ||
-        OpIdx == (unsigned)MCDesc.findFirstPredOperandIdx()) {
-      continue;
-    }
-    EmitSrcISA(MI, OpIdx, InstWord01, OS);
-    SrcIdx++;
-  }
+  unsigned SrcNum = MCDesc.TSFlags & R600_InstFlag::OP3 ? 3 :
+      MCDesc.TSFlags & R600_InstFlag::OP2 ? 2 : 1;
  
  
-  // Emit zeros for unused sources
-  for ( ; SrcIdx < 3; SrcIdx++) {
-    EmitNullBytes(SRC_BYTE_COUNT - 6, OS);
+  EmitByte(SrcNum, OS);
+
+  const unsigned SrcOps[3][2] = {
+      {R600Operands::SRC0, R600Operands::SRC0_SEL},
+      {R600Operands::SRC1, R600Operands::SRC1_SEL},
+      {R600Operands::SRC2, R600Operands::SRC2_SEL}
+  };
+
+  for (unsigned SrcIdx = 0; SrcIdx < SrcNum; ++SrcIdx) {
+    unsigned RegOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][0]];
+    unsigned SelOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][1]];
+    EmitSrcISA(MI, RegOpIdx, SelOpIdx, OS);
    }
  
    Emit(InstWord01, OS);
    }
  
    Emit(InstWord01, OS);
@@ -292,34 +294,37 @@ void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx,
  
  }
  
  
  }
  
-void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned OpIdx,
-                                   uint64_t &Value, raw_ostream &OS) const {
-  const MCOperand &MO = MI.getOperand(OpIdx);
+void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned RegOpIdx,
+                                   unsigned SelOpIdx, raw_ostream &OS) const {
+  const MCOperand &RegMO = MI.getOperand(RegOpIdx);
+  const MCOperand &SelMO = MI.getOperand(SelOpIdx);
+
    union {
      float f;
      uint32_t i;
    } InlineConstant;
    InlineConstant.i = 0;
    union {
      float f;
      uint32_t i;
    } InlineConstant;
    InlineConstant.i = 0;
-  // Emit the source select (2 bytes).  For GPRs, this is the register index.
-  // For other potential instruction operands, (e.g. constant registers) the
-  // value of the source select is defined in the r600isa docs.
-  if (MO.isReg()) {
-    unsigned Reg = MO.getReg();
-    if (AMDGPUMCRegisterClasses[AMDGPU::R600_CReg32RegClassID].contains(Reg)) {
-      EmitByte(1, OS);
-    } else {
-      EmitByte(0, OS);
-    }
+  // Emit source type (1 byte) and source select (4 bytes). For GPRs type is 0
+  // and select is 0 (GPR index is encoded in the instr encoding. For constants
+  // type is 1 and select is the original const select passed from the driver.
+  unsigned Reg = RegMO.getReg();
+  if (Reg == AMDGPU::ALU_CONST) {
+    EmitByte(1, OS);
+    uint32_t Sel = SelMO.getImm();
+    Emit(Sel, OS);
+  } else {
+    EmitByte(0, OS);
+    Emit((uint32_t)0, OS);
+  }
  
  
-    if (Reg == AMDGPU::ALU_LITERAL_X) {
-      unsigned ImmOpIndex = MI.getNumOperands() - 1;
-      MCOperand ImmOp = MI.getOperand(ImmOpIndex);
-      if (ImmOp.isFPImm()) {
-        InlineConstant.f = ImmOp.getFPImm();
-      } else {
-        assert(ImmOp.isImm());
-        InlineConstant.i = ImmOp.getImm();
-      }
+  if (Reg == AMDGPU::ALU_LITERAL_X) {
+    unsigned ImmOpIndex = MI.getNumOperands() - 1;
+    MCOperand ImmOp = MI.getOperand(ImmOpIndex);
+    if (ImmOp.isFPImm()) {
+      InlineConstant.f = ImmOp.getFPImm();
+    } else {
+      assert(ImmOp.isImm());
+      InlineConstant.i = ImmOp.getImm();
      }
    }
  
      }
    }
  
diff --git a/lib/Target/R600/R600Defines.h b/lib/Target/R600/R600Defines.h

index 7dea8e44ea2b2fdbfecf635c2cb4412618f43ec2..e19eea38e492a0c154d05c899dae33a9f6364d0f 100644 (file)
--- a/lib/Target/R600/R600Defines.h
+++ b/lib/Target/R600/R600Defines.h
@@ -62,18 +62,33 @@ namespace R600Operands {
      SRC0_NEG,
      SRC0_REL,
      SRC0_ABS,
      SRC0_NEG,
      SRC0_REL,
      SRC0_ABS,
+    SRC0_SEL,
      SRC1,
      SRC1_NEG,
      SRC1_REL,
      SRC1_ABS,
      SRC1,
      SRC1_NEG,
      SRC1_REL,
      SRC1_ABS,
+    SRC1_SEL,
      SRC2,
      SRC2_NEG,
      SRC2_REL,
      SRC2,
      SRC2_NEG,
      SRC2_REL,
+    SRC2_SEL,
      LAST,
      PRED_SEL,
      IMM,
      COUNT
   };
      LAST,
      PRED_SEL,
      IMM,
      COUNT
   };
+
+  const static int ALUOpTable[3][R600Operands::COUNT] = {
+//            W        C     S  S  S  S     S  S  S  S     S  S  S
+//            R  O  D  L  S  R  R  R  R  S  R  R  R  R  S  R  R  R  L  P
+//   D  U     I  M  R  A  R  C  C  C  C  R  C  C  C  C  R  C  C  C  A  R  I
+//   S  E  U  T  O  E  M  C  0  0  0  0  C  1  1  1  1  C  2  2  2  S  E  M
+//   T  M  P  E  D  L  P  0  N  R  A  S  1  N  R  A  S  2  N  R  S  T  D  M
+    {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,10,11,12},
+    {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,13,14,15,16,-1,-1,-1,-1,17,18,19},
+    {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8, 9,-1,10,11,12,13,14,15,16,17}
+  };
+
  }
  
  #endif // R600DEFINES_H_
  }
  
  #endif // R600DEFINES_H_
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp

index f0eece39ea5f956dcd068d660723d72b3b48c460..69ca3f58304a65d5d34cf5258df9010eab34bc8c 100644 (file)
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -74,7 +74,10 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
    setOperationAction(ISD::STORE, MVT::i32, Custom);
    setOperationAction(ISD::STORE, MVT::v4i32, Custom);
  
    setOperationAction(ISD::STORE, MVT::i32, Custom);
    setOperationAction(ISD::STORE, MVT::v4i32, Custom);
  
+  setOperationAction(ISD::LOAD, MVT::i32, Custom);
+  setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
    setTargetDAGCombine(ISD::FP_ROUND);
    setTargetDAGCombine(ISD::FP_ROUND);
+  setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
  
    setSchedulingPreference(Sched::VLIW);
  }
  
    setSchedulingPreference(Sched::VLIW);
  }
@@ -115,15 +118,6 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
      break;
    }
  
      break;
    }
  
-  case AMDGPU::R600_LOAD_CONST: {
-    int64_t RegIndex = MI->getOperand(1).getImm();
-    unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
-    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
-                .addOperand(MI->getOperand(0))
-                .addReg(ConstantReg);
-    break;
-  }
-
    case AMDGPU::MASK_WRITE: {
      unsigned maskedRegister = MI->getOperand(0).getReg();
      assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
    case AMDGPU::MASK_WRITE: {
      unsigned maskedRegister = MI->getOperand(0).getReg();
      assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
@@ -364,6 +358,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
    case ISD::SELECT: return LowerSELECT(Op, DAG);
    case ISD::SETCC: return LowerSETCC(Op, DAG);
    case ISD::STORE: return LowerSTORE(Op, DAG);
    case ISD::SELECT: return LowerSELECT(Op, DAG);
    case ISD::SETCC: return LowerSETCC(Op, DAG);
    case ISD::STORE: return LowerSTORE(Op, DAG);
+  case ISD::LOAD: return LowerLOAD(Op, DAG);
    case ISD::FPOW: return LowerFPOW(Op, DAG);
    case ISD::INTRINSIC_VOID: {
      SDValue Chain = Op.getOperand(0);
    case ISD::FPOW: return LowerFPOW(Op, DAG);
    case ISD::INTRINSIC_VOID: {
      SDValue Chain = Op.getOperand(0);
@@ -527,6 +522,16 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
    switch (N->getOpcode()) {
    default: return;
    case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
    switch (N->getOpcode()) {
    default: return;
    case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
+    return;
+  case ISD::LOAD: {
+    SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
+    Results.push_back(SDValue(Node, 0));
+    Results.push_back(SDValue(Node, 1));
+    // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
+    // function
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
+    return;
+  }
    }
  }
  
    }
  }
  
@@ -832,6 +837,94 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
    return SDValue();
  }
  
    return SDValue();
  }
  
+// return (512 + (kc_bank << 12)
+static int
+ConstantAddressBlock(unsigned AddressSpace) {
+  switch (AddressSpace) {
+  case AMDGPUAS::CONSTANT_BUFFER_0:
+    return 512;
+  case AMDGPUAS::CONSTANT_BUFFER_1:
+    return 512 + 4096;
+  case AMDGPUAS::CONSTANT_BUFFER_2:
+    return 512 + 4096 * 2;
+  case AMDGPUAS::CONSTANT_BUFFER_3:
+    return 512 + 4096 * 3;
+  case AMDGPUAS::CONSTANT_BUFFER_4:
+    return 512 + 4096 * 4;
+  case AMDGPUAS::CONSTANT_BUFFER_5:
+    return 512 + 4096 * 5;
+  case AMDGPUAS::CONSTANT_BUFFER_6:
+    return 512 + 4096 * 6;
+  case AMDGPUAS::CONSTANT_BUFFER_7:
+    return 512 + 4096 * 7;
+  case AMDGPUAS::CONSTANT_BUFFER_8:
+    return 512 + 4096 * 8;
+  case AMDGPUAS::CONSTANT_BUFFER_9:
+    return 512 + 4096 * 9;
+  case AMDGPUAS::CONSTANT_BUFFER_10:
+    return 512 + 4096 * 10;
+  case AMDGPUAS::CONSTANT_BUFFER_11:
+    return 512 + 4096 * 11;
+  case AMDGPUAS::CONSTANT_BUFFER_12:
+    return 512 + 4096 * 12;
+  case AMDGPUAS::CONSTANT_BUFFER_13:
+    return 512 + 4096 * 13;
+  case AMDGPUAS::CONSTANT_BUFFER_14:
+    return 512 + 4096 * 14;
+  case AMDGPUAS::CONSTANT_BUFFER_15:
+    return 512 + 4096 * 15;
+  default:
+    return -1;
+  }
+}
+
+SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
+{
+  EVT VT = Op.getValueType();
+  DebugLoc DL = Op.getDebugLoc();
+  LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
+  SDValue Chain = Op.getOperand(0);
+  SDValue Ptr = Op.getOperand(1);
+  SDValue LoweredLoad;
+
+  int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
+  if (ConstantBlock > -1) {
+    SDValue Result;
+    if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
+        dyn_cast<Constant>(LoadNode->getSrcValue())) {
+      SDValue Slots[4];
+      for (unsigned i = 0; i < 4; i++) {
+        // We want Const position encoded with the following formula :
+        // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
+        // const_index is Ptr computed by llvm using an alignment of 16.
+        // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
+        // then div by 4 at the ISel step
+        SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+            DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
+        Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
+      }
+      Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4);
+    } else {
+      // non constant ptr cant be folded, keeps it as a v4f32 load
+      Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
+          DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32))
+          );
+    }
+
+    if (!VT.isVector()) {
+      Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
+          DAG.getConstant(0, MVT::i32));
+    }
+
+    SDValue MergedValues[2] = {
+        Result,
+        Chain
+    };
+    return DAG.getMergeValues(MergedValues, 2, DL);
+  }
+
+  return SDValue();
+}
  
  SDValue R600TargetLowering::LowerFPOW(SDValue Op,
      SelectionDAG &DAG) const {
  
  SDValue R600TargetLowering::LowerFPOW(SDValue Op,
      SelectionDAG &DAG) const {
@@ -904,6 +997,17 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
        }
        break;
      }
        }
        break;
      }
+  // Extract_vec (Build_vector) generated by custom lowering
+  // also needs to be customly combined
+  case ISD::EXTRACT_VECTOR_ELT: {
+    SDValue Arg = N->getOperand(0);
+    if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
+      if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+        unsigned Element = Const->getZExtValue();
+        return Arg->getOperand(Element);
+      }
+    }
+  }
    }
    return SDValue();
  }
    }
    return SDValue();
  }
diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h

index 2b954dab5582d3d4bdabd85f6dfad4fae29ba0ae..c141d50210e792660dda28d7801652c9c53f0cf3 100644 (file)
--- a/lib/Target/R600/R600ISelLowering.h
+++ b/lib/Target/R600/R600ISelLowering.h
@@ -63,6 +63,7 @@ private:
    SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
    SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
    SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const;
    SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
    SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
    SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
    
    bool isZero(SDValue Op) const;
  };
    
    bool isZero(SDValue Op) const;
  };
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp

index 06b78d09cc7cc7dcafa041ee5a50df1d8634a230..1adb1422dcba9a218958cb5d19e363b3fecd2f1b 100644 (file)
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -486,13 +486,15 @@ MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MB
       .addReg(Src0Reg)  // $src0
       .addImm(0)        // $src0_neg
       .addImm(0)        // $src0_rel
       .addReg(Src0Reg)  // $src0
       .addImm(0)        // $src0_neg
       .addImm(0)        // $src0_rel
-     .addImm(0);       // $src0_abs
+     .addImm(0)        // $src0_abs
+     .addImm(-1);       // $src0_sel
  
    if (Src1Reg) {
      MIB.addReg(Src1Reg) // $src1
         .addImm(0)       // $src1_neg
         .addImm(0)       // $src1_rel
  
    if (Src1Reg) {
      MIB.addReg(Src1Reg) // $src1
         .addImm(0)       // $src1_neg
         .addImm(0)       // $src1_rel
-       .addImm(0);       // $src1_abs
+       .addImm(0)       // $src1_abs
+       .addImm(-1);      // $src1_sel
    }
  
    //XXX: The r600g finalizer expects this to be 1, once we've moved the
    }
  
    //XXX: The r600g finalizer expects this to be 1, once we've moved the
@@ -521,16 +523,6 @@ int R600InstrInfo::getOperandIdx(const MachineInstr &MI,
  
  int R600InstrInfo::getOperandIdx(unsigned Opcode,
                                   R600Operands::Ops Op) const {
  
  int R600InstrInfo::getOperandIdx(unsigned Opcode,
                                   R600Operands::Ops Op) const {
-  const static int OpTable[3][R600Operands::COUNT] = {
-//            W        C     S  S  S     S  S  S     S  S
-//            R  O  D  L  S  R  R  R  S  R  R  R  S  R  R  L  P
-//   D  U     I  M  R  A  R  C  C  C  C  C  C  C  R  C  C  A  R  I
-//   S  E  U  T  O  E  M  C  0  0  0  C  1  1  1  C  2  2  S  E  M
-//   T  M  P  E  D  L  P  0  N  R  A  1  N  R  A  2  N  R  T  D  M
-    {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8,-1,-1,-1,-1,-1,-1,-1, 9,10,11},
-    {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,-1,-1,-1,13,14,15,16,17},
-    {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8,-1, 9,10,11,12,13,14}
-  };
    unsigned TargetFlags = get(Opcode).TSFlags;
    unsigned OpTableIdx;
  
    unsigned TargetFlags = get(Opcode).TSFlags;
    unsigned OpTableIdx;
  
@@ -556,7 +548,7 @@ int R600InstrInfo::getOperandIdx(unsigned Opcode,
      OpTableIdx = 2;
    }
  
      OpTableIdx = 2;
    }
  
-  return OpTable[OpTableIdx][Op];
+  return R600Operands::ALUOpTable[OpTableIdx][Op];
  }
  
  void R600InstrInfo::setImmOperand(MachineInstr *MI, R600Operands::Ops Op,
  }
  
  void R600InstrInfo::setImmOperand(MachineInstr *MI, R600Operands::Ops Op,
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td

index d4fa3d682627a13694a91c4972752a2ffeb9a8d8..a6c39108c7272ecdb0f983b95a7ba51b9501e1fc 100644 (file)
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -70,6 +70,11 @@ class InstFlag<string PM = "printOperand", int Default = 0>
    let PrintMethod = PM;
  }
  
    let PrintMethod = PM;
  }
  
+// src_sel for ALU src operands, see also ALU_CONST, ALU_PARAM registers 
+def SEL : OperandWithDefaultOps <i32, (ops (i32 -1))> {
+  let PrintMethod = "printSel";
+}
+
  def LITERAL : InstFlag<"printLiteral">;
  
  def WRITE : InstFlag <"printWrite", 1>;
  def LITERAL : InstFlag<"printLiteral">;
  
  def WRITE : InstFlag <"printWrite", 1>;
@@ -89,6 +94,8 @@ def LAST : InstFlag<"printLast", 1>;
  def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>;
  def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>;
  def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>;
  def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>;
  def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>;
  def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>;
+def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>;
+def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>;
  
  class R600ALU_Word0 {
    field bits<32> Word0;
  
  class R600ALU_Word0 {
    field bits<32> Word0;
@@ -263,11 +270,11 @@ class R600_1OP <bits<11> inst, string opName, list<dag> pattern,
      InstR600 <0,
                (outs R600_Reg32:$dst),
                (ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
      InstR600 <0,
                (outs R600_Reg32:$dst),
                (ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
-                   R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs,
+                   R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
                     LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
                !strconcat(opName,
                     "$clamp $dst$write$dst_rel$omod, "
                     LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
                !strconcat(opName,
                     "$clamp $dst$write$dst_rel$omod, "
-                   "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
+                   "$src0_neg$src0_abs$src0$src0_sel$src0_abs$src0_rel, "
                     "$literal $pred_sel$last"),
                pattern,
                itin>,
                     "$literal $pred_sel$last"),
                pattern,
                itin>,
@@ -303,13 +310,13 @@ class R600_2OP <bits<11> inst, string opName, list<dag> pattern,
            (outs R600_Reg32:$dst),
            (ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write,
                 OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
            (outs R600_Reg32:$dst),
            (ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write,
                 OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
-               R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs,
-               R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs,
+               R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
+               R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel,
                 LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
            !strconcat(opName,
                  "$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, "
                 LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
            !strconcat(opName,
                  "$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, "
-                "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
-                "$src1_neg$src1_abs$src1$src1_abs$src1_rel, "
+                "$src0_neg$src0_abs$src0$src0_sel$src0_abs$src0_rel, "
+                "$src1_neg$src1_abs$src1$src1_sel$src1_abs$src1_rel, "
                  "$literal $pred_sel$last"),
            pattern,
            itin>,
                  "$literal $pred_sel$last"),
            pattern,
            itin>,
@@ -340,14 +347,14 @@ class R600_3OP <bits<5> inst, string opName, list<dag> pattern,
    InstR600 <0,
            (outs R600_Reg32:$dst),
            (ins REL:$dst_rel, CLAMP:$clamp,
    InstR600 <0,
            (outs R600_Reg32:$dst),
            (ins REL:$dst_rel, CLAMP:$clamp,
-               R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel,
-               R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel,
-               R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel,
+               R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel,
+               R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel,
+               R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel,
                 LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
            !strconcat(opName, "$clamp $dst$dst_rel, "
                 LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
            !strconcat(opName, "$clamp $dst$dst_rel, "
-                             "$src0_neg$src0$src0_rel, "
-                             "$src1_neg$src1$src1_rel, "
-                             "$src2_neg$src2$src2_rel, "
+                             "$src0_neg$src0$src0_sel$src0_rel, "
+                             "$src1_neg$src1$src1_sel$src1_rel, "
+                             "$src2_neg$src2$src2_sel$src2_rel, "
                               "$literal $pred_sel$last"),
            pattern,
            itin>,
                               "$literal $pred_sel$last"),
            pattern,
            itin>,
@@ -482,7 +489,7 @@ def INTERP_P0: SDNode<"AMDGPUISD::INTERP_P0",
    >;
  
  def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS",
    >;
  
  def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS",
-  SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisPtrTy<1>]>,
+  SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>,
    [SDNPMayLoad]
  >;
  
    [SDNPMayLoad]
  >;
  
@@ -1538,12 +1545,6 @@ def MASK_WRITE : AMDGPUShaderInst <
  
  } // End mayLoad = 0, mayStore = 0, hasSideEffects = 1
  
  
  } // End mayLoad = 0, mayStore = 0, hasSideEffects = 1
  
-def R600_LOAD_CONST : AMDGPUShaderInst <
-  (outs R600_Reg32:$dst),
-  (ins i32imm:$src0),
-  "R600_LOAD_CONST $dst, $src0",
-  [(set R600_Reg32:$dst, (int_AMDGPU_load_const imm:$src0))]
->;
  
  def RESERVE_REG : AMDGPUShaderInst <
    (outs),
  
  def RESERVE_REG : AMDGPUShaderInst <
    (outs),
@@ -1551,7 +1552,6 @@ def RESERVE_REG : AMDGPUShaderInst <
    "RESERVE_REG $src",
    [(int_AMDGPU_reserve_reg imm:$src)]
  >;
    "RESERVE_REG $src",
    [(int_AMDGPU_reserve_reg imm:$src)]
  >;
-
  def TXD: AMDGPUShaderInst <
    (outs R600_Reg128:$dst),
    (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
  def TXD: AMDGPUShaderInst <
    (outs R600_Reg128:$dst),
    (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
@@ -1581,6 +1581,78 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in {
        "RETURN", [(IL_retflag)]>;
  }
  
        "RETURN", [(IL_retflag)]>;
  }
  
+
+//===----------------------------------------------------------------------===//
+// Constant Buffer Addressing Support
+//===----------------------------------------------------------------------===//
+
+let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"  in {
+def CONST_COPY : Instruction {
+  let OutOperandList = (outs R600_Reg32:$dst);
+  let InOperandList = (ins i32imm:$src);
+  let Pattern = [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))];
+  let AsmString = "CONST_COPY";
+  let neverHasSideEffects = 1;
+  let isAsCheapAsAMove = 1;
+  let Itinerary = NullALU;
+}
+} // end isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"
+
+def TEX_VTX_CONSTBUF :
+  InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr), "VTX_READ_eg $dst, $ptr",
+      [(set R600_Reg128:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr))]>,
+  VTX_WORD1_GPR, VTX_WORD0 {
+
+  let VC_INST = 0;
+  let FETCH_TYPE = 2;
+  let FETCH_WHOLE_QUAD = 0;
+  let BUFFER_ID = 0;
+  let SRC_REL = 0;
+  let SRC_SEL_X = 0;
+  let DST_REL = 0;
+  let USE_CONST_FIELDS = 0;
+  let NUM_FORMAT_ALL = 2;
+  let FORMAT_COMP_ALL = 1;
+  let SRF_MODE_ALL = 1;
+  let MEGA_FETCH_COUNT = 16;
+  let DST_SEL_X        = 0;
+  let DST_SEL_Y        = 1;
+  let DST_SEL_Z        = 2;
+  let DST_SEL_W        = 3;
+  let DATA_FORMAT      = 35;
+
+  let Inst{31-0} = Word0;
+  let Inst{63-32} = Word1;
+
+// LLVM can only encode 64-bit instructions, so these fields are manually
+// encoded in R600CodeEmitter
+//
+// bits<16> OFFSET;
+// bits<2>  ENDIAN_SWAP = 0;
+// bits<1>  CONST_BUF_NO_STRIDE = 0;
+// bits<1>  MEGA_FETCH = 0;
+// bits<1>  ALT_CONST = 0;
+// bits<2>  BUFFER_INDEX_MODE = 0;
+
+
+
+// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
+// is done in R600CodeEmitter
+//
+// Inst{79-64} = OFFSET;
+// Inst{81-80} = ENDIAN_SWAP;
+// Inst{82}    = CONST_BUF_NO_STRIDE;
+// Inst{83}    = MEGA_FETCH;
+// Inst{84}    = ALT_CONST;
+// Inst{86-85} = BUFFER_INDEX_MODE;
+// Inst{95-86} = 0; Reserved
+
+// VTX_WORD3 (Padding)
+//
+// Inst{127-96} = 0;
+}
+
+
  //===--------------------------------------------------------------------===//
  // Instructions support
  //===--------------------------------------------------------------------===//
  //===--------------------------------------------------------------------===//
  // Instructions support
  //===--------------------------------------------------------------------===//
diff --git a/lib/Target/R600/R600LowerConstCopy.cpp b/lib/Target/R600/R600LowerConstCopy.cpp

new file mode 100644 (file)

index 0000000..70a2b13
--- /dev/null
+++ b/lib/Target/R600/R600LowerConstCopy.cpp
@@ -0,0 +1,74 @@
+//===-- R600LowerConstCopy.cpp - Propagate ConstCopy / lower them to MOV---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass is intended to handle remaining ConstCopy pseudo MachineInstr.
+/// ISel will fold each Const Buffer read inside scalar ALU. However it cannot
+/// fold them inside vector instruction, like DOT4 or Cube ; ISel emits
+/// ConstCopy instead. This pass (executed after ExpandingSpecialInstr) will try
+/// to fold them if possible or replace them by MOV otherwise.
+/// TODO : Implement the folding part, using Copy Propagation algorithm.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "R600InstrInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/GlobalValue.h"
+
+namespace llvm {
+
+class R600LowerConstCopy : public MachineFunctionPass {
+private:
+  static char ID;
+  const R600InstrInfo *TII;
+public:
+  R600LowerConstCopy(TargetMachine &tm);
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  const char *getPassName() const { return "R600 Eliminate Symbolic Operand"; }
+};
+
+char R600LowerConstCopy::ID = 0;
+
+
+R600LowerConstCopy::R600LowerConstCopy(TargetMachine &tm) :
+    MachineFunctionPass(ID),
+    TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo()))
+{
+}
+
+bool R600LowerConstCopy::runOnMachineFunction(MachineFunction &MF) {
+  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+                                                  BB != BB_E; ++BB) {
+    MachineBasicBlock &MBB = *BB;
+    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+                                                      I != E;) {
+      MachineInstr &MI = *I;
+      I = llvm::next(I);
+      if (MI.getOpcode() != AMDGPU::CONST_COPY)
+        continue;
+      MachineInstr *NewMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::MOV,
+          MI.getOperand(0).getReg(), AMDGPU::ALU_CONST);
+      NewMI->getOperand(9).setImm(MI.getOperand(1).getImm());
+      MI.eraseFromParent();
+    }
+  }
+  return false;
+}
+
+FunctionPass *createR600LowerConstCopy(TargetMachine &tm) {
+  return new R600LowerConstCopy(tm);
+}
+
+}
+
+
diff --git a/lib/Target/R600/R600RegisterInfo.cpp b/lib/Target/R600/R600RegisterInfo.cpp

index a39f83dbaca8f05b72353c44fbeb2a9fdeec368f..0441e4a306eb1ab2cd2b8249389836dfa54cde18 100644 (file)
--- a/lib/Target/R600/R600RegisterInfo.cpp
+++ b/lib/Target/R600/R600RegisterInfo.cpp
@@ -38,16 +38,12 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
    Reserved.set(AMDGPU::NEG_ONE);
    Reserved.set(AMDGPU::PV_X);
    Reserved.set(AMDGPU::ALU_LITERAL_X);
    Reserved.set(AMDGPU::NEG_ONE);
    Reserved.set(AMDGPU::PV_X);
    Reserved.set(AMDGPU::ALU_LITERAL_X);
+  Reserved.set(AMDGPU::ALU_CONST);
    Reserved.set(AMDGPU::PREDICATE_BIT);
    Reserved.set(AMDGPU::PRED_SEL_OFF);
    Reserved.set(AMDGPU::PRED_SEL_ZERO);
    Reserved.set(AMDGPU::PRED_SEL_ONE);
  
    Reserved.set(AMDGPU::PREDICATE_BIT);
    Reserved.set(AMDGPU::PRED_SEL_OFF);
    Reserved.set(AMDGPU::PRED_SEL_ZERO);
    Reserved.set(AMDGPU::PRED_SEL_ONE);
  
-  for (TargetRegisterClass::iterator I = AMDGPU::R600_CReg32RegClass.begin(),
-                        E = AMDGPU::R600_CReg32RegClass.end(); I != E; ++I) {
-    Reserved.set(*I);
-  }
-
    for (std::vector<unsigned>::const_iterator I = MFI->ReservedRegs.begin(),
                                      E = MFI->ReservedRegs.end(); I != E; ++I) {
      Reserved.set(*I);
    for (std::vector<unsigned>::const_iterator I = MFI->ReservedRegs.begin(),
                                      E = MFI->ReservedRegs.end(); I != E; ++I) {
      Reserved.set(*I);
diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td

index d3d6d25d2920f768ffeb0692cd1ab8cdd029c268..993fefc2ab3bb1d5b1e87e690a51aa49886312ff 100644 (file)
--- a/lib/Target/R600/R600RegisterInfo.td
+++ b/lib/Target/R600/R600RegisterInfo.td
@@ -27,10 +27,6 @@ foreach Index = 0-127 in {
    foreach Chan = [ "X", "Y", "Z", "W" ] in {
      // 32-bit Temporary Registers
      def T#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index, Chan>;
    foreach Chan = [ "X", "Y", "Z", "W" ] in {
      // 32-bit Temporary Registers
      def T#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index, Chan>;
-
-    // 32-bit Constant Registers (There are more than 128, this the number
-    // that is currently supported.
-    def C#Index#_#Chan : R600RegWithChan <"C"#Index#"."#Chan, Index, Chan>;
    }
    // 128-bit Temporary Registers
    def T#Index#_XYZW : R600Reg_128 <"T"#Index#".XYZW",
    }
    // 128-bit Temporary Registers
    def T#Index#_XYZW : R600Reg_128 <"T"#Index#".XYZW",
@@ -64,13 +60,11 @@ def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>;
  
  def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32,
                            (add (sequence "ArrayBase%u", 448, 464))>;
  
  def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32,
                            (add (sequence "ArrayBase%u", 448, 464))>;
-
-def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
-                          (add (interleave
-                                  (interleave (sequence "C%u_X", 0, 127),
-                                              (sequence "C%u_Z", 0, 127)),
-                                  (interleave (sequence "C%u_Y", 0, 127),
-                                              (sequence "C%u_W", 0, 127))))>;
+// special registers for ALU src operands
+// const buffer reference, SRCx_SEL contains index
+def ALU_CONST : R600Reg<"CBuf", 0>;
+// interpolation param reference, SRCx_SEL contains index
+def ALU_PARAM : R600Reg<"Param", 0>;
  
  def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32,
                                     (add (sequence "T%u_X", 0, 127))>;
  
  def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32,
                                     (add (sequence "T%u_X", 0, 127))>;
@@ -85,15 +79,15 @@ def R600_TReg32_W : RegisterClass <"AMDGPU", [f32, i32], 32,
                                     (add (sequence "T%u_W", 0, 127))>;
  
  def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
                                     (add (sequence "T%u_W", 0, 127))>;
  
  def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
-                          (add (interleave
-                                 (interleave R600_TReg32_X, R600_TReg32_Z),
-                                 (interleave R600_TReg32_Y, R600_TReg32_W)))>;
+                                   (interleave R600_TReg32_X, R600_TReg32_Y,
+                                               R600_TReg32_Z, R600_TReg32_W)>;
  
  def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
      R600_TReg32,
  
  def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
      R600_TReg32,
-    R600_CReg32,
      R600_ArrayBase,
      R600_ArrayBase,
-    ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>;
+    ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF,
+    ALU_CONST, ALU_PARAM
+    )>;
  
  def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add
      PRED_SEL_OFF, PRED_SEL_ZERO, PRED_SEL_ONE)>;
  
  def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add
      PRED_SEL_OFF, PRED_SEL_ZERO, PRED_SEL_ONE)>;
author	Tom Stellard <thomas.stellard@amd.com>
	Wed, 23 Jan 2013 02:09:06 +0000 (02:09 +0000)
committer	Tom Stellard <thomas.stellard@amd.com>
	Wed, 23 Jan 2013 02:09:06 +0000 (02:09 +0000)
lib/Target/R600/AMDGPU.h		patch \| blob \| history
lib/Target/R600/AMDGPUTargetMachine.cpp		patch \| blob \| history
lib/Target/R600/AMDIL.h		patch \| blob \| history
lib/Target/R600/AMDILISelDAGToDAG.cpp		patch \| blob \| history
lib/Target/R600/CMakeLists.txt		patch \| blob \| history
lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp		patch \| blob \| history
lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h		patch \| blob \| history
lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp		patch \| blob \| history
lib/Target/R600/R600Defines.h		patch \| blob \| history
lib/Target/R600/R600ISelLowering.cpp		patch \| blob \| history
lib/Target/R600/R600ISelLowering.h		patch \| blob \| history
lib/Target/R600/R600InstrInfo.cpp		patch \| blob \| history
lib/Target/R600/R600Instructions.td		patch \| blob \| history
lib/Target/R600/R600LowerConstCopy.cpp	[new file with mode: 0644]	patch \| blob
lib/Target/R600/R600RegisterInfo.cpp		patch \| blob \| history
lib/Target/R600/R600RegisterInfo.td		patch \| blob \| history