R600: Support for indirect addressing v4

author Tom Stellard <thomas.stellard@amd.com>

Wed, 6 Feb 2013 17:32:29 +0000 (17:32 +0000)

committer Tom Stellard <thomas.stellard@amd.com>

Wed, 6 Feb 2013 17:32:29 +0000 (17:32 +0000)
author Tom Stellard <thomas.stellard@amd.com>
Wed, 6 Feb 2013 17:32:29 +0000 (17:32 +0000)
committer Tom Stellard <thomas.stellard@amd.com>
Wed, 6 Feb 2013 17:32:29 +0000 (17:32 +0000)
diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h

index 1aa607f57ea3a4bd8bc08015320abfff65aefc3c..bac01a3f427c94b3e5994d6fad56eb382be05b35 100644 (file)
--- a/lib/Target/R600/AMDGPU.h
+++ b/lib/Target/R600/AMDGPU.h
@@ -36,6 +36,7 @@ FunctionPass *createSIInsertWaits(TargetMachine &tm);
  // Passes common to R600 and SI
  Pass *createAMDGPUStructurizeCFGPass();
  FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
+FunctionPass* createAMDGPUIndirectAddressingPass(TargetMachine &tm);
  
  } // End namespace llvm
  
diff --git a/lib/Target/R600/AMDGPUFrameLowering.cpp b/lib/Target/R600/AMDGPUFrameLowering.cpp

new file mode 100644 (file)

index 0000000..815d6f7
--- /dev/null
+++ b/lib/Target/R600/AMDGPUFrameLowering.cpp
@@ -0,0 +1,122 @@
+//===----------------------- AMDGPUFrameLowering.cpp ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Interface to describe a layout of a stack frame on a AMDIL target machine
+//
+//===----------------------------------------------------------------------===//
+#include "AMDGPUFrameLowering.h"
+#include "AMDGPURegisterInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Instructions.h"
+
+using namespace llvm;
+AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl,
+    int LAO, unsigned TransAl)
+  : TargetFrameLowering(D, StackAl, LAO, TransAl) { }
+
+AMDGPUFrameLowering::~AMDGPUFrameLowering() { }
+
+unsigned AMDGPUFrameLowering::getStackWidth(const MachineFunction &MF) const {
+
+  // XXX: Hardcoding to 1 for now.
+  //
+  // I think the StackWidth should stored as metadata associated with the
+  // MachineFunction.  This metadata can either be added by a frontend, or
+  // calculated by a R600 specific LLVM IR pass.
+  //
+  // The StackWidth determines how stack objects are laid out in memory.
+  // For a vector stack variable, like: int4 stack[2], the data will be stored
+  // in the following ways depending on the StackWidth.
+  //
+  // StackWidth = 1:
+  //
+  // T0.X = stack[0].x
+  // T1.X = stack[0].y
+  // T2.X = stack[0].z
+  // T3.X = stack[0].w
+  // T4.X = stack[1].x
+  // T5.X = stack[1].y
+  // T6.X = stack[1].z
+  // T7.X = stack[1].w
+  //
+  // StackWidth = 2:
+  //
+  // T0.X = stack[0].x
+  // T0.Y = stack[0].y
+  // T1.X = stack[0].z
+  // T1.Y = stack[0].w
+  // T2.X = stack[1].x
+  // T2.Y = stack[1].y
+  // T3.X = stack[1].z
+  // T3.Y = stack[1].w
+  // 
+  // StackWidth = 4:
+  // T0.X = stack[0].x
+  // T0.Y = stack[0].y
+  // T0.Z = stack[0].z
+  // T0.W = stack[0].w
+  // T1.X = stack[1].x
+  // T1.Y = stack[1].y
+  // T1.Z = stack[1].z
+  // T1.W = stack[1].w
+  return 1;
+}
+
+/// \returns The number of registers allocated for \p FI.
+int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+                                         int FI) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  unsigned Offset = 0;
+  int UpperBound = FI == -1 ? MFI->getNumObjects() : FI;
+
+  for (int i = MFI->getObjectIndexBegin(); i < UpperBound; ++i) {
+    const AllocaInst *Alloca = MFI->getObjectAllocation(i);
+    unsigned ArrayElements;
+    const Type *AllocaType = Alloca->getAllocatedType();
+    const Type *ElementType;
+
+    if (AllocaType->isArrayTy()) {
+      ArrayElements = AllocaType->getArrayNumElements();
+      ElementType = AllocaType->getArrayElementType();
+    } else {
+      ArrayElements = 1;
+      ElementType = AllocaType;
+    }
+
+    unsigned VectorElements;
+    if (ElementType->isVectorTy()) {
+      VectorElements = ElementType->getVectorNumElements();
+    } else {
+      VectorElements = 1;
+    }
+
+    Offset += (VectorElements / getStackWidth(MF)) * ArrayElements;
+  }
+  return Offset;
+}
+
+const TargetFrameLowering::SpillSlot *
+AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
+  NumEntries = 0;
+  return 0;
+}
+void
+AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const {
+}
+void
+AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF,
+                                  MachineBasicBlock &MBB) const {
+}
+
+bool
+AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const {
+  return false;
+}
diff --git a/lib/Target/R600/AMDGPUFrameLowering.h b/lib/Target/R600/AMDGPUFrameLowering.h

new file mode 100644 (file)

index 0000000..cf5742e
--- /dev/null
+++ b/lib/Target/R600/AMDGPUFrameLowering.h
@@ -0,0 +1,44 @@
+//===--------------------- AMDGPUFrameLowering.h ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface to describe a layout of a stack frame on a AMDIL target
+/// machine.
+//
+//===----------------------------------------------------------------------===//
+#ifndef AMDILFRAME_LOWERING_H
+#define AMDILFRAME_LOWERING_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+
+/// \brief Information about the stack frame layout on the AMDGPU targets.
+///
+/// It holds the direction of the stack growth, the known stack alignment on
+/// entry to each function, and the offset to the locals area.
+/// See TargetFrameInfo for more comments.
+class AMDGPUFrameLowering : public TargetFrameLowering {
+public:
+  AMDGPUFrameLowering(StackDirection D, unsigned StackAl, int LAO,
+                      unsigned TransAl = 1);
+  virtual ~AMDGPUFrameLowering();
+
+  /// \returns The number of 32-bit sub-registers that are used when storing
+  /// values to the stack.
+  virtual unsigned getStackWidth(const MachineFunction &MF) const;
+  virtual int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+  virtual const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const;
+  virtual void emitPrologue(MachineFunction &MF) const;
+  virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+  virtual bool hasFP(const MachineFunction &MF) const;
+};
+} // namespace llvm
+#endif // AMDILFRAME_LOWERING_H
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp

index f3a047a135c3b2ecb01abd3b091fef183dadb3e0..d0d23d692a390c50f3497e5db1f18f20a728bdf4 100644 (file)
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -412,5 +412,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
    NODE_NAME_CASE(URECIP)
    NODE_NAME_CASE(EXPORT)
    NODE_NAME_CASE(CONST_ADDRESS)
+  NODE_NAME_CASE(REGISTER_LOAD)
+  NODE_NAME_CASE(REGISTER_STORE)
    }
  }
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h

index 0584d39682421b501a79748f9fb61db5462d747d..927ed09b4637dbb6ac38e4004610bc47f8f8d8e5 100644 (file)
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -122,6 +122,8 @@ enum {
    URECIP,
    EXPORT,
    CONST_ADDRESS,
+  REGISTER_LOAD,
+  REGISTER_STORE,
    LAST_AMDGPU_ISD_NUMBER
  };
  
diff --git a/lib/Target/R600/AMDGPUIndirectAddressing.cpp b/lib/Target/R600/AMDGPUIndirectAddressing.cpp

new file mode 100644 (file)

index 0000000..56aaf23
--- /dev/null
+++ b/lib/Target/R600/AMDGPUIndirectAddressing.cpp
@@ -0,0 +1,326 @@
+//===-- AMDGPUIndirectAddressing.cpp - Indirect Adressing Support ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+///
+/// Instructions can use indirect addressing to index the register file as if it
+/// were memory.  This pass lowers RegisterLoad and RegisterStore instructions
+/// to either a COPY or a MOV that uses indirect addressing.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUIndirectAddressingPass : public MachineFunctionPass {
+
+private:
+  static char ID;
+  const AMDGPUInstrInfo *TII;
+
+  bool regHasExplicitDef(MachineRegisterInfo &MRI, unsigned Reg) const;
+
+public:
+  AMDGPUIndirectAddressingPass(TargetMachine &tm) :
+    MachineFunctionPass(ID),
+    TII(static_cast<const AMDGPUInstrInfo*>(tm.getInstrInfo()))
+    { }
+
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  const char *getPassName() const { return "R600 Handle indirect addressing"; }
+
+};
+
+} // End anonymous namespace
+
+char AMDGPUIndirectAddressingPass::ID = 0;
+
+FunctionPass *llvm::createAMDGPUIndirectAddressingPass(TargetMachine &tm) {
+  return new AMDGPUIndirectAddressingPass(tm);
+}
+
+bool AMDGPUIndirectAddressingPass::runOnMachineFunction(MachineFunction &MF) {
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  int IndirectBegin = TII->getIndirectIndexBegin(MF);
+  int IndirectEnd = TII->getIndirectIndexEnd(MF);
+
+  if (IndirectBegin == -1) {
+    // No indirect addressing, we can skip this pass
+    assert(IndirectEnd == -1);
+    return false;
+  }
+
+  // The map keeps track of the indirect address that is represented by
+  // each virtual register. The key is the register and the value is the
+  // indirect address it uses.
+  std::map<unsigned, unsigned> RegisterAddressMap;
+
+  // First pass - Lower all of the RegisterStore instructions and track which
+  // registers are live.
+  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+                                                      BB != BB_E; ++BB) {
+    // This map keeps track of the current live indirect registers.
+    // The key is the address and the value is the register
+    std::map<unsigned, unsigned> LiveAddressRegisterMap;
+    MachineBasicBlock &MBB = *BB;
+
+    for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
+                               I != MBB.end(); I = Next) {
+      Next = llvm::next(I);
+      MachineInstr &MI = *I;
+
+      if (!TII->isRegisterStore(MI)) {
+        continue;
+      }
+
+      // Lower RegisterStore
+
+      unsigned RegIndex = MI.getOperand(2).getImm();
+      unsigned Channel = MI.getOperand(3).getImm();
+      unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel);
+      const TargetRegisterClass *IndirectStoreRegClass =
+                   TII->getIndirectAddrStoreRegClass(MI.getOperand(0).getReg());
+
+      if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) {
+        // Direct register access.
+        unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass);
+
+        BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY), DstReg)
+                .addOperand(MI.getOperand(0));
+
+        RegisterAddressMap[DstReg] = Address;
+        LiveAddressRegisterMap[Address] = DstReg;
+      } else {
+        // Indirect register access.
+        MachineInstrBuilder MOV = TII->buildIndirectWrite(BB, I,
+                                           MI.getOperand(0).getReg(), // Value
+                                           Address,
+                                           MI.getOperand(1).getReg()); // Offset
+        for (int i = IndirectBegin; i <= IndirectEnd; ++i) {
+          unsigned Addr = TII->calculateIndirectAddress(i, Channel);
+          unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass);
+          MOV.addReg(DstReg, RegState::Define | RegState::Implicit);
+          RegisterAddressMap[DstReg] = Addr;
+          LiveAddressRegisterMap[Addr] = DstReg;
+        }
+      }
+      MI.eraseFromParent();
+    }
+
+    // Update the live-ins of the succesor blocks
+    for (MachineBasicBlock::succ_iterator Succ = MBB.succ_begin(),
+                                          SuccEnd = MBB.succ_end();
+                                          SuccEnd != Succ; ++Succ) {
+      std::map<unsigned, unsigned>::const_iterator Key, KeyEnd;
+      for (Key = LiveAddressRegisterMap.begin(),
+           KeyEnd = LiveAddressRegisterMap.end(); KeyEnd != Key; ++Key) {
+        (*Succ)->addLiveIn(Key->second);
+      }
+    }
+  }
+
+  // Second pass - Lower the RegisterLoad instructions
+  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+                                                      BB != BB_E; ++BB) {
+    // Key is the address and the value is the register
+    std::map<unsigned, unsigned> LiveAddressRegisterMap;
+    MachineBasicBlock &MBB = *BB;
+
+    MachineBasicBlock::livein_iterator LI = MBB.livein_begin();
+    while (LI != MBB.livein_end()) {
+      std::vector<unsigned> PhiRegisters;
+
+      // Make sure this live in is used for indirect addressing
+      if (RegisterAddressMap.find(*LI) == RegisterAddressMap.end()) {
+        ++LI;
+        continue;
+      }
+
+      unsigned Address = RegisterAddressMap[*LI];
+      LiveAddressRegisterMap[Address] = *LI;
+      PhiRegisters.push_back(*LI);
+
+      // Check if there are other live in registers which map to the same
+      // indirect address.
+      for (MachineBasicBlock::livein_iterator LJ = llvm::next(LI),
+                                              LE = MBB.livein_end();
+                                              LJ != LE; ++LJ) {
+        unsigned Reg = *LJ;
+        if (RegisterAddressMap.find(Reg) == RegisterAddressMap.end()) {
+          continue;
+        }
+
+        if (RegisterAddressMap[Reg] == Address) {
+          if (!regHasExplicitDef(MRI, Reg)) {
+            continue;
+          }
+          PhiRegisters.push_back(Reg);
+        }
+      }
+
+      if (PhiRegisters.size() == 1) {
+        // We don't need to insert a Phi instruction, so we can just add the
+        // registers to the live list for the block.
+        LiveAddressRegisterMap[Address] = *LI;
+        MBB.removeLiveIn(*LI);
+      } else {
+        // We need to insert a PHI, because we have the same address being
+        // written in multiple predecessor blocks.
+        const TargetRegisterClass *PhiDstClass =
+                   TII->getIndirectAddrStoreRegClass(*(PhiRegisters.begin()));
+        unsigned PhiDstReg = MRI.createVirtualRegister(PhiDstClass);
+        MachineInstrBuilder Phi = BuildMI(MBB, MBB.begin(),
+                                          MBB.findDebugLoc(MBB.begin()),
+                                          TII->get(AMDGPU::PHI), PhiDstReg);
+
+        for (std::vector<unsigned>::const_iterator RI = PhiRegisters.begin(),
+                                                   RE = PhiRegisters.end();
+                                                   RI != RE; ++RI) {
+          unsigned Reg = *RI;
+          MachineInstr *DefInst = MRI.getVRegDef(Reg);
+          assert(DefInst);
+          MachineBasicBlock *RegBlock = DefInst->getParent();
+          Phi.addReg(Reg);
+          Phi.addMBB(RegBlock);
+          MBB.removeLiveIn(Reg);
+        }
+        RegisterAddressMap[PhiDstReg] = Address;
+        LiveAddressRegisterMap[Address] = PhiDstReg;
+      }
+      LI = MBB.livein_begin();
+    }
+
+    for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
+                               I != MBB.end(); I = Next) {
+      Next = llvm::next(I);
+      MachineInstr &MI = *I;
+
+      if (!TII->isRegisterLoad(MI)) {
+        if (MI.getOpcode() == AMDGPU::PHI) {
+          continue;
+        }
+        // Check for indirect register defs
+        for (unsigned OpIdx = 0, NumOperands = MI.getNumOperands();
+                                 OpIdx < NumOperands; ++OpIdx) {
+          MachineOperand &MO = MI.getOperand(OpIdx);
+          if (MO.isReg() && MO.isDef() &&
+              RegisterAddressMap.find(MO.getReg()) != RegisterAddressMap.end()) {
+            unsigned Reg = MO.getReg();
+            unsigned LiveAddress = RegisterAddressMap[Reg];
+            // Chain the live-ins
+            if (LiveAddressRegisterMap.find(LiveAddress) !=
+                                                     RegisterAddressMap.end()) {
+              MI.addOperand(MachineOperand::CreateReg(
+                                  LiveAddressRegisterMap[LiveAddress],
+                                  false, // isDef
+                                  true,  // isImp
+                                  true));  // isKill
+            }
+            LiveAddressRegisterMap[LiveAddress] = Reg;
+          }
+        }
+        continue;
+      }
+
+      const TargetRegisterClass *SuperIndirectRegClass =
+                                                TII->getSuperIndirectRegClass();
+      const TargetRegisterClass *IndirectLoadRegClass =
+                                             TII->getIndirectAddrLoadRegClass();
+      unsigned IndirectReg = MRI.createVirtualRegister(SuperIndirectRegClass);
+
+      unsigned RegIndex = MI.getOperand(2).getImm();
+      unsigned Channel = MI.getOperand(3).getImm();
+      unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel);
+
+      if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) {
+        // Direct register access
+        unsigned Reg = LiveAddressRegisterMap[Address];
+        unsigned AddrReg = IndirectLoadRegClass->getRegister(Address);
+
+        if (regHasExplicitDef(MRI, Reg)) {
+          // If the register we are reading from has an explicit def, then that
+          // means it was written via a direct register access (i.e. COPY
+          // or other instruction that doesn't use indirect addressing).  In
+          // this case we know where the value has been stored, so we can just
+          // issue a copy.
+          BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY),
+                  MI.getOperand(0).getReg())
+                  .addReg(Reg);
+        } else {
+          // If the register we are reading has an implicit def, then that
+          // means it was written by an indirect register access (i.e. An
+          // instruction that uses indirect addressing. 
+          BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY),
+                   MI.getOperand(0).getReg())
+                   .addReg(AddrReg);
+        }
+      } else {
+        // Indirect register access
+
+        // Note on REQ_SEQUENCE instructons: You can't actually use the register
+        // it defines unless  you have an instruction that takes the defined
+        // register class as an operand.
+
+        MachineInstrBuilder Sequence = BuildMI(MBB, I, MBB.findDebugLoc(I),
+                                               TII->get(AMDGPU::REG_SEQUENCE),
+                                               IndirectReg);
+        for (int i = IndirectBegin; i <= IndirectEnd; ++i) {
+          unsigned Addr = TII->calculateIndirectAddress(i, Channel);
+          if (LiveAddressRegisterMap.find(Addr) == LiveAddressRegisterMap.end()) {
+            continue;
+          }
+          unsigned Reg = LiveAddressRegisterMap[Addr];
+
+          // We only need to use REG_SEQUENCE for explicit defs, since the
+          // register coalescer won't do anything with the implicit defs.
+          MachineInstr *DefInstr = MRI.getVRegDef(Reg);
+          if (!DefInstr->getOperand(0).isReg() ||
+              DefInstr->getOperand(0).getReg() != Reg) {
+            continue;
+          }
+
+          // Insert a REQ_SEQUENCE instruction to force the register allocator
+          // to allocate the virtual register to the correct physical register.
+          Sequence.addReg(LiveAddressRegisterMap[Addr]);
+          Sequence.addImm(TII->getRegisterInfo().getIndirectSubReg(Addr));
+        }
+        MachineInstrBuilder Mov = TII->buildIndirectRead(BB, I,
+                                           MI.getOperand(0).getReg(), // Value
+                                           Address,
+                                           MI.getOperand(1).getReg()); // Offset
+
+
+
+        Mov.addReg(IndirectReg, RegState::Implicit | RegState::Kill);
+
+      }
+      MI.eraseFromParent();
+    }
+  }
+  return false;
+}
+
+bool AMDGPUIndirectAddressingPass::regHasExplicitDef(MachineRegisterInfo &MRI,
+                                                  unsigned Reg) const {
+  MachineInstr *DefInstr = MRI.getVRegDef(Reg);
+  return DefInstr && DefInstr->getOperand(0).isReg() &&
+         DefInstr->getOperand(0).getReg() == Reg;
+}
diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp b/lib/Target/R600/AMDGPUInstrInfo.cpp

index e42a46d839b41836c29edf730a88ef8e3209c5c7..640707d7ca41afd0758af9b1da4f36c5dd79f817 100644 (file)
--- a/lib/Target/R600/AMDGPUInstrInfo.cpp
+++ b/lib/Target/R600/AMDGPUInstrInfo.cpp
@@ -234,7 +234,16 @@ AMDGPUInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
    // TODO: Implement this function
    return true;
  }
- 
+
+bool AMDGPUInstrInfo::isRegisterStore(const MachineInstr &MI) const {
+  return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_STORE;
+}
+
+bool AMDGPUInstrInfo::isRegisterLoad(const MachineInstr &MI) const {
+  return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_LOAD;
+}
+
+
  void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
      DebugLoc DL) const {
    MachineRegisterInfo &MRI = MF.getRegInfo();
diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h

index cb97af98317e45f7b05e8eb382867688053161da..3909e4e105ee88d5cb19b2a29e5a7eab2fc61b2a 100644 (file)
--- a/lib/Target/R600/AMDGPUInstrInfo.h
+++ b/lib/Target/R600/AMDGPUInstrInfo.h
@@ -40,9 +40,10 @@ class MachineInstrBuilder;
  class AMDGPUInstrInfo : public AMDGPUGenInstrInfo {
  private:
    const AMDGPURegisterInfo RI;
-  TargetMachine &TM;
    bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
                            MachineBasicBlock &MBB) const;
+protected:
+  TargetMachine &TM;
  public:
    explicit AMDGPUInstrInfo(TargetMachine &tm);
  
@@ -130,12 +131,66 @@ public:
    bool isAExtLoadInst(llvm::MachineInstr *MI) const;
    bool isStoreInst(llvm::MachineInstr *MI) const;
    bool isTruncStoreInst(llvm::MachineInstr *MI) const;
+  bool isRegisterStore(const MachineInstr &MI) const;
+  bool isRegisterLoad(const MachineInstr &MI) const;
+
+//===---------------------------------------------------------------------===//
+// Pure virtual funtions to be implemented by sub-classes.
+//===---------------------------------------------------------------------===//
  
    virtual MachineInstr* getMovImmInstr(MachineFunction *MF, unsigned DstReg,
                                         int64_t Imm) const = 0;
    virtual unsigned getIEQOpcode() const = 0;
    virtual bool isMov(unsigned opcode) const = 0;
  
+  /// \returns the smallest register index that will be accessed by an indirect
+  /// read or write or -1 if indirect addressing is not used by this program.
+  virtual int getIndirectIndexBegin(const MachineFunction &MF) const = 0;
+
+  /// \returns the largest register index that will be accessed by an indirect
+  /// read or write or -1 if indirect addressing is not used by this program.
+  virtual int getIndirectIndexEnd(const MachineFunction &MF) const = 0;
+
+  /// \brief Calculate the "Indirect Address" for the given \p RegIndex and
+  ///        \p Channel
+  ///
+  /// We model indirect addressing using a virtual address space that can be
+  /// accesed with loads and stores.  The "Indirect Address" is the memory
+  /// address in this virtual address space that maps to the given \p RegIndex
+  /// and \p Channel.
+  virtual unsigned calculateIndirectAddress(unsigned RegIndex,
+                                            unsigned Channel) const = 0;
+
+  /// \returns The register class to be used for storing values to an
+  /// "Indirect Address" .
+  virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
+                                                  unsigned SourceReg) const = 0;
+
+  /// \returns The register class to be used for loading values from
+  /// an "Indirect Address" .
+  virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const = 0;
+
+  /// \brief Build instruction(s) for an indirect register write.
+  ///
+  /// \returns The instruction that performs the indirect register write
+  virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
+                                    MachineBasicBlock::iterator I,
+                                    unsigned ValueReg, unsigned Address,
+                                    unsigned OffsetReg) const = 0;
+
+  /// \brief Build instruction(s) for an indirect register read.
+  ///
+  /// \returns The instruction that performs the indirect register read
+  virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
+                                    MachineBasicBlock::iterator I,
+                                    unsigned ValueReg, unsigned Address,
+                                    unsigned OffsetReg) const = 0;
+
+  /// \returns the register class whose sub registers are the set of all
+  /// possible registers that can be used for indirect addressing.
+  virtual const TargetRegisterClass *getSuperIndirectRegClass() const = 0;
+
+
    /// \brief Convert the AMDIL MachineInstr to a supported ISA
    /// MachineInstr
    virtual void convertToISA(MachineInstr & MI, MachineFunction &MF,
@@ -145,4 +200,7 @@ public:
  
  } // End llvm namespace
  
+#define AMDGPU_FLAG_REGISTER_LOAD  (UINT64_C(1) << 63)
+#define AMDGPU_FLAG_REGISTER_STORE (UINT64_C(1) << 62)
+
  #endif // AMDGPUINSTRINFO_H
diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td

index 96368e8541cc67267efb3baf3f3c563b1dd6f63c..b66ae879dc2065c01add7f04b6e8144c9c73f143 100644 (file)
--- a/lib/Target/R600/AMDGPUInstrInfo.td
+++ b/lib/Target/R600/AMDGPUInstrInfo.td
@@ -72,3 +72,11 @@ def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp,
  def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
  
  def fpow : SDNode<"ISD::FPOW", SDTFPBinOp>;
+
+def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD",
+                          SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
+                          [SDNPHasChain, SDNPMayLoad]>;
+
+def AMDGPUregister_store : SDNode<"AMDGPUISD::REGISTER_STORE",
+                           SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
+                           [SDNPHasChain, SDNPMayStore]>;
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td

index e634d20b61452b5b5c9ab91e97163900d6fa83d1..3dee004270dd6ac6244ec63bc921c645e099fd59 100644 (file)
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -13,8 +13,8 @@
  //===----------------------------------------------------------------------===//
  
  class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction {
-  field bits<16> AMDILOp = 0;
-  field bits<3> Gen = 0;
+  field bit isRegisterLoad = 0;
+  field bit isRegisterStore = 0;
  
    let Namespace = "AMDGPU";
    let OutOperandList = outs;
@@ -22,8 +22,9 @@ class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instructio
    let AsmString = asm;
    let Pattern = pattern;
    let Itinerary = NullALU;
-  let TSFlags{42-40} = Gen;
-  let TSFlags{63-48} = AMDILOp;
+
+  let TSFlags{63} = isRegisterLoad;
+  let TSFlags{62} = isRegisterStore;
  }
  
  class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
@@ -101,7 +102,9 @@ def FP_ONE : PatLeaf <
    [{return N->isExactlyValue(1.0);}]
  >;
  
-let isCodeGenOnly = 1, isPseudo = 1, usesCustomInserter = 1  in {
+let isCodeGenOnly = 1, isPseudo = 1 in {
+
+let usesCustomInserter = 1  in {
  
  class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
    (outs rc:$dst),
@@ -131,7 +134,31 @@ def SHADER_TYPE : AMDGPUShaderInst <
    [(int_AMDGPU_shader_type imm:$type)]
  >;
  
-} // End isCodeGenOnly = 1, isPseudo = 1, hasCustomInserter = 1
+} // usesCustomInserter = 1
+
+multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass,
+                    ComplexPattern addrPat> {
+  def RegisterLoad : AMDGPUShaderInst <
+    (outs dstClass:$dst),
+    (ins addrClass:$addr, i32imm:$chan),
+    "RegisterLoad $dst, $addr",
+    [(set (i32 dstClass:$dst), (AMDGPUregister_load addrPat:$addr,
+                                                    (i32 timm:$chan)))]
+  > {
+    let isRegisterLoad = 1;
+  }
+
+  def RegisterStore : AMDGPUShaderInst <
+    (outs),
+    (ins dstClass:$val, addrClass:$addr, i32imm:$chan),
+    "RegisterStore $val, $addr",
+    [(AMDGPUregister_store (i32 dstClass:$val), addrPat:$addr, (i32 timm:$chan))]
+  > {
+    let isRegisterStore = 1;
+  }
+}
+
+} // End isCodeGenOnly = 1, isPseudo = 1
  
  /* Generic helper patterns for intrinsics */
  /* -------------------------------------- */
diff --git a/lib/Target/R600/AMDGPURegisterInfo.cpp b/lib/Target/R600/AMDGPURegisterInfo.cpp

index b33290523dfb974eeed1f38b8f6d9d733c377f24..7878d6000d7ccbaec3ff64ad3c99565f24ae0945 100644 (file)
--- a/lib/Target/R600/AMDGPURegisterInfo.cpp
+++ b/lib/Target/R600/AMDGPURegisterInfo.cpp
@@ -48,5 +48,28 @@ unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const {
    return 0;
  }
  
+unsigned AMDGPURegisterInfo::getIndirectSubReg(unsigned IndirectIndex) const {
+
+  switch(IndirectIndex) {
+  case 0: return AMDGPU::indirect_0;
+  case 1: return AMDGPU::indirect_1;
+  case 2: return AMDGPU::indirect_2;
+  case 3: return AMDGPU::indirect_3;
+  case 4: return AMDGPU::indirect_4;
+  case 5: return AMDGPU::indirect_5;
+  case 6: return AMDGPU::indirect_6;
+  case 7: return AMDGPU::indirect_7;
+  case 8: return AMDGPU::indirect_8;
+  case 9: return AMDGPU::indirect_9;
+  case 10: return AMDGPU::indirect_10;
+  case 11: return AMDGPU::indirect_11;
+  case 12: return AMDGPU::indirect_12;
+  case 13: return AMDGPU::indirect_13;
+  case 14: return AMDGPU::indirect_14;
+  case 15: return AMDGPU::indirect_15;
+  default: llvm_unreachable("indirect index out of range");
+  }
+}
+
  #define GET_REGINFO_TARGET_DESC
  #include "AMDGPUGenRegisterInfo.inc"
diff --git a/lib/Target/R600/AMDGPURegisterInfo.h b/lib/Target/R600/AMDGPURegisterInfo.h

index 4a4beade02cf7a6c23fde6bf8bc879c5756a2f4c..1fc88e7455b93cd763bb8db2b9f2d89f65eb71ee 100644 (file)
--- a/lib/Target/R600/AMDGPURegisterInfo.h
+++ b/lib/Target/R600/AMDGPURegisterInfo.h
@@ -57,6 +57,8 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
                             RegScavenger *RS) const;
    unsigned getFrameRegister(const MachineFunction &MF) const;
  
+  unsigned getIndirectSubReg(unsigned IndirectIndex) const;
+
  };
  
  } // End namespace llvm
diff --git a/lib/Target/R600/AMDGPURegisterInfo.td b/lib/Target/R600/AMDGPURegisterInfo.td

index 8181e023aa3d826b01060ffc4167358b4b393967..0b4482cc51abb16572aae732ab0e828b9ef7f85d 100644 (file)
--- a/lib/Target/R600/AMDGPURegisterInfo.td
+++ b/lib/Target/R600/AMDGPURegisterInfo.td
@@ -16,6 +16,14 @@ let Namespace = "AMDGPU" in {
    def sel_y : SubRegIndex;
    def sel_z : SubRegIndex;
    def sel_w : SubRegIndex;
+
+
+foreach Index = 0-15 in {
+  def indirect_#Index : SubRegIndex;
+}
+
+def INDIRECT_BASE_ADDR : Register <"INDIRECT_BASE_ADDR">;
+
  }
  
  include "R600RegisterInfo.td"
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp

index 2185be3c20373f09bcee30eba792672a291903b6..821e86476f3f40eac403aa7f48d03ac928358816 100644 (file)
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -102,6 +102,12 @@ AMDGPUPassConfig::addPreISel() {
  bool AMDGPUPassConfig::addInstSelector() {
    addPass(createAMDGPUPeepholeOpt(*TM));
    addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
+
+  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+  if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+    // This callbacks this pass uses are not implemented yet on SI.
+    addPass(createAMDGPUIndirectAddressingPass(*TM));
+  }
    return false;
  }
  
diff --git a/lib/Target/R600/AMDGPUTargetMachine.h b/lib/Target/R600/AMDGPUTargetMachine.h

index 91f9a83a29c5bc4d9831fc4cbcb0432191efd01e..2afe7873a90cbb146d01409bf47c9b9386bc1e94 100644 (file)
--- a/lib/Target/R600/AMDGPUTargetMachine.h
+++ b/lib/Target/R600/AMDGPUTargetMachine.h
@@ -15,9 +15,9 @@
  #ifndef AMDGPU_TARGET_MACHINE_H
  #define AMDGPU_TARGET_MACHINE_H
  
+#include "AMDGPUFrameLowering.h"
  #include "AMDGPUInstrInfo.h"
  #include "AMDGPUSubtarget.h"
-#include "AMDILFrameLowering.h"
  #include "AMDILIntrinsicInfo.h"
  #include "R600ISelLowering.h"
  #include "llvm/ADT/OwningPtr.h"
diff --git a/lib/Target/R600/AMDILFrameLowering.cpp b/lib/Target/R600/AMDILFrameLowering.cpp

deleted file mode 100644 (file)

index 9ad495a..0000000
--- a/lib/Target/R600/AMDILFrameLowering.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-//===----------------------- AMDILFrameLowering.cpp -----------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief Interface to describe a layout of a stack frame on a AMDGPU target
-/// machine.
-//
-//===----------------------------------------------------------------------===//
-#include "AMDILFrameLowering.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-
-using namespace llvm;
-AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl,
-    int LAO, unsigned TransAl)
-  : TargetFrameLowering(D, StackAl, LAO, TransAl) {
-}
-
-AMDGPUFrameLowering::~AMDGPUFrameLowering() {
-}
-
-int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
-                                         int FI) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return MFI->getObjectOffset(FI);
-}
-
-const TargetFrameLowering::SpillSlot *
-AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
-  NumEntries = 0;
-  return 0;
-}
-void
-AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const {
-}
-void
-AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
-}
-bool
-AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const {
-  return false;
-}
diff --git a/lib/Target/R600/AMDILFrameLowering.h b/lib/Target/R600/AMDILFrameLowering.h

deleted file mode 100644 (file)

index 51337c3..0000000
--- a/lib/Target/R600/AMDILFrameLowering.h
+++ /dev/null
@@ -1,40 +0,0 @@
-//===--------------------- AMDILFrameLowering.h -----------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief Interface to describe a layout of a stack frame on a AMDIL target
-/// machine.
-//
-//===----------------------------------------------------------------------===//
-#ifndef AMDILFRAME_LOWERING_H
-#define AMDILFRAME_LOWERING_H
-
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/Target/TargetFrameLowering.h"
-
-namespace llvm {
-
-/// \brief Information about the stack frame layout on the AMDGPU targets.
-///
-/// It holds the direction of the stack growth, the known stack alignment on
-/// entry to each function, and the offset to the locals area.
-/// See TargetFrameInfo for more comments.
-class AMDGPUFrameLowering : public TargetFrameLowering {
-public:
-  AMDGPUFrameLowering(StackDirection D, unsigned StackAl, int LAO,
-                      unsigned TransAl = 1);
-  virtual ~AMDGPUFrameLowering();
-  virtual int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
-  virtual const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const;
-  virtual void emitPrologue(MachineFunction &MF) const;
-  virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-  virtual bool hasFP(const MachineFunction &MF) const;
-};
-} // namespace llvm
-#endif // AMDILFRAME_LOWERING_H
diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp

index 84223f62e17c52fa39a7a5575df868a02a53da80..26994090643e94ff9afd6abcc1855926666b98a8 100644 (file)
--- a/lib/Target/R600/AMDILISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp
@@ -75,6 +75,7 @@ private:
    bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset);
    bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset);
    bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
+  bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
  
    // Include the pieces autogenerated from the target description.
  #include "AMDGPUGenDAGISel.inc"
@@ -161,16 +162,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
    }
    switch (Opc) {
    default: break;
-  case ISD::FrameIndex: {
-    if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) {
-      unsigned int FI = FIN->getIndex();
-      EVT OpVT = N->getValueType(0);
-      unsigned int NewOpc = AMDGPU::COPY;
-      SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32);
-      return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI);
-    }
-    break;
-  }
    case ISD::ConstantFP:
    case ISD::Constant: {
      const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
@@ -613,3 +604,22 @@ bool AMDGPUDAGToDAGISel::SelectADDRReg(SDValue Addr, SDValue& Base,
  
    return true;
  }
+
+bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
+                                            SDValue &Offset) {
+  ConstantSDNode *C;
+
+  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
+    Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
+    Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
+  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
+            (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
+    Base = Addr.getOperand(0);
+    Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
+  } else {
+    Base = Addr;
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+  }
+
+  return true;
+}
diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt

index a8be7ed975cd016a7d7d2174e9739f37b6163c91..4f74b0477b3cef7da46168338e6196d9511286d1 100644 (file)
--- a/lib/Target/R600/CMakeLists.txt
+++ b/lib/Target/R600/CMakeLists.txt
@@ -17,7 +17,6 @@ add_llvm_target(R600CodeGen
    AMDILDevice.cpp
    AMDILDeviceInfo.cpp
    AMDILEvergreenDevice.cpp
-  AMDILFrameLowering.cpp
    AMDILIntrinsicInfo.cpp
    AMDILISelDAGToDAG.cpp
    AMDILISelLowering.cpp
@@ -25,6 +24,8 @@ add_llvm_target(R600CodeGen
    AMDILPeepholeOptimizer.cpp
    AMDILSIDevice.cpp
    AMDGPUAsmPrinter.cpp
+  AMDGPUFrameLowering.cpp
+  AMDGPUIndirectAddressing.cpp
    AMDGPUMCInstLower.cpp
    AMDGPUSubtarget.cpp
    AMDGPUStructurizeCFG.cpp
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp

index e76c6c86757ebd8d709932146cb4f3347abad876..fb17ab7ddf480b3b9e907352587b3a81d37e1351 100644 (file)
--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
@@ -105,10 +105,7 @@ void AMDGPUInstPrinter::printOMOD(const MCInst *MI, unsigned OpNo,
  
  void AMDGPUInstPrinter::printRel(const MCInst *MI, unsigned OpNo,
                                   raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.getImm() != 0) {
-    O << " + " << Op.getImm();
-  }
+  printIfSet(MI, OpNo, O, "+");
  }
  
  void AMDGPUInstPrinter::printUpdateExecMask(const MCInst *MI, unsigned OpNo,
diff --git a/lib/Target/R600/R600Defines.h b/lib/Target/R600/R600Defines.h

index e19eea38e492a0c154d05c899dae33a9f6364d0f..16cfcf59eb3d2391dcac8a60a232b1833777059a 100644 (file)
--- a/lib/Target/R600/R600Defines.h
+++ b/lib/Target/R600/R600Defines.h
@@ -49,6 +49,9 @@ namespace R600_InstFlag {
  #define HW_REG_MASK 0x1ff
  #define HW_CHAN_SHIFT 9
  
+#define GET_REG_CHAN(reg) ((reg) >> HW_CHAN_SHIFT)
+#define GET_REG_INDEX(reg) ((reg) & HW_REG_MASK)
+
  namespace R600Operands {
    enum Ops {
      DST,
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp

index 110dcc18876a9379a19cc300c47c6fecc3c963d8..85187f8fc516597a55f7b76385ca6fb1e28284c4 100644 (file)
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -16,6 +16,7 @@
  #include "R600Defines.h"
  #include "R600InstrInfo.h"
  #include "R600MachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
  #include "llvm/CodeGen/MachineInstrBuilder.h"
  #include "llvm/CodeGen/MachineRegisterInfo.h"
  #include "llvm/CodeGen/SelectionDAG.h"
@@ -71,11 +72,23 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
    setOperationAction(ISD::SELECT, MVT::i32, Custom);
    setOperationAction(ISD::SELECT, MVT::f32, Custom);
  
+  // Legalize loads and stores to the private address space.
+  setOperationAction(ISD::LOAD, MVT::i32, Custom);
+  setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
+  setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
+  setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Custom);
+  setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Custom);
+  setOperationAction(ISD::STORE, MVT::i8, Custom);
    setOperationAction(ISD::STORE, MVT::i32, Custom);
+  setOperationAction(ISD::STORE, MVT::v2i32, Custom);
    setOperationAction(ISD::STORE, MVT::v4i32, Custom);
  
    setOperationAction(ISD::LOAD, MVT::i32, Custom);
    setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
+  setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
+
    setTargetDAGCombine(ISD::FP_ROUND);
    setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
  
@@ -350,6 +363,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
    case ISD::STORE: return LowerSTORE(Op, DAG);
    case ISD::LOAD: return LowerLOAD(Op, DAG);
    case ISD::FPOW: return LowerFPOW(Op, DAG);
+  case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
    case ISD::INTRINSIC_VOID: {
      SDValue Chain = Op.getOperand(0);
      unsigned IntrinsicID =
@@ -485,6 +499,10 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
      DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
      return;
    }
+  case ISD::STORE:
+    SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
+    Results.push_back(SDValue(Node, 0));
+    return;
    }
  }
  
@@ -552,6 +570,20 @@ SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
                       false, false, false, 0);
  }
  
+SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  const AMDGPUFrameLowering *TFL =
+   static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
+
+  FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
+  assert(FIN);
+
+  unsigned FrameIndex = FIN->getIndex();
+  unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
+  return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
+}
+
  SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
    DebugLoc DL = Op.getDebugLoc();
    EVT VT = Op.getValueType();
@@ -766,6 +798,61 @@ SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
    return Cond;
  }
  
+/// LLVM generates byte-addresed pointers.  For indirect addressing, we need to
+/// convert these pointers to a register index.  Each register holds
+/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
+/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
+/// for indirect addressing.
+SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
+                                               unsigned StackWidth,
+                                               SelectionDAG &DAG) const {
+  unsigned SRLPad;
+  switch(StackWidth) {
+  case 1:
+    SRLPad = 2;
+    break;
+  case 2:
+    SRLPad = 3;
+    break;
+  case 4:
+    SRLPad = 4;
+    break;
+  default: llvm_unreachable("Invalid stack width");
+  }
+
+  return DAG.getNode(ISD::SRL, Ptr.getDebugLoc(), Ptr.getValueType(), Ptr,
+                     DAG.getConstant(SRLPad, MVT::i32));
+}
+
+void R600TargetLowering::getStackAddress(unsigned StackWidth,
+                                         unsigned ElemIdx,
+                                         unsigned &Channel,
+                                         unsigned &PtrIncr) const {
+  switch (StackWidth) {
+  default:
+  case 1:
+    Channel = 0;
+    if (ElemIdx > 0) {
+      PtrIncr = 1;
+    } else {
+      PtrIncr = 0;
+    }
+    break;
+  case 2:
+    Channel = ElemIdx % 2;
+    if (ElemIdx == 2) {
+      PtrIncr = 1;
+    } else {
+      PtrIncr = 0;
+    }
+    break;
+  case 4:
+    Channel = ElemIdx;
+    PtrIncr = 0;
+    break;
+  }
+}
+
  SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
    DebugLoc DL = Op.getDebugLoc();
    StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
@@ -787,7 +874,52 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
      }
      return Chain;
    }
-  return SDValue();
+
+  EVT ValueVT = Value.getValueType();
+
+  if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
+    return SDValue();
+  }
+
+  // Lowering for indirect addressing
+
+  const MachineFunction &MF = DAG.getMachineFunction();
+  const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
+                                         getTargetMachine().getFrameLowering());
+  unsigned StackWidth = TFL->getStackWidth(MF);
+
+  Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
+
+  if (ValueVT.isVector()) {
+    unsigned NumElemVT = ValueVT.getVectorNumElements();
+    EVT ElemVT = ValueVT.getVectorElementType();
+    SDValue Stores[4];
+
+    assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
+                                      "vector width in load");
+
+    for (unsigned i = 0; i < NumElemVT; ++i) {
+      unsigned Channel, PtrIncr;
+      getStackAddress(StackWidth, i, Channel, PtrIncr);
+      Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
+                        DAG.getConstant(PtrIncr, MVT::i32));
+      SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
+                                 Value, DAG.getConstant(i, MVT::i32));
+
+      Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
+                              Chain, Elem, Ptr,
+                              DAG.getTargetConstant(Channel, MVT::i32));
+    }
+     Chain =  DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
+   } else {
+    if (ValueVT == MVT::i8) {
+      Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
+    }
+    Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
+    DAG.getTargetConstant(0, MVT::i32)); // Channel 
+  }
+
+  return Chain;
  }
  
  // return (512 + (kc_bank << 12)
@@ -876,7 +1008,53 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
      return DAG.getMergeValues(MergedValues, 2, DL);
    }
  
-  return SDValue();
+  if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
+    return SDValue();
+  }
+
+  // Lowering for indirect addressing
+  const MachineFunction &MF = DAG.getMachineFunction();
+  const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
+                                         getTargetMachine().getFrameLowering());
+  unsigned StackWidth = TFL->getStackWidth(MF);
+
+  Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
+
+  if (VT.isVector()) {
+    unsigned NumElemVT = VT.getVectorNumElements();
+    EVT ElemVT = VT.getVectorElementType();
+    SDValue Loads[4];
+
+    assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
+                                      "vector width in load");
+
+    for (unsigned i = 0; i < NumElemVT; ++i) {
+      unsigned Channel, PtrIncr;
+      getStackAddress(StackWidth, i, Channel, PtrIncr);
+      Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
+                        DAG.getConstant(PtrIncr, MVT::i32));
+      Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
+                             Chain, Ptr,
+                             DAG.getTargetConstant(Channel, MVT::i32),
+                             Op.getOperand(2));
+    }
+    for (unsigned i = NumElemVT; i < 4; ++i) {
+      Loads[i] = DAG.getUNDEF(ElemVT);
+    }
+    EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
+    LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
+  } else {
+    LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
+                              Chain, Ptr,
+                              DAG.getTargetConstant(0, MVT::i32), // Channel
+                              Op.getOperand(2));
+  }
+
+  SDValue Ops[2];
+  Ops[0] = LoweredLoad;
+  Ops[1] = Chain;
+
+  return DAG.getMergeValues(Ops, 2, DL);
  }
  
  SDValue R600TargetLowering::LowerFPOW(SDValue Op,
diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h

index c141d50210e792660dda28d7801652c9c53f0cf3..afa38971551245b204bc8ac5bed96b31a14c79ae 100644 (file)
--- a/lib/Target/R600/R600ISelLowering.h
+++ b/lib/Target/R600/R600ISelLowering.h
@@ -64,7 +64,12 @@ private:
    SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
    SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const;
    SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
-  
+  SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth,
+                                          SelectionDAG &DAG) const;
+  void getStackAddress(unsigned StackWidth, unsigned ElemIdx,
+                       unsigned &Channel, unsigned &PtrIncr) const;
    bool isZero(SDValue Op) const;
  };
  
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp

index 85859eb6202db82e5e9b452dc374447ebbf44f79..7e3f00572930be3737d52c9bfda932f36e01330d 100644 (file)
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -16,8 +16,11 @@
  #include "AMDGPUSubtarget.h"
  #include "AMDGPUTargetMachine.h"
  #include "R600Defines.h"
+#include "R600MachineFunctionInfo.h"
  #include "R600RegisterInfo.h"
  #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
  
  #define GET_INSTRINFO_CTOR
  #include "AMDGPUGenDFAPacketizer.inc"
@@ -465,6 +468,124 @@ unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
    return 2;
  }
  
+int R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  int Offset = 0;
+
+  if (MFI->getNumObjects() == 0) {
+    return -1;
+  }
+
+  if (MRI.livein_empty()) {
+    return 0;
+  }
+
+  for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
+                                            LE = MRI.livein_end();
+                                            LI != LE; ++LI) {
+    Offset = std::max(Offset,
+                      GET_REG_INDEX(RI.getEncodingValue(LI->first)));
+  }
+
+  return Offset + 1;
+}
+
+int R600InstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
+  int Offset = 0;
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  // Variable sized objects are not supported
+  assert(!MFI->hasVarSizedObjects());
+
+  if (MFI->getNumObjects() == 0) {
+    return -1;
+  }
+
+  Offset = TM.getFrameLowering()->getFrameIndexOffset(MF, -1);
+
+  return getIndirectIndexBegin(MF) + Offset;
+}
+
+std::vector<unsigned> R600InstrInfo::getIndirectReservedRegs(
+                                             const MachineFunction &MF) const {
+  const AMDGPUFrameLowering *TFL =
+                 static_cast<const AMDGPUFrameLowering*>(TM.getFrameLowering());
+  std::vector<unsigned> Regs;
+
+  unsigned StackWidth = TFL->getStackWidth(MF);
+  int End = getIndirectIndexEnd(MF);
+
+  if (End == -1) {
+    return Regs;
+  }
+
+  for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) {
+    unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index);
+    Regs.push_back(SuperReg);
+    for (unsigned Chan = 0; Chan < StackWidth; ++Chan) {
+      unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan);
+      Regs.push_back(Reg);
+    }
+  }
+  return Regs;
+}
+
+unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex,
+                                                 unsigned Channel) const {
+  // XXX: Remove when we support a stack width > 2
+  assert(Channel == 0);
+  return RegIndex;
+}
+
+const TargetRegisterClass * R600InstrInfo::getIndirectAddrStoreRegClass(
+                                                     unsigned SourceReg) const {
+  return &AMDGPU::R600_TReg32RegClass;
+}
+
+const TargetRegisterClass *R600InstrInfo::getIndirectAddrLoadRegClass() const {
+  return &AMDGPU::TRegMemRegClass;
+}
+
+MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
+                                       MachineBasicBlock::iterator I,
+                                       unsigned ValueReg, unsigned Address,
+                                       unsigned OffsetReg) const {
+  unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address);
+  MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
+                                               AMDGPU::AR_X, OffsetReg);
+  setImmOperand(MOVA, R600Operands::WRITE, 0);
+
+  MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
+                                      AddrReg, ValueReg)
+                                      .addReg(AMDGPU::AR_X, RegState::Implicit);
+  setImmOperand(Mov, R600Operands::DST_REL, 1);
+  return Mov;
+}
+
+MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
+                                       MachineBasicBlock::iterator I,
+                                       unsigned ValueReg, unsigned Address,
+                                       unsigned OffsetReg) const {
+  unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address);
+  MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
+                                                       AMDGPU::AR_X,
+                                                       OffsetReg);
+  setImmOperand(MOVA, R600Operands::WRITE, 0);
+  MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
+                                      ValueReg,
+                                      AddrReg)
+                                      .addReg(AMDGPU::AR_X, RegState::Implicit);
+  setImmOperand(Mov, R600Operands::SRC0_REL, 1);
+
+  return Mov;
+}
+
+const TargetRegisterClass *R600InstrInfo::getSuperIndirectRegClass() const {
+  return &AMDGPU::IndirectRegRegClass;
+}
+
+
  MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB,
                                                    MachineBasicBlock::iterator I,
                                                    unsigned Opcode,
diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h

index 11685af5b027e0a70524a6d31162bd539f6f7841..efe721c00cf13e19889fa37ade446c7ae6268a14 100644 (file)
--- a/lib/Target/R600/R600InstrInfo.h
+++ b/lib/Target/R600/R600InstrInfo.h
@@ -113,6 +113,38 @@ namespace llvm {
    virtual int getInstrLatency(const InstrItineraryData *ItinData,
                                SDNode *Node) const { return 1;}
  
+  /// \returns a list of all the registers that may be accesed using indirect
+  /// addressing.
+  std::vector<unsigned> getIndirectReservedRegs(const MachineFunction &MF) const;
+
+  virtual int getIndirectIndexBegin(const MachineFunction &MF) const;
+
+  virtual int getIndirectIndexEnd(const MachineFunction &MF) const;
+
+
+  virtual unsigned calculateIndirectAddress(unsigned RegIndex,
+                                            unsigned Channel) const;
+
+  virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
+                                                      unsigned SourceReg) const;
+
+  virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const;
+
+  virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
+                                  MachineBasicBlock::iterator I,
+                                  unsigned ValueReg, unsigned Address,
+                                  unsigned OffsetReg) const;
+
+  virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
+                                  MachineBasicBlock::iterator I,
+                                  unsigned ValueReg, unsigned Address,
+                                  unsigned OffsetReg) const;
+
+  virtual const TargetRegisterClass *getSuperIndirectRegClass() const;
+
+
+  ///buildDefaultInstruction - This function returns a MachineInstr with
+  /// all the instruction modifiers initialized to their default values.
    /// You can use this function to avoid manually specifying each instruction
    /// modifier operand when building a new instruction.
    ///
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td

index f935313fe9b3fd0e1c2ae43081be66e97a3720af..afb30ec0d4c4ebd06132eb4ab0464aaa9a34d41c 100644 (file)
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -91,11 +91,16 @@ def UP : InstFlag <"printUpdatePred">;
  // default to 0.
  def LAST : InstFlag<"printLast", 1>;
  
+def FRAMEri : Operand<iPTR> {
+  let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index);
+}
+
  def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>;
  def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>;
  def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>;
  def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>;
  def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>;
+def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
  
  class R600ALU_Word0 {
    field bits<32> Word0;
@@ -1220,6 +1225,10 @@ let Predicates = [isEGorCayman] in {
    defm DOT4_eg : DOT4_Common<0xBE>;
    defm CUBE_eg : CUBE_Common<0xC0>;
  
+let hasSideEffects = 1 in {
+  def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", []>;
+}
+
    def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>;
  
    def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> {
@@ -1470,6 +1479,12 @@ def CONSTANT_LOAD_eg : VTX_READ_32_eg <1,
  
  }
  
+//===----------------------------------------------------------------------===//
+// Regist loads and stores - for indirect addressing
+//===----------------------------------------------------------------------===//
+
+defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>;
+
  let Predicates = [isCayman] in {
  
  let isVector = 1 in { 
diff --git a/lib/Target/R600/R600MachineFunctionInfo.h b/lib/Target/R600/R600MachineFunctionInfo.h

index ad7b4da45ae81be5b69f893202e37b95dbd5a1c2..4b901f4bbc34859c479228bde392dbf3fb3b8038 100644 (file)
--- a/lib/Target/R600/R600MachineFunctionInfo.h
+++ b/lib/Target/R600/R600MachineFunctionInfo.h
@@ -13,6 +13,7 @@
  #ifndef R600MACHINEFUNCTIONINFO_H
  #define R600MACHINEFUNCTIONINFO_H
  
+#include "llvm/ADT/BitVector.h"
  #include "llvm/CodeGen/MachineFunction.h"
  #include "llvm/CodeGen/SelectionDAG.h"
  #include <vector>
@@ -24,6 +25,7 @@ class R600MachineFunctionInfo : public MachineFunctionInfo {
  public:
    R600MachineFunctionInfo(const MachineFunction &MF);
    SmallVector<unsigned, 4> LiveOuts;
+  std::vector<unsigned> IndirectRegs;
    SDNode *Outputs[16];
  };
  
diff --git a/lib/Target/R600/R600RegisterInfo.cpp b/lib/Target/R600/R600RegisterInfo.cpp

index d46b3a354357220ab09adcfb3cf3d12fc24dc692..cd3fc4a45ef800bdbd1672aa17fe60818cb1d276 100644 (file)
--- a/lib/Target/R600/R600RegisterInfo.cpp
+++ b/lib/Target/R600/R600RegisterInfo.cpp
@@ -15,6 +15,7 @@
  #include "R600RegisterInfo.h"
  #include "AMDGPUTargetMachine.h"
  #include "R600Defines.h"
+#include "R600InstrInfo.h"
  #include "R600MachineFunctionInfo.h"
  
  using namespace llvm;
@@ -43,6 +44,18 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
    Reserved.set(AMDGPU::PRED_SEL_ZERO);
    Reserved.set(AMDGPU::PRED_SEL_ONE);
  
+  for (TargetRegisterClass::iterator I = AMDGPU::R600_AddrRegClass.begin(),
+                        E = AMDGPU::R600_AddrRegClass.end(); I != E; ++I) {
+    Reserved.set(*I);
+  }
+
+  const R600InstrInfo *RII = static_cast<const R600InstrInfo*>(&TII);
+  std::vector<unsigned> IndirectRegs = RII->getIndirectReservedRegs(MF);
+  for (std::vector<unsigned>::iterator I = IndirectRegs.begin(),
+                                       E = IndirectRegs.end();
+                                       I != E; ++I) {
+    Reserved.set(*I);
+  }
    return Reserved;
  }
  
@@ -77,3 +90,4 @@ unsigned R600RegisterInfo::getSubRegFromChannel(unsigned Channel) const {
      case 3: return AMDGPU::sel_w;
    }
  }
+
diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td

index 993fefc2ab3bb1d5b1e87e690a51aa49886312ff..9a8b85950cd837e84cefbb72bf219a409516ba06 100644 (file)
--- a/lib/Target/R600/R600RegisterInfo.td
+++ b/lib/Target/R600/R600RegisterInfo.td
@@ -27,6 +27,12 @@ foreach Index = 0-127 in {
    foreach Chan = [ "X", "Y", "Z", "W" ] in {
      // 32-bit Temporary Registers
      def T#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index, Chan>;
+
+    // Indirect addressing offset registers
+    def Addr#Index#_#Chan : R600RegWithChan <"T("#Index#" + AR.x)."#Chan,
+                                              Index, Chan>;
+    def TRegMem#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index,
+                                                Chan>;
    }
    // 128-bit Temporary Registers
    def T#Index#_XYZW : R600Reg_128 <"T"#Index#".XYZW",
@@ -57,6 +63,7 @@ def PREDICATE_BIT : R600Reg<"PredicateBit", 0>;
  def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>;
  def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>;
  def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>;
+def AR_X : R600Reg<"AR.x", 0>;
  
  def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32,
                            (add (sequence "ArrayBase%u", 448, 464))>;
@@ -66,6 +73,13 @@ def ALU_CONST : R600Reg<"CBuf", 0>;
  // interpolation param reference, SRCx_SEL contains index
  def ALU_PARAM : R600Reg<"Param", 0>;
  
+let isAllocatable = 0 in {
+
+// XXX: Only use the X channel, until we support wider stack widths
+def R600_Addr : RegisterClass <"AMDGPU", [i32], 127, (add (sequence "Addr%u_X", 0, 127))>;
+
+} // End isAllocatable = 0
+
  def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32,
                                     (add (sequence "T%u_X", 0, 127))>;
  
@@ -85,6 +99,7 @@ def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
  def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
      R600_TReg32,
      R600_ArrayBase,
+    R600_Addr,
      ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF,
      ALU_CONST, ALU_PARAM
      )>;
@@ -99,3 +114,34 @@ def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128,
                                  (add (sequence "T%u_XYZW", 0, 127))> {
    let CopyCost = -1;
  }
+
+//===----------------------------------------------------------------------===//
+// Register classes for indirect addressing
+//===----------------------------------------------------------------------===//
+
+// Super register for all the Indirect Registers.  This register class is used
+// by the REG_SEQUENCE instruction to specify the registers to use for direct
+// reads / writes which may be written / read by an indirect address.
+class IndirectSuper<string n, list<Register> subregs> :
+    RegisterWithSubRegs<n, subregs> {
+  let Namespace = "AMDGPU";
+  let SubRegIndices =
+ [indirect_0,indirect_1,indirect_2,indirect_3,indirect_4,indirect_5,indirect_6,
+ indirect_7,indirect_8,indirect_9,indirect_10,indirect_11,indirect_12,
+ indirect_13,indirect_14,indirect_15];
+}
+
+def IndirectSuperReg : IndirectSuper<"Indirect",
+  [TRegMem0_X, TRegMem1_X, TRegMem2_X, TRegMem3_X, TRegMem4_X, TRegMem5_X,
+   TRegMem6_X, TRegMem7_X, TRegMem8_X, TRegMem9_X, TRegMem10_X, TRegMem11_X,
+   TRegMem12_X, TRegMem13_X, TRegMem14_X, TRegMem15_X]
+>;
+
+def IndirectReg : RegisterClass<"AMDGPU", [f32, i32], 32, (add IndirectSuperReg)>;
+
+// This register class defines the registers that are the storage units for
+// the "Indirect Addressing" pseudo memory space.
+// XXX: Only use the X channel, until we support wider stack widths
+def TRegMem : RegisterClass<"AMDGPU", [f32, i32], 32,
+  (add (sequence "TRegMem%u_X", 0, 16))
+>;
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp

index c6ad4d531dc82fa40b5cbae3f8a6b8e4075c3829..b40337d5d24362e4ea543b8fb456bfb4d937050d 100644 (file)
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -87,3 +87,51 @@ bool
  SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
    return RC != &AMDGPU::EXECRegRegClass;
  }
+
+//===----------------------------------------------------------------------===//
+// Indirect addressing callbacks
+//===----------------------------------------------------------------------===//
+
+unsigned SIInstrInfo::calculateIndirectAddress(unsigned RegIndex,
+                                                 unsigned Channel) const {
+  assert(Channel == 0);
+  return RegIndex;
+}
+
+
+int SIInstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
+  llvm_unreachable("Unimplemented");
+}
+
+int SIInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
+  llvm_unreachable("Unimplemented");
+}
+
+const TargetRegisterClass *SIInstrInfo::getIndirectAddrStoreRegClass(
+                                                     unsigned SourceReg) const {
+  llvm_unreachable("Unimplemented");
+}
+
+const TargetRegisterClass *SIInstrInfo::getIndirectAddrLoadRegClass() const {
+  llvm_unreachable("Unimplemented");
+}
+
+MachineInstrBuilder SIInstrInfo::buildIndirectWrite(
+                                   MachineBasicBlock *MBB,
+                                   MachineBasicBlock::iterator I,
+                                   unsigned ValueReg,
+                                   unsigned Address, unsigned OffsetReg) const {
+  llvm_unreachable("Unimplemented");
+}
+
+MachineInstrBuilder SIInstrInfo::buildIndirectRead(
+                                   MachineBasicBlock *MBB,
+                                   MachineBasicBlock::iterator I,
+                                   unsigned ValueReg,
+                                   unsigned Address, unsigned OffsetReg) const {
+  llvm_unreachable("Unimplemented");
+}
+
+const TargetRegisterClass *SIInstrInfo::getSuperIndirectRegClass() const {
+  llvm_unreachable("Unimplemented");
+}
diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h

index 783cd9f4cb4a51cd21dad16ab8c7b45d77d12706..e4de4b85f26d003aef1a72242cb9b97352d15e8e 100644 (file)
--- a/lib/Target/R600/SIInstrInfo.h
+++ b/lib/Target/R600/SIInstrInfo.h
@@ -48,6 +48,32 @@ public:
    virtual bool isMov(unsigned Opcode) const;
  
    virtual bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
+
+  virtual int getIndirectIndexBegin(const MachineFunction &MF) const;
+
+  virtual int getIndirectIndexEnd(const MachineFunction &MF) const;
+
+  virtual unsigned calculateIndirectAddress(unsigned RegIndex,
+                                            unsigned Channel) const;
+
+  virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
+                                                      unsigned SourceReg) const;
+
+  virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const;
+
+  virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
+                                                 MachineBasicBlock::iterator I,
+                                                 unsigned ValueReg,
+                                                 unsigned Address,
+                                                 unsigned OffsetReg) const;
+
+  virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
+                                                MachineBasicBlock::iterator I,
+                                                unsigned ValueReg,
+                                                unsigned Address,
+                                                unsigned OffsetReg) const;
+
+  virtual const TargetRegisterClass *getSuperIndirectRegClass() const;
    };
  
  } // End namespace llvm
author	Tom Stellard <thomas.stellard@amd.com>
	Wed, 6 Feb 2013 17:32:29 +0000 (17:32 +0000)
committer	Tom Stellard <thomas.stellard@amd.com>
	Wed, 6 Feb 2013 17:32:29 +0000 (17:32 +0000)
lib/Target/R600/AMDGPU.h		patch \| blob \| history
lib/Target/R600/AMDGPUFrameLowering.cpp	[new file with mode: 0644]	patch \| blob
lib/Target/R600/AMDGPUFrameLowering.h	[new file with mode: 0644]	patch \| blob
lib/Target/R600/AMDGPUISelLowering.cpp		patch \| blob \| history
lib/Target/R600/AMDGPUISelLowering.h		patch \| blob \| history
lib/Target/R600/AMDGPUIndirectAddressing.cpp	[new file with mode: 0644]	patch \| blob
lib/Target/R600/AMDGPUInstrInfo.cpp		patch \| blob \| history
lib/Target/R600/AMDGPUInstrInfo.h		patch \| blob \| history
lib/Target/R600/AMDGPUInstrInfo.td		patch \| blob \| history
lib/Target/R600/AMDGPUInstructions.td		patch \| blob \| history
lib/Target/R600/AMDGPURegisterInfo.cpp		patch \| blob \| history
lib/Target/R600/AMDGPURegisterInfo.h		patch \| blob \| history
lib/Target/R600/AMDGPURegisterInfo.td		patch \| blob \| history
lib/Target/R600/AMDGPUTargetMachine.cpp		patch \| blob \| history
lib/Target/R600/AMDGPUTargetMachine.h		patch \| blob \| history
lib/Target/R600/AMDILFrameLowering.cpp	[deleted file]	patch \| blob \| history
lib/Target/R600/AMDILFrameLowering.h	[deleted file]	patch \| blob \| history
lib/Target/R600/AMDILISelDAGToDAG.cpp		patch \| blob \| history
lib/Target/R600/CMakeLists.txt		patch \| blob \| history
lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp		patch \| blob \| history
lib/Target/R600/R600Defines.h		patch \| blob \| history
lib/Target/R600/R600ISelLowering.cpp		patch \| blob \| history
lib/Target/R600/R600ISelLowering.h		patch \| blob \| history
lib/Target/R600/R600InstrInfo.cpp		patch \| blob \| history
lib/Target/R600/R600InstrInfo.h		patch \| blob \| history
lib/Target/R600/R600Instructions.td		patch \| blob \| history
lib/Target/R600/R600MachineFunctionInfo.h		patch \| blob \| history
lib/Target/R600/R600RegisterInfo.cpp		patch \| blob \| history
lib/Target/R600/R600RegisterInfo.td		patch \| blob \| history
lib/Target/R600/SIInstrInfo.cpp		patch \| blob \| history
lib/Target/R600/SIInstrInfo.h		patch \| blob \| history