R600: Simplify handling of private address space
authorTom Stellard <thomas.stellard@amd.com>
Tue, 22 Oct 2013 18:19:10 +0000 (18:19 +0000)
committerTom Stellard <thomas.stellard@amd.com>
Tue, 22 Oct 2013 18:19:10 +0000 (18:19 +0000)
The AMDGPUIndirectAddressing pass was previously responsible for
lowering private loads and stores to indirect addressing instructions.
However, this pass was buggy and way too complicated.  The only
advantage it had over the new simplified code was that it saved one
instruction per direct write to private memory.  This optimization
likely has a minimal impact on performance, and we may be able
to duplicate it using some other transformation.

For the private address space, we now:
1. Lower private loads/store to Register(Load|Store) instructions
2. Reserve part of the register file as 'private memory'
3. After regalloc lower the Register(Load|Store) instructions to
   MOV instructions that use indirect addressing.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193179 91177308-0d34-0410-b5e6-96231b3b80d8

14 files changed:
lib/Target/R600/AMDGPU.h
lib/Target/R600/AMDGPUIndirectAddressing.cpp [deleted file]
lib/Target/R600/AMDGPUInstrInfo.cpp
lib/Target/R600/AMDGPUInstrInfo.h
lib/Target/R600/AMDGPUTargetMachine.cpp
lib/Target/R600/CMakeLists.txt
lib/Target/R600/R600InstrInfo.cpp
lib/Target/R600/R600InstrInfo.h
lib/Target/R600/R600Packetizer.cpp
lib/Target/R600/R600RegisterInfo.cpp
lib/Target/R600/R600RegisterInfo.td
lib/Target/R600/SIInstrInfo.cpp
lib/Target/R600/SIInstrInfo.h
test/CodeGen/R600/indirect-addressing.ll

index feec1c539c525d0b201f2131856b4c82756c0617..025b28e32bfe29aed95286f4403f1d129a5f437f 100644 (file)
@@ -45,7 +45,6 @@ FunctionPass *createSIInsertWaits(TargetMachine &tm);
 // Passes common to R600 and SI
 Pass *createAMDGPUStructurizeCFGPass();
 FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
-FunctionPass *createAMDGPUIndirectAddressingPass(TargetMachine &tm);
 FunctionPass *createAMDGPUISelDag(TargetMachine &tm);
 
 /// \brief Creates an AMDGPU-specific Target Transformation Info pass.
diff --git a/lib/Target/R600/AMDGPUIndirectAddressing.cpp b/lib/Target/R600/AMDGPUIndirectAddressing.cpp
deleted file mode 100644 (file)
index f31eed0..0000000
+++ /dev/null
@@ -1,345 +0,0 @@
-//===-- AMDGPUIndirectAddressing.cpp - Indirect Adressing Support ---------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-///
-/// Instructions can use indirect addressing to index the register file as if it
-/// were memory.  This pass lowers RegisterLoad and RegisterStore instructions
-/// to either a COPY or a MOV that uses indirect addressing.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "R600InstrInfo.h"
-#include "R600MachineFunctionInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/Debug.h"
-
-using namespace llvm;
-
-namespace {
-
-class AMDGPUIndirectAddressingPass : public MachineFunctionPass {
-
-private:
-  static char ID;
-  const AMDGPUInstrInfo *TII;
-
-  bool regHasExplicitDef(MachineRegisterInfo &MRI, unsigned Reg) const;
-
-public:
-  AMDGPUIndirectAddressingPass(TargetMachine &tm) :
-    MachineFunctionPass(ID),
-    TII(0)
-    { }
-
-  virtual bool runOnMachineFunction(MachineFunction &MF);
-
-  const char *getPassName() const { return "R600 Handle indirect addressing"; }
-
-};
-
-} // End anonymous namespace
-
-char AMDGPUIndirectAddressingPass::ID = 0;
-
-FunctionPass *llvm::createAMDGPUIndirectAddressingPass(TargetMachine &tm) {
-  return new AMDGPUIndirectAddressingPass(tm);
-}
-
-bool AMDGPUIndirectAddressingPass::runOnMachineFunction(MachineFunction &MF) {
-  MachineRegisterInfo &MRI = MF.getRegInfo();
-
-  TII = static_cast<const AMDGPUInstrInfo*>(MF.getTarget().getInstrInfo());
-
-  int IndirectBegin = TII->getIndirectIndexBegin(MF);
-  int IndirectEnd = TII->getIndirectIndexEnd(MF);
-
-  if (IndirectBegin == -1) {
-    // No indirect addressing, we can skip this pass
-    assert(IndirectEnd == -1);
-    return false;
-  }
-
-  // The map keeps track of the indirect address that is represented by
-  // each virtual register. The key is the register and the value is the
-  // indirect address it uses.
-  std::map<unsigned, unsigned> RegisterAddressMap;
-
-  // First pass - Lower all of the RegisterStore instructions and track which
-  // registers are live.
-  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
-                                                      BB != BB_E; ++BB) {
-    // This map keeps track of the current live indirect registers.
-    // The key is the address and the value is the register
-    std::map<unsigned, unsigned> LiveAddressRegisterMap;
-    MachineBasicBlock &MBB = *BB;
-
-    for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
-                               I != MBB.end(); I = Next) {
-      Next = llvm::next(I);
-      MachineInstr &MI = *I;
-
-      if (!TII->isRegisterStore(MI)) {
-        continue;
-      }
-
-      // Lower RegisterStore
-
-      unsigned RegIndex = MI.getOperand(2).getImm();
-      unsigned Channel = MI.getOperand(3).getImm();
-      unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel);
-      const TargetRegisterClass *IndirectStoreRegClass =
-                   TII->getIndirectAddrStoreRegClass(MI.getOperand(0).getReg());
-
-      if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) {
-        // Direct register access.
-        unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass);
-
-        BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY), DstReg)
-                .addOperand(MI.getOperand(0));
-
-        RegisterAddressMap[DstReg] = Address;
-        LiveAddressRegisterMap[Address] = DstReg;
-      } else {
-        // Indirect register access.
-        MachineInstrBuilder MOV = TII->buildIndirectWrite(BB, I,
-                                           MI.getOperand(0).getReg(), // Value
-                                           Address,
-                                           MI.getOperand(1).getReg()); // Offset
-        for (int i = IndirectBegin; i <= IndirectEnd; ++i) {
-          unsigned Addr = TII->calculateIndirectAddress(i, Channel);
-          unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass);
-          MOV.addReg(DstReg, RegState::Define | RegState::Implicit);
-          RegisterAddressMap[DstReg] = Addr;
-          LiveAddressRegisterMap[Addr] = DstReg;
-        }
-      }
-      MI.eraseFromParent();
-    }
-
-    // Update the live-ins of the succesor blocks
-    for (MachineBasicBlock::succ_iterator Succ = MBB.succ_begin(),
-                                          SuccEnd = MBB.succ_end();
-                                          SuccEnd != Succ; ++Succ) {
-      std::map<unsigned, unsigned>::const_iterator Key, KeyEnd;
-      for (Key = LiveAddressRegisterMap.begin(),
-           KeyEnd = LiveAddressRegisterMap.end(); KeyEnd != Key; ++Key) {
-        (*Succ)->addLiveIn(Key->second);
-      }
-    }
-  }
-
-  // Second pass - Lower the RegisterLoad instructions
-  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
-                                                      BB != BB_E; ++BB) {
-    // Key is the address and the value is the register
-    std::map<unsigned, unsigned> LiveAddressRegisterMap;
-    MachineBasicBlock &MBB = *BB;
-
-    MachineBasicBlock::livein_iterator LI = MBB.livein_begin();
-    while (LI != MBB.livein_end()) {
-      std::vector<unsigned> PhiRegisters;
-
-      // Make sure this live in is used for indirect addressing
-      if (RegisterAddressMap.find(*LI) == RegisterAddressMap.end()) {
-        ++LI;
-        continue;
-      }
-
-      unsigned Address = RegisterAddressMap[*LI];
-      LiveAddressRegisterMap[Address] = *LI;
-      PhiRegisters.push_back(*LI);
-
-      // Check if there are other live in registers which map to the same
-      // indirect address.
-      for (MachineBasicBlock::livein_iterator LJ = llvm::next(LI),
-                                              LE = MBB.livein_end();
-                                              LJ != LE; ++LJ) {
-        unsigned Reg = *LJ;
-        if (RegisterAddressMap.find(Reg) == RegisterAddressMap.end()) {
-          continue;
-        }
-
-        if (RegisterAddressMap[Reg] == Address) {
-          PhiRegisters.push_back(Reg);
-        }
-      }
-
-      if (PhiRegisters.size() == 1) {
-        // We don't need to insert a Phi instruction, so we can just add the
-        // registers to the live list for the block.
-        LiveAddressRegisterMap[Address] = *LI;
-        MBB.removeLiveIn(*LI);
-      } else {
-        // We need to insert a PHI, because we have the same address being
-        // written in multiple predecessor blocks.
-        const TargetRegisterClass *PhiDstClass =
-                   TII->getIndirectAddrStoreRegClass(*(PhiRegisters.begin()));
-        unsigned PhiDstReg = MRI.createVirtualRegister(PhiDstClass);
-        MachineInstrBuilder Phi = BuildMI(MBB, MBB.begin(),
-                                          MBB.findDebugLoc(MBB.begin()),
-                                          TII->get(AMDGPU::PHI), PhiDstReg);
-
-        for (std::vector<unsigned>::const_iterator RI = PhiRegisters.begin(),
-                                                   RE = PhiRegisters.end();
-                                                   RI != RE; ++RI) {
-          unsigned Reg = *RI;
-          MachineInstr *DefInst = MRI.getVRegDef(Reg);
-          assert(DefInst);
-          MachineBasicBlock *RegBlock = DefInst->getParent();
-          Phi.addReg(Reg);
-          Phi.addMBB(RegBlock);
-          MBB.removeLiveIn(Reg);
-        }
-        RegisterAddressMap[PhiDstReg] = Address;
-        LiveAddressRegisterMap[Address] = PhiDstReg;
-      }
-      LI = MBB.livein_begin();
-    }
-
-    for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
-                               I != MBB.end(); I = Next) {
-      Next = llvm::next(I);
-      MachineInstr &MI = *I;
-
-      if (!TII->isRegisterLoad(MI)) {
-        if (MI.getOpcode() == AMDGPU::PHI) {
-          continue;
-        }
-        // Check for indirect register defs
-        for (unsigned OpIdx = 0, NumOperands = MI.getNumOperands();
-                                 OpIdx < NumOperands; ++OpIdx) {
-          MachineOperand &MO = MI.getOperand(OpIdx);
-          if (MO.isReg() && MO.isDef() &&
-              RegisterAddressMap.find(MO.getReg()) != RegisterAddressMap.end()) {
-            unsigned Reg = MO.getReg();
-            unsigned LiveAddress = RegisterAddressMap[Reg];
-            // Chain the live-ins
-            if (LiveAddressRegisterMap.find(LiveAddress) !=
-                LiveAddressRegisterMap.end()) {
-              MI.addOperand(MachineOperand::CreateReg(
-                                  LiveAddressRegisterMap[LiveAddress],
-                                  false, // isDef
-                                  true,  // isImp
-                                  true));  // isKill
-            }
-            LiveAddressRegisterMap[LiveAddress] = Reg;
-          }
-        }
-        continue;
-      }
-
-      const TargetRegisterClass *SuperIndirectRegClass =
-                                                TII->getSuperIndirectRegClass();
-      const TargetRegisterClass *IndirectLoadRegClass =
-                                             TII->getIndirectAddrLoadRegClass();
-      unsigned IndirectReg = MRI.createVirtualRegister(SuperIndirectRegClass);
-
-      unsigned RegIndex = MI.getOperand(2).getImm();
-      unsigned Channel = MI.getOperand(3).getImm();
-      unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel);
-
-      if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) {
-        // Direct register access
-        unsigned Reg = LiveAddressRegisterMap[Address];
-        unsigned AddrReg = IndirectLoadRegClass->getRegister(Address);
-
-        if (regHasExplicitDef(MRI, Reg)) {
-          // If the register we are reading from has an explicit def, then that
-          // means it was written via a direct register access (i.e. COPY
-          // or other instruction that doesn't use indirect addressing).  In
-          // this case we know where the value has been stored, so we can just
-          // issue a copy.
-          BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY),
-                  MI.getOperand(0).getReg())
-                  .addReg(Reg);
-        } else {
-          // If the register we are reading has an implicit def, then that
-          // means it was written by an indirect register access (i.e. An
-          // instruction that uses indirect addressing. 
-          BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY),
-                   MI.getOperand(0).getReg())
-                   .addReg(AddrReg)
-                   .addReg(Reg, RegState::Implicit);
-        }
-      } else {
-        // Indirect register access
-
-        // Note on REQ_SEQUENCE instructions: You can't actually use the register
-        // it defines unless  you have an instruction that takes the defined
-        // register class as an operand.
-
-        MachineInstrBuilder Sequence = BuildMI(MBB, I, MBB.findDebugLoc(I),
-                                               TII->get(AMDGPU::REG_SEQUENCE),
-                                               IndirectReg);
-        for (int i = IndirectBegin; i <= IndirectEnd; ++i) {
-          unsigned Addr = TII->calculateIndirectAddress(i, Channel);
-          if (LiveAddressRegisterMap.find(Addr) == LiveAddressRegisterMap.end()) {
-            continue;
-          }
-          unsigned Reg = LiveAddressRegisterMap[Addr];
-
-          // We only need to use REG_SEQUENCE for explicit defs, since the
-          // register coalescer won't do anything with the implicit defs.
-          if (!regHasExplicitDef(MRI, Reg)) {
-            continue;
-          }
-
-          // Insert a REQ_SEQUENCE instruction to force the register allocator
-          // to allocate the virtual register to the correct physical register.
-          Sequence.addReg(LiveAddressRegisterMap[Addr]);
-          Sequence.addImm(TII->getRegisterInfo().getIndirectSubReg(Addr));
-        }
-        MachineInstrBuilder Mov = TII->buildIndirectRead(BB, I,
-                                           MI.getOperand(0).getReg(), // Value
-                                           Address,
-                                           MI.getOperand(1).getReg()); // Offset
-
-
-
-        Mov.addReg(IndirectReg, RegState::Implicit | RegState::Kill);
-        Mov.addReg(LiveAddressRegisterMap[Address], RegState::Implicit);
-
-      }
-      MI.eraseFromParent();
-    }
-  }
-  return false;
-}
-
-bool AMDGPUIndirectAddressingPass::regHasExplicitDef(MachineRegisterInfo &MRI,
-                                                  unsigned Reg) const {
-  MachineInstr *DefInstr = MRI.getVRegDef(Reg);
-
-  if (!DefInstr) {
-    return false;
-  }
-
-  if (DefInstr->getOpcode() == AMDGPU::PHI) {
-    bool Explicit = false;
-    for (MachineInstr::const_mop_iterator I = DefInstr->operands_begin(),
-                                          E = DefInstr->operands_end();
-                                          I != E; ++I) {
-      const MachineOperand &MO = *I;
-      if (!MO.isReg() || MO.isDef()) {
-        continue;
-      }
-
-      Explicit = Explicit || regHasExplicitDef(MRI, MO.getReg());
-    }
-    return Explicit;
-  }
-
-  return DefInstr->getOperand(0).isReg() &&
-         DefInstr->getOperand(0).getReg() == Reg;
-}
index bb7f97ff11dac43ea3f7ee37f2af6c80af34ab4b..434c91a523156082bd4600d36ca75f8b0a031d57 100644 (file)
@@ -118,6 +118,46 @@ AMDGPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
   assert(!"Not Implemented");
 }
 
+bool AMDGPUInstrInfo::expandPostRAPseudo (MachineBasicBlock::iterator MI) const {
+  MachineBasicBlock *MBB = MI->getParent();
+
+  switch(MI->getOpcode()) {
+  default:
+    if (isRegisterLoad(*MI)) {
+      unsigned RegIndex = MI->getOperand(2).getImm();
+      unsigned Channel = MI->getOperand(3).getImm();
+      unsigned Address = calculateIndirectAddress(RegIndex, Channel);
+      unsigned OffsetReg = MI->getOperand(1).getReg();
+      if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
+        buildMovInstr(MBB, MI, MI->getOperand(0).getReg(),
+                      getIndirectAddrRegClass()->getRegister(Address));
+      } else {
+        buildIndirectRead(MBB, MI, MI->getOperand(0).getReg(),
+                          Address, OffsetReg);
+      }
+    } else if (isRegisterStore(*MI)) {
+      unsigned RegIndex = MI->getOperand(2).getImm();
+      unsigned Channel = MI->getOperand(3).getImm();
+      unsigned Address = calculateIndirectAddress(RegIndex, Channel);
+      unsigned OffsetReg = MI->getOperand(1).getReg();
+      if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
+        buildMovInstr(MBB, MI, getIndirectAddrRegClass()->getRegister(Address),
+                      MI->getOperand(0).getReg());
+      } else {
+        buildIndirectWrite(MBB, MI, MI->getOperand(0).getReg(),
+                         calculateIndirectAddress(RegIndex, Channel),
+                         OffsetReg);
+      }
+    } else {
+      return false;
+    }
+  }
+
+  MBB->erase(MI);
+  return true;
+}
+
+
 MachineInstr *
 AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
                                       MachineInstr *MI,
index c83e57de2e1e5471258891913121bc7fa22eff8d..dc65d4e75f760c7becdb82b0e6910dbee1229627 100644 (file)
@@ -87,6 +87,8 @@ public:
                             unsigned DestReg, int FrameIndex,
                             const TargetRegisterClass *RC,
                             const TargetRegisterInfo *TRI) const;
+  virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
 
 protected:
   MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
@@ -160,14 +162,9 @@ public:
   virtual unsigned calculateIndirectAddress(unsigned RegIndex,
                                             unsigned Channel) const = 0;
 
-  /// \returns The register class to be used for storing values to an
-  /// "Indirect Address" .
-  virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
-                                                  unsigned SourceReg) const = 0;
-
-  /// \returns The register class to be used for loading values from
-  /// an "Indirect Address" .
-  virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const = 0;
+  /// \returns The register class to be used for loading and storing values
+  /// from an "Indirect Address" .
+  virtual const TargetRegisterClass *getIndirectAddrRegClass() const = 0;
 
   /// \brief Build instruction(s) for an indirect register write.
   ///
@@ -185,19 +182,21 @@ public:
                                     unsigned ValueReg, unsigned Address,
                                     unsigned OffsetReg) const = 0;
 
-  /// \returns the register class whose sub registers are the set of all
-  /// possible registers that can be used for indirect addressing.
-  virtual const TargetRegisterClass *getSuperIndirectRegClass() const = 0;
-
 
   /// \brief Convert the AMDIL MachineInstr to a supported ISA
   /// MachineInstr
   virtual void convertToISA(MachineInstr & MI, MachineFunction &MF,
     DebugLoc DL) const;
 
+  /// \brief Build a MOV instruction.
+  virtual MachineInstr *buildMovInstr(MachineBasicBlock *MBB,
+                                      MachineBasicBlock::iterator I,
+                                      unsigned DstReg, unsigned SrcReg) const = 0;
+
   /// \brief Given a MIMG \p Opcode that writes all 4 channels, return the
   /// equivalent opcode that writes \p Channels Channels.
   int getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const;
+
 };
 
 namespace AMDGPU {
index cd7a616cd3214fb2b54278cb01a2e908d0fd75ea..9722e7dddf06928c4a3a2ffa0444195fdeabd15e 100644 (file)
@@ -139,12 +139,6 @@ AMDGPUPassConfig::addPreISel() {
 
 bool AMDGPUPassConfig::addInstSelector() {
   addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
-
-  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
-  if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
-    // This callbacks this pass uses are not implemented yet on SI.
-    addPass(createAMDGPUIndirectAddressingPass(*TM));
-  }
   return false;
 }
 
index 7bdfa7e3b1f1585b37236fab416bb0e38f6ffd3b..9f8f6a83e45940e8d912d4e41bec6d5dd911e57f 100644 (file)
@@ -17,7 +17,6 @@ add_llvm_target(R600CodeGen
   AMDILISelLowering.cpp
   AMDGPUAsmPrinter.cpp
   AMDGPUFrameLowering.cpp
-  AMDGPUIndirectAddressing.cpp
   AMDGPUISelDAGToDAG.cpp
   AMDGPUMCInstLower.cpp
   AMDGPUMachineFunction.cpp
index 005f6424d0721cf6b7411261a878880ad8a30b38..a11d54a9f7d9230e68919b99a74c0ac30a8d1207 100644 (file)
@@ -210,6 +210,14 @@ bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
   }
 }
 
+bool R600InstrInfo::usesAddressRegister(MachineInstr *MI) const {
+  return  MI->findRegisterUseOperandIdx(AMDGPU::AR_X) != -1;
+}
+
+bool R600InstrInfo::definesAddressRegister(MachineInstr *MI) const {
+  return MI->findRegisterDefOperandIdx(AMDGPU::AR_X) != -1;
+}
+
 bool R600InstrInfo::readsLDSSrcReg(const MachineInstr *MI) const {
   if (!isALUInstr(MI->getOpcode())) {
     return false;
@@ -1086,13 +1094,8 @@ unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex,
   return RegIndex;
 }
 
-const TargetRegisterClass * R600InstrInfo::getIndirectAddrStoreRegClass(
-                                                     unsigned SourceReg) const {
-  return &AMDGPU::R600_TReg32RegClass;
-}
-
-const TargetRegisterClass *R600InstrInfo::getIndirectAddrLoadRegClass() const {
-  return &AMDGPU::TRegMemRegClass;
+const TargetRegisterClass *R600InstrInfo::getIndirectAddrRegClass() const {
+  return &AMDGPU::R600_TReg32_XRegClass;
 }
 
 MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
@@ -1131,10 +1134,6 @@ MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
   return Mov;
 }
 
-const TargetRegisterClass *R600InstrInfo::getSuperIndirectRegClass() const {
-  return &AMDGPU::IndirectRegRegClass;
-}
-
 unsigned R600InstrInfo::getMaxAlusPerClause() const {
   return 115;
 }
@@ -1272,6 +1271,12 @@ MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB,
   return MovImm;
 }
 
+MachineInstr *R600InstrInfo::buildMovInstr(MachineBasicBlock *MBB,
+                                       MachineBasicBlock::iterator I,
+                                       unsigned DstReg, unsigned SrcReg) const {
+  return buildDefaultInstruction(*MBB, I, AMDGPU::MOV, DstReg, SrcReg);
+}
+
 int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const {
   return getOperandIdx(MI.getOpcode(), Op);
 }
index 2e36f05aa2fff75dcef0bc494ce079405595de11..d7438ef2771e5c4bb0cc20e2213699dd4dd3de43 100644 (file)
@@ -82,6 +82,8 @@ namespace llvm {
   bool usesTextureCache(const MachineInstr *MI) const;
 
   bool mustBeLastInClause(unsigned Opcode) const;
+  bool usesAddressRegister(MachineInstr *MI) const;
+  bool definesAddressRegister(MachineInstr *MI) const;
   bool readsLDSSrcReg(const MachineInstr *MI) const;
 
   /// \returns The operand index for the given source number.  Legal values
@@ -203,10 +205,7 @@ namespace llvm {
   virtual unsigned calculateIndirectAddress(unsigned RegIndex,
                                             unsigned Channel) const;
 
-  virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
-                                                      unsigned SourceReg) const;
-
-  virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const;
+  virtual const TargetRegisterClass *getIndirectAddrRegClass() const;
 
   virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
                                   MachineBasicBlock::iterator I,
@@ -218,8 +217,6 @@ namespace llvm {
                                   unsigned ValueReg, unsigned Address,
                                   unsigned OffsetReg) const;
 
-  virtual const TargetRegisterClass *getSuperIndirectRegClass() const;
-
   unsigned getMaxAlusPerClause() const;
 
   ///buildDefaultInstruction - This function returns a MachineInstr with
@@ -246,6 +243,10 @@ namespace llvm {
                                   unsigned DstReg,
                                   uint64_t Imm) const;
 
+  MachineInstr *buildMovInstr(MachineBasicBlock *MBB,
+                              MachineBasicBlock::iterator I,
+                              unsigned DstReg, unsigned SrcReg) const;
+
   /// \brief Get the index of Op in the MachineInstr.
   ///
   /// \returns -1 if the Instruction does not contain the specified \p Op.
index 03d8d8767ed3371a28b685a06483476cfa588d7d..cd9b6eae6ed610a8246f593ea17831706099403f 100644 (file)
@@ -206,6 +206,14 @@ public:
         return false;
       }
     }
+
+    bool ARDef = TII->definesAddressRegister(MII) ||
+                 TII->definesAddressRegister(MIJ);
+    bool ARUse = TII->usesAddressRegister(MII) ||
+                 TII->usesAddressRegister(MIJ);
+    if (ARDef && ARUse)
+      return false;
+
     return true;
   }
 
index 4dc63fe57ea913f3911bab8f674a1b548363f16b..dd8f3ef9814dc32439b1e62db302b81c0f9a1395 100644 (file)
@@ -41,18 +41,13 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   Reserved.set(AMDGPU::PRED_SEL_OFF);
   Reserved.set(AMDGPU::PRED_SEL_ZERO);
   Reserved.set(AMDGPU::PRED_SEL_ONE);
+  Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
 
   for (TargetRegisterClass::iterator I = AMDGPU::R600_AddrRegClass.begin(),
                         E = AMDGPU::R600_AddrRegClass.end(); I != E; ++I) {
     Reserved.set(*I);
   }
 
-  for (TargetRegisterClass::iterator I = AMDGPU::TRegMemRegClass.begin(),
-                                     E = AMDGPU::TRegMemRegClass.end();
-                                     I !=  E; ++I) {
-    Reserved.set(*I);
-  }
-
   const R600InstrInfo *RII =
     static_cast<const R600InstrInfo*>(TM.getInstrInfo());
   std::vector<unsigned> IndirectRegs = RII->getIndirectReservedRegs(MF);
index 6fec43c4d7aa47b071196009bbbab36920f8bdca..68bcd207b42c7936915d517460223fa12e0d57a5 100644 (file)
@@ -39,8 +39,6 @@ foreach Index = 0-127 in {
     // Indirect addressing offset registers
     def Addr#Index#_#Chan : R600RegWithChan <"T("#Index#" + AR.x)."#Chan,
                                               Index, Chan>;
-    def TRegMem#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index,
-                                                Chan>;
   }
   // 128-bit Temporary Registers
   def T#Index#_XYZW : R600Reg_128 <"T"#Index#"",
@@ -210,33 +208,3 @@ def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128,
 
 def R600_Reg64 : RegisterClass<"AMDGPU", [v2f32, v2i32], 64,
                                 (add (sequence "T%u_XY", 0, 63))>;
-
-//===----------------------------------------------------------------------===//
-// Register classes for indirect addressing
-//===----------------------------------------------------------------------===//
-
-// Super register for all the Indirect Registers.  This register class is used
-// by the REG_SEQUENCE instruction to specify the registers to use for direct
-// reads / writes which may be written / read by an indirect address.
-class IndirectSuper<string n, list<Register> subregs> :
-    RegisterWithSubRegs<n, subregs> {
-  let Namespace = "AMDGPU";
-  let SubRegIndices =
- [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
-  sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15];
-}
-
-def IndirectSuperReg : IndirectSuper<"Indirect",
-  [TRegMem0_X, TRegMem1_X, TRegMem2_X, TRegMem3_X, TRegMem4_X, TRegMem5_X,
-   TRegMem6_X, TRegMem7_X, TRegMem8_X, TRegMem9_X, TRegMem10_X, TRegMem11_X,
-   TRegMem12_X, TRegMem13_X, TRegMem14_X, TRegMem15_X]
->;
-
-def IndirectReg : RegisterClass<"AMDGPU", [f32, i32], 32, (add IndirectSuperReg)>;
-
-// This register class defines the registers that are the storage units for
-// the "Indirect Addressing" pseudo memory space.
-// XXX: Only use the X channel, until we support wider stack widths
-def TRegMem : RegisterClass<"AMDGPU", [f32, i32], 32,
-  (add (sequence "TRegMem%u_X", 0, 16))
->;
index d3ad06ba9b0162bc8319f378b67df0c0522f6de9..8b35d89088336258d3813994277981cb6965aba6 100644 (file)
@@ -197,6 +197,13 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
   return MI;
 }
 
+MachineInstr *SIInstrInfo::buildMovInstr(MachineBasicBlock *MBB,
+                                         MachineBasicBlock::iterator I,
+                                         unsigned DstReg,
+                                         unsigned SrcReg) const {
+  assert(!"Not Implemented");
+}
+
 bool SIInstrInfo::isMov(unsigned Opcode) const {
   switch(Opcode) {
   default: return false;
@@ -346,12 +353,7 @@ int SIInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
   llvm_unreachable("Unimplemented");
 }
 
-const TargetRegisterClass *SIInstrInfo::getIndirectAddrStoreRegClass(
-                                                     unsigned SourceReg) const {
-  llvm_unreachable("Unimplemented");
-}
-
-const TargetRegisterClass *SIInstrInfo::getIndirectAddrLoadRegClass() const {
+const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const {
   llvm_unreachable("Unimplemented");
 }
 
@@ -370,7 +372,3 @@ MachineInstrBuilder SIInstrInfo::buildIndirectRead(
                                    unsigned Address, unsigned OffsetReg) const {
   llvm_unreachable("Unimplemented");
 }
-
-const TargetRegisterClass *SIInstrInfo::getSuperIndirectRegClass() const {
-  llvm_unreachable("Unimplemented");
-}
index 906befdb0b5d8e4a5894c5335c3794a5ffc70673..72bb25e369f673cc14b754ca96eb7218467e2f0f 100644 (file)
@@ -41,6 +41,9 @@ public:
                                            bool NewMI=false) const;
 
   virtual unsigned getIEQOpcode() const { assert(!"Implement"); return 0;}
+  MachineInstr *buildMovInstr(MachineBasicBlock *MBB,
+                              MachineBasicBlock::iterator I,
+                              unsigned DstReg, unsigned SrcReg) const;
   virtual bool isMov(unsigned Opcode) const;
 
   virtual bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
@@ -62,10 +65,7 @@ public:
   virtual unsigned calculateIndirectAddress(unsigned RegIndex,
                                             unsigned Channel) const;
 
-  virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
-                                                      unsigned SourceReg) const;
-
-  virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const;
+  virtual const TargetRegisterClass *getIndirectAddrRegClass() const;
 
   virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
                                                  MachineBasicBlock::iterator I,
@@ -78,8 +78,6 @@ public:
                                                 unsigned ValueReg,
                                                 unsigned Address,
                                                 unsigned OffsetReg) const;
-
-  virtual const TargetRegisterClass *getSuperIndirectRegClass() const;
   };
 
 namespace AMDGPU {
index bd72cd96e2e5c2fada0e445c00de6d8173fda167..1ef6c358921385b13376f1a9fcdd724c4918ea77 100644 (file)
@@ -63,3 +63,42 @@ entry:
   store i32 %0, i32 addrspace(1)* %out
   ret void
 }
+
+; Test direct access of a private array inside a loop.  The private array
+; loads and stores should be lowered to copies, so there shouldn't be any
+; MOVA instructions.
+
+; CHECK: @direct_loop
+; CHECK-NOT: MOVA_INT
+
+define void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+entry:
+  %prv_array_const = alloca [2 x i32]
+  %prv_array = alloca [2 x i32]
+  %a = load i32 addrspace(1)* %in
+  %b_src_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %b = load i32 addrspace(1)* %b_src_ptr
+  %a_dst_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 0
+  store i32 %a, i32* %a_dst_ptr
+  %b_dst_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 1
+  store i32 %b, i32* %b_dst_ptr
+  br label %for.body
+
+for.body:
+  %inc = phi i32 [0, %entry], [%count, %for.body]
+  %x_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 0
+  %x = load i32* %x_ptr
+  %y_ptr = getelementptr [2 x i32]* %prv_array, i32 0, i32 0
+  %y = load i32* %y_ptr
+  %xy = add i32 %x, %y
+  store i32 %xy, i32* %y_ptr
+  %count = add i32 %inc, 1
+  %done = icmp eq i32 %count, 4095
+  br i1 %done, label %for.end, label %for.body
+
+for.end:
+  %value_ptr = getelementptr [2 x i32]* %prv_array, i32 0, i32 0
+  %value = load i32* %value_ptr
+  store i32 %value, i32 addrspace(1)* %out
+  ret void
+}