R600/SI: Use external symbols for scratch buffer
authorTom Stellard <thomas.stellard@amd.com>
Tue, 20 Jan 2015 17:49:47 +0000 (17:49 +0000)
committerTom Stellard <thomas.stellard@amd.com>
Tue, 20 Jan 2015 17:49:47 +0000 (17:49 +0000)
We were passing the scratch buffer address to the shaders via user sgprs,
but now we use external symbols and have the driver patch the shader
using reloc information.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@226586 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/R600/AMDGPU.h
lib/Target/R600/AMDGPUISelDAGToDAG.cpp
lib/Target/R600/AMDGPUMCInstLower.cpp
lib/Target/R600/SIDefines.h
lib/Target/R600/SIInstrInfo.cpp
lib/Target/R600/SIInstructions.td
lib/Target/R600/SIPrepareScratchRegs.cpp
lib/Target/R600/SIRegisterInfo.cpp
lib/Target/R600/SIRegisterInfo.h

index fcf9eca80e96ff94bdab8d21894851eb364c3244..c6600550126eb55111dd46c2a34ebfdeceb32de1 100644 (file)
@@ -77,7 +77,11 @@ extern Target TheGCNTarget;
 
 namespace AMDGPU {
 enum TargetIndex {
-  TI_CONSTDATA_START
+  TI_CONSTDATA_START,
+  TI_SCRATCH_RSRC_DWORD0,
+  TI_SCRATCH_RSRC_DWORD1,
+  TI_SCRATCH_RSRC_DWORD2,
+  TI_SCRATCH_RSRC_DWORD3
 };
 }
 
index 28b4183d277e30c689ab0e35bfdd5126eb052fd3..e0e81680cc087ee03e0b8eccf00f6df801e05d95 100644 (file)
@@ -962,16 +962,27 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
   const SITargetLowering& Lowering =
     *static_cast<const SITargetLowering*>(getTargetLowering());
 
-  unsigned ScratchPtrReg =
-      TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR);
   unsigned ScratchOffsetReg =
       TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET);
   Lowering.CreateLiveInRegister(*CurDAG, &AMDGPU::SReg_32RegClass,
                                 ScratchOffsetReg, MVT::i32);
+  SDValue Sym0 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD0", MVT::i32);
+  SDValue ScratchRsrcDword0 =
+      SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym0), 0);
 
-  SDValue ScratchPtr =
-    CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
-                           MRI.getLiveInVirtReg(ScratchPtrReg), MVT::i64);
+  SDValue Sym1 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD1", MVT::i32);
+  SDValue ScratchRsrcDword1 =
+      SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym1), 0);
+
+  const SDValue RsrcOps[] = {
+      CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
+      ScratchRsrcDword0,
+      CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
+      ScratchRsrcDword1,
+      CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32),
+  };
+  SDValue ScratchPtr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
+                                              MVT::v2i32, RsrcOps), 0);
   Rsrc = SDValue(Lowering.buildScratchRSRC(*CurDAG, DL, ScratchPtr), 0);
   SOffset = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
       MRI.getLiveInVirtReg(ScratchOffsetReg), MVT::i32);
index 5d870d5e6613c78797efc48d210210376d0debce..03aa32d05ef0bc27a486d0c382978e5771a1b312 100644 (file)
@@ -80,6 +80,12 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
       MCOp = MCOperand::CreateExpr(Expr);
       break;
     }
+    case MachineOperand::MO_ExternalSymbol: {
+      MCSymbol *Sym = Ctx.GetOrCreateSymbol(StringRef(MO.getSymbolName()));
+      const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+      MCOp = MCOperand::CreateExpr(Expr);
+      break;
+    }
     }
     OutMI.addOperand(MCOp);
   }
index 73a9c73d8e7b0fbf3b8f681c7a193fb1d1269f03..1c74dda5362c7b08d44871e890fea20cb0779e8f 100644 (file)
@@ -163,4 +163,5 @@ namespace SIOutMods {
 #define R_00B860_COMPUTE_TMPRING_SIZE                                   0x00B860
 #define   S_00B860_WAVESIZE(x)                                        (((x) & 0x1FFF) << 12)
 
+
 #endif
index 8c2f324005aae5e97273e976b8f4546318908779..ccf90ddfae040620c6249b38d1609a2505b9d3a5 100644 (file)
@@ -482,7 +482,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
             .addFrameIndex(FrameIndex)
             // Place-holder registers, these will be filled in by
             // SIPrepareScratchRegs.
-            .addReg(AMDGPU::SGPR0_SGPR1, RegState::Undef)
+            .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
             .addReg(AMDGPU::SGPR0, RegState::Undef);
   } else {
     LLVMContext &Ctx = MF->getFunction()->getContext();
@@ -528,7 +528,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
             .addFrameIndex(FrameIndex)
             // Place-holder registers, these will be filled in by
             // SIPrepareScratchRegs.
-            .addReg(AMDGPU::SGPR0_SGPR1, RegState::Undef)
+            .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
             .addReg(AMDGPU::SGPR0, RegState::Undef);
 
   } else {
index c09bed7840fcabd92f60e9085122614cc85080b3..bd680233e6586aad40315b447d4cde0a765bdfc8 100644 (file)
@@ -1951,14 +1951,14 @@ multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
   let UseNamedOperandTable = 1 in {
     def _SAVE : InstSI <
       (outs),
-      (ins sgpr_class:$src, i32imm:$frame_idx, SReg_64:$scratch_ptr,
+      (ins sgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
            SReg_32:$scratch_offset),
       "", []
     >;
 
     def _RESTORE : InstSI <
       (outs sgpr_class:$dst),
-      (ins i32imm:$frame_idx, SReg_64:$scratch_ptr, SReg_32:$scratch_offset),
+      (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),
       "", []
     >;
   } // End UseNamedOperandTable = 1
@@ -1974,14 +1974,14 @@ multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
   let UseNamedOperandTable = 1 in {
     def _SAVE : InstSI <
       (outs),
-      (ins vgpr_class:$src, i32imm:$frame_idx, SReg_64:$scratch_ptr,
+      (ins vgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
            SReg_32:$scratch_offset),
       "", []
     >;
 
     def _RESTORE : InstSI <
       (outs vgpr_class:$dst),
-      (ins i32imm:$frame_idx, SReg_64:$scratch_ptr, SReg_32:$scratch_offset),
+      (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),
       "", []
     >;
   } // End UseNamedOperandTable = 1
index f0e7edec6b488f72f369154251090ea5c5410bd0..b7934bd9e9194c7442230dc8f4d2c2894965badd 100644 (file)
@@ -29,6 +29,7 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/LLVMContext.h"
 
+#include "llvm/Support/Debug.h"
 using namespace llvm;
 
 namespace {
@@ -84,28 +85,10 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
   if (!Entry->isLiveIn(ScratchOffsetPreloadReg))
     Entry->addLiveIn(ScratchOffsetPreloadReg);
 
-  // Load the scratch pointer
-  unsigned ScratchPtrReg =
-      TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass);
-  int ScratchPtrFI = -1;
-
-  if (ScratchPtrReg != AMDGPU::NoRegister) {
-    // Found an SGPR to use.
-    MRI.setPhysRegUsed(ScratchPtrReg);
-    BuildMI(*Entry, I, DL, TII->get(AMDGPU::S_MOV_B64), ScratchPtrReg)
-            .addReg(ScratchPtrPreloadReg);
-  } else {
-    // No SGPR is available, we must spill.
-    ScratchPtrFI = FrameInfo->CreateSpillStackObject(8, 4);
-    BuildMI(*Entry, I, DL, TII->get(AMDGPU::SI_SPILL_S64_SAVE))
-            .addReg(ScratchPtrPreloadReg)
-            .addFrameIndex(ScratchPtrFI);
-  }
-
   // Load the scratch offset.
   unsigned ScratchOffsetReg =
       TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_32RegClass);
-  int ScratchOffsetFI = ~0;
+  int ScratchOffsetFI = -1;
 
   if (ScratchOffsetReg != AMDGPU::NoRegister) {
     // Found an SGPR to use
@@ -125,22 +108,26 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
   // add them to all the SI_SPILL_V* instructions.
 
   RegScavenger RS;
-  bool UseRegScavenger =
-      (ScratchPtrReg == AMDGPU::NoRegister ||
-      ScratchOffsetReg == AMDGPU::NoRegister);
+  unsigned ScratchRsrcFI = FrameInfo->CreateSpillStackObject(16, 4);
+  RS.addScavengingFrameIndex(ScratchRsrcFI);
+
   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
        BI != BE; ++BI) {
 
     MachineBasicBlock &MBB = *BI;
-    if (UseRegScavenger)
-      RS.enterBasicBlock(&MBB);
+    // Add the scratch offset reg as a live-in so that the register scavenger
+    // doesn't re-use it.
+    if (!MBB.isLiveIn(ScratchOffsetReg))
+      MBB.addLiveIn(ScratchOffsetReg);
+    RS.enterBasicBlock(&MBB);
 
     for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
          I != E; ++I) {
       MachineInstr &MI = *I;
+      RS.forward(I);
       DebugLoc DL = MI.getDebugLoc();
       switch(MI.getOpcode()) {
-        default: break;;
+        default: break;
         case AMDGPU::SI_SPILL_V512_SAVE:
         case AMDGPU::SI_SPILL_V256_SAVE:
         case AMDGPU::SI_SPILL_V128_SAVE:
@@ -153,18 +140,35 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
         case AMDGPU::SI_SPILL_V256_RESTORE:
         case AMDGPU::SI_SPILL_V512_RESTORE:
 
-          // Scratch Pointer
-          if (ScratchPtrReg == AMDGPU::NoRegister) {
-            ScratchPtrReg = RS.scavengeRegister(&AMDGPU::SGPR_64RegClass, 0);
-            BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S64_RESTORE),
-                    ScratchPtrReg)
-                    .addFrameIndex(ScratchPtrFI)
-                    .addReg(AMDGPU::NoRegister)
-                    .addReg(AMDGPU::NoRegister);
-          } else if (!MBB.isLiveIn(ScratchPtrReg)) {
-            MBB.addLiveIn(ScratchPtrReg);
-          }
+          // Scratch resource
+          unsigned ScratchRsrcReg =
+              RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0);
+
+          uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
+                          0xffffffff; // Size
+
+          unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
+          unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
+          unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
+          unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
+
+          BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc0)
+                  .addExternalSymbol("SCRATCH_RSRC_DWORD0")
+                  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+          BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc1)
+                  .addExternalSymbol("SCRATCH_RSRC_DWORD1")
+                  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+          BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2)
+                  .addImm(Rsrc & 0xffffffff)
+                  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+          BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3)
+                  .addImm(Rsrc >> 32)
+                  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
 
+          // Scratch Offset
           if (ScratchOffsetReg == AMDGPU::NoRegister) {
             ScratchOffsetReg = RS.scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
             BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S32_RESTORE),
@@ -176,20 +180,26 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
             MBB.addLiveIn(ScratchOffsetReg);
           }
 
-          if (ScratchPtrReg == AMDGPU::NoRegister ||
+          if (ScratchRsrcReg == AMDGPU::NoRegister ||
               ScratchOffsetReg == AMDGPU::NoRegister) {
             LLVMContext &Ctx = MF.getFunction()->getContext();
             Ctx.emitError("ran out of SGPRs for spilling VGPRs");
-            ScratchPtrReg = AMDGPU::SGPR0;
+            ScratchRsrcReg = AMDGPU::SGPR0;
             ScratchOffsetReg = AMDGPU::SGPR0;
           }
-          MI.getOperand(2).setReg(ScratchPtrReg);
+          MI.getOperand(2).setReg(ScratchRsrcReg);
+          MI.getOperand(2).setIsKill(true);
+          MI.getOperand(2).setIsUndef(false);
           MI.getOperand(3).setReg(ScratchOffsetReg);
+          MI.getOperand(3).setIsUndef(false);
+          MI.addOperand(MachineOperand::CreateReg(Rsrc0, false, true, true));
+          MI.addOperand(MachineOperand::CreateReg(Rsrc1, false, true, true));
+          MI.addOperand(MachineOperand::CreateReg(Rsrc2, false, true, true));
+          MI.addOperand(MachineOperand::CreateReg(Rsrc3, false, true, true));
 
+          MI.dump();
           break;
       }
-      if (UseRegScavenger)
-        RS.forward();
     }
   }
   return true;
index 59ff8a27d0574f17caa56f42083035a116c3f1d5..321d25fce73078dc40a24cee8d3c9ccb6ca1248e 100644 (file)
@@ -98,7 +98,7 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
 void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
                                            unsigned LoadStoreOp,
                                            unsigned Value,
-                                           unsigned ScratchPtr,
+                                           unsigned ScratchRsrcReg,
                                            unsigned ScratchOffset,
                                            int64_t Offset,
                                            RegScavenger *RS) const {
@@ -113,34 +113,11 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
   bool RanOutOfSGPRs = false;
   unsigned SOffset = ScratchOffset;
 
-  unsigned RsrcReg = RS->scavengeRegister(&AMDGPU::SReg_128RegClass, MI, 0);
-  if (RsrcReg == AMDGPU::NoRegister) {
-    RanOutOfSGPRs = true;
-    RsrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
-  }
-
   unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
   unsigned Size = NumSubRegs * 4;
 
-  uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
-                  0xffffffff; // Size
-
-  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B64),
-          getSubReg(RsrcReg, AMDGPU::sub0_sub1))
-          .addReg(ScratchPtr)
-          .addReg(RsrcReg, RegState::ImplicitDefine);
-
-  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32),
-          getSubReg(RsrcReg, AMDGPU::sub2))
-          .addImm(Rsrc & 0xffffffff)
-          .addReg(RsrcReg, RegState::ImplicitDefine);
-
-  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32),
-          getSubReg(RsrcReg, AMDGPU::sub3))
-          .addImm(Rsrc >> 32)
-          .addReg(RsrcReg, RegState::ImplicitDefine);
-
   if (!isUInt<12>(Offset + Size)) {
+    dbgs() << "Offset scavenge\n";
     SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0);
     if (SOffset == AMDGPU::NoRegister) {
       RanOutOfSGPRs = true;
@@ -163,7 +140,7 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
 
     BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
             .addReg(SubReg, getDefRegState(IsLoad))
-            .addReg(RsrcReg, getKillRegState(IsKill))
+            .addReg(ScratchRsrcReg, getKillRegState(IsKill))
             .addImm(Offset)
             .addReg(SOffset, getKillRegState(IsKill))
             .addImm(0) // glc
@@ -236,6 +213,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
         }
 
         if (isM0) {
+          dbgs() << "Scavenge M0\n";
           SubReg = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0);
         }
 
@@ -262,7 +240,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
     case AMDGPU::SI_SPILL_V32_SAVE:
       buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
             TII->getNamedOperand(*MI, AMDGPU::OpName::src)->getReg(),
-            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_ptr)->getReg(),
+            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
             TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
              FrameInfo->getObjectOffset(Index), RS);
       MI->eraseFromParent();
@@ -274,7 +252,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
     case AMDGPU::SI_SPILL_V512_RESTORE: {
       buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
             TII->getNamedOperand(*MI, AMDGPU::OpName::dst)->getReg(),
-            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_ptr)->getReg(),
+            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
             TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
             FrameInfo->getObjectOffset(Index), RS);
       MI->eraseFromParent();
index d14212c2b10455b960032da32b1d80541e262916..8aa02c30a431e053af0ce5d5536267443bce736f 100644 (file)
@@ -111,7 +111,7 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
 private:
   void buildScratchLoadStore(MachineBasicBlock::iterator MI,
                              unsigned LoadStoreOp, unsigned Value,
-                             unsigned ScratchPtr, unsigned ScratchOffset,
+                             unsigned ScratchRsrcReg, unsigned ScratchOffset,
                              int64_t Offset, RegScavenger *RS) const;
 };