R600/SI: Spill VGPRs to scratch space for compute shaders

[oota-llvm.git] / lib / Target / R600 / SIInstrInfo.cpp
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp

index b911d418680d7ca832fde218c1b090acb6ecd20a..71872ee492afcda6ffd3ea9ada326bd1102e9346 100644 (file)
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -433,13 +433,9 @@ unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
  static bool shouldTryToSpillVGPRs(MachineFunction *MF) {
  
    SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
-  const TargetMachine &TM = MF->getTarget();
  
-  // FIXME: Even though it can cause problems, we need to enable
-  // spilling at -O0, since the fast register allocator always
-  // spills registers that are live at the end of blocks.
-  return MFI->getShaderType() == ShaderType::COMPUTE &&
-         TM.getOptLevel() == CodeGenOpt::None;
+  // FIXME: Implement spilling for other shader types.
+  return MFI->getShaderType() == ShaderType::COMPUTE;
  
  }
  
@@ -450,6 +446,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                        const TargetRegisterClass *RC,
                                        const TargetRegisterInfo *TRI) const {
    MachineFunction *MF = MBB.getParent();
+  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
    MachineFrameInfo *FrameInfo = MF->getFrameInfo();
    DebugLoc DL = MBB.findDebugLoc(MI);
    int Opcode = -1;
@@ -466,6 +463,8 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
        case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break;
      }
    } else if(shouldTryToSpillVGPRs(MF) && RI.hasVGPRs(RC)) {
+    MFI->setHasSpilledVGPRs();
+
      switch(RC->getSize() * 8) {
        case 32: Opcode = AMDGPU::SI_SPILL_V32_SAVE; break;
        case 64: Opcode = AMDGPU::SI_SPILL_V64_SAVE; break;
@@ -480,7 +479,11 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
      FrameInfo->setObjectAlignment(FrameIndex, 4);
      BuildMI(MBB, MI, DL, get(Opcode))
              .addReg(SrcReg)
-            .addFrameIndex(FrameIndex);
+            .addFrameIndex(FrameIndex)
+            // Place-holder registers, these will be filled in by
+            // SIPrepareScratchRegs.
+            .addReg(AMDGPU::SGPR0_SGPR1, RegState::Undef)
+            .addReg(AMDGPU::SGPR0, RegState::Undef);
    } else {
      LLVMContext &Ctx = MF->getFunction()->getContext();
      Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to"
@@ -522,7 +525,12 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
    if (Opcode != -1) {
      FrameInfo->setObjectAlignment(FrameIndex, 4);
      BuildMI(MBB, MI, DL, get(Opcode), DestReg)
-            .addFrameIndex(FrameIndex);
+            .addFrameIndex(FrameIndex)
+            // Place-holder registers, these will be filled in by
+            // SIPrepareScratchRegs.
+            .addReg(AMDGPU::SGPR0_SGPR1, RegState::Undef)
+            .addReg(AMDGPU::SGPR0, RegState::Undef);
+
    } else {
      LLVMContext &Ctx = MF->getFunction()->getContext();
      Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to"
@@ -553,7 +561,7 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB,
      MachineBasicBlock::iterator Insert = Entry.front();
      DebugLoc DL = Insert->getDebugLoc();
  
-    TIDReg = RI.findUnusedVGPR(MF->getRegInfo());
+    TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass);
      if (TIDReg == AMDGPU::NoRegister)
        return TIDReg;