R600/SI: Only create one instruction when spilling/restoring register v3

author Tom Stellard <thomas.stellard@amd.com>

Fri, 2 May 2014 15:41:42 +0000 (15:41 +0000)

committer Tom Stellard <thomas.stellard@amd.com>

Fri, 2 May 2014 15:41:42 +0000 (15:41 +0000)
author Tom Stellard <thomas.stellard@amd.com>
Fri, 2 May 2014 15:41:42 +0000 (15:41 +0000)
committer Tom Stellard <thomas.stellard@amd.com>
Fri, 2 May 2014 15:41:42 +0000 (15:41 +0000)
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp

index 5d08b91ea7bbac07207b648899412819bd0be4b8..454b7c2d55f878e80364420ee8fbbabf7abb3ee6 100644 (file)
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -187,27 +187,45 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                        int FrameIndex,
                                        const TargetRegisterClass *RC,
                                        const TargetRegisterInfo *TRI) const {
-  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
    SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo<SIMachineFunctionInfo>();
    DebugLoc DL = MBB.findDebugLoc(MI);
    unsigned KillFlag = isKill ? RegState::Kill : 0;
+  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
  
    if (TRI->getCommonSubClass(RC, &AMDGPU::SGPR_32RegClass)) {
-    unsigned Lane = MFI->SpillTracker.getNextLane(MRI);
-    BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32),
-            MFI->SpillTracker.LaneVGPR)
+    unsigned Lane = MFI->SpillTracker.reserveLanes(MRI, MBB.getParent());
+
+    BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32), MFI->SpillTracker.LaneVGPR)
              .addReg(SrcReg, KillFlag)
              .addImm(Lane);
+    MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR, Lane);
+  } else if (RI.isSGPRClass(RC)) {
+    // We are only allowed to create one new instruction when spilling
+    // registers, so we need to use pseudo instruction for vector
+    // registers.
+    //
+    // Reserve a spot in the spill tracker for each sub-register of
+    // the vector register.
+    unsigned NumSubRegs = RC->getSize() / 4;
+    unsigned FirstLane = MFI->SpillTracker.reserveLanes(MRI, MBB.getParent(),
+                                                        NumSubRegs);
      MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR,
-                                    Lane);
-  } else {
-    for (unsigned i = 0, e = RC->getSize() / 4; i != e; ++i) {
-      unsigned SubReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
-      BuildMI(MBB, MI, MBB.findDebugLoc(MI), get(AMDGPU::COPY), SubReg)
-              .addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
-      storeRegToStackSlot(MBB, MI, SubReg, isKill, FrameIndex + i,
-                          &AMDGPU::SReg_32RegClass, TRI);
+                                    FirstLane);
+
+    unsigned Opcode;
+    switch (RC->getSize() * 8) {
+    case 64:  Opcode = AMDGPU::SI_SPILL_S64_SAVE;  break;
+    case 128: Opcode = AMDGPU::SI_SPILL_S128_SAVE; break;
+    case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break;
+    case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break;
+    default: llvm_unreachable("Cannot spill register class");
      }
+
+    BuildMI(MBB, MI, DL, get(Opcode), MFI->SpillTracker.LaneVGPR)
+            .addReg(SrcReg)
+            .addImm(FrameIndex);
+  } else {
+    llvm_unreachable("VGPR spilling not supported");
    }
  }
  
@@ -216,30 +234,125 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                         unsigned DestReg, int FrameIndex,
                                         const TargetRegisterClass *RC,
                                         const TargetRegisterInfo *TRI) const {
-  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
    SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo<SIMachineFunctionInfo>();
    DebugLoc DL = MBB.findDebugLoc(MI);
    if (TRI->getCommonSubClass(RC, &AMDGPU::SReg_32RegClass)) {
-     SIMachineFunctionInfo::SpilledReg Spill =
+    SIMachineFunctionInfo::SpilledReg Spill =
          MFI->SpillTracker.getSpilledReg(FrameIndex);
      assert(Spill.VGPR);
      BuildMI(MBB, MI, DL, get(AMDGPU::V_READLANE_B32), DestReg)
              .addReg(Spill.VGPR)
              .addImm(Spill.Lane);
+    insertNOPs(MI, 3);
+  } else if (RI.isSGPRClass(RC)){
+    unsigned Opcode;
+    switch(RC->getSize() * 8) {
+    case 64:  Opcode = AMDGPU::SI_SPILL_S64_RESTORE;  break;
+    case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break;
+    case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break;
+    case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break;
+    default: llvm_unreachable("Cannot spill register class");
+    }
+
+    SIMachineFunctionInfo::SpilledReg Spill =
+        MFI->SpillTracker.getSpilledReg(FrameIndex);
+
+    BuildMI(MBB, MI, DL, get(Opcode), DestReg)
+            .addReg(Spill.VGPR)
+            .addImm(FrameIndex);
+    insertNOPs(MI, 3);
    } else {
-    for (unsigned i = 0, e = RC->getSize() / 4; i != e; ++i) {
-      unsigned Flags = RegState::Define;
-      if (i == 0) {
-        Flags |= RegState::Undef;
-      }
-      unsigned SubReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
-      loadRegFromStackSlot(MBB, MI, SubReg, FrameIndex + i,
-                           &AMDGPU::SReg_32RegClass, TRI);
-      BuildMI(MBB, MI, DL, get(AMDGPU::COPY))
-              .addReg(DestReg, Flags, RI.getSubRegFromChannel(i))
-              .addReg(SubReg);
+    llvm_unreachable("VGPR spilling not supported");
+  }
+}
+
+static unsigned getNumSubRegsForSpillOp(unsigned Op) {
+
+  switch (Op) {
+  case AMDGPU::SI_SPILL_S512_SAVE:
+  case AMDGPU::SI_SPILL_S512_RESTORE:
+    return 16;
+  case AMDGPU::SI_SPILL_S256_SAVE:
+  case AMDGPU::SI_SPILL_S256_RESTORE:
+    return 8;
+  case AMDGPU::SI_SPILL_S128_SAVE:
+  case AMDGPU::SI_SPILL_S128_RESTORE:
+    return 4;
+  case AMDGPU::SI_SPILL_S64_SAVE:
+  case AMDGPU::SI_SPILL_S64_RESTORE:
+    return 2;
+  default: llvm_unreachable("Invalid spill opcode");
+  }
+}
+
+void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI,
+                             int Count) const {
+  while (Count > 0) {
+    int Arg;
+    if (Count >= 8)
+      Arg = 7;
+    else
+      Arg = Count - 1;
+    Count -= 8;
+    BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(AMDGPU::S_NOP))
+            .addImm(Arg);
+  }
+}
+
+bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+  SIMachineFunctionInfo *MFI =
+      MI->getParent()->getParent()->getInfo<SIMachineFunctionInfo>();
+  MachineBasicBlock &MBB = *MI->getParent();
+  DebugLoc DL = MBB.findDebugLoc(MI);
+  switch (MI->getOpcode()) {
+  default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
+
+  // SGPR register spill
+  case AMDGPU::SI_SPILL_S512_SAVE:
+  case AMDGPU::SI_SPILL_S256_SAVE:
+  case AMDGPU::SI_SPILL_S128_SAVE:
+  case AMDGPU::SI_SPILL_S64_SAVE: {
+    unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
+    unsigned FrameIndex = MI->getOperand(2).getImm();
+
+    for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
+      SIMachineFunctionInfo::SpilledReg Spill;
+      unsigned SubReg = RI.getPhysRegSubReg(MI->getOperand(1).getReg(),
+                                            &AMDGPU::SGPR_32RegClass, i);
+      Spill = MFI->SpillTracker.getSpilledReg(FrameIndex);
+
+      BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32),
+              MI->getOperand(0).getReg())
+              .addReg(SubReg)
+              .addImm(Spill.Lane + i);
      }
+    MI->eraseFromParent();
+    break;
    }
+
+  // SGPR register restore
+  case AMDGPU::SI_SPILL_S512_RESTORE:
+  case AMDGPU::SI_SPILL_S256_RESTORE:
+  case AMDGPU::SI_SPILL_S128_RESTORE:
+  case AMDGPU::SI_SPILL_S64_RESTORE: {
+    unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
+
+    for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
+      SIMachineFunctionInfo::SpilledReg Spill;
+      unsigned FrameIndex = MI->getOperand(2).getImm();
+      unsigned SubReg = RI.getPhysRegSubReg(MI->getOperand(0).getReg(),
+                                   &AMDGPU::SGPR_32RegClass, i);
+      Spill = MFI->SpillTracker.getSpilledReg(FrameIndex);
+
+      BuildMI(MBB, MI, DL, get(AMDGPU::V_READLANE_B32), SubReg)
+              .addReg(MI->getOperand(1).getReg())
+              .addImm(Spill.Lane + i);
+    }
+    MI->eraseFromParent();
+    break;
+  }
+  }
+  return true;
  }
  
  MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h

index 63f1d7fdee892b87f6416add459424c85f081216..d7992742216d454ba9f8298b54886483005da35b 100644 (file)
--- a/lib/Target/R600/SIInstrInfo.h
+++ b/lib/Target/R600/SIInstrInfo.h
@@ -73,6 +73,8 @@ public:
                              const TargetRegisterClass *RC,
                              const TargetRegisterInfo *TRI) const override;
  
+  virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
    unsigned commuteOpcode(unsigned Opcode) const;
  
    MachineInstr *commuteInstruction(MachineInstr *MI,
@@ -165,6 +167,8 @@ public:
  
    void LoadM0(MachineInstr *MoveRel, MachineBasicBlock::iterator I,
                unsigned SavReg, unsigned IndexReg) const;
+
+  void insertNOPs(MachineBasicBlock::iterator MI, int Count) const;
  };
  
  namespace AMDGPU {
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td

index 27e7abe1a388576ca4c909618b40bc53e7c1fb19..b93de36ddf862aac2bd818f557549295e819967a 100644 (file)
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -369,7 +369,7 @@ def S_GETREG_REGRD_B32 : SOPK_32 <0x00000014, "S_GETREG_REGRD_B32", []>;
  
  let Predicates = [isSI] in {
  
-//def S_NOP : SOPP_ <0x00000000, "S_NOP", []>;
+def S_NOP : SOPP <0x00000000, (ins i16imm:$SIMM16), "S_NOP $SIMM16", []>;
  
  let isTerminator = 1 in {
  
@@ -1574,6 +1574,27 @@ def V_SUB_F64 : InstSI <
  
  } // end usesCustomInserter
  
+multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
+
+  def _SAVE : InstSI <
+    (outs VReg_32:$dst),
+    (ins sgpr_class:$src, i32imm:$frame_idx),
+    "", []
+  >;
+
+  def _RESTORE : InstSI <
+    (outs sgpr_class:$dst),
+    (ins VReg_32:$src, i32imm:$frame_idx),
+    "", []
+  >;
+
+}
+
+defm SI_SPILL_S64  : SI_SPILL_SGPR <SReg_64>;
+defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>;
+defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
+defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
+
  } // end IsCodeGenOnly, isPseudo
  
  def : Pat<
diff --git a/lib/Target/R600/SIMachineFunctionInfo.cpp b/lib/Target/R600/SIMachineFunctionInfo.cpp

index ea04346e50975fa129aef087baf81c26c8f67b34..af609958129c906bc73ab81e8e128c7607ebe9d7 100644 (file)
--- a/lib/Target/R600/SIMachineFunctionInfo.cpp
+++ b/lib/Target/R600/SIMachineFunctionInfo.cpp
@@ -10,8 +10,11 @@
  
  
  #include "SIMachineFunctionInfo.h"
+#include "SIInstrInfo.h"
  #include "SIRegisterInfo.h"
  #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
  
  #define MAX_LANES 64
  
@@ -26,21 +29,57 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
      PSInputAddr(0),
      SpillTracker() { }
  
-static unsigned createLaneVGPR(MachineRegisterInfo &MRI) {
-  return MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
+static unsigned createLaneVGPR(MachineRegisterInfo &MRI, MachineFunction *MF) {
+  unsigned VGPR = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
+
+  // We need to add this register as live out for the function, in order to
+  // have the live range calculated directly.
+  //
+  // When register spilling begins, we have already calculated the live
+  // live intervals for all the registers.  Since we are spilling SGPRs to
+  // VGPRs, we need to update the Lane VGPR's live interval every time we
+  // spill or restore a register.
+  //
+  // Unfortunately, there is no good way to update the live interval as
+  // the TargetInstrInfo callbacks for spilling and restoring don't give
+  // us access to the live interval information.
+  //
+  // We are lucky, though, because the InlineSpiller calls
+  // LiveRangeEdit::calculateRegClassAndHint() which iterates through
+  // all the new register that have been created when restoring a register
+  // and calls LiveIntervals::getInterval(), which creates and computes
+  // the live interval for the newly created register.  However, once this
+  // live intervals is created, it doesn't change and since we usually reuse
+  // the Lane VGPR multiple times, this means any uses after the first aren't
+  // added to the live interval.
+  //
+  // To work around this, we add Lane VGPRs to the functions live out list,
+  // so that we can guarantee its live range will cover all of its uses.
+
+  for (MachineBasicBlock &MBB : *MF) {
+    if (MBB.back().getOpcode() == AMDGPU::S_ENDPGM) {
+      MBB.back().addOperand(*MF, MachineOperand::CreateReg(VGPR, false, true));
+      return VGPR;
+    }
+  }
+  MF->getFunction()->getContext().emitError(
+      "Could not found S_ENGPGM instrtuction.");
+  return VGPR;
  }
  
-unsigned SIMachineFunctionInfo::RegSpillTracker::getNextLane(MachineRegisterInfo &MRI) {
+unsigned SIMachineFunctionInfo::RegSpillTracker::reserveLanes(
+    MachineRegisterInfo &MRI, MachineFunction *MF, unsigned NumRegs) {
+  unsigned StartLane = CurrentLane;
+  CurrentLane += NumRegs;
    if (!LaneVGPR) {
-    LaneVGPR = createLaneVGPR(MRI);
+    LaneVGPR = createLaneVGPR(MRI, MF);
    } else {
-    CurrentLane++;
-    if (CurrentLane == MAX_LANES) {
-      CurrentLane = 0;
-      LaneVGPR = createLaneVGPR(MRI);
+    if (CurrentLane >= MAX_LANES) {
+      StartLane = CurrentLane = 0;
+      LaneVGPR = createLaneVGPR(MRI, MF);
      }
    }
-  return CurrentLane;
+  return StartLane;
  }
  
  void SIMachineFunctionInfo::RegSpillTracker::addSpilledReg(unsigned FrameIndex,
diff --git a/lib/Target/R600/SIMachineFunctionInfo.h b/lib/Target/R600/SIMachineFunctionInfo.h

index ef38270d51a104b7e08065926c94748f036f68f0..96e619bde8d6b7931a86edb2238419d2eb9957d3 100644 (file)
--- a/lib/Target/R600/SIMachineFunctionInfo.h
+++ b/lib/Target/R600/SIMachineFunctionInfo.h
@@ -43,7 +43,12 @@ public:
    public:
      unsigned LaneVGPR;
      RegSpillTracker() : CurrentLane(0), SpilledRegisters(), LaneVGPR(0) { }
-    unsigned getNextLane(MachineRegisterInfo &MRI);
+    /// \p NumRegs The number of consecutive registers what need to be spilled.
+    ///            This function will ensure that all registers are stored in
+    ///            the same VGPR.
+    /// \returns The lane to be used for storing the first register.
+    unsigned reserveLanes(MachineRegisterInfo &MRI, MachineFunction *MF,
+                          unsigned NumRegs = 1);
      void addSpilledReg(unsigned FrameIndex, unsigned Reg, int Lane = -1);
      const SpilledReg& getSpilledReg(unsigned FrameIndex);
      bool programSpillsRegisters() { return !SpilledRegisters.empty(); }
diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp

index 5897fbca94c5d501e4b738742dfefd2c7c05073a..8dc9a05799c37a40805b2c187f3c905bccc6564e 100644 (file)
--- a/lib/Target/R600/SIRegisterInfo.cpp
+++ b/lib/Target/R600/SIRegisterInfo.cpp
@@ -129,3 +129,10 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
      return &AMDGPU::VGPR_32RegClass;
    }
  }
+
+unsigned SIRegisterInfo::getPhysRegSubReg(unsigned Reg,
+                                          const TargetRegisterClass *SubRC,
+                                          unsigned Channel) const {
+  unsigned Index = getHWRegIndex(Reg);
+  return SubRC->getRegister(Index + Channel);
+}
diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h

index 54717c184bc5f2ce1a4bfa857359e50ec7768cc2..36b4fcd32a89d60e5f4b463d77593c70e0ef43c0 100644 (file)
--- a/lib/Target/R600/SIRegisterInfo.h
+++ b/lib/Target/R600/SIRegisterInfo.h
@@ -63,6 +63,12 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
    /// be returned.
    const TargetRegisterClass *getSubRegClass(const TargetRegisterClass *RC,
                                              unsigned SubIdx) const;
+
+  /// \p Channel This is the register channel (e.g. a value from 0-16), not the
+  ///            SubReg index.
+  /// \returns The sub-register of Reg that is in Channel.
+  unsigned getPhysRegSubReg(unsigned Reg, const TargetRegisterClass *SubRC,
+                            unsigned Channel) const;
  };
  
  } // End namespace llvm
author	Tom Stellard <thomas.stellard@amd.com>
	Fri, 2 May 2014 15:41:42 +0000 (15:41 +0000)
committer	Tom Stellard <thomas.stellard@amd.com>
	Fri, 2 May 2014 15:41:42 +0000 (15:41 +0000)
lib/Target/R600/SIInstrInfo.cpp		patch \| blob \| history
lib/Target/R600/SIInstrInfo.h		patch \| blob \| history
lib/Target/R600/SIInstructions.td		patch \| blob \| history
lib/Target/R600/SIMachineFunctionInfo.cpp		patch \| blob \| history
lib/Target/R600/SIMachineFunctionInfo.h		patch \| blob \| history
lib/Target/R600/SIRegisterInfo.cpp		patch \| blob \| history
lib/Target/R600/SIRegisterInfo.h		patch \| blob \| history