lib/Target/R600/R600MachineScheduler.cpp

   1 //===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ -*-----===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 /// \file
  11 /// \brief R600 Machine Scheduler interface
  12 // TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS slot
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #define DEBUG_TYPE "misched"
  17
  18 #include "R600MachineScheduler.h"
  19 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
  20 #include "llvm/CodeGen/MachineRegisterInfo.h"
  21 #include "llvm/Pass.h"
  22 #include "llvm/PassManager.h"
  23 #include "llvm/Support/raw_ostream.h"
  24
  25 using namespace llvm;
  26
  27 void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
  28
  29   DAG = dag;
  30   TII = static_cast<const R600InstrInfo*>(DAG->TII);
  31   TRI = static_cast<const R600RegisterInfo*>(DAG->TRI);
  32   MRI = &DAG->MRI;
  33   CurInstKind = IDOther;
  34   CurEmitted = 0;
  35   OccupedSlotsMask = 15;
  36   InstKindLimit[IDAlu] = TII->getMaxAlusPerClause();
  37   InstKindLimit[IDOther] = 32;
  38
  39   const AMDGPUSubtarget &ST = DAG->TM.getSubtarget<AMDGPUSubtarget>();
  40   InstKindLimit[IDFetch] = ST.getTexVTXClauseSize();
  41 }
  42
  43 void R600SchedStrategy::MoveUnits(std::vector<SUnit *> &QSrc,
  44                                   std::vector<SUnit *> &QDst)
  45 {
  46   QDst.insert(QDst.end(), QSrc.begin(), QSrc.end());
  47   QSrc.clear();
  48 }
  49
  50 SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
  51   SUnit *SU = 0;
  52   NextInstKind = IDOther;
  53
  54   IsTopNode = false;
  55
  56   // check if we might want to switch current clause type
  57   bool AllowSwitchToAlu = (CurEmitted >= InstKindLimit[CurInstKind]) ||
  58       (Available[CurInstKind].empty());
  59   bool AllowSwitchFromAlu = (CurEmitted >= InstKindLimit[CurInstKind]) &&
  60       (!Available[IDFetch].empty() || !Available[IDOther].empty());
  61
  62   if ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
  63       (!AllowSwitchFromAlu && CurInstKind == IDAlu)) {
  64     // try to pick ALU
  65     SU = pickAlu();
  66     if (SU) {
  67       if (CurEmitted >= InstKindLimit[IDAlu])
  68         CurEmitted = 0;
  69       NextInstKind = IDAlu;
  70     }
  71   }
  72
  73   if (!SU) {
  74     // try to pick FETCH
  75     SU = pickOther(IDFetch);
  76     if (SU)
  77       NextInstKind = IDFetch;
  78   }
  79
  80   // try to pick other
  81   if (!SU) {
  82     SU = pickOther(IDOther);
  83     if (SU)
  84       NextInstKind = IDOther;
  85   }
  86
  87   DEBUG(
  88       if (SU) {
  89         dbgs() << " ** Pick node **\n";
  90         SU->dump(DAG);
  91       } else {
  92         dbgs() << "NO NODE \n";
  93         for (unsigned i = 0; i < DAG->SUnits.size(); i++) {
  94           const SUnit &S = DAG->SUnits[i];
  95           if (!S.isScheduled)
  96             S.dump(DAG);
  97         }
  98       }
  99   );
 100
 101   return SU;
 102 }
 103
 104 void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
 105
 106   if (NextInstKind != CurInstKind) {
 107     DEBUG(dbgs() << "Instruction Type Switch\n");
 108     if (NextInstKind != IDAlu)
 109       OccupedSlotsMask = 15;
 110     CurEmitted = 0;
 111     CurInstKind = NextInstKind;
 112   }
 113
 114   if (CurInstKind == IDAlu) {
 115     switch (getAluKind(SU)) {
 116     case AluT_XYZW:
 117       CurEmitted += 4;
 118       break;
 119     case AluDiscarded:
 120       break;
 121     default: {
 122       ++CurEmitted;
 123       for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(),
 124           E = SU->getInstr()->operands_end(); It != E; ++It) {
 125         MachineOperand &MO = *It;
 126         if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
 127           ++CurEmitted;
 128       }
 129     }
 130     }
 131   } else {
 132     ++CurEmitted;
 133   }
 134
 135
 136   DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n");
 137
 138   if (CurInstKind != IDFetch) {
 139     MoveUnits(Pending[IDFetch], Available[IDFetch]);
 140   }
 141 }
 142
 143 void R600SchedStrategy::releaseTopNode(SUnit *SU) {
 144   DEBUG(dbgs() << "Top Releasing ";SU->dump(DAG););
 145
 146 }
 147
 148 void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
 149   DEBUG(dbgs() << "Bottom Releasing ";SU->dump(DAG););
 150
 151   int IK = getInstKind(SU);
 152   // There is no export clause, we can schedule one as soon as its ready
 153   if (IK == IDOther)
 154     Available[IDOther].push_back(SU);
 155   else
 156     Pending[IK].push_back(SU);
 157
 158 }
 159
 160 bool R600SchedStrategy::regBelongsToClass(unsigned Reg,
 161                                           const TargetRegisterClass *RC) const {
 162   if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
 163     return RC->contains(Reg);
 164   } else {
 165     return MRI->getRegClass(Reg) == RC;
 166   }
 167 }
 168
 169 R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
 170   MachineInstr *MI = SU->getInstr();
 171
 172     switch (MI->getOpcode()) {
 173     case AMDGPU::PRED_X:
 174       return AluPredX;
 175     case AMDGPU::INTERP_PAIR_XY:
 176     case AMDGPU::INTERP_PAIR_ZW:
 177     case AMDGPU::INTERP_VEC_LOAD:
 178     case AMDGPU::DOT_4:
 179       return AluT_XYZW;
 180     case AMDGPU::COPY:
 181       if (MI->getOperand(1).isUndef()) {
 182         // MI will become a KILL, don't considers it in scheduling
 183         return AluDiscarded;
 184       }
 185     default:
 186       break;
 187     }
 188
 189     // Does the instruction take a whole IG ?
 190     if(TII->isVector(*MI) ||
 191         TII->isCubeOp(MI->getOpcode()) ||
 192         TII->isReductionOp(MI->getOpcode()))
 193       return AluT_XYZW;
 194
 195     // Is the result already assigned to a channel ?
 196     unsigned DestSubReg = MI->getOperand(0).getSubReg();
 197     switch (DestSubReg) {
 198     case AMDGPU::sub0:
 199       return AluT_X;
 200     case AMDGPU::sub1:
 201       return AluT_Y;
 202     case AMDGPU::sub2:
 203       return AluT_Z;
 204     case AMDGPU::sub3:
 205       return AluT_W;
 206     default:
 207       break;
 208     }
 209
 210     // Is the result already member of a X/Y/Z/W class ?
 211     unsigned DestReg = MI->getOperand(0).getReg();
 212     if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) ||
 213         regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass))
 214       return AluT_X;
 215     if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass))
 216       return AluT_Y;
 217     if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass))
 218       return AluT_Z;
 219     if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass))
 220       return AluT_W;
 221     if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass))
 222       return AluT_XYZW;
 223
 224     return AluAny;
 225
 226 }
 227
 228 int R600SchedStrategy::getInstKind(SUnit* SU) {
 229   int Opcode = SU->getInstr()->getOpcode();
 230
 231   if (TII->usesTextureCache(Opcode) || TII->usesVertexCache(Opcode))
 232     return IDFetch;
 233
 234   if (TII->isALUInstr(Opcode)) {
 235     return IDAlu;
 236   }
 237
 238   switch (Opcode) {
 239   case AMDGPU::PRED_X:
 240   case AMDGPU::COPY:
 241   case AMDGPU::CONST_COPY:
 242   case AMDGPU::INTERP_PAIR_XY:
 243   case AMDGPU::INTERP_PAIR_ZW:
 244   case AMDGPU::INTERP_VEC_LOAD:
 245   case AMDGPU::DOT_4:
 246     return IDAlu;
 247   default:
 248     return IDOther;
 249   }
 250 }
 251
 252 SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q) {
 253   if (Q.empty())
 254     return NULL;
 255   for (std::vector<SUnit *>::reverse_iterator It = Q.rbegin(), E = Q.rend();
 256       It != E; ++It) {
 257     SUnit *SU = *It;
 258     InstructionsGroupCandidate.push_back(SU->getInstr());
 259     if (TII->canBundle(InstructionsGroupCandidate)) {
 260       InstructionsGroupCandidate.pop_back();
 261       Q.erase((It + 1).base());
 262       return SU;
 263     } else {
 264       InstructionsGroupCandidate.pop_back();
 265     }
 266   }
 267   return NULL;
 268 }
 269
 270 void R600SchedStrategy::LoadAlu() {
 271   std::vector<SUnit *> &QSrc = Pending[IDAlu];
 272   for (unsigned i = 0, e = QSrc.size(); i < e; ++i) {
 273     AluKind AK = getAluKind(QSrc[i]);
 274     AvailableAlus[AK].push_back(QSrc[i]);
 275   }
 276   QSrc.clear();
 277 }
 278
 279 void R600SchedStrategy::PrepareNextSlot() {
 280   DEBUG(dbgs() << "New Slot\n");
 281   assert (OccupedSlotsMask && "Slot wasn't filled");
 282   OccupedSlotsMask = 0;
 283   InstructionsGroupCandidate.clear();
 284   LoadAlu();
 285 }
 286
 287 void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) {
 288   unsigned DestReg = MI->getOperand(0).getReg();
 289   // PressureRegister crashes if an operand is def and used in the same inst
 290   // and we try to constraint its regclass
 291   for (MachineInstr::mop_iterator It = MI->operands_begin(),
 292       E = MI->operands_end(); It != E; ++It) {
 293     MachineOperand &MO = *It;
 294     if (MO.isReg() && !MO.isDef() &&
 295         MO.getReg() == MI->getOperand(0).getReg())
 296       return;
 297   }
 298   // Constrains the regclass of DestReg to assign it to Slot
 299   switch (Slot) {
 300   case 0:
 301     MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_XRegClass);
 302     break;
 303   case 1:
 304     MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_YRegClass);
 305     break;
 306   case 2:
 307     MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass);
 308     break;
 309   case 3:
 310     MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_WRegClass);
 311     break;
 312   }
 313 }
 314
 315 SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) {
 316   static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
 317   SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]);
 318   if (SlotedSU)
 319     return SlotedSU;
 320   SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]);
 321   if (UnslotedSU)
 322     AssignSlot(UnslotedSU->getInstr(), Slot);
 323   return UnslotedSU;
 324 }
 325
 326 bool R600SchedStrategy::isAvailablesAluEmpty() const {
 327   return Pending[IDAlu].empty() && AvailableAlus[AluAny].empty() &&
 328       AvailableAlus[AluT_XYZW].empty() && AvailableAlus[AluT_X].empty() &&
 329       AvailableAlus[AluT_Y].empty() && AvailableAlus[AluT_Z].empty() &&
 330       AvailableAlus[AluT_W].empty() && AvailableAlus[AluDiscarded].empty() &&
 331       AvailableAlus[AluPredX].empty();
 332 }
 333
 334 SUnit* R600SchedStrategy::pickAlu() {
 335   while (!isAvailablesAluEmpty()) {
 336     if (!OccupedSlotsMask) {
 337       // Bottom up scheduling : predX must comes first
 338       if (!AvailableAlus[AluPredX].empty()) {
 339         OccupedSlotsMask = 15;
 340         return PopInst(AvailableAlus[AluPredX]);
 341       }
 342       // Flush physical reg copies (RA will discard them)
 343       if (!AvailableAlus[AluDiscarded].empty()) {
 344         OccupedSlotsMask = 15;
 345         return PopInst(AvailableAlus[AluDiscarded]);
 346       }
 347       // If there is a T_XYZW alu available, use it
 348       if (!AvailableAlus[AluT_XYZW].empty()) {
 349         OccupedSlotsMask = 15;
 350         return PopInst(AvailableAlus[AluT_XYZW]);
 351       }
 352     }
 353     for (int Chan = 3; Chan > -1; --Chan) {
 354       bool isOccupied = OccupedSlotsMask & (1 << Chan);
 355       if (!isOccupied) {
 356         SUnit *SU = AttemptFillSlot(Chan);
 357         if (SU) {
 358           OccupedSlotsMask |= (1 << Chan);
 359           InstructionsGroupCandidate.push_back(SU->getInstr());
 360           return SU;
 361         }
 362       }
 363     }
 364     PrepareNextSlot();
 365   }
 366   return NULL;
 367 }
 368
 369 SUnit* R600SchedStrategy::pickOther(int QID) {
 370   SUnit *SU = 0;
 371   std::vector<SUnit *> &AQ = Available[QID];
 372
 373   if (AQ.empty()) {
 374     MoveUnits(Pending[QID], AQ);
 375   }
 376   if (!AQ.empty()) {
 377     SU = AQ.back();
 378     AQ.resize(AQ.size() - 1);
 379   }
 380   return SU;
 381 }
 382