1 //===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ -*-----===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \brief R600 Machine Scheduler interface
12 // TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS slot
14 //===----------------------------------------------------------------------===//
16 #define DEBUG_TYPE "misched"
18 #include "R600MachineScheduler.h"
19 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/Pass.h"
22 #include "llvm/PassManager.h"
23 #include "llvm/Support/raw_ostream.h"
27 void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
30 TII = static_cast<const R600InstrInfo*>(DAG->TII);
31 TRI = static_cast<const R600RegisterInfo*>(DAG->TRI);
33 CurInstKind = IDOther;
35 OccupedSlotsMask = 15;
36 InstKindLimit[IDAlu] = TII->getMaxAlusPerClause();
37 InstKindLimit[IDOther] = 32;
39 const AMDGPUSubtarget &ST = DAG->TM.getSubtarget<AMDGPUSubtarget>();
40 InstKindLimit[IDFetch] = ST.getTexVTXClauseSize();
44 void R600SchedStrategy::MoveUnits(std::vector<SUnit *> &QSrc,
45 std::vector<SUnit *> &QDst)
47 QDst.insert(QDst.end(), QSrc.begin(), QSrc.end());
51 SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
53 NextInstKind = IDOther;
57 // check if we might want to switch current clause type
58 bool AllowSwitchToAlu = (CurEmitted >= InstKindLimit[CurInstKind]) ||
59 (Available[CurInstKind].empty());
60 bool AllowSwitchFromAlu = (CurEmitted >= InstKindLimit[CurInstKind]) &&
61 (!Available[IDFetch].empty() || !Available[IDOther].empty());
63 // We want to scheduled AR defs as soon as possible to make sure they aren't
64 // put in a different ALU clause from their uses.
65 if (!SU && !UnscheduledARDefs.empty()) {
66 SU = UnscheduledARDefs[0];
67 UnscheduledARDefs.erase(UnscheduledARDefs.begin());
71 if (!SU && ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
72 (!AllowSwitchFromAlu && CurInstKind == IDAlu))) {
75 if (!SU && !PhysicalRegCopy.empty()) {
76 SU = PhysicalRegCopy.front();
77 PhysicalRegCopy.erase(PhysicalRegCopy.begin());
80 if (CurEmitted >= InstKindLimit[IDAlu])
88 SU = pickOther(IDFetch);
90 NextInstKind = IDFetch;
95 SU = pickOther(IDOther);
97 NextInstKind = IDOther;
100 // We want to schedule the AR uses as late as possible to make sure that
101 // the AR defs have been released.
102 if (!SU && !UnscheduledARUses.empty()) {
103 SU = UnscheduledARUses[0];
104 UnscheduledARUses.erase(UnscheduledARUses.begin());
105 NextInstKind = IDAlu;
111 dbgs() << " ** Pick node **\n";
114 dbgs() << "NO NODE \n";
115 for (unsigned i = 0; i < DAG->SUnits.size(); i++) {
116 const SUnit &S = DAG->SUnits[i];
126 void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
127 if (NextInstKind != CurInstKind) {
128 DEBUG(dbgs() << "Instruction Type Switch\n");
129 if (NextInstKind != IDAlu)
130 OccupedSlotsMask = 15;
132 CurInstKind = NextInstKind;
135 if (CurInstKind == IDAlu) {
136 switch (getAluKind(SU)) {
144 for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(),
145 E = SU->getInstr()->operands_end(); It != E; ++It) {
146 MachineOperand &MO = *It;
147 if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
157 DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n");
159 if (CurInstKind != IDFetch) {
160 MoveUnits(Pending[IDFetch], Available[IDFetch]);
165 isPhysicalRegCopy(MachineInstr *MI) {
166 if (MI->getOpcode() != AMDGPU::COPY)
169 return !TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg());
172 void R600SchedStrategy::releaseTopNode(SUnit *SU) {
173 DEBUG(dbgs() << "Top Releasing ";SU->dump(DAG););
176 void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
177 DEBUG(dbgs() << "Bottom Releasing ";SU->dump(DAG););
178 if (isPhysicalRegCopy(SU->getInstr())) {
179 PhysicalRegCopy.push_back(SU);
183 int IK = getInstKind(SU);
185 // Check for AR register defines
186 for (MachineInstr::const_mop_iterator I = SU->getInstr()->operands_begin(),
187 E = SU->getInstr()->operands_end();
189 if (I->isReg() && I->getReg() == AMDGPU::AR_X) {
191 UnscheduledARDefs.push_back(SU);
193 UnscheduledARUses.push_back(SU);
199 // There is no export clause, we can schedule one as soon as its ready
201 Available[IDOther].push_back(SU);
203 Pending[IK].push_back(SU);
207 bool R600SchedStrategy::regBelongsToClass(unsigned Reg,
208 const TargetRegisterClass *RC) const {
209 if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
210 return RC->contains(Reg);
212 return MRI->getRegClass(Reg) == RC;
216 R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
217 MachineInstr *MI = SU->getInstr();
219 switch (MI->getOpcode()) {
222 case AMDGPU::INTERP_PAIR_XY:
223 case AMDGPU::INTERP_PAIR_ZW:
224 case AMDGPU::INTERP_VEC_LOAD:
228 if (MI->getOperand(1).isUndef()) {
229 // MI will become a KILL, don't considers it in scheduling
236 // Does the instruction take a whole IG ?
237 if(TII->isVector(*MI) ||
238 TII->isCubeOp(MI->getOpcode()) ||
239 TII->isReductionOp(MI->getOpcode()))
242 // Is the result already assigned to a channel ?
243 unsigned DestSubReg = MI->getOperand(0).getSubReg();
244 switch (DestSubReg) {
257 // Is the result already member of a X/Y/Z/W class ?
258 unsigned DestReg = MI->getOperand(0).getReg();
259 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) ||
260 regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass))
262 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass))
264 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass))
266 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass))
268 if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass))
275 int R600SchedStrategy::getInstKind(SUnit* SU) {
276 int Opcode = SU->getInstr()->getOpcode();
278 if (TII->usesTextureCache(Opcode) || TII->usesVertexCache(Opcode))
281 if (TII->isALUInstr(Opcode)) {
288 case AMDGPU::CONST_COPY:
289 case AMDGPU::INTERP_PAIR_XY:
290 case AMDGPU::INTERP_PAIR_ZW:
291 case AMDGPU::INTERP_VEC_LOAD:
299 SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q) {
302 for (std::vector<SUnit *>::reverse_iterator It = Q.rbegin(), E = Q.rend();
305 InstructionsGroupCandidate.push_back(SU->getInstr());
306 if (TII->canBundle(InstructionsGroupCandidate)) {
307 InstructionsGroupCandidate.pop_back();
308 Q.erase((It + 1).base());
311 InstructionsGroupCandidate.pop_back();
317 void R600SchedStrategy::LoadAlu() {
318 std::vector<SUnit *> &QSrc = Pending[IDAlu];
319 for (unsigned i = 0, e = QSrc.size(); i < e; ++i) {
320 AluKind AK = getAluKind(QSrc[i]);
321 AvailableAlus[AK].push_back(QSrc[i]);
326 void R600SchedStrategy::PrepareNextSlot() {
327 DEBUG(dbgs() << "New Slot\n");
328 assert (OccupedSlotsMask && "Slot wasn't filled");
329 OccupedSlotsMask = 0;
330 InstructionsGroupCandidate.clear();
334 void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) {
335 unsigned DestReg = MI->getOperand(0).getReg();
336 // PressureRegister crashes if an operand is def and used in the same inst
337 // and we try to constraint its regclass
338 for (MachineInstr::mop_iterator It = MI->operands_begin(),
339 E = MI->operands_end(); It != E; ++It) {
340 MachineOperand &MO = *It;
341 if (MO.isReg() && !MO.isDef() &&
342 MO.getReg() == MI->getOperand(0).getReg())
345 // Constrains the regclass of DestReg to assign it to Slot
348 MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_XRegClass);
351 MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_YRegClass);
354 MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass);
357 MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_WRegClass);
362 SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) {
363 static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
364 SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]);
367 SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]);
369 AssignSlot(UnslotedSU->getInstr(), Slot);
373 bool R600SchedStrategy::isAvailablesAluEmpty() const {
374 return Pending[IDAlu].empty() && AvailableAlus[AluAny].empty() &&
375 AvailableAlus[AluT_XYZW].empty() && AvailableAlus[AluT_X].empty() &&
376 AvailableAlus[AluT_Y].empty() && AvailableAlus[AluT_Z].empty() &&
377 AvailableAlus[AluT_W].empty() && AvailableAlus[AluDiscarded].empty() &&
378 AvailableAlus[AluPredX].empty();
381 SUnit* R600SchedStrategy::pickAlu() {
382 while (!isAvailablesAluEmpty()) {
383 if (!OccupedSlotsMask) {
384 // Bottom up scheduling : predX must comes first
385 if (!AvailableAlus[AluPredX].empty()) {
386 OccupedSlotsMask = 15;
387 return PopInst(AvailableAlus[AluPredX]);
389 // Flush physical reg copies (RA will discard them)
390 if (!AvailableAlus[AluDiscarded].empty()) {
391 OccupedSlotsMask = 15;
392 return PopInst(AvailableAlus[AluDiscarded]);
394 // If there is a T_XYZW alu available, use it
395 if (!AvailableAlus[AluT_XYZW].empty()) {
396 OccupedSlotsMask = 15;
397 return PopInst(AvailableAlus[AluT_XYZW]);
400 for (int Chan = 3; Chan > -1; --Chan) {
401 bool isOccupied = OccupedSlotsMask & (1 << Chan);
403 SUnit *SU = AttemptFillSlot(Chan);
405 OccupedSlotsMask |= (1 << Chan);
406 InstructionsGroupCandidate.push_back(SU->getInstr());
416 SUnit* R600SchedStrategy::pickOther(int QID) {
418 std::vector<SUnit *> &AQ = Available[QID];
421 MoveUnits(Pending[QID], AQ);
425 AQ.resize(AQ.size() - 1);