1 //===----- SchedulePostRAList.cpp - list scheduler ------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This implements a top-down list scheduler, using standard algorithms.
11 // The basic approach uses a priority queue of available nodes to schedule.
12 // One at a time, nodes are taken from the priority queue (thus in priority
13 // order), checked for legality to schedule, and emitted if legal.
15 // Nodes may not be legal to schedule either due to structural hazards (e.g.
16 // pipeline or resource constraints) or because an input to the instruction has
17 // not completed execution.
19 //===----------------------------------------------------------------------===//
21 #define DEBUG_TYPE "post-RA-sched"
22 #include "ExactHazardRecognizer.h"
23 #include "SimpleHazardRecognizer.h"
24 #include "ScheduleDAGInstrs.h"
25 #include "llvm/CodeGen/Passes.h"
26 #include "llvm/CodeGen/LatencyPriorityQueue.h"
27 #include "llvm/CodeGen/SchedulerRegistry.h"
28 #include "llvm/CodeGen/MachineDominators.h"
29 #include "llvm/CodeGen/MachineFrameInfo.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include "llvm/CodeGen/MachineLoopInfo.h"
32 #include "llvm/CodeGen/MachineRegisterInfo.h"
33 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
34 #include "llvm/Analysis/AliasAnalysis.h"
35 #include "llvm/Target/TargetLowering.h"
36 #include "llvm/Target/TargetMachine.h"
37 #include "llvm/Target/TargetInstrInfo.h"
38 #include "llvm/Target/TargetRegisterInfo.h"
39 #include "llvm/Target/TargetSubtarget.h"
40 #include "llvm/Support/Debug.h"
41 #include "llvm/Support/ErrorHandling.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include "llvm/ADT/Statistic.h"
48 STATISTIC(NumNoops, "Number of noops inserted");
49 STATISTIC(NumStalls, "Number of pipeline stalls");
51 // Post-RA scheduling is enabled with
52 // TargetSubtarget.enablePostRAScheduler(). This flag can be used to
53 // override the target.
55 EnablePostRAScheduler("post-RA-scheduler",
56 cl::desc("Enable scheduling after register allocation"),
57 cl::init(false), cl::Hidden);
59 EnableAntiDepBreaking("break-anti-dependencies",
60 cl::desc("Break post-RA scheduling anti-dependencies"),
61 cl::init(true), cl::Hidden);
63 EnablePostRAHazardAvoidance("avoid-hazards",
64 cl::desc("Enable exact hazard avoidance"),
65 cl::init(true), cl::Hidden);
67 // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
69 DebugDiv("postra-sched-debugdiv",
70 cl::desc("Debug control MBBs that are scheduled"),
71 cl::init(0), cl::Hidden);
73 DebugMod("postra-sched-debugmod",
74 cl::desc("Debug control MBBs that are scheduled"),
75 cl::init(0), cl::Hidden);
78 class PostRAScheduler : public MachineFunctionPass {
80 CodeGenOpt::Level OptLevel;
84 PostRAScheduler(CodeGenOpt::Level ol) :
85 MachineFunctionPass(&ID), OptLevel(ol) {}
87 void getAnalysisUsage(AnalysisUsage &AU) const {
89 AU.addRequired<AliasAnalysis>();
90 AU.addRequired<MachineDominatorTree>();
91 AU.addPreserved<MachineDominatorTree>();
92 AU.addRequired<MachineLoopInfo>();
93 AU.addPreserved<MachineLoopInfo>();
94 MachineFunctionPass::getAnalysisUsage(AU);
97 const char *getPassName() const {
98 return "Post RA top-down list latency scheduler";
101 bool runOnMachineFunction(MachineFunction &Fn);
103 char PostRAScheduler::ID = 0;
105 class SchedulePostRATDList : public ScheduleDAGInstrs {
106 /// AvailableQueue - The priority queue to use for the available SUnits.
108 LatencyPriorityQueue AvailableQueue;
110 /// PendingQueue - This contains all of the instructions whose operands have
111 /// been issued, but their results are not ready yet (due to the latency of
112 /// the operation). Once the operands becomes available, the instruction is
113 /// added to the AvailableQueue.
114 std::vector<SUnit*> PendingQueue;
116 /// Topo - A topological ordering for SUnits.
117 ScheduleDAGTopologicalSort Topo;
119 /// AllocatableSet - The set of allocatable registers.
120 /// We'll be ignoring anti-dependencies on non-allocatable registers,
121 /// because they may not be safe to break.
122 const BitVector AllocatableSet;
124 /// HazardRec - The hazard recognizer to use.
125 ScheduleHazardRecognizer *HazardRec;
127 /// AA - AliasAnalysis for making memory reference queries.
130 /// AntiDepMode - Anti-dependence breaking mode
131 TargetSubtarget::AntiDepBreakMode AntiDepMode;
133 /// Classes - For live regs that are only used in one register class in a
134 /// live range, the register class. If the register is not live, the
135 /// corresponding value is null. If the register is live but used in
136 /// multiple register classes, the corresponding value is -1 casted to a
138 const TargetRegisterClass *
139 Classes[TargetRegisterInfo::FirstVirtualRegister];
141 /// RegRegs - Map registers to all their references within a live range.
142 std::multimap<unsigned, MachineOperand *> RegRefs;
144 /// KillIndices - The index of the most recent kill (proceding bottom-up),
145 /// or ~0u if the register is not live.
146 unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister];
148 /// DefIndices - The index of the most recent complete def (proceding bottom
149 /// up), or ~0u if the register is live.
150 unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister];
152 /// KeepRegs - A set of registers which are live and cannot be changed to
153 /// break anti-dependencies.
154 SmallSet<unsigned, 4> KeepRegs;
157 SchedulePostRATDList(MachineFunction &MF,
158 const MachineLoopInfo &MLI,
159 const MachineDominatorTree &MDT,
160 ScheduleHazardRecognizer *HR,
162 TargetSubtarget::AntiDepBreakMode adm)
163 : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits),
164 AllocatableSet(TRI->getAllocatableSet(MF)),
165 HazardRec(HR), AA(aa), AntiDepMode(adm) {}
167 ~SchedulePostRATDList() {
171 /// StartBlock - Initialize register live-range state for scheduling in
174 void StartBlock(MachineBasicBlock *BB);
176 /// Schedule - Schedule the instruction range using list scheduling.
180 /// FixupKills - Fix register kill flags that have been made
181 /// invalid due to scheduling
183 void FixupKills(MachineBasicBlock *MBB);
185 /// Observe - Update liveness information to account for the current
186 /// instruction, which will not be scheduled.
188 void Observe(MachineInstr *MI, unsigned Count);
190 /// FinishBlock - Clean up register live-range state.
195 void PrescanInstruction(MachineInstr *MI);
196 void ScanInstruction(MachineInstr *MI, unsigned Count);
197 void ReleaseSucc(SUnit *SU, SDep *SuccEdge);
198 void ReleaseSuccessors(SUnit *SU);
199 void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
200 void ListScheduleTopDown();
201 bool BreakAntiDependencies();
202 unsigned findSuitableFreeRegister(unsigned AntiDepReg,
204 const TargetRegisterClass *);
205 void StartBlockForKills(MachineBasicBlock *BB);
207 // ToggleKillFlag - Toggle a register operand kill flag. Other
208 // adjustments may be made to the instruction if necessary. Return
209 // true if the operand has been deleted, false if not.
210 bool ToggleKillFlag(MachineInstr *MI, MachineOperand &MO);
214 /// isSchedulingBoundary - Test if the given instruction should be
215 /// considered a scheduling boundary. This primarily includes labels
218 static bool isSchedulingBoundary(const MachineInstr *MI,
219 const MachineFunction &MF) {
220 // Terminators and labels can't be scheduled around.
221 if (MI->getDesc().isTerminator() || MI->isLabel())
224 // Don't attempt to schedule around any instruction that modifies
225 // a stack-oriented pointer, as it's unlikely to be profitable. This
226 // saves compile time, because it doesn't require every single
227 // stack slot reference to depend on the instruction that does the
229 const TargetLowering &TLI = *MF.getTarget().getTargetLowering();
230 if (MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore()))
236 bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
237 AA = &getAnalysis<AliasAnalysis>();
239 // Check for explicit enable/disable of post-ra scheduling.
240 TargetSubtarget::AntiDepBreakMode AntiDepMode = TargetSubtarget::ANTIDEP_NONE;
241 if (EnablePostRAScheduler.getPosition() > 0) {
242 if (!EnablePostRAScheduler)
245 // Check that post-RA scheduling is enabled for this target.
246 const TargetSubtarget &ST = Fn.getTarget().getSubtarget<TargetSubtarget>();
247 if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode))
251 // Check for antidep breaking override...
252 if (EnableAntiDepBreaking.getPosition() > 0) {
253 AntiDepMode = (EnableAntiDepBreaking) ?
254 TargetSubtarget::ANTIDEP_CRITICAL : TargetSubtarget::ANTIDEP_NONE;
257 DEBUG(errs() << "PostRAScheduler\n");
259 const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
260 const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
261 const InstrItineraryData &InstrItins = Fn.getTarget().getInstrItineraryData();
262 ScheduleHazardRecognizer *HR = EnablePostRAHazardAvoidance ?
263 (ScheduleHazardRecognizer *)new ExactHazardRecognizer(InstrItins) :
264 (ScheduleHazardRecognizer *)new SimpleHazardRecognizer();
266 SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR, AA, AntiDepMode);
268 // Loop over all of the basic blocks
269 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
270 MBB != MBBe; ++MBB) {
272 // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
274 static int bbcnt = 0;
275 if (bbcnt++ % DebugDiv != DebugMod)
277 errs() << "*** DEBUG scheduling " << Fn.getFunction()->getNameStr() <<
278 ":MBB ID#" << MBB->getNumber() << " ***\n";
282 // Initialize register live-range state for scheduling in this block.
283 Scheduler.StartBlock(MBB);
285 // Schedule each sequence of instructions not interrupted by a label
286 // or anything else that effectively needs to shut down scheduling.
287 MachineBasicBlock::iterator Current = MBB->end();
288 unsigned Count = MBB->size(), CurrentCount = Count;
289 for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) {
290 MachineInstr *MI = prior(I);
291 if (isSchedulingBoundary(MI, Fn)) {
292 Scheduler.Run(MBB, I, Current, CurrentCount);
293 Scheduler.EmitSchedule(0);
295 CurrentCount = Count - 1;
296 Scheduler.Observe(MI, CurrentCount);
301 assert(Count == 0 && "Instruction count mismatch!");
302 assert((MBB->begin() == Current || CurrentCount != 0) &&
303 "Instruction count mismatch!");
304 Scheduler.Run(MBB, MBB->begin(), Current, CurrentCount);
305 Scheduler.EmitSchedule(0);
307 // Clean up register live-range state.
308 Scheduler.FinishBlock();
310 // Update register kills
311 Scheduler.FixupKills(MBB);
317 /// StartBlock - Initialize register live-range state for scheduling in
320 void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) {
321 // Call the superclass.
322 ScheduleDAGInstrs::StartBlock(BB);
324 // Reset the hazard recognizer.
327 // Clear out the register class data.
328 std::fill(Classes, array_endof(Classes),
329 static_cast<const TargetRegisterClass *>(0));
331 // Initialize the indices to indicate that no registers are live.
332 std::fill(KillIndices, array_endof(KillIndices), ~0u);
333 std::fill(DefIndices, array_endof(DefIndices), BB->size());
335 // Clear "do not change" set.
338 bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn());
340 // Determine the live-out physregs for this block.
342 // In a return block, examine the function live-out regs.
343 for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
344 E = MRI.liveout_end(); I != E; ++I) {
346 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
347 KillIndices[Reg] = BB->size();
348 DefIndices[Reg] = ~0u;
349 // Repeat, for all aliases.
350 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
351 unsigned AliasReg = *Alias;
352 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
353 KillIndices[AliasReg] = BB->size();
354 DefIndices[AliasReg] = ~0u;
358 // In a non-return block, examine the live-in regs of all successors.
359 for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
360 SE = BB->succ_end(); SI != SE; ++SI)
361 for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
362 E = (*SI)->livein_end(); I != E; ++I) {
364 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
365 KillIndices[Reg] = BB->size();
366 DefIndices[Reg] = ~0u;
367 // Repeat, for all aliases.
368 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
369 unsigned AliasReg = *Alias;
370 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
371 KillIndices[AliasReg] = BB->size();
372 DefIndices[AliasReg] = ~0u;
377 // Mark live-out callee-saved registers. In a return block this is
378 // all callee-saved registers. In non-return this is any
379 // callee-saved register that is not saved in the prolog.
380 const MachineFrameInfo *MFI = MF.getFrameInfo();
381 BitVector Pristine = MFI->getPristineRegs(BB);
382 for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
384 if (!IsReturnBlock && !Pristine.test(Reg)) continue;
385 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
386 KillIndices[Reg] = BB->size();
387 DefIndices[Reg] = ~0u;
388 // Repeat, for all aliases.
389 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
390 unsigned AliasReg = *Alias;
391 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
392 KillIndices[AliasReg] = BB->size();
393 DefIndices[AliasReg] = ~0u;
398 /// Schedule - Schedule the instruction range using list scheduling.
400 void SchedulePostRATDList::Schedule() {
401 DEBUG(errs() << "********** List Scheduling **********\n");
403 // Build the scheduling graph.
406 if (AntiDepMode != TargetSubtarget::ANTIDEP_NONE) {
407 if (BreakAntiDependencies()) {
408 // We made changes. Update the dependency graph.
409 // Theoretically we could update the graph in place:
410 // When a live range is changed to use a different register, remove
411 // the def's anti-dependence *and* output-dependence edges due to
412 // that register, and add new anti-dependence and output-dependence
413 // edges based on the next live range of the register.
421 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
422 SUnits[su].dumpAll(this));
424 AvailableQueue.initNodes(SUnits);
426 ListScheduleTopDown();
428 AvailableQueue.releaseState();
431 /// Observe - Update liveness information to account for the current
432 /// instruction, which will not be scheduled.
434 void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) {
435 assert(Count < InsertPosIndex && "Instruction index out of expected range!");
437 // Any register which was defined within the previous scheduling region
438 // may have been rescheduled and its lifetime may overlap with registers
439 // in ways not reflected in our current liveness state. For each such
440 // register, adjust the liveness state to be conservatively correct.
441 for (unsigned Reg = 0; Reg != TargetRegisterInfo::FirstVirtualRegister; ++Reg)
442 if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) {
443 assert(KillIndices[Reg] == ~0u && "Clobbered register is live!");
444 // Mark this register to be non-renamable.
445 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
446 // Move the def index to the end of the previous region, to reflect
447 // that the def could theoretically have been scheduled at the end.
448 DefIndices[Reg] = InsertPosIndex;
451 PrescanInstruction(MI);
452 ScanInstruction(MI, Count);
455 /// FinishBlock - Clean up register live-range state.
457 void SchedulePostRATDList::FinishBlock() {
460 // Call the superclass.
461 ScheduleDAGInstrs::FinishBlock();
464 /// CriticalPathStep - Return the next SUnit after SU on the bottom-up
466 static SDep *CriticalPathStep(SUnit *SU) {
468 unsigned NextDepth = 0;
469 // Find the predecessor edge with the greatest depth.
470 for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
472 SUnit *PredSU = P->getSUnit();
473 unsigned PredLatency = P->getLatency();
474 unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
475 // In the case of a latency tie, prefer an anti-dependency edge over
476 // other types of edges.
477 if (NextDepth < PredTotalLatency ||
478 (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) {
479 NextDepth = PredTotalLatency;
486 void SchedulePostRATDList::PrescanInstruction(MachineInstr *MI) {
487 // Scan the register operands for this instruction and update
488 // Classes and RegRefs.
489 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
490 MachineOperand &MO = MI->getOperand(i);
491 if (!MO.isReg()) continue;
492 unsigned Reg = MO.getReg();
493 if (Reg == 0) continue;
494 const TargetRegisterClass *NewRC = 0;
496 if (i < MI->getDesc().getNumOperands())
497 NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI);
499 // For now, only allow the register to be changed if its register
500 // class is consistent across all uses.
501 if (!Classes[Reg] && NewRC)
502 Classes[Reg] = NewRC;
503 else if (!NewRC || Classes[Reg] != NewRC)
504 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
506 // Now check for aliases.
507 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
508 // If an alias of the reg is used during the live range, give up.
509 // Note that this allows us to skip checking if AntiDepReg
510 // overlaps with any of the aliases, among other things.
511 unsigned AliasReg = *Alias;
512 if (Classes[AliasReg]) {
513 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
514 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
518 // If we're still willing to consider this register, note the reference.
519 if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1))
520 RegRefs.insert(std::make_pair(Reg, &MO));
522 // It's not safe to change register allocation for source operands of
523 // that have special allocation requirements.
524 if (MO.isUse() && MI->getDesc().hasExtraSrcRegAllocReq()) {
525 if (KeepRegs.insert(Reg)) {
526 for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
528 KeepRegs.insert(*Subreg);
534 void SchedulePostRATDList::ScanInstruction(MachineInstr *MI,
537 // Proceding upwards, registers that are defed but not used in this
538 // instruction are now dead.
539 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
540 MachineOperand &MO = MI->getOperand(i);
541 if (!MO.isReg()) continue;
542 unsigned Reg = MO.getReg();
543 if (Reg == 0) continue;
544 if (!MO.isDef()) continue;
545 // Ignore two-addr defs.
546 if (MI->isRegTiedToUseOperand(i)) continue;
548 DefIndices[Reg] = Count;
549 KillIndices[Reg] = ~0u;
550 assert(((KillIndices[Reg] == ~0u) !=
551 (DefIndices[Reg] == ~0u)) &&
552 "Kill and Def maps aren't consistent for Reg!");
556 // Repeat, for all subregs.
557 for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
559 unsigned SubregReg = *Subreg;
560 DefIndices[SubregReg] = Count;
561 KillIndices[SubregReg] = ~0u;
562 KeepRegs.erase(SubregReg);
563 Classes[SubregReg] = 0;
564 RegRefs.erase(SubregReg);
566 // Conservatively mark super-registers as unusable.
567 for (const unsigned *Super = TRI->getSuperRegisters(Reg);
569 unsigned SuperReg = *Super;
570 Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1);
573 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
574 MachineOperand &MO = MI->getOperand(i);
575 if (!MO.isReg()) continue;
576 unsigned Reg = MO.getReg();
577 if (Reg == 0) continue;
578 if (!MO.isUse()) continue;
580 const TargetRegisterClass *NewRC = 0;
581 if (i < MI->getDesc().getNumOperands())
582 NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI);
584 // For now, only allow the register to be changed if its register
585 // class is consistent across all uses.
586 if (!Classes[Reg] && NewRC)
587 Classes[Reg] = NewRC;
588 else if (!NewRC || Classes[Reg] != NewRC)
589 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
591 RegRefs.insert(std::make_pair(Reg, &MO));
593 // It wasn't previously live but now it is, this is a kill.
594 if (KillIndices[Reg] == ~0u) {
595 KillIndices[Reg] = Count;
596 DefIndices[Reg] = ~0u;
597 assert(((KillIndices[Reg] == ~0u) !=
598 (DefIndices[Reg] == ~0u)) &&
599 "Kill and Def maps aren't consistent for Reg!");
601 // Repeat, for all aliases.
602 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
603 unsigned AliasReg = *Alias;
604 if (KillIndices[AliasReg] == ~0u) {
605 KillIndices[AliasReg] = Count;
606 DefIndices[AliasReg] = ~0u;
613 SchedulePostRATDList::findSuitableFreeRegister(unsigned AntiDepReg,
615 const TargetRegisterClass *RC) {
616 for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF),
617 RE = RC->allocation_order_end(MF); R != RE; ++R) {
618 unsigned NewReg = *R;
619 // Don't replace a register with itself.
620 if (NewReg == AntiDepReg) continue;
621 // Don't replace a register with one that was recently used to repair
622 // an anti-dependence with this AntiDepReg, because that would
623 // re-introduce that anti-dependence.
624 if (NewReg == LastNewReg) continue;
625 // If NewReg is dead and NewReg's most recent def is not before
626 // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg.
627 assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) &&
628 "Kill and Def maps aren't consistent for AntiDepReg!");
629 assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) &&
630 "Kill and Def maps aren't consistent for NewReg!");
631 if (KillIndices[NewReg] != ~0u ||
632 Classes[NewReg] == reinterpret_cast<TargetRegisterClass *>(-1) ||
633 KillIndices[AntiDepReg] > DefIndices[NewReg])
638 // No registers are free and available!
642 /// BreakAntiDependencies - Identifiy anti-dependencies along the critical path
643 /// of the ScheduleDAG and break them by renaming registers.
645 bool SchedulePostRATDList::BreakAntiDependencies() {
646 // The code below assumes that there is at least one instruction,
647 // so just duck out immediately if the block is empty.
648 if (SUnits.empty()) return false;
650 // Find the node at the bottom of the critical path.
652 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
653 SUnit *SU = &SUnits[i];
654 if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency)
660 DEBUG(errs() << "Critical path has total latency "
661 << (Max->getDepth() + Max->Latency) << "\n");
662 DEBUG(errs() << "Available regs:");
663 for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
664 if (KillIndices[Reg] == ~0u)
665 DEBUG(errs() << " " << TRI->getName(Reg));
667 DEBUG(errs() << '\n');
671 // Track progress along the critical path through the SUnit graph as we walk
673 SUnit *CriticalPathSU = Max;
674 MachineInstr *CriticalPathMI = CriticalPathSU->getInstr();
676 // Consider this pattern:
685 // There are three anti-dependencies here, and without special care,
686 // we'd break all of them using the same register:
695 // because at each anti-dependence, B is the first register that
696 // isn't A which is free. This re-introduces anti-dependencies
697 // at all but one of the original anti-dependencies that we were
698 // trying to break. To avoid this, keep track of the most recent
699 // register that each register was replaced with, avoid
700 // using it to repair an anti-dependence on the same register.
701 // This lets us produce this:
710 // This still has an anti-dependence on B, but at least it isn't on the
711 // original critical path.
713 // TODO: If we tracked more than one register here, we could potentially
714 // fix that remaining critical edge too. This is a little more involved,
715 // because unlike the most recent register, less recent registers should
716 // still be considered, though only if no other registers are available.
717 unsigned LastNewReg[TargetRegisterInfo::FirstVirtualRegister] = {};
719 // Attempt to break anti-dependence edges on the critical path. Walk the
720 // instructions from the bottom up, tracking information about liveness
721 // as we go to help determine which registers are available.
722 bool Changed = false;
723 unsigned Count = InsertPosIndex - 1;
724 for (MachineBasicBlock::iterator I = InsertPos, E = Begin;
726 MachineInstr *MI = --I;
728 // Check if this instruction has a dependence on the critical path that
729 // is an anti-dependence that we may be able to break. If it is, set
730 // AntiDepReg to the non-zero register associated with the anti-dependence.
732 // We limit our attention to the critical path as a heuristic to avoid
733 // breaking anti-dependence edges that aren't going to significantly
734 // impact the overall schedule. There are a limited number of registers
735 // and we want to save them for the important edges.
737 // TODO: Instructions with multiple defs could have multiple
738 // anti-dependencies. The current code here only knows how to break one
739 // edge per instruction. Note that we'd have to be able to break all of
740 // the anti-dependencies in an instruction in order to be effective.
741 unsigned AntiDepReg = 0;
742 if (MI == CriticalPathMI) {
743 if (SDep *Edge = CriticalPathStep(CriticalPathSU)) {
744 SUnit *NextSU = Edge->getSUnit();
746 // Only consider anti-dependence edges.
747 if (Edge->getKind() == SDep::Anti) {
748 AntiDepReg = Edge->getReg();
749 assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
750 if (!AllocatableSet.test(AntiDepReg))
751 // Don't break anti-dependencies on non-allocatable registers.
753 else if (KeepRegs.count(AntiDepReg))
754 // Don't break anti-dependencies if an use down below requires
755 // this exact register.
758 // If the SUnit has other dependencies on the SUnit that it
759 // anti-depends on, don't bother breaking the anti-dependency
760 // since those edges would prevent such units from being
761 // scheduled past each other regardless.
763 // Also, if there are dependencies on other SUnits with the
764 // same register as the anti-dependency, don't attempt to
766 for (SUnit::pred_iterator P = CriticalPathSU->Preds.begin(),
767 PE = CriticalPathSU->Preds.end(); P != PE; ++P)
768 if (P->getSUnit() == NextSU ?
769 (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) :
770 (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) {
776 CriticalPathSU = NextSU;
777 CriticalPathMI = CriticalPathSU->getInstr();
779 // We've reached the end of the critical path.
785 PrescanInstruction(MI);
787 if (MI->getDesc().hasExtraDefRegAllocReq())
788 // If this instruction's defs have special allocation requirement, don't
789 // break this anti-dependency.
791 else if (AntiDepReg) {
792 // If this instruction has a use of AntiDepReg, breaking it
794 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
795 MachineOperand &MO = MI->getOperand(i);
796 if (!MO.isReg()) continue;
797 unsigned Reg = MO.getReg();
798 if (Reg == 0) continue;
799 if (MO.isUse() && AntiDepReg == Reg) {
806 // Determine AntiDepReg's register class, if it is live and is
807 // consistently used within a single class.
808 const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0;
809 assert((AntiDepReg == 0 || RC != NULL) &&
810 "Register should be live if it's causing an anti-dependence!");
811 if (RC == reinterpret_cast<TargetRegisterClass *>(-1))
814 // Look for a suitable register to use to break the anti-depenence.
816 // TODO: Instead of picking the first free register, consider which might
818 if (AntiDepReg != 0) {
819 if (unsigned NewReg = findSuitableFreeRegister(AntiDepReg,
820 LastNewReg[AntiDepReg],
822 DEBUG(errs() << "Breaking anti-dependence edge on "
823 << TRI->getName(AntiDepReg)
824 << " with " << RegRefs.count(AntiDepReg) << " references"
825 << " using " << TRI->getName(NewReg) << "!\n");
827 // Update the references to the old register to refer to the new
829 std::pair<std::multimap<unsigned, MachineOperand *>::iterator,
830 std::multimap<unsigned, MachineOperand *>::iterator>
831 Range = RegRefs.equal_range(AntiDepReg);
832 for (std::multimap<unsigned, MachineOperand *>::iterator
833 Q = Range.first, QE = Range.second; Q != QE; ++Q)
834 Q->second->setReg(NewReg);
836 // We just went back in time and modified history; the
837 // liveness information for the anti-depenence reg is now
838 // inconsistent. Set the state as if it were dead.
839 Classes[NewReg] = Classes[AntiDepReg];
840 DefIndices[NewReg] = DefIndices[AntiDepReg];
841 KillIndices[NewReg] = KillIndices[AntiDepReg];
842 assert(((KillIndices[NewReg] == ~0u) !=
843 (DefIndices[NewReg] == ~0u)) &&
844 "Kill and Def maps aren't consistent for NewReg!");
846 Classes[AntiDepReg] = 0;
847 DefIndices[AntiDepReg] = KillIndices[AntiDepReg];
848 KillIndices[AntiDepReg] = ~0u;
849 assert(((KillIndices[AntiDepReg] == ~0u) !=
850 (DefIndices[AntiDepReg] == ~0u)) &&
851 "Kill and Def maps aren't consistent for AntiDepReg!");
853 RegRefs.erase(AntiDepReg);
855 LastNewReg[AntiDepReg] = NewReg;
859 ScanInstruction(MI, Count);
865 /// StartBlockForKills - Initialize register live-range state for updating kills
867 void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) {
868 // Initialize the indices to indicate that no registers are live.
869 std::fill(KillIndices, array_endof(KillIndices), ~0u);
871 // Determine the live-out physregs for this block.
872 if (!BB->empty() && BB->back().getDesc().isReturn()) {
873 // In a return block, examine the function live-out regs.
874 for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
875 E = MRI.liveout_end(); I != E; ++I) {
877 KillIndices[Reg] = BB->size();
878 // Repeat, for all subregs.
879 for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
881 KillIndices[*Subreg] = BB->size();
886 // In a non-return block, examine the live-in regs of all successors.
887 for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
888 SE = BB->succ_end(); SI != SE; ++SI) {
889 for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
890 E = (*SI)->livein_end(); I != E; ++I) {
892 KillIndices[Reg] = BB->size();
893 // Repeat, for all subregs.
894 for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
896 KillIndices[*Subreg] = BB->size();
903 bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI,
904 MachineOperand &MO) {
905 // Setting kill flag...
911 // If MO itself is live, clear the kill flag...
912 if (KillIndices[MO.getReg()] != ~0u) {
917 // If any subreg of MO is live, then create an imp-def for that
918 // subreg and keep MO marked as killed.
921 const unsigned SuperReg = MO.getReg();
922 for (const unsigned *Subreg = TRI->getSubRegisters(SuperReg);
924 if (KillIndices[*Subreg] != ~0u) {
925 MI->addOperand(MachineOperand::CreateReg(*Subreg,
939 /// FixupKills - Fix the register kill flags, they may have been made
940 /// incorrect by instruction reordering.
942 void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
943 DEBUG(errs() << "Fixup kills for BB ID#" << MBB->getNumber() << '\n');
945 std::set<unsigned> killedRegs;
946 BitVector ReservedRegs = TRI->getReservedRegs(MF);
948 StartBlockForKills(MBB);
950 // Examine block from end to start...
951 unsigned Count = MBB->size();
952 for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin();
954 MachineInstr *MI = --I;
956 // Update liveness. Registers that are defed but not used in this
957 // instruction are now dead. Mark register and all subregs as they
958 // are completely defined.
959 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
960 MachineOperand &MO = MI->getOperand(i);
961 if (!MO.isReg()) continue;
962 unsigned Reg = MO.getReg();
963 if (Reg == 0) continue;
964 if (!MO.isDef()) continue;
965 // Ignore two-addr defs.
966 if (MI->isRegTiedToUseOperand(i)) continue;
968 KillIndices[Reg] = ~0u;
970 // Repeat for all subregs.
971 for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
973 KillIndices[*Subreg] = ~0u;
977 // Examine all used registers and set/clear kill flag. When a
978 // register is used multiple times we only set the kill flag on
981 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
982 MachineOperand &MO = MI->getOperand(i);
983 if (!MO.isReg() || !MO.isUse()) continue;
984 unsigned Reg = MO.getReg();
985 if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
988 if (killedRegs.find(Reg) == killedRegs.end()) {
990 // A register is not killed if any subregs are live...
991 for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
993 if (KillIndices[*Subreg] != ~0u) {
999 // If subreg is not live, then register is killed if it became
1000 // live in this instruction
1002 kill = (KillIndices[Reg] == ~0u);
1005 if (MO.isKill() != kill) {
1006 bool removed = ToggleKillFlag(MI, MO);
1008 DEBUG(errs() << "Fixed <removed> in ");
1010 DEBUG(errs() << "Fixed " << MO << " in ");
1015 killedRegs.insert(Reg);
1018 // Mark any used register (that is not using undef) and subregs as
1020 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1021 MachineOperand &MO = MI->getOperand(i);
1022 if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
1023 unsigned Reg = MO.getReg();
1024 if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
1026 KillIndices[Reg] = Count;
1028 for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
1029 *Subreg; ++Subreg) {
1030 KillIndices[*Subreg] = Count;
1036 //===----------------------------------------------------------------------===//
1037 // Top-Down Scheduling
1038 //===----------------------------------------------------------------------===//
1040 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
1041 /// the PendingQueue if the count reaches zero. Also update its cycle bound.
1042 void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
1043 SUnit *SuccSU = SuccEdge->getSUnit();
1046 if (SuccSU->NumPredsLeft == 0) {
1047 errs() << "*** Scheduling failed! ***\n";
1049 errs() << " has been released too many times!\n";
1050 llvm_unreachable(0);
1053 --SuccSU->NumPredsLeft;
1055 // Compute how many cycles it will be before this actually becomes
1056 // available. This is the max of the start time of all predecessors plus
1058 SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency());
1060 // If all the node's predecessors are scheduled, this node is ready
1061 // to be scheduled. Ignore the special ExitSU node.
1062 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
1063 PendingQueue.push_back(SuccSU);
1066 /// ReleaseSuccessors - Call ReleaseSucc on each of SU's successors.
1067 void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) {
1068 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1070 ReleaseSucc(SU, &*I);
1073 /// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
1074 /// count of its successors. If a successor pending count is zero, add it to
1075 /// the Available queue.
1076 void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
1077 DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: ");
1078 DEBUG(SU->dump(this));
1080 Sequence.push_back(SU);
1081 assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
1082 SU->setDepthToAtLeast(CurCycle);
1084 ReleaseSuccessors(SU);
1085 SU->isScheduled = true;
1086 AvailableQueue.ScheduledNode(SU);
1089 /// ListScheduleTopDown - The main loop of list scheduling for top-down
1091 void SchedulePostRATDList::ListScheduleTopDown() {
1092 unsigned CurCycle = 0;
1094 // Release any successors of the special Entry node.
1095 ReleaseSuccessors(&EntrySU);
1097 // All leaves to Available queue.
1098 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
1099 // It is available if it has no predecessors.
1100 if (SUnits[i].Preds.empty()) {
1101 AvailableQueue.push(&SUnits[i]);
1102 SUnits[i].isAvailable = true;
1106 // In any cycle where we can't schedule any instructions, we must
1107 // stall or emit a noop, depending on the target.
1108 bool CycleHasInsts = false;
1110 // While Available queue is not empty, grab the node with the highest
1111 // priority. If it is not ready put it back. Schedule the node.
1112 std::vector<SUnit*> NotReady;
1113 Sequence.reserve(SUnits.size());
1114 while (!AvailableQueue.empty() || !PendingQueue.empty()) {
1115 // Check to see if any of the pending instructions are ready to issue. If
1116 // so, add them to the available queue.
1117 unsigned MinDepth = ~0u;
1118 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
1119 if (PendingQueue[i]->getDepth() <= CurCycle) {
1120 AvailableQueue.push(PendingQueue[i]);
1121 PendingQueue[i]->isAvailable = true;
1122 PendingQueue[i] = PendingQueue.back();
1123 PendingQueue.pop_back();
1125 } else if (PendingQueue[i]->getDepth() < MinDepth)
1126 MinDepth = PendingQueue[i]->getDepth();
1129 DEBUG(errs() << "\n*** Examining Available\n";
1130 LatencyPriorityQueue q = AvailableQueue;
1131 while (!q.empty()) {
1132 SUnit *su = q.pop();
1133 errs() << "Height " << su->getHeight() << ": ";
1137 SUnit *FoundSUnit = 0;
1139 bool HasNoopHazards = false;
1140 while (!AvailableQueue.empty()) {
1141 SUnit *CurSUnit = AvailableQueue.pop();
1143 ScheduleHazardRecognizer::HazardType HT =
1144 HazardRec->getHazardType(CurSUnit);
1145 if (HT == ScheduleHazardRecognizer::NoHazard) {
1146 FoundSUnit = CurSUnit;
1150 // Remember if this is a noop hazard.
1151 HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
1153 NotReady.push_back(CurSUnit);
1156 // Add the nodes that aren't ready back onto the available list.
1157 if (!NotReady.empty()) {
1158 AvailableQueue.push_all(NotReady);
1162 // If we found a node to schedule, do it now.
1164 ScheduleNodeTopDown(FoundSUnit, CurCycle);
1165 HazardRec->EmitInstruction(FoundSUnit);
1166 CycleHasInsts = true;
1168 // If we are using the target-specific hazards, then don't
1169 // advance the cycle time just because we schedule a node. If
1170 // the target allows it we can schedule multiple nodes in the
1172 if (!EnablePostRAHazardAvoidance) {
1173 if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops!
1177 if (CycleHasInsts) {
1178 DEBUG(errs() << "*** Finished cycle " << CurCycle << '\n');
1179 HazardRec->AdvanceCycle();
1180 } else if (!HasNoopHazards) {
1181 // Otherwise, we have a pipeline stall, but no other problem,
1182 // just advance the current cycle and try again.
1183 DEBUG(errs() << "*** Stall in cycle " << CurCycle << '\n');
1184 HazardRec->AdvanceCycle();
1187 // Otherwise, we have no instructions to issue and we have instructions
1188 // that will fault if we don't do this right. This is the case for
1189 // processors without pipeline interlocks and other cases.
1190 DEBUG(errs() << "*** Emitting noop in cycle " << CurCycle << '\n');
1191 HazardRec->EmitNoop();
1192 Sequence.push_back(0); // NULL here means noop
1197 CycleHasInsts = false;
1202 VerifySchedule(/*isBottomUp=*/false);
1206 //===----------------------------------------------------------------------===//
1207 // Public Constructor Functions
1208 //===----------------------------------------------------------------------===//
1210 FunctionPass *llvm::createPostRAScheduler(CodeGenOpt::Level OptLevel) {
1211 return new PostRAScheduler(OptLevel);