1 //===-- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function --===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This pass is responsible for finalizing the functions frame layout, saving
11 // callee saved registers, and for emitting prolog & epilog code for the
14 // This pass must be run after register allocation. After this pass is
15 // executed, it is illegal to construct MO_FrameIndex operands.
17 // This pass implements a shrink wrapping variant of prolog/epilog insertion:
18 // - Places callee saved register (CSR) spills and restores in the CFG to
19 // tightly surround uses so that execution paths that do not use CSRs do not
20 // pay the spill/restore penalty.
22 // - Avoiding placment of spills/restores in loops: if a CSR is used inside a
23 // loop(nest), the spills are placed in the loop preheader, and restores are
24 // placed in the loop exit nodes (the successors of the loop _exiting_ nodes).
26 // - Covering paths without CSR uses: e.g. if a restore is placed in a join
27 // block, a matching spill is added to the end of all immediate predecessor
28 // blocks that are not reached by a spill. Similarly for saves placed in
31 // Shrink wrapping uses an analysis similar to the one in GVNPRE to determine
32 // which basic blocks require callee-saved register save/restore code.
34 // This pass uses MachineDominators and MachineLoopInfo. Loop information
35 // is used to prevent shrink wrapping of callee-saved register save/restore
38 //===----------------------------------------------------------------------===//
40 #define DEBUG_TYPE "shrink-wrap"
42 #include "llvm/CodeGen/Passes.h"
43 #include "llvm/CodeGen/MachineDominators.h"
44 #include "llvm/CodeGen/MachineLoopInfo.h"
45 #include "llvm/CodeGen/MachineFunctionPass.h"
46 #include "llvm/CodeGen/MachineInstr.h"
47 #include "llvm/CodeGen/MachineFrameInfo.h"
48 #include "llvm/CodeGen/MachineModuleInfo.h"
49 #include "llvm/CodeGen/MachineRegisterInfo.h"
50 #include "llvm/CodeGen/RegisterScavenging.h"
51 #include "llvm/Target/TargetMachine.h"
52 #include "llvm/Target/TargetRegisterInfo.h"
53 #include "llvm/Target/TargetFrameInfo.h"
54 #include "llvm/Target/TargetInstrInfo.h"
55 #include "llvm/ADT/SparseBitVector.h"
56 #include "llvm/ADT/DenseMap.h"
57 #include "llvm/ADT/PostOrderIterator.h"
58 #include "llvm/ADT/Statistic.h"
59 #include "llvm/Support/CommandLine.h"
60 #include "llvm/Support/Compiler.h"
61 #include "llvm/Support/Debug.h"
62 #include "llvm/ADT/STLExtras.h"
63 #include "llvm/ADT/Statistic.h"
69 STATISTIC(numSRReduced, "Number of CSR spills+restores reduced.");
73 ShrinkWrapping("shrink-wrap",
74 cl::desc("Shrink wrap callee-saved register spills/restores"));
76 // Shrink wrap only the specified function, a debugging aid.
77 static cl::opt<std::string>
78 ShrinkWrapFunc("shrink-wrap-func", cl::Hidden,
79 cl::desc("Shrink wrap the specified function"),
80 cl::value_desc("funcname"),
83 // Debugging level for shrink wrapping.
84 enum ShrinkWrapDebugLevel {
85 None, BasicInfo, Iterations, Details
88 static cl::opt<enum ShrinkWrapDebugLevel>
89 ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden,
90 cl::desc("Print shrink wrapping debugging information"),
92 clEnumVal(None , "disable debug output"),
93 clEnumVal(BasicInfo , "print basic DF sets"),
94 clEnumVal(Iterations, "print SR sets for each iteration"),
95 clEnumVal(Details , "print all DF sets"),
100 struct VISIBILITY_HIDDEN PEI : public MachineFunctionPass {
102 PEI() : MachineFunctionPass(&ID) {}
104 const char *getPassName() const {
105 return "Prolog/Epilog Insertion & Frame Finalization";
108 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
109 AU.setPreservesCFG();
110 if (ShrinkWrapping || ShrinkWrapFunc != "") {
111 AU.addRequired<MachineLoopInfo>();
112 AU.addRequired<MachineDominatorTree>();
114 AU.addPreserved<MachineLoopInfo>();
115 AU.addPreserved<MachineDominatorTree>();
116 MachineFunctionPass::getAnalysisUsage(AU);
119 /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
120 /// frame indexes with appropriate references.
122 bool runOnMachineFunction(MachineFunction &Fn) {
123 const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
124 RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL;
128 // Get MachineModuleInfo so that we can track the construction of the
130 if (MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>())
131 Fn.getFrameInfo()->setMachineModuleInfo(MMI);
133 // Allow the target machine to make some adjustments to the function
134 // e.g. UsedPhysRegs before calculateCalleeSavedRegisters.
135 TRI->processFunctionBeforeCalleeSavedScan(Fn, RS);
137 // Scan the function for modified callee saved registers and insert spill
138 // code for any callee saved registers that are modified. Also calculate
139 // the MaxCallFrameSize and HasCalls variables for the function's frame
140 // information and eliminates call frame pseudo instructions.
141 calculateCalleeSavedRegisters(Fn);
143 // Determine placement of CSR spill/restore code:
144 // - with shrink wrapping, place spills and restores to tightly
145 // enclose regions in the Machine CFG of the function where
146 // they are used. Without shrink wrapping
147 // - default (no shrink wrapping), place all spills in the
148 // entry block, all restores in return blocks.
149 placeCSRSpillsAndRestores(Fn);
151 // Add the code to save and restore the callee saved registers
152 insertCSRSpillsAndRestores(Fn);
154 // Allow the target machine to make final modifications to the function
155 // before the frame layout is finalized.
156 TRI->processFunctionBeforeFrameFinalized(Fn);
158 // Calculate actual frame offsets for all abstract stack objects...
159 calculateFrameObjectOffsets(Fn);
161 // Add prolog and epilog code to the function. This function is required
162 // to align the stack frame as necessary for any stack variables or
163 // called functions. Because of this, calculateCalleeSavedRegisters
164 // must be called before this function in order to set the HasCalls
165 // and MaxCallFrameSize variables.
166 insertPrologEpilogCode(Fn);
168 // Replace all MO_FrameIndex operands with physical register references
169 // and actual offsets.
171 replaceFrameIndices(Fn);
181 // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved
182 // stack frame indexes.
183 unsigned MinCSFrameIndex, MaxCSFrameIndex;
185 // Analysis info for spill/restore placement.
186 // "CSR": "callee saved register".
188 // CSRegSet contains indices into the Callee Saved Register Info
189 // vector built by calculateCalleeSavedRegisters() and accessed
190 // via MF.getFrameInfo()->getCalleeSavedInfo().
191 typedef SparseBitVector<> CSRegSet;
193 // CSRegBlockMap maps MachineBasicBlocks to sets of callee
194 // saved register indices.
195 typedef DenseMap<MachineBasicBlock*, CSRegSet> CSRegBlockMap;
197 // Set and maps for computing CSR spill/restore placement:
198 // used in function (UsedCSRegs)
199 // used in a basic block (CSRUsed)
200 // anticipatable in a basic block (Antic{In,Out})
201 // available in a basic block (Avail{In,Out})
202 // to be spilled at the entry to a basic block (CSRSave)
203 // to be restored at the end of a basic block (CSRRestore)
205 CSRegBlockMap CSRUsed;
206 CSRegBlockMap AnticIn, AnticOut;
207 CSRegBlockMap AvailIn, AvailOut;
208 CSRegBlockMap CSRSave;
209 CSRegBlockMap CSRRestore;
211 // Entry and return blocks of the current function.
212 MachineBasicBlock* EntryBlock;
213 SmallVector<MachineBasicBlock*, 4> ReturnBlocks;
215 // Map of MBBs to top level MachineLoops.
216 DenseMap<MachineBasicBlock*, MachineLoop*> TLLoops;
218 // Flag to control shrink wrapping per-function:
219 // may choose to skip shrink wrapping for certain
221 bool ShrinkWrapThisFunction;
224 // Machine function handle.
227 // Flag indicating that the current function
228 // has at least one "short" path in the machine
229 // CFG from the entry block to an exit block.
230 bool HasFastExitPath;
233 bool calculateSets(MachineFunction &Fn);
234 bool calcAnticInOut(MachineBasicBlock* MBB);
235 bool calcAvailInOut(MachineBasicBlock* MBB);
236 void calculateAnticAvail(MachineFunction &Fn);
237 bool addUsesForMEMERegion(MachineBasicBlock* MBB,
238 SmallVector<MachineBasicBlock*, 4>& blks);
239 bool addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks);
240 bool calcSpillPlacements(MachineBasicBlock* MBB,
241 SmallVector<MachineBasicBlock*, 4> &blks,
242 CSRegBlockMap &prevSpills);
243 bool calcRestorePlacements(MachineBasicBlock* MBB,
244 SmallVector<MachineBasicBlock*, 4> &blks,
245 CSRegBlockMap &prevRestores);
246 void placeSpillsAndRestores(MachineFunction &Fn);
247 void placeCSRSpillsAndRestores(MachineFunction &Fn);
248 void calculateCalleeSavedRegisters(MachineFunction &Fn);
249 void insertCSRSpillsAndRestores(MachineFunction &Fn);
250 void calculateFrameObjectOffsets(MachineFunction &Fn);
251 void replaceFrameIndices(MachineFunction &Fn);
252 void insertPrologEpilogCode(MachineFunction &Fn);
254 // Initialize DFA sets, called before iterations.
255 void clearAnticAvailSets();
256 // Clear all sets constructed by shrink wrapping.
259 // Initialize all shrink wrapping data.
260 void initShrinkWrappingInfo();
262 // Convienences for dealing with machine loops.
263 MachineBasicBlock* getTopLevelLoopPreheader(MachineLoop* LP) {
264 assert(LP && "Machine loop is NULL.");
265 MachineBasicBlock* PHDR = LP->getLoopPreheader();
266 MachineLoop* PLP = LP->getParentLoop();
268 PHDR = PLP->getLoopPreheader();
269 PLP = PLP->getParentLoop();
274 MachineLoop* getTopLevelLoopParent(MachineLoop *LP) {
277 MachineLoop* PLP = LP->getParentLoop();
280 PLP = PLP->getParentLoop();
285 // Propgate CSRs used in MBB to all MBBs of loop LP.
286 void propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP);
288 // Convenience for recognizing return blocks.
289 bool isReturnBlock(MachineBasicBlock* MBB) {
290 return (MBB && !MBB->empty() && MBB->back().getDesc().isReturn());
294 // Debugging methods.
296 // Mark this function as having fast exit paths.
297 void findFastExitPath();
299 // Verify placement of spills/restores.
300 void verifySpillRestorePlacement();
302 std::string getBasicBlockName(const MachineBasicBlock* MBB);
303 std::string stringifyCSRegSet(const CSRegSet& s);
304 void dumpSet(const CSRegSet& s);
305 void dumpUsed(MachineBasicBlock* MBB);
307 void dumpSets(MachineBasicBlock* MBB);
308 void dumpSets1(MachineBasicBlock* MBB);
317 // Initialize shrink wrapping DFA sets, called before iterations.
318 void PEI::clearAnticAvailSets() {
325 // Clear all sets constructed by shrink wrapping.
326 void PEI::clearAllSets() {
327 ReturnBlocks.clear();
328 clearAnticAvailSets();
336 // Initialize all shrink wrapping data.
337 void PEI::initShrinkWrappingInfo() {
341 HasFastExitPath = false;
343 ShrinkWrapThisFunction = ShrinkWrapping;
344 // DEBUG: enable or disable shrink wrapping for the current function
345 // via --shrink-wrap-func=<funcname>.
347 if (ShrinkWrapFunc != "") {
348 std::string MFName = MF->getFunction()->getName();
349 ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc);
355 /// createPrologEpilogCodeInserter - This function returns a pass that inserts
356 /// prolog and epilog code, and eliminates abstract frame references.
358 FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); }
360 /// placeCSRSpillsAndRestores - determine which MBBs of the function
361 /// need save, restore code for callee-saved registers by doing a DF analysis
362 /// similar to the one used in code motion (GVNPRE). This produces maps of MBBs
363 /// to sets of registers (CSRs) for saves and restores. MachineLoopInfo
364 /// is used to ensure that CSR save/restore code is not placed inside loops.
365 /// This function computes the maps of MBBs -> CSRs to spill and restore
366 /// in CSRSave, CSRRestore.
368 /// If shrink wrapping is not being performed, place all spills in
369 /// the entry block, all restores in return blocks. In this case,
370 /// CSRSave has a single mapping, CSRRestore has mappings for each
373 void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) {
375 initShrinkWrappingInfo();
377 DEBUG(if (ShrinkWrapThisFunction) {
378 DOUT << "Place CSR spills/restores for "
379 << MF->getFunction()->getName() << "\n";
382 if (calculateSets(Fn))
383 placeSpillsAndRestores(Fn);
386 /// calcAnticInOut - calculate the anticipated in/out reg sets
387 /// for the given MBB by looking forward in the MCFG at MBB's
390 bool PEI::calcAnticInOut(MachineBasicBlock* MBB) {
391 bool changed = false;
393 // AnticOut[MBB] = INTERSECT(AnticIn[S] for S in SUCCESSORS(MBB))
394 SmallVector<MachineBasicBlock*, 4> successors;
395 for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
396 SE = MBB->succ_end(); SI != SE; ++SI) {
397 MachineBasicBlock* SUCC = *SI;
399 successors.push_back(SUCC);
402 unsigned i = 0, e = successors.size();
404 CSRegSet prevAnticOut = AnticOut[MBB];
405 MachineBasicBlock* SUCC = successors[i];
407 AnticOut[MBB] = AnticIn[SUCC];
408 for (++i; i != e; ++i) {
409 SUCC = successors[i];
410 AnticOut[MBB] &= AnticIn[SUCC];
412 if (prevAnticOut != AnticOut[MBB])
416 // AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]);
417 CSRegSet prevAnticIn = AnticIn[MBB];
418 AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB];
419 if (prevAnticIn |= AnticIn[MBB])
424 /// calcAvailInOut - calculate the available in/out reg sets
425 /// for the given MBB by looking backward in the MCFG at MBB's
428 bool PEI::calcAvailInOut(MachineBasicBlock* MBB) {
429 bool changed = false;
431 // AvailIn[MBB] = INTERSECT(AvailOut[P] for P in PREDECESSORS(MBB))
432 SmallVector<MachineBasicBlock*, 4> predecessors;
433 for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
434 PE = MBB->pred_end(); PI != PE; ++PI) {
435 MachineBasicBlock* PRED = *PI;
437 predecessors.push_back(PRED);
440 unsigned i = 0, e = predecessors.size();
442 CSRegSet prevAvailIn = AvailIn[MBB];
443 MachineBasicBlock* PRED = predecessors[i];
445 AvailIn[MBB] = AvailOut[PRED];
446 for (++i; i != e; ++i) {
447 PRED = predecessors[i];
448 AvailIn[MBB] &= AvailOut[PRED];
450 if (prevAvailIn != AvailIn[MBB])
454 // AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]);
455 CSRegSet prevAvailOut = AvailOut[MBB];
456 AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB];
457 if (prevAvailOut |= AvailOut[MBB])
462 /// calculateAnticAvail - build the sets anticipated and available
463 /// registers in the MCFG of the current function iteratively,
464 /// doing a combined forward and backward analysis.
466 void PEI::calculateAnticAvail(MachineFunction &Fn) {
467 // Initialize data flow sets.
468 clearAnticAvailSets();
470 // Calulate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG.
472 unsigned iterations = 0;
476 for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
477 MBBI != MBBE; ++MBBI) {
478 MachineBasicBlock* MBB = MBBI;
480 // Calculate anticipated in, out regs at MBB from
481 // anticipated at successors of MBB.
482 changed |= calcAnticInOut(MBB);
484 // Calculate available in, out regs at MBB from
485 // available at predecessors of MBB.
486 changed |= calcAvailInOut(MBB);
490 DEBUG(if (ShrinkWrapDebugging >= Details) {
491 DOUT << "-----------------------------------------------------------\n";
492 DOUT << " Antic/Avail Sets:\n";
493 DOUT << "-----------------------------------------------------------\n";
494 DOUT << "iterations = " << iterations << "\n";
495 DOUT << "-----------------------------------------------------------\n";
496 DOUT << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n";
497 DOUT << "-----------------------------------------------------------\n";
498 for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
499 MBBI != MBBE; ++MBBI) {
500 MachineBasicBlock* MBB = MBBI;
503 DOUT << "-----------------------------------------------------------\n";
507 /// propagateUsesAroundLoop - copy used register info from MBB to all blocks
508 /// of the loop given by LP and its parent loops. This prevents spills/restores
509 /// from being placed in the bodies of loops.
511 void PEI::propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP) {
515 std::vector<MachineBasicBlock*> loopBlocks = LP->getBlocks();
516 for (unsigned i = 0, e = loopBlocks.size(); i != e; ++i) {
517 MachineBasicBlock* LBB = loopBlocks[i];
520 if (CSRUsed[LBB].contains(CSRUsed[MBB]))
522 CSRUsed[LBB] |= CSRUsed[MBB];
526 /// calculateSets - collect the CSRs used in this function, compute
527 /// the DF sets that describe the initial minimal regions in the
528 /// Machine CFG around which CSR spills and restores must be placed.
530 /// Additionally, this function decides if shrink wrapping should
531 /// be disabled for the current function, checking the following:
532 /// 1. the current function has more than 500 MBBs: heuristic limit
533 /// on function size to reduce compile time impact of the current
534 /// iterative algorithm.
535 /// 2. all CSRs are used in the entry block.
536 /// 3. all CSRs are used in all immediate successors of the entry block.
537 /// 4. all CSRs are used in a subset of blocks, each of which dominates
538 /// all return blocks. These blocks, taken as a subgraph of the MCFG,
539 /// are equivalent to the entry block since all execution paths pass
542 bool PEI::calculateSets(MachineFunction &Fn) {
543 // Sets used to compute spill, restore placement sets.
544 const std::vector<CalleeSavedInfo> CSI =
545 Fn.getFrameInfo()->getCalleeSavedInfo();
547 // If no CSRs used, we are done.
549 DEBUG(if (ShrinkWrapThisFunction)
550 DOUT << "DISABLED: " << Fn.getFunction()->getName()
551 << ": uses no callee-saved registers\n");
555 // Save refs to entry and return blocks.
556 EntryBlock = Fn.begin();
557 for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end();
559 if (isReturnBlock(MBB))
560 ReturnBlocks.push_back(MBB);
562 // Determine if this function has fast exit paths.
563 DEBUG(if (ShrinkWrapThisFunction)
566 // Limit shrink wrapping via the current iterative bit vector
567 // implementation to functions with <= 500 MBBs.
568 if (Fn.size() > 500) {
569 DEBUG(if (ShrinkWrapThisFunction)
570 DOUT << "DISABLED: " << Fn.getFunction()->getName()
571 << ": too large (" << Fn.size() << " MBBs)\n");
572 ShrinkWrapThisFunction = false;
575 // Return now if not shrink wrapping.
576 if (! ShrinkWrapThisFunction)
579 // Collect set of used CSRs.
580 for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) {
584 // Walk instructions in all MBBs, create CSRUsed[] sets, choose
585 // whether or not to shrink wrap this function.
586 MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
587 MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>();
588 const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
590 bool allCSRUsesInEntryBlock = true;
591 for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
592 MBBI != MBBE; ++MBBI) {
593 MachineBasicBlock* MBB = MBBI;
594 for (MachineBasicBlock::iterator I = MBB->begin(); I != MBB->end(); ++I) {
595 for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) {
596 unsigned Reg = CSI[inx].getReg();
597 // If instruction I reads or modifies Reg, add it to UsedCSRegs,
598 // CSRUsed map for the current block.
599 for (unsigned opInx = 0, opEnd = I->getNumOperands();
600 opInx != opEnd; ++opInx) {
601 const MachineOperand &MO = I->getOperand(opInx);
602 if (! (MO.isReg() && (MO.isUse() || MO.isDef())))
604 unsigned MOReg = MO.getReg();
608 (TargetRegisterInfo::isPhysicalRegister(MOReg) &&
609 TargetRegisterInfo::isPhysicalRegister(Reg) &&
610 TRI->isSubRegister(Reg, MOReg))) {
611 // CSR Reg is defined/used in block MBB.
612 CSRUsed[MBB].set(inx);
613 // Check for uses in EntryBlock.
614 if (MBB != EntryBlock)
615 allCSRUsesInEntryBlock = false;
621 if (CSRUsed[MBB].empty())
624 // Propagate CSRUsed[MBB] in loops
625 if (MachineLoop* LP = LI.getLoopFor(MBB)) {
626 // Add top level loop to work list.
627 MachineBasicBlock* HDR = getTopLevelLoopPreheader(LP);
628 MachineLoop* PLP = getTopLevelLoopParent(LP);
631 HDR = PLP->getHeader();
632 assert(HDR->pred_size() > 0 && "Loop header has no predecessors?");
633 MachineBasicBlock::pred_iterator PI = HDR->pred_begin();
638 // Push uses from inside loop to its parent loops,
639 // or to all other MBBs in its loop.
640 if (LP->getLoopDepth() > 1) {
641 for (MachineLoop* PLP = LP->getParentLoop(); PLP;
642 PLP = PLP->getParentLoop()) {
643 propagateUsesAroundLoop(MBB, PLP);
646 propagateUsesAroundLoop(MBB, LP);
651 if (allCSRUsesInEntryBlock) {
652 DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName()
653 << ": all CSRs used in EntryBlock\n");
654 ShrinkWrapThisFunction = false;
656 bool allCSRsUsedInEntryFanout = true;
657 for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(),
658 SE = EntryBlock->succ_end(); SI != SE; ++SI) {
659 MachineBasicBlock* SUCC = *SI;
660 if (CSRUsed[SUCC] != UsedCSRegs)
661 allCSRsUsedInEntryFanout = false;
663 if (allCSRsUsedInEntryFanout) {
664 DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName()
665 << ": all CSRs used in imm successors of EntryBlock\n");
666 ShrinkWrapThisFunction = false;
670 if (ShrinkWrapThisFunction) {
671 // Check if MBB uses CSRs and dominates all exit nodes.
672 // Such nodes are equiv. to the entry node w.r.t.
673 // CSR uses: every path through the function must
674 // pass through this node. If each CSR is used at least
675 // once by these nodes, shrink wrapping is disabled.
676 CSRegSet CSRUsedInChokePoints;
677 for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
678 MBBI != MBBE; ++MBBI) {
679 MachineBasicBlock* MBB = MBBI;
680 if (MBB == EntryBlock || CSRUsed[MBB].empty() || MBB->succ_size() < 1)
682 bool dominatesExitNodes = true;
683 for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
684 if (! DT.dominates(MBB, ReturnBlocks[ri])) {
685 dominatesExitNodes = false;
688 if (dominatesExitNodes) {
689 CSRUsedInChokePoints |= CSRUsed[MBB];
690 if (CSRUsedInChokePoints == UsedCSRegs) {
691 DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName()
692 << ": all CSRs used in choke point(s) at "
693 << getBasicBlockName(MBB) << "\n");
694 ShrinkWrapThisFunction = false;
701 // Return now if we have decided not to apply shrink wrapping
702 // to the current function.
703 if (! ShrinkWrapThisFunction)
707 DOUT << "ENABLED: " << Fn.getFunction()->getName();
709 DOUT << " (fast exit path)";
711 if (ShrinkWrapDebugging >= BasicInfo) {
712 DOUT << "------------------------------"
713 << "-----------------------------\n";
714 DOUT << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n";
715 if (ShrinkWrapDebugging >= Details) {
716 DOUT << "------------------------------"
717 << "-----------------------------\n";
723 // Build initial DF sets to determine minimal regions in the
724 // Machine CFG around which CSRs must be spilled and restored.
725 calculateAnticAvail(Fn);
730 /// addUsesForMEMERegion - add uses of CSRs spilled or restored in
731 /// multi-entry, multi-exit (MEME) regions so spill and restore
732 /// placement will not break code that enters or leaves a
733 /// shrink-wrapped region by inducing spills with no matching
734 /// restores or restores with no matching spills. A MEME region
735 /// is a subgraph of the MCFG with multiple entry edges, multiple
736 /// exit edges, or both. This code propagates use information
737 /// through the MCFG until all paths requiring spills and restores
738 /// _outside_ the computed minimal placement regions have been covered.
740 bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB,
741 SmallVector<MachineBasicBlock*, 4>& blks) {
742 if (MBB->succ_size() < 2 && MBB->pred_size() < 2) {
743 bool processThisBlock = false;
744 for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
745 SE = MBB->succ_end(); SI != SE; ++SI) {
746 MachineBasicBlock* SUCC = *SI;
747 if (SUCC->pred_size() > 1) {
748 processThisBlock = true;
752 if (!CSRRestore[MBB].empty() && MBB->succ_size() > 0) {
753 for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
754 PE = MBB->pred_end(); PI != PE; ++PI) {
755 MachineBasicBlock* PRED = *PI;
756 if (PRED->succ_size() > 1) {
757 processThisBlock = true;
762 if (! processThisBlock)
767 if (!CSRSave[MBB].empty())
769 else if (!CSRRestore[MBB].empty())
770 prop = CSRRestore[MBB];
776 // Propagate selected bits to successors, predecessors of MBB.
777 bool addedUses = false;
778 for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
779 SE = MBB->succ_end(); SI != SE; ++SI) {
780 MachineBasicBlock* SUCC = *SI;
784 if (! CSRUsed[SUCC].contains(prop)) {
785 CSRUsed[SUCC] |= prop;
787 blks.push_back(SUCC);
788 DEBUG(if (ShrinkWrapDebugging >= Iterations)
789 DOUT << getBasicBlockName(MBB)
790 << "(" << stringifyCSRegSet(prop) << ")->"
791 << "successor " << getBasicBlockName(SUCC) << "\n");
794 for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
795 PE = MBB->pred_end(); PI != PE; ++PI) {
796 MachineBasicBlock* PRED = *PI;
800 if (! CSRUsed[PRED].contains(prop)) {
801 CSRUsed[PRED] |= prop;
803 blks.push_back(PRED);
804 DEBUG(if (ShrinkWrapDebugging >= Iterations)
805 DOUT << getBasicBlockName(MBB)
806 << "(" << stringifyCSRegSet(prop) << ")->"
807 << "predecessor " << getBasicBlockName(PRED) << "\n");
813 /// addUsesForTopLevelLoops - add uses for CSRs used inside top
814 /// level loops to the exit blocks of those loops.
816 bool PEI::addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks) {
817 bool addedUses = false;
819 // Place restores for top level loops where needed.
820 for (DenseMap<MachineBasicBlock*, MachineLoop*>::iterator
821 I = TLLoops.begin(), E = TLLoops.end(); I != E; ++I) {
822 MachineBasicBlock* MBB = I->first;
823 MachineLoop* LP = I->second;
824 MachineBasicBlock* HDR = LP->getHeader();
825 SmallVector<MachineBasicBlock*, 4> exitBlocks;
828 loopSpills = CSRSave[MBB];
829 if (CSRSave[MBB].empty()) {
830 loopSpills = CSRUsed[HDR];
831 assert(!loopSpills.empty() && "No CSRs used in loop?");
832 } else if (CSRRestore[MBB].contains(CSRSave[MBB]))
835 LP->getExitBlocks(exitBlocks);
836 assert(exitBlocks.size() > 0 && "Loop has no top level exit blocks?");
837 for (unsigned i = 0, e = exitBlocks.size(); i != e; ++i) {
838 MachineBasicBlock* EXB = exitBlocks[i];
839 if (! CSRUsed[EXB].contains(loopSpills)) {
840 CSRUsed[EXB] |= loopSpills;
842 DEBUG(if (ShrinkWrapDebugging >= Iterations)
843 DOUT << "LOOP " << getBasicBlockName(MBB)
844 << "(" << stringifyCSRegSet(loopSpills) << ")->"
845 << getBasicBlockName(EXB) << "\n");
846 if (EXB->succ_size() > 1 || EXB->pred_size() > 1)
854 /// calcSpillPlacements - determine which CSRs should be spilled
855 /// in MBB using AnticIn sets of MBB's predecessors, keeping track
856 /// of changes to spilled reg sets. Add MBB to the set of blocks
857 /// that need to be processed for propagating use info to cover
858 /// multi-entry/exit regions.
860 bool PEI::calcSpillPlacements(MachineBasicBlock* MBB,
861 SmallVector<MachineBasicBlock*, 4> &blks,
862 CSRegBlockMap &prevSpills) {
863 bool placedSpills = false;
864 // Intersect (CSRegs - AnticIn[P]) for P in Predecessors(MBB)
865 CSRegSet anticInPreds;
866 SmallVector<MachineBasicBlock*, 4> predecessors;
867 for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
868 PE = MBB->pred_end(); PI != PE; ++PI) {
869 MachineBasicBlock* PRED = *PI;
871 predecessors.push_back(PRED);
873 unsigned i = 0, e = predecessors.size();
875 MachineBasicBlock* PRED = predecessors[i];
876 anticInPreds = UsedCSRegs - AnticIn[PRED];
877 for (++i; i != e; ++i) {
878 PRED = predecessors[i];
879 anticInPreds &= (UsedCSRegs - AnticIn[PRED]);
882 // Handle uses in entry blocks (which have no predecessors).
883 // This is necessary because the DFA formulation assumes the
884 // entry and (multiple) exit nodes cannot have CSR uses, which
885 // is not the case in the real world.
886 anticInPreds = UsedCSRegs;
888 // Compute spills required at MBB:
889 CSRSave[MBB] |= (AnticIn[MBB] - AvailIn[MBB]) & anticInPreds;
891 if (! CSRSave[MBB].empty()) {
892 if (MBB == EntryBlock) {
893 for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
894 CSRRestore[ReturnBlocks[ri]] |= CSRSave[MBB];
896 // Reset all regs spilled in MBB that are also spilled in EntryBlock.
897 if (CSRSave[EntryBlock].intersects(CSRSave[MBB])) {
898 CSRSave[MBB] = CSRSave[MBB] - CSRSave[EntryBlock];
902 placedSpills = (CSRSave[MBB] != prevSpills[MBB]);
903 prevSpills[MBB] = CSRSave[MBB];
904 // Remember this block for adding restores to successor
905 // blocks for multi-entry region.
909 DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations)
910 DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
911 << stringifyCSRegSet(CSRSave[MBB]) << "\n");
916 /// calcRestorePlacements - determine which CSRs should be restored
917 /// in MBB using AvailOut sets of MBB's succcessors, keeping track
918 /// of changes to restored reg sets. Add MBB to the set of blocks
919 /// that need to be processed for propagating use info to cover
920 /// multi-entry/exit regions.
922 bool PEI::calcRestorePlacements(MachineBasicBlock* MBB,
923 SmallVector<MachineBasicBlock*, 4> &blks,
924 CSRegBlockMap &prevRestores) {
925 bool placedRestores = false;
926 // Intersect (CSRegs - AvailOut[S]) for S in Successors(MBB)
927 CSRegSet availOutSucc;
928 SmallVector<MachineBasicBlock*, 4> successors;
929 for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
930 SE = MBB->succ_end(); SI != SE; ++SI) {
931 MachineBasicBlock* SUCC = *SI;
933 successors.push_back(SUCC);
935 unsigned i = 0, e = successors.size();
937 MachineBasicBlock* SUCC = successors[i];
938 availOutSucc = UsedCSRegs - AvailOut[SUCC];
939 for (++i; i != e; ++i) {
940 SUCC = successors[i];
941 availOutSucc &= (UsedCSRegs - AvailOut[SUCC]);
944 if (! CSRUsed[MBB].empty() || ! AvailOut[MBB].empty()) {
945 // Handle uses in return blocks (which have no successors).
946 // This is necessary because the DFA formulation assumes the
947 // entry and (multiple) exit nodes cannot have CSR uses, which
948 // is not the case in the real world.
949 availOutSucc = UsedCSRegs;
952 // Compute restores required at MBB:
953 CSRRestore[MBB] |= (AvailOut[MBB] - AnticOut[MBB]) & availOutSucc;
955 // Postprocess restore placements at MBB.
956 // Remove the CSRs that are restored in the return blocks.
957 // Lest this be confusing, note that:
958 // CSRSave[EntryBlock] == CSRRestore[B] for all B in ReturnBlocks.
959 if (MBB->succ_size() && ! CSRRestore[MBB].empty()) {
960 if (! CSRSave[EntryBlock].empty())
961 CSRRestore[MBB] = CSRRestore[MBB] - CSRSave[EntryBlock];
963 placedRestores = (CSRRestore[MBB] != prevRestores[MBB]);
964 prevRestores[MBB] = CSRRestore[MBB];
965 // Remember this block for adding saves to predecessor
966 // blocks for multi-entry region.
970 DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations)
971 DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = "
972 << stringifyCSRegSet(CSRRestore[MBB]) << "\n");
974 return placedRestores;
977 /// placeSpillsAndRestores - place spills and restores of CSRs
978 /// used in MBBs in minimal regions that contain the uses.
980 void PEI::placeSpillsAndRestores(MachineFunction &Fn) {
981 CSRegBlockMap prevCSRSave;
982 CSRegBlockMap prevCSRRestore;
983 SmallVector<MachineBasicBlock*, 4> cvBlocks, ncvBlocks;
985 unsigned iterations = 0;
987 // Iterate computation of spill and restore placements in the MCFG until:
988 // 1. CSR use info has been fully propagated around the MCFG, and
989 // 2. computation of CSRSave[], CSRRestore[] reach fixed points.
994 DEBUG(if (ShrinkWrapDebugging >= Iterations)
995 DOUT << "iter " << iterations
996 << " --------------------------------------------------\n");
998 // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG,
999 // which determines the placements of spills and restores.
1000 // Keep track of changes to spills, restores in each iteration to
1001 // minimize the total iterations.
1002 bool SRChanged = false;
1003 for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
1004 MBBI != MBBE; ++MBBI) {
1005 MachineBasicBlock* MBB = MBBI;
1007 // Place spills for CSRs in MBB.
1008 SRChanged |= calcSpillPlacements(MBB, cvBlocks, prevCSRSave);
1010 // Place restores for CSRs in MBB.
1011 SRChanged |= calcRestorePlacements(MBB, cvBlocks, prevCSRRestore);
1014 // Add uses of CSRs used inside loops where needed.
1015 changed |= addUsesForTopLevelLoops(cvBlocks);
1017 // Add uses for CSRs spilled or restored at branch, join points.
1018 if (changed || SRChanged) {
1019 while (! cvBlocks.empty()) {
1020 MachineBasicBlock* MBB = cvBlocks.pop_back_val();
1021 changed |= addUsesForMEMERegion(MBB, ncvBlocks);
1023 if (! ncvBlocks.empty()) {
1024 cvBlocks = ncvBlocks;
1030 calculateAnticAvail(Fn);
1036 // Check for effectiveness:
1037 // SR0 = {r | r in CSRSave[EntryBlock], CSRRestore[RB], RB in ReturnBlocks}
1038 // numSRReduced = |(UsedCSRegs - SR0)|, approx. SR0 by CSRSave[EntryBlock]
1039 // Gives a measure of how many CSR spills have been moved from EntryBlock
1040 // to minimal regions enclosing their uses.
1041 CSRegSet notSpilledInEntryBlock = (UsedCSRegs - CSRSave[EntryBlock]);
1042 unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count();
1043 numSRReduced += numSRReducedThisFunc;
1044 DEBUG(if (ShrinkWrapDebugging >= BasicInfo) {
1045 DOUT << "-----------------------------------------------------------\n";
1046 DOUT << "total iterations = " << iterations << " ( "
1047 << Fn.getFunction()->getName()
1048 << " " << numSRReducedThisFunc
1051 DOUT << "-----------------------------------------------------------\n";
1053 DOUT << "-----------------------------------------------------------\n";
1054 if (numSRReducedThisFunc)
1055 verifySpillRestorePlacement();
1059 /// calculateCalleeSavedRegisters - Scan the function for modified callee saved
1060 /// registers. Also calculate the MaxCallFrameSize and HasCalls variables for
1061 /// the function's frame information and eliminates call frame pseudo
1064 void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
1065 const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
1066 const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo();
1068 // Get the callee saved register list...
1069 const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn);
1071 // Get the function call frame set-up and tear-down instruction opcode
1072 int FrameSetupOpcode = RegInfo->getCallFrameSetupOpcode();
1073 int FrameDestroyOpcode = RegInfo->getCallFrameDestroyOpcode();
1075 // These are used to keep track the callee-save area. Initialize them.
1076 MinCSFrameIndex = INT_MAX;
1077 MaxCSFrameIndex = 0;
1079 // Early exit for targets which have no callee saved registers and no call
1080 // frame setup/destroy pseudo instructions.
1081 if ((CSRegs == 0 || CSRegs[0] == 0) &&
1082 FrameSetupOpcode == -1 && FrameDestroyOpcode == -1)
1085 unsigned MaxCallFrameSize = 0;
1086 bool HasCalls = false;
1088 std::vector<MachineBasicBlock::iterator> FrameSDOps;
1089 for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB)
1090 for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I)
1091 if (I->getOpcode() == FrameSetupOpcode ||
1092 I->getOpcode() == FrameDestroyOpcode) {
1093 assert(I->getNumOperands() >= 1 && "Call Frame Setup/Destroy Pseudo"
1094 " instructions should have a single immediate argument!");
1095 unsigned Size = I->getOperand(0).getImm();
1096 if (Size > MaxCallFrameSize) MaxCallFrameSize = Size;
1098 FrameSDOps.push_back(I);
1101 MachineFrameInfo *FFI = Fn.getFrameInfo();
1102 FFI->setHasCalls(HasCalls);
1103 FFI->setMaxCallFrameSize(MaxCallFrameSize);
1105 for (unsigned i = 0, e = FrameSDOps.size(); i != e; ++i) {
1106 MachineBasicBlock::iterator I = FrameSDOps[i];
1107 // If call frames are not being included as part of the stack frame,
1108 // and there is no dynamic allocation (therefore referencing frame slots
1109 // off sp), leave the pseudo ops alone. We'll eliminate them later.
1110 if (RegInfo->hasReservedCallFrame(Fn) || RegInfo->hasFP(Fn))
1111 RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I);
1114 // Now figure out which *callee saved* registers are modified by the current
1115 // function, thus needing to be saved and restored in the prolog/epilog.
1117 const TargetRegisterClass* const *CSRegClasses =
1118 RegInfo->getCalleeSavedRegClasses(&Fn);
1119 std::vector<CalleeSavedInfo> CSI;
1120 for (unsigned i = 0; CSRegs[i]; ++i) {
1121 unsigned Reg = CSRegs[i];
1122 if (Fn.getRegInfo().isPhysRegUsed(Reg)) {
1123 // If the reg is modified, save it!
1124 CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i]));
1126 for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg);
1127 *AliasSet; ++AliasSet) { // Check alias registers too.
1128 if (Fn.getRegInfo().isPhysRegUsed(*AliasSet)) {
1129 CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i]));
1137 return; // Early exit if no callee saved registers are modified!
1139 unsigned NumFixedSpillSlots;
1140 const std::pair<unsigned,int> *FixedSpillSlots =
1141 TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);
1143 // Now that we know which registers need to be saved and restored, allocate
1144 // stack slots for them.
1145 for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1146 unsigned Reg = CSI[i].getReg();
1147 const TargetRegisterClass *RC = CSI[i].getRegClass();
1149 // Check to see if this physreg must be spilled to a particular stack slot
1151 const std::pair<unsigned,int> *FixedSlot = FixedSpillSlots;
1152 while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots &&
1153 FixedSlot->first != Reg)
1157 if (FixedSlot == FixedSpillSlots+NumFixedSpillSlots) {
1158 // Nope, just spill it anywhere convenient.
1159 unsigned Align = RC->getAlignment();
1160 unsigned StackAlign = TFI->getStackAlignment();
1161 // We may not be able to sastify the desired alignment specification of
1162 // the TargetRegisterClass if the stack alignment is smaller.
1164 Align = std::min(Align, StackAlign);
1165 FrameIdx = FFI->CreateStackObject(RC->getSize(), Align);
1166 if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
1167 if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
1169 // Spill it to the stack where we must.
1170 FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->second);
1172 CSI[i].setFrameIdx(FrameIdx);
1175 FFI->setCalleeSavedInfo(CSI);
1178 /// insertCSRSpillsAndRestores - Insert spill and restore code for
1179 /// callee saved registers used in the function, handling shrink wrapping.
1181 void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
1182 // Get callee saved register information.
1183 MachineFrameInfo *FFI = Fn.getFrameInfo();
1184 const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo();
1186 // Early exit if no callee saved registers are modified!
1190 const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
1191 MachineBasicBlock::iterator I;
1193 DEBUG(if (ShrinkWrapThisFunction && ShrinkWrapDebugging >= Details)
1194 DOUT << "Inserting CSR spills/restores in function "
1195 << Fn.getFunction()->getName() << "\n");
1197 if (! ShrinkWrapThisFunction) {
1198 // Spill using target interface.
1199 I = EntryBlock->begin();
1200 if (!TII.spillCalleeSavedRegisters(*EntryBlock, I, CSI)) {
1201 for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1202 // Add the callee-saved register as live-in.
1203 // It's killed at the spill.
1204 EntryBlock->addLiveIn(CSI[i].getReg());
1206 // Insert the spill to the stack frame.
1207 TII.storeRegToStackSlot(*EntryBlock, I, CSI[i].getReg(), true,
1208 CSI[i].getFrameIdx(), CSI[i].getRegClass());
1212 // Restore using target interface.
1213 for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) {
1214 MachineBasicBlock* MBB = ReturnBlocks[ri];
1215 I = MBB->end(); --I;
1217 // Skip over all terminator instructions, which are part of the return
1219 MachineBasicBlock::iterator I2 = I;
1220 while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator())
1223 bool AtStart = I == MBB->begin();
1224 MachineBasicBlock::iterator BeforeI = I;
1228 // Restore all registers immediately before the return and any
1229 // terminators that preceed it.
1230 if (!TII.restoreCalleeSavedRegisters(*MBB, I, CSI)) {
1231 for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1232 TII.loadRegFromStackSlot(*MBB, I, CSI[i].getReg(),
1233 CSI[i].getFrameIdx(),
1234 CSI[i].getRegClass());
1235 assert(I != MBB->begin() &&
1236 "loadRegFromStackSlot didn't insert any code!");
1237 // Insert in reverse order. loadRegFromStackSlot can insert
1238 // multiple instructions.
1252 std::vector<CalleeSavedInfo> blockCSI;
1253 for (CSRegBlockMap::iterator BI = CSRSave.begin(),
1254 BE = CSRSave.end(); BI != BE; ++BI) {
1255 MachineBasicBlock* MBB = BI->first;
1256 CSRegSet save = BI->second;
1261 DEBUG(if (ShrinkWrapDebugging >= Details)
1262 DOUT << "Spilling " << stringifyCSRegSet(save)
1263 << " in " << getBasicBlockName(MBB) << "\n");
1266 for (CSRegSet::iterator RI = save.begin(),
1267 RE = save.end(); RI != RE; ++RI) {
1268 blockCSI.push_back(CSI[*RI]);
1270 assert(blockCSI.size() > 0 &&
1271 "Could not collect callee saved register info");
1275 // When shrink wrapping, use stack slot stores/loads.
1276 for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
1277 // Add the callee-saved register as live-in.
1278 // It's killed at the spill.
1279 MBB->addLiveIn(blockCSI[i].getReg());
1281 // Insert the spill to the stack frame.
1282 TII.storeRegToStackSlot(*MBB, I, blockCSI[i].getReg(),
1284 blockCSI[i].getFrameIdx(),
1285 blockCSI[i].getRegClass());
1289 DEBUG(if (ShrinkWrapDebugging >= Details)
1290 DOUT << "------------------------------"
1291 << "-----------------------------\n");
1293 for (CSRegBlockMap::iterator BI = CSRRestore.begin(),
1294 BE = CSRRestore.end(); BI != BE; ++BI) {
1295 MachineBasicBlock* MBB = BI->first;
1296 CSRegSet restore = BI->second;
1298 if (restore.empty())
1301 DEBUG(if (ShrinkWrapDebugging >= Details)
1302 DOUT << "Restoring " << stringifyCSRegSet(restore)
1303 << " in " << getBasicBlockName(MBB) << "\n");
1306 for (CSRegSet::iterator RI = restore.begin(),
1307 RE = restore.end(); RI != RE; ++RI) {
1308 blockCSI.push_back(CSI[*RI]);
1310 assert(blockCSI.size() > 0 &&
1311 "Could not find callee saved register info");
1313 // If MBB is empty and needs restores, insert at the _beginning_.
1320 // Skip over all terminator instructions, which are part of the
1322 if (! I->getDesc().isTerminator()) {
1325 MachineBasicBlock::iterator I2 = I;
1326 while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator())
1331 bool AtStart = I == MBB->begin();
1332 MachineBasicBlock::iterator BeforeI = I;
1336 // Restore all registers immediately before the return and any
1337 // terminators that preceed it.
1338 for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
1339 TII.loadRegFromStackSlot(*MBB, I, blockCSI[i].getReg(),
1340 blockCSI[i].getFrameIdx(),
1341 blockCSI[i].getRegClass());
1342 assert(I != MBB->begin() &&
1343 "loadRegFromStackSlot didn't insert any code!");
1344 // Insert in reverse order. loadRegFromStackSlot can insert
1345 // multiple instructions.
1355 DEBUG(if (ShrinkWrapDebugging >= Details)
1356 DOUT << "------------------------------"
1357 << "-----------------------------\n");
1360 /// AdjustStackOffset - Helper function used to adjust the stack frame offset.
1362 AdjustStackOffset(MachineFrameInfo *FFI, int FrameIdx,
1363 bool StackGrowsDown, int64_t &Offset,
1364 unsigned &MaxAlign) {
1365 // If stack grows down, we need to add size of find the lowest address of the
1368 Offset += FFI->getObjectSize(FrameIdx);
1370 unsigned Align = FFI->getObjectAlignment(FrameIdx);
1372 // If the alignment of this object is greater than that of the stack, then
1373 // increase the stack alignment to match.
1374 MaxAlign = std::max(MaxAlign, Align);
1376 // Adjust to alignment boundary.
1377 Offset = (Offset + Align - 1) / Align * Align;
1379 if (StackGrowsDown) {
1380 FFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset
1382 FFI->setObjectOffset(FrameIdx, Offset);
1383 Offset += FFI->getObjectSize(FrameIdx);
1387 /// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
1388 /// abstract stack objects.
1390 void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
1391 const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo();
1393 bool StackGrowsDown =
1394 TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
1396 // Loop over all of the stack objects, assigning sequential addresses...
1397 MachineFrameInfo *FFI = Fn.getFrameInfo();
1399 unsigned MaxAlign = FFI->getMaxAlignment();
1401 // Start at the beginning of the local area.
1402 // The Offset is the distance from the stack top in the direction
1403 // of stack growth -- so it's always nonnegative.
1404 int64_t Offset = TFI.getOffsetOfLocalArea();
1408 && "Local area offset should be in direction of stack growth");
1410 // If there are fixed sized objects that are preallocated in the local area,
1411 // non-fixed objects can't be allocated right at the start of local area.
1412 // We currently don't support filling in holes in between fixed sized
1413 // objects, so we adjust 'Offset' to point to the end of last fixed sized
1414 // preallocated object.
1415 for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
1417 if (StackGrowsDown) {
1418 // The maximum distance from the stack pointer is at lower address of
1419 // the object -- which is given by offset. For down growing stack
1420 // the offset is negative, so we negate the offset to get the distance.
1421 FixedOff = -FFI->getObjectOffset(i);
1423 // The maximum distance from the start pointer is at the upper
1424 // address of the object.
1425 FixedOff = FFI->getObjectOffset(i) + FFI->getObjectSize(i);
1427 if (FixedOff > Offset) Offset = FixedOff;
1430 // First assign frame offsets to stack objects that are used to spill
1431 // callee saved registers.
1432 if (StackGrowsDown) {
1433 for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) {
1434 // If stack grows down, we need to add size of find the lowest
1435 // address of the object.
1436 Offset += FFI->getObjectSize(i);
1438 unsigned Align = FFI->getObjectAlignment(i);
1439 // If the alignment of this object is greater than that of the stack,
1440 // then increase the stack alignment to match.
1441 MaxAlign = std::max(MaxAlign, Align);
1442 // Adjust to alignment boundary
1443 Offset = (Offset+Align-1)/Align*Align;
1445 FFI->setObjectOffset(i, -Offset); // Set the computed offset
1448 int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex;
1449 for (int i = MaxCSFI; i >= MinCSFI ; --i) {
1450 unsigned Align = FFI->getObjectAlignment(i);
1451 // If the alignment of this object is greater than that of the stack,
1452 // then increase the stack alignment to match.
1453 MaxAlign = std::max(MaxAlign, Align);
1454 // Adjust to alignment boundary
1455 Offset = (Offset+Align-1)/Align*Align;
1457 FFI->setObjectOffset(i, Offset);
1458 Offset += FFI->getObjectSize(i);
1462 // Make sure the special register scavenging spill slot is closest to the
1463 // frame pointer if a frame pointer is required.
1464 const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
1465 if (RS && RegInfo->hasFP(Fn)) {
1466 int SFI = RS->getScavengingFrameIndex();
1468 AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign);
1471 // Make sure that the stack protector comes before the local variables on the
1473 if (FFI->getStackProtectorIndex() >= 0)
1474 AdjustStackOffset(FFI, FFI->getStackProtectorIndex(), StackGrowsDown,
1477 // Then assign frame offsets to stack objects that are not used to spill
1478 // callee saved registers.
1479 for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
1480 if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
1482 if (RS && (int)i == RS->getScavengingFrameIndex())
1484 if (FFI->isDeadObjectIndex(i))
1486 if (FFI->getStackProtectorIndex() == (int)i)
1489 AdjustStackOffset(FFI, i, StackGrowsDown, Offset, MaxAlign);
1492 // Make sure the special register scavenging spill slot is closest to the
1494 if (RS && !RegInfo->hasFP(Fn)) {
1495 int SFI = RS->getScavengingFrameIndex();
1497 AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign);
1500 // Round up the size to a multiple of the alignment, but only if there are
1501 // calls or alloca's in the function. This ensures that any calls to
1502 // subroutines have their stack frames suitable aligned.
1503 // Also do this if we need runtime alignment of the stack. In this case
1504 // offsets will be relative to SP not FP; round up the stack size so this
1506 if (!RegInfo->targetHandlesStackFrameRounding() &&
1507 (FFI->hasCalls() || FFI->hasVarSizedObjects() ||
1508 (RegInfo->needsStackRealignment(Fn) &&
1509 FFI->getObjectIndexEnd() != 0))) {
1510 // If we have reserved argument space for call sites in the function
1511 // immediately on entry to the current function, count it as part of the
1512 // overall stack size.
1513 if (RegInfo->hasReservedCallFrame(Fn))
1514 Offset += FFI->getMaxCallFrameSize();
1516 unsigned AlignMask = std::max(TFI.getStackAlignment(),MaxAlign) - 1;
1517 Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
1520 // Update frame info to pretend that this is part of the stack...
1521 FFI->setStackSize(Offset+TFI.getOffsetOfLocalArea());
1523 // Remember the required stack alignment in case targets need it to perform
1524 // dynamic stack alignment.
1525 FFI->setMaxAlignment(MaxAlign);
1529 /// insertPrologEpilogCode - Scan the function for modified callee saved
1530 /// registers, insert spill code for these callee saved registers, then add
1531 /// prolog and epilog code to the function.
1533 void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
1534 const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
1536 // Add prologue to the function...
1537 TRI->emitPrologue(Fn);
1539 // Add epilogue to restore the callee-save registers in each exiting block
1540 for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
1541 // If last instruction is a return instruction, add an epilogue
1542 if (!I->empty() && I->back().getDesc().isReturn())
1543 TRI->emitEpilogue(Fn, *I);
1548 /// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
1549 /// register references and actual offsets.
1551 void PEI::replaceFrameIndices(MachineFunction &Fn) {
1552 if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do?
1554 const TargetMachine &TM = Fn.getTarget();
1555 assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!");
1556 const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
1557 const TargetFrameInfo *TFI = TM.getFrameInfo();
1558 bool StackGrowsDown =
1559 TFI->getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
1560 int FrameSetupOpcode = TRI.getCallFrameSetupOpcode();
1561 int FrameDestroyOpcode = TRI.getCallFrameDestroyOpcode();
1563 for (MachineFunction::iterator BB = Fn.begin(),
1564 E = Fn.end(); BB != E; ++BB) {
1565 int SPAdj = 0; // SP offset due to call frame setup / destroy.
1566 if (RS) RS->enterBasicBlock(BB);
1568 for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
1569 if (I->getOpcode() == TargetInstrInfo::DECLARE) {
1575 if (I->getOpcode() == FrameSetupOpcode ||
1576 I->getOpcode() == FrameDestroyOpcode) {
1577 // Remember how much SP has been adjusted to create the call
1579 int Size = I->getOperand(0).getImm();
1581 if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) ||
1582 (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode))
1587 MachineBasicBlock::iterator PrevI = BB->end();
1588 if (I != BB->begin()) PrevI = prior(I);
1589 TRI.eliminateCallFramePseudoInstr(Fn, *BB, I);
1591 // Visit the instructions created by eliminateCallFramePseudoInstr().
1592 if (PrevI == BB->end())
1593 I = BB->begin(); // The replaced instr was the first in the block.
1599 MachineInstr *MI = I;
1601 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
1602 if (MI->getOperand(i).isFI()) {
1603 // Some instructions (e.g. inline asm instructions) can have
1604 // multiple frame indices and/or cause eliminateFrameIndex
1605 // to insert more than one instruction. We need the register
1606 // scavenger to go through all of these instructions so that
1607 // it can update its register information. We keep the
1608 // iterator at the point before insertion so that we can
1609 // revisit them in full.
1610 bool AtBeginning = (I == BB->begin());
1611 if (!AtBeginning) --I;
1613 // If this instruction has a FrameIndex operand, we need to
1614 // use that target machine register info object to eliminate
1617 TRI.eliminateFrameIndex(MI, SPAdj, RS);
1619 // Reset the iterator if we were at the beginning of the BB.
1629 if (DoIncr && I != BB->end()) ++I;
1631 // Update register states.
1632 if (RS && MI) RS->forward(MI);
1635 assert(SPAdj == 0 && "Unbalanced call frame setup / destroy pairs?");
1639 // Debugging methods for shrink wrapping.
1641 /// findFastExitPath - debugging method used to detect functions
1642 /// with at least one path from the entry block to a return block
1643 /// directly or which has a very small number of edges.
1645 void PEI::findFastExitPath() {
1648 // Fina a path from EntryBlock to any return block that does not branch:
1656 for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(),
1657 SE = EntryBlock->succ_end(); SI != SE; ++SI) {
1658 MachineBasicBlock* SUCC = *SI;
1660 // Assume positive, disprove existence of fast path.
1662 HasFastExitPath = true;
1665 // Check the immediate successors.
1666 if (isReturnBlock(SUCC)) {
1667 if (ShrinkWrapDebugging >= BasicInfo)
1668 DOUT << "Fast exit path: " << getBasicBlockName(EntryBlock)
1669 << "->" << getBasicBlockName(SUCC) << "\n";
1672 // Traverse df from SUCC, look for a branch block.
1673 std::string exitPath = getBasicBlockName(SUCC);
1674 for (df_iterator<MachineBasicBlock*> BI = df_begin(SUCC),
1675 BE = df_end(SUCC); BI != BE; ++BI) {
1676 MachineBasicBlock* SBB = *BI;
1677 // Reject paths with branch nodes.
1678 if (SBB->succ_size() > 1) {
1680 HasFastExitPath = false;
1684 exitPath += "->" + getBasicBlockName(SBB);
1687 if (HasFastExitPath) {
1688 if (ShrinkWrapDebugging >= BasicInfo)
1689 DOUT << "Fast exit path: " << getBasicBlockName(EntryBlock)
1690 << "->" << exitPath << "\n";
1697 /// verifySpillRestorePlacement - check the current spill/restore
1698 /// sets for safety. Attempt to find spills without restores or
1699 /// restores without spills.
1700 /// Spills: walk df from each MBB in spill set ensuring that
1701 /// all CSRs spilled at MMBB are restored on all paths
1702 /// from MBB to all exit blocks.
1703 /// Restores: walk idf from each MBB in restore set ensuring that
1704 /// all CSRs restored at MBB are spilled on all paths
1707 void PEI::verifySpillRestorePlacement() {
1708 unsigned numReturnBlocks = 0;
1709 for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
1710 MBBI != MBBE; ++MBBI) {
1711 MachineBasicBlock* MBB = MBBI;
1712 if (isReturnBlock(MBB) || MBB->succ_size() == 0)
1715 for (CSRegBlockMap::iterator BI = CSRSave.begin(),
1716 BE = CSRSave.end(); BI != BE; ++BI) {
1717 MachineBasicBlock* MBB = BI->first;
1718 CSRegSet spilled = BI->second;
1721 if (spilled.empty())
1724 DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
1725 << stringifyCSRegSet(spilled)
1726 << " RESTORE[" << getBasicBlockName(MBB) << "] = "
1727 << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
1729 if (CSRRestore[MBB].intersects(spilled)) {
1730 restored |= (CSRRestore[MBB] & spilled);
1733 // Walk depth first from MBB to find restores of all CSRs spilled at MBB:
1734 // we must find restores for all spills w/no intervening spills on all
1735 // paths from MBB to all return blocks.
1736 for (df_iterator<MachineBasicBlock*> BI = df_begin(MBB),
1737 BE = df_end(MBB); BI != BE; ++BI) {
1738 MachineBasicBlock* SBB = *BI;
1741 // Stop when we encounter spills of any CSRs spilled at MBB that
1742 // have not yet been seen to be restored.
1743 if (CSRSave[SBB].intersects(spilled) &&
1744 !restored.contains(CSRSave[SBB] & spilled))
1746 // Collect the CSRs spilled at MBB that are restored
1747 // at this DF successor of MBB.
1748 if (CSRRestore[SBB].intersects(spilled))
1749 restored |= (CSRRestore[SBB] & spilled);
1750 // If we are at a retun block, check that the restores
1751 // we have seen so far exhaust the spills at MBB, then
1752 // reset the restores.
1753 if (isReturnBlock(SBB) || SBB->succ_size() == 0) {
1754 if (restored != spilled) {
1755 CSRegSet notRestored = (spilled - restored);
1756 DOUT << MF->getFunction()->getName() << ": "
1757 << stringifyCSRegSet(notRestored)
1758 << " spilled at " << getBasicBlockName(MBB)
1759 << " are never restored on path to return "
1760 << getBasicBlockName(SBB) << "\n";
1767 // Check restore placements.
1768 for (CSRegBlockMap::iterator BI = CSRRestore.begin(),
1769 BE = CSRRestore.end(); BI != BE; ++BI) {
1770 MachineBasicBlock* MBB = BI->first;
1771 CSRegSet restored = BI->second;
1774 if (restored.empty())
1777 DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
1778 << stringifyCSRegSet(CSRSave[MBB])
1779 << " RESTORE[" << getBasicBlockName(MBB) << "] = "
1780 << stringifyCSRegSet(restored) << "\n";
1782 if (CSRSave[MBB].intersects(restored)) {
1783 spilled |= (CSRSave[MBB] & restored);
1785 // Walk inverse depth first from MBB to find spills of all
1786 // CSRs restored at MBB:
1787 for (idf_iterator<MachineBasicBlock*> BI = idf_begin(MBB),
1788 BE = idf_end(MBB); BI != BE; ++BI) {
1789 MachineBasicBlock* PBB = *BI;
1792 // Stop when we encounter restores of any CSRs restored at MBB that
1793 // have not yet been seen to be spilled.
1794 if (CSRRestore[PBB].intersects(restored) &&
1795 !spilled.contains(CSRRestore[PBB] & restored))
1797 // Collect the CSRs restored at MBB that are spilled
1798 // at this DF predecessor of MBB.
1799 if (CSRSave[PBB].intersects(restored))
1800 spilled |= (CSRSave[PBB] & restored);
1802 if (spilled != restored) {
1803 CSRegSet notSpilled = (restored - spilled);
1804 DOUT << MF->getFunction()->getName() << ": "
1805 << stringifyCSRegSet(notSpilled)
1806 << " restored at " << getBasicBlockName(MBB)
1807 << " are never spilled\n";
1812 // Debugging print methods.
1813 std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) {
1814 std::ostringstream name;
1816 if (MBB->getBasicBlock())
1817 name << MBB->getBasicBlock()->getName();
1819 name << "_MBB_" << MBB->getNumber();
1824 std::string PEI::stringifyCSRegSet(const CSRegSet& s) {
1825 const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo();
1826 const std::vector<CalleeSavedInfo> CSI =
1827 MF->getFrameInfo()->getCalleeSavedInfo();
1829 std::ostringstream srep;
1830 if (CSI.size() == 0) {
1835 CSRegSet::iterator I = s.begin(), E = s.end();
1837 unsigned reg = CSI[*I].getReg();
1838 srep << TRI->getName(reg);
1839 for (++I; I != E; ++I) {
1840 reg = CSI[*I].getReg();
1842 srep << TRI->getName(reg);
1849 void PEI::dumpSet(const CSRegSet& s) {
1850 DOUT << stringifyCSRegSet(s) << "\n";
1853 void PEI::dumpUsed(MachineBasicBlock* MBB) {
1855 DOUT << "CSRUsed[" << getBasicBlockName(MBB) << "] = "
1856 << stringifyCSRegSet(CSRUsed[MBB]) << "\n";
1860 void PEI::dumpAllUsed() {
1861 for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
1862 MBBI != MBBE; ++MBBI) {
1863 MachineBasicBlock* MBB = MBBI;
1868 void PEI::dumpSets(MachineBasicBlock* MBB) {
1870 DOUT << getBasicBlockName(MBB) << " | "
1871 << stringifyCSRegSet(CSRUsed[MBB]) << " | "
1872 << stringifyCSRegSet(AnticIn[MBB]) << " | "
1873 << stringifyCSRegSet(AnticOut[MBB]) << " | "
1874 << stringifyCSRegSet(AvailIn[MBB]) << " | "
1875 << stringifyCSRegSet(AvailOut[MBB]) << "\n";
1879 void PEI::dumpSets1(MachineBasicBlock* MBB) {
1881 DOUT << getBasicBlockName(MBB) << " | "
1882 << stringifyCSRegSet(CSRUsed[MBB]) << " | "
1883 << stringifyCSRegSet(AnticIn[MBB]) << " | "
1884 << stringifyCSRegSet(AnticOut[MBB]) << " | "
1885 << stringifyCSRegSet(AvailIn[MBB]) << " | "
1886 << stringifyCSRegSet(AvailOut[MBB]) << " | "
1887 << stringifyCSRegSet(CSRSave[MBB]) << " | "
1888 << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
1892 void PEI::dumpAllSets() {
1893 for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
1894 MBBI != MBBE; ++MBBI) {
1895 MachineBasicBlock* MBB = MBBI;
1900 void PEI::dumpSRSets() {
1901 for (MachineFunction::iterator MBB = MF->begin(), E = MF->end();
1903 if (! CSRSave[MBB].empty()) {
1904 DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
1905 << stringifyCSRegSet(CSRSave[MBB]);
1906 if (CSRRestore[MBB].empty())
1909 if (! CSRRestore[MBB].empty()) {
1910 if (! CSRSave[MBB].empty())
1912 DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = "
1913 << stringifyCSRegSet(CSRRestore[MBB]) << "\n";