X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FPrologEpilogInserter.cpp;h=7b6747363663249a39644c410f3e33468ea05533;hb=fecdea0bf77599038eb368db3bc6a38a14900308;hp=a2a8af397a30a7df473fbd76f54aa63447308095;hpb=b58f498f7502e7e1833decbbbb4df771367c7341;p=oota-llvm.git diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index a2a8af397a3..7b674736366 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -19,12 +19,12 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "pei" #include "PrologEpilogInserter.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Target/TargetMachine.h" @@ -33,26 +33,22 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" #include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/STLExtras.h" #include using namespace llvm; -char PEI::ID = 0; +// FIXME: For testing purposes only. Remove once the pre-allocation pass +// is done. +extern cl::opt EnableLocalStackAlloc; -static RegisterPass -X("prologepilog", "Prologue/Epilogue Insertion"); +char PEI::ID = 0; -// FIXME: For now, the frame index scavenging is off by default and only -// used by the Thumb1 target. When it's the default and replaces the current -// on-the-fly PEI scavenging for all targets, requiresRegisterScavenging() -// will replace this. -cl::opt -FrameIndexVirtualScavenging("enable-frame-index-scavenging", - cl::Hidden, - cl::desc("Enable frame index elimination with" - "virtual register scavenging")); +INITIALIZE_PASS(PEI, "prologepilog", + "Prologue/Epilogue Insertion", false, false); /// createPrologEpilogCodeInserter - This function returns a pass that inserts /// prolog and epilog code, and eliminates abstract frame references. @@ -66,14 +62,12 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { const Function* F = Fn.getFunction(); const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL; + FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn); + FrameConstantRegMap.clear(); - // Get MachineModuleInfo so that we can track the construction of the - // frame. - if (MachineModuleInfo *MMI = getAnalysisIfAvailable()) - Fn.getFrameInfo()->setMachineModuleInfo(MMI); - - // Calculate the MaxCallFrameSize and HasCalls variables for the function's - // frame information. Also eliminates call frame pseudo instructions. + // Calculate the MaxCallFrameSize and AdjustsStack variables for the + // function's frame information. Also eliminates call frame pseudo + // instructions. calculateCallsInformation(Fn); // Allow the target machine to make some adjustments to the function @@ -85,10 +79,10 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { calculateCalleeSavedRegisters(Fn); // Determine placement of CSR spill/restore code: - // - with shrink wrapping, place spills and restores to tightly + // - With shrink wrapping, place spills and restores to tightly // enclose regions in the Machine CFG of the function where - // they are used. Without shrink wrapping - // - default (no shrink wrapping), place all spills in the + // they are used. + // - Without shink wrapping (default), place all spills in the // entry block, all restores in return blocks. placeCSRSpillsAndRestores(Fn); @@ -105,8 +99,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // Add prolog and epilog code to the function. This function is required // to align the stack frame as necessary for any stack variables or - // called functions. Because of this, calculateCalleeSavedRegisters - // must be called before this function in order to set the HasCalls + // called functions. Because of this, calculateCalleeSavedRegisters() + // must be called before this function in order to set the AdjustsStack // and MaxCallFrameSize variables. if (!F->hasFnAttr(Attribute::Naked)) insertPrologEpilogCode(Fn); @@ -140,14 +134,15 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const { } #endif -/// calculateCallsInformation - Calculate the MaxCallFrameSize and HasCalls +/// calculateCallsInformation - Calculate the MaxCallFrameSize and AdjustsStack /// variables for the function's frame information and eliminate call frame /// pseudo instructions. void PEI::calculateCallsInformation(MachineFunction &Fn) { const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); + MachineFrameInfo *MFI = Fn.getFrameInfo(); unsigned MaxCallFrameSize = 0; - bool HasCalls = false; + bool AdjustsStack = MFI->adjustsStack(); // Get the function call frame set-up and tear-down instruction opcode int FrameSetupOpcode = RegInfo->getCallFrameSetupOpcode(); @@ -167,26 +162,26 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) { " instructions should have a single immediate argument!"); unsigned Size = I->getOperand(0).getImm(); if (Size > MaxCallFrameSize) MaxCallFrameSize = Size; - HasCalls = true; + AdjustsStack = true; FrameSDOps.push_back(I); - } else if (I->getOpcode() == TargetInstrInfo::INLINEASM) { - // An InlineAsm might be a call; assume it is to get the stack frame - // aligned correctly for calls. - HasCalls = true; + } else if (I->isInlineAsm()) { + // Some inline asm's need a stack frame, as indicated by operand 1. + if (I->getOperand(1).getImm()) + AdjustsStack = true; } - MachineFrameInfo *FFI = Fn.getFrameInfo(); - FFI->setHasCalls(HasCalls); - FFI->setMaxCallFrameSize(MaxCallFrameSize); + MFI->setAdjustsStack(AdjustsStack); + MFI->setMaxCallFrameSize(MaxCallFrameSize); for (std::vector::iterator i = FrameSDOps.begin(), e = FrameSDOps.end(); i != e; ++i) { MachineBasicBlock::iterator I = *i; // If call frames are not being included as part of the stack frame, and - // there is no dynamic allocation (therefore referencing frame slots off - // sp), leave the pseudo ops alone. We'll eliminate them later. - if (RegInfo->hasReservedCallFrame(Fn) || RegInfo->hasFP(Fn)) + // the target doesn't indicate otherwise, remove the call frame pseudos + // here. The sub/add sp instruction pairs are still inserted, but we don't + // need to track the SP adjustment for frame index elimination. + if (RegInfo->canSimplifyCallFramePseudos(Fn)) RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I); } } @@ -197,7 +192,7 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) { void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo(); - MachineFrameInfo *FFI = Fn.getFrameInfo(); + MachineFrameInfo *MFI = Fn.getFrameInfo(); // Get the callee saved register list... const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn); @@ -210,22 +205,21 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { if (CSRegs == 0 || CSRegs[0] == 0) return; - // Figure out which *callee saved* registers are modified by the current - // function, thus needing to be saved and restored in the prolog/epilog. - const TargetRegisterClass * const *CSRegClasses = - RegInfo->getCalleeSavedRegClasses(&Fn); + // In Naked functions we aren't going to save any registers. + if (Fn.getFunction()->hasFnAttr(Attribute::Naked)) + return; std::vector CSI; for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; if (Fn.getRegInfo().isPhysRegUsed(Reg)) { // If the reg is modified, save it! - CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i])); + CSI.push_back(CalleeSavedInfo(Reg)); } else { for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg); *AliasSet; ++AliasSet) { // Check alias registers too. if (Fn.getRegInfo().isPhysRegUsed(*AliasSet)) { - CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i])); + CSI.push_back(CalleeSavedInfo(Reg)); break; } } @@ -244,7 +238,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { for (std::vector::iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { unsigned Reg = I->getReg(); - const TargetRegisterClass *RC = I->getRegClass(); + const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); int FrameIdx; if (RegInfo->hasReservedSpillSlot(Fn, Reg, FrameIdx)) { @@ -268,18 +262,18 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { // the TargetRegisterClass if the stack alignment is smaller. Use the // min. Align = std::min(Align, StackAlign); - FrameIdx = FFI->CreateStackObject(RC->getSize(), Align); + FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true); if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; } else { // Spill it to the stack where we must. - FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset); + FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, true); } I->setFrameIdx(FrameIdx); } - FFI->setCalleeSavedInfo(CSI); + MFI->setCalleeSavedInfo(CSI); } /// insertCSRSpillsAndRestores - Insert spill and restore code for @@ -287,30 +281,33 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { /// void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { // Get callee saved register information. - MachineFrameInfo *FFI = Fn.getFrameInfo(); - const std::vector &CSI = FFI->getCalleeSavedInfo(); + MachineFrameInfo *MFI = Fn.getFrameInfo(); + const std::vector &CSI = MFI->getCalleeSavedInfo(); - FFI->setCalleeSavedInfoValid(true); + MFI->setCalleeSavedInfoValid(true); // Early exit if no callee saved registers are modified! if (CSI.empty()) return; const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); + const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); MachineBasicBlock::iterator I; if (! ShrinkWrapThisFunction) { // Spill using target interface. I = EntryBlock->begin(); - if (!TII.spillCalleeSavedRegisters(*EntryBlock, I, CSI)) { + if (!TII.spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) { for (unsigned i = 0, e = CSI.size(); i != e; ++i) { // Add the callee-saved register as live-in. // It's killed at the spill. EntryBlock->addLiveIn(CSI[i].getReg()); // Insert the spill to the stack frame. - TII.storeRegToStackSlot(*EntryBlock, I, CSI[i].getReg(), true, - CSI[i].getFrameIdx(), CSI[i].getRegClass()); + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(*EntryBlock, I, Reg, true, + CSI[i].getFrameIdx(), RC, TRI); } } @@ -332,11 +329,13 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { // Restore all registers immediately before the return and any // terminators that preceed it. - if (!TII.restoreCalleeSavedRegisters(*MBB, I, CSI)) { + if (!TII.restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) { for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - TII.loadRegFromStackSlot(*MBB, I, CSI[i].getReg(), + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(*MBB, I, Reg, CSI[i].getFrameIdx(), - CSI[i].getRegClass()); + RC, TRI); assert(I != MBB->begin() && "loadRegFromStackSlot didn't insert any code!"); // Insert in reverse order. loadRegFromStackSlot can insert @@ -380,10 +379,12 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { MBB->addLiveIn(blockCSI[i].getReg()); // Insert the spill to the stack frame. - TII.storeRegToStackSlot(*MBB, I, blockCSI[i].getReg(), + unsigned Reg = blockCSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(*MBB, I, Reg, true, blockCSI[i].getFrameIdx(), - blockCSI[i].getRegClass()); + RC, TRI); } } @@ -429,9 +430,11 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { // Restore all registers immediately before the return and any // terminators that preceed it. for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { - TII.loadRegFromStackSlot(*MBB, I, blockCSI[i].getReg(), + unsigned Reg = blockCSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(*MBB, I, Reg, blockCSI[i].getFrameIdx(), - blockCSI[i].getRegClass()); + RC, TRI); assert(I != MBB->begin() && "loadRegFromStackSlot didn't insert any code!"); // Insert in reverse order. loadRegFromStackSlot can insert @@ -448,14 +451,14 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { /// AdjustStackOffset - Helper function used to adjust the stack frame offset. static inline void -AdjustStackOffset(MachineFrameInfo *FFI, int FrameIdx, +AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, bool StackGrowsDown, int64_t &Offset, unsigned &MaxAlign) { // If the stack grows down, add the object size to find the lowest address. if (StackGrowsDown) - Offset += FFI->getObjectSize(FrameIdx); + Offset += MFI->getObjectSize(FrameIdx); - unsigned Align = FFI->getObjectAlignment(FrameIdx); + unsigned Align = MFI->getObjectAlignment(FrameIdx); // If the alignment of this object is greater than that of the stack, then // increase the stack alignment to match. @@ -465,10 +468,12 @@ AdjustStackOffset(MachineFrameInfo *FFI, int FrameIdx, Offset = (Offset + Align - 1) / Align * Align; if (StackGrowsDown) { - FFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset + DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n"); + MFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset } else { - FFI->setObjectOffset(FrameIdx, Offset); - Offset += FFI->getObjectSize(FrameIdx); + DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset << "]\n"); + MFI->setObjectOffset(FrameIdx, Offset); + Offset += MFI->getObjectSize(FrameIdx); } } @@ -482,9 +487,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown; // Loop over all of the stack objects, assigning sequential addresses... - MachineFrameInfo *FFI = Fn.getFrameInfo(); - - unsigned MaxAlign = 1; + MachineFrameInfo *MFI = Fn.getFrameInfo(); // Start at the beginning of the local area. // The Offset is the distance from the stack top in the direction @@ -501,17 +504,17 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // We currently don't support filling in holes in between fixed sized // objects, so we adjust 'Offset' to point to the end of last fixed sized // preallocated object. - for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) { + for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) { int64_t FixedOff; if (StackGrowsDown) { // The maximum distance from the stack pointer is at lower address of // the object -- which is given by offset. For down growing stack // the offset is negative, so we negate the offset to get the distance. - FixedOff = -FFI->getObjectOffset(i); + FixedOff = -MFI->getObjectOffset(i); } else { // The maximum distance from the start pointer is at the upper // address of the object. - FixedOff = FFI->getObjectOffset(i) + FFI->getObjectSize(i); + FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i); } if (FixedOff > Offset) Offset = FixedOff; } @@ -520,78 +523,127 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // callee saved registers. if (StackGrowsDown) { for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) { - // If stack grows down, we need to add size of find the lowest + // If the stack grows down, we need to add the size to find the lowest // address of the object. - Offset += FFI->getObjectSize(i); + Offset += MFI->getObjectSize(i); - unsigned Align = FFI->getObjectAlignment(i); - // If the alignment of this object is greater than that of the stack, - // then increase the stack alignment to match. - MaxAlign = std::max(MaxAlign, Align); + unsigned Align = MFI->getObjectAlignment(i); // Adjust to alignment boundary Offset = (Offset+Align-1)/Align*Align; - FFI->setObjectOffset(i, -Offset); // Set the computed offset + MFI->setObjectOffset(i, -Offset); // Set the computed offset } } else { int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex; for (int i = MaxCSFI; i >= MinCSFI ; --i) { - unsigned Align = FFI->getObjectAlignment(i); - // If the alignment of this object is greater than that of the stack, - // then increase the stack alignment to match. - MaxAlign = std::max(MaxAlign, Align); + unsigned Align = MFI->getObjectAlignment(i); // Adjust to alignment boundary Offset = (Offset+Align-1)/Align*Align; - FFI->setObjectOffset(i, Offset); - Offset += FFI->getObjectSize(i); + MFI->setObjectOffset(i, Offset); + Offset += MFI->getObjectSize(i); } } + unsigned MaxAlign = MFI->getMaxAlignment(); + // Make sure the special register scavenging spill slot is closest to the // frame pointer if a frame pointer is required. const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); - if (RS && RegInfo->hasFP(Fn)) { + if (RS && RegInfo->hasFP(Fn) && !RegInfo->needsStackRealignment(Fn)) { int SFI = RS->getScavengingFrameIndex(); if (SFI >= 0) - AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign); + AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign); + } + + // FIXME: Once this is working, then enable flag will change to a target + // check for whether the frame is large enough to want to use virtual + // frame index registers. Functions which don't want/need this optimization + // will continue to use the existing code path. + if (EnableLocalStackAlloc) { + unsigned Align = MFI->getLocalFrameMaxAlign(); + + // Adjust to alignment boundary. + Offset = (Offset + Align - 1) / Align * Align; + + // Store the offset of the start of the local allocation block. This + // will be used later when resolving frame base virtual register pseudos. + MFI->setLocalFrameBaseOffset(Offset); + + DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n"); + + // Allocate the local block + Offset += MFI->getLocalFrameSize(); + + // Resolve offsets for objects in the local block. + for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) { + std::pair Entry = MFI->getLocalFrameObjectMap(i); + int64_t FIOffset = MFI->getLocalFrameBaseOffset() + Entry.second; + + AdjustStackOffset(MFI, Entry.first, StackGrowsDown, FIOffset, MaxAlign); + } } // Make sure that the stack protector comes before the local variables on the // stack. - if (FFI->getStackProtectorIndex() >= 0) - AdjustStackOffset(FFI, FFI->getStackProtectorIndex(), StackGrowsDown, + SmallSet LargeStackObjs; + if (MFI->getStackProtectorIndex() >= 0) { + AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown, Offset, MaxAlign); + // Assign large stack objects first. + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (MFI->isObjectPreAllocated(i)) + continue; + if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) + continue; + if (RS && (int)i == RS->getScavengingFrameIndex()) + continue; + if (MFI->isDeadObjectIndex(i)) + continue; + if (MFI->getStackProtectorIndex() == (int)i) + continue; + if (!MFI->MayNeedStackProtector(i)) + continue; + + AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign); + LargeStackObjs.insert(i); + } + } + // Then assign frame offsets to stack objects that are not used to spill // callee saved registers. - for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) { + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (MFI->isObjectPreAllocated(i)) + continue; if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) continue; if (RS && (int)i == RS->getScavengingFrameIndex()) continue; - if (FFI->isDeadObjectIndex(i)) + if (MFI->isDeadObjectIndex(i)) continue; - if (FFI->getStackProtectorIndex() == (int)i) + if (MFI->getStackProtectorIndex() == (int)i) + continue; + if (LargeStackObjs.count(i)) continue; - AdjustStackOffset(FFI, i, StackGrowsDown, Offset, MaxAlign); + AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign); } // Make sure the special register scavenging spill slot is closest to the // stack pointer. - if (RS && !RegInfo->hasFP(Fn)) { + if (RS && (!RegInfo->hasFP(Fn) || RegInfo->needsStackRealignment(Fn))) { int SFI = RS->getScavengingFrameIndex(); if (SFI >= 0) - AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign); + AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign); } if (!RegInfo->targetHandlesStackFrameRounding()) { // If we have reserved argument space for call sites in the function // immediately on entry to the current function, count it as part of the // overall stack size. - if (FFI->hasCalls() && RegInfo->hasReservedCallFrame(Fn)) - Offset += FFI->getMaxCallFrameSize(); + if (MFI->adjustsStack() && RegInfo->hasReservedCallFrame(Fn)) + Offset += MFI->getMaxCallFrameSize(); // Round up the size to a multiple of the alignment. If the function has // any calls or alloca's, align to the target's StackAlignment value to @@ -599,28 +651,23 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // otherwise, for leaf functions, align to the TransientStackAlignment // value. unsigned StackAlign; - if (FFI->hasCalls() || FFI->hasVarSizedObjects() || - (RegInfo->needsStackRealignment(Fn) && FFI->getObjectIndexEnd() != 0)) + if (MFI->adjustsStack() || MFI->hasVarSizedObjects() || + (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0)) StackAlign = TFI.getStackAlignment(); else StackAlign = TFI.getTransientStackAlignment(); - // If the frame pointer is eliminated, all frame offsets will be relative - // to SP not FP; align to MaxAlign so this works. + + // If the frame pointer is eliminated, all frame offsets will be relative to + // SP not FP. Align to MaxAlign so this works. StackAlign = std::max(StackAlign, MaxAlign); unsigned AlignMask = StackAlign - 1; Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); } // Update frame info to pretend that this is part of the stack... - FFI->setStackSize(Offset - LocalAreaOffset); - - // Remember the required stack alignment in case targets need it to perform - // dynamic stack alignment. - if (MaxAlign > FFI->getMaxAlignment()) - FFI->setMaxAlignment(MaxAlign); + MFI->setStackSize(Offset - LocalAreaOffset); } - /// insertPrologEpilogCode - Scan the function for modified callee saved /// registers, insert spill code for these callee saved registers, then add /// prolog and epilog code to the function. @@ -639,7 +686,6 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) { } } - /// replaceFrameIndices - Replace all MO_FrameIndex operands with physical /// register references and actual offsets. /// @@ -655,13 +701,11 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { int FrameSetupOpcode = TRI.getCallFrameSetupOpcode(); int FrameDestroyOpcode = TRI.getCallFrameDestroyOpcode(); - // Pre-allocate space for frame index mappings. If more space is needed, - // the map will be grown later. - if (FrameIndexVirtualScavenging) - FrameConstantRegMap.grow(Fn.getRegInfo().getLastVirtReg() + 128); - for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { +#ifndef NDEBUG + int SPAdjCount = 0; // frame setup / destroy count. +#endif int SPAdj = 0; // SP offset due to call frame setup / destroy. if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB); @@ -669,6 +713,10 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { if (I->getOpcode() == FrameSetupOpcode || I->getOpcode() == FrameDestroyOpcode) { +#ifndef NDEBUG + // Track whether we see even pairs of them + SPAdjCount += I->getOpcode() == FrameSetupOpcode ? 1 : -1; +#endif // Remember how much SP has been adjusted to create the call // frame. int Size = I->getOperand(0).getImm(); @@ -687,7 +735,7 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { if (PrevI == BB->end()) I = BB->begin(); // The replaced instr was the first in the block. else - I = next(PrevI); + I = llvm::next(PrevI); continue; } @@ -708,7 +756,7 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { // If this instruction has a FrameIndex operand, we need to // use that target machine register info object to eliminate // it. - int Value; + TargetRegisterInfo::FrameIndexValue Value; unsigned VReg = TRI.eliminateFrameIndex(MI, SPAdj, &Value, FrameIndexVirtualScavenging ? NULL : RS); @@ -716,7 +764,6 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { assert (FrameIndexVirtualScavenging && "Not scavenging, but virtual returned from " "eliminateFrameIndex()!"); - FrameConstantRegMap.grow(VReg); FrameConstantRegMap[VReg] = FrameConstantEntry(Value, SPAdj); } @@ -736,7 +783,13 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI); } - assert(SPAdj == 0 && "Unbalanced call frame setup / destroy pairs?"); + // If we have evenly matched pairs of frame setup / destroy instructions, + // make sure the adjustments come out to zero. If we don't have matched + // pairs, we can't be sure the missing bit isn't in another basic block + // due to a custom inserter playing tricks, so just asserting SPAdj==0 + // isn't sufficient. See tMOVCC on Thumb1, for example. + assert((SPAdjCount || SPAdj == 0) && + "Unbalanced call frame setup / destroy pairs?"); } } @@ -744,10 +797,12 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { /// the instruciton range. Return the operand number of the kill in Operand. static MachineBasicBlock::iterator findLastUseReg(MachineBasicBlock::iterator I, MachineBasicBlock::iterator ME, - unsigned Reg, unsigned *Operand) { + unsigned Reg) { // Scan forward to find the last use of this virtual register for (++I; I != ME; ++I) { MachineInstr *MI = I; + bool isDefInsn = false; + bool isKillInsn = false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) if (MI->getOperand(i).isReg()) { unsigned OpReg = MI->getOperand(i).getReg(); @@ -755,13 +810,14 @@ findLastUseReg(MachineBasicBlock::iterator I, MachineBasicBlock::iterator ME, continue; assert (OpReg == Reg && "overlapping use of scavenged index register!"); - // If this is the killing use, we're done - if (MI->getOperand(i).isKill()) { - if (Operand) - *Operand = i; - return I; - } + // If this is the killing use, we have a candidate. + if (MI->getOperand(i).isKill()) + isKillInsn = true; + else if (MI->getOperand(i).isDef()) + isDefInsn = true; } + if (isKillInsn && !isDefInsn) + return I; } // If we hit the end of the basic block, there was no kill of // the virtual register, which is wrong. @@ -778,107 +834,153 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { E = Fn.end(); BB != E; ++BB) { RS->enterBasicBlock(BB); + // FIXME: The logic flow in this function is still too convoluted. + // It needs a cleanup refactoring. Do that in preparation for tracking + // more than one scratch register value and using ranges to find + // available scratch registers. unsigned CurrentVirtReg = 0; unsigned CurrentScratchReg = 0; - unsigned PrevScratchReg = 0; - int PrevValue; - MachineInstr *PrevLastUseMI; - unsigned PrevLastUseOp; + bool havePrevValue = false; + TargetRegisterInfo::FrameIndexValue PrevValue(0,0); + TargetRegisterInfo::FrameIndexValue Value(0,0); + MachineInstr *PrevLastUseMI = NULL; + unsigned PrevLastUseOp = 0; + bool trackingCurrentValue = false; + int SPAdj = 0; // The instruction stream may change in the loop, so check BB->end() // directly. - for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { MachineInstr *MI = I; - // Likewise, call getNumOperands() each iteration, as the MI may change - // inside the loop (with 'i' updated accordingly). - for (unsigned i = 0; i != MI->getNumOperands(); ++i) + bool isDefInsn = false; + bool isKillInsn = false; + bool clobbersScratchReg = false; + bool DoIncr = true; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { if (MI->getOperand(i).isReg()) { MachineOperand &MO = MI->getOperand(i); unsigned Reg = MO.getReg(); if (Reg == 0) continue; if (!TargetRegisterInfo::isVirtualRegister(Reg)) { - // If we have an active scavenged register, we shouldn't be - // seeing any references to it. - assert (Reg != CurrentScratchReg - && "overlapping use of scavenged frame index register!"); - // If we have a previous scratch reg, check and see if anything // here kills whatever value is in there. - if (Reg == PrevScratchReg) { + if (Reg == CurrentScratchReg) { if (MO.isUse()) { // Two-address operands implicitly kill if (MO.isKill() || MI->isRegTiedToDefOperand(i)) - PrevScratchReg = 0; + clobbersScratchReg = true; } else { assert (MO.isDef()); - PrevScratchReg = 0; + clobbersScratchReg = true; } } continue; } - - // If we already have a scratch for this virtual register, use it + // If this is a def, remember that this insn defines the value. + // This lets us properly consider insns which re-use the scratch + // register, such as r2 = sub r2, #imm, in the middle of the + // scratch range. + if (MO.isDef()) + isDefInsn = true; + + // Have we already allocated a scratch register for this virtual? if (Reg != CurrentVirtReg) { - int Value = FrameConstantRegMap[Reg].first; - int SPAdj = FrameConstantRegMap[Reg].second; + // When we first encounter a new virtual register, it + // must be a definition. + assert(MI->getOperand(i).isDef() && + "frame index virtual missing def!"); + // We can't have nested virtual register live ranges because + // there's only a guarantee of one scavenged register at a time. + assert (CurrentVirtReg == 0 && + "overlapping frame index virtual registers!"); + + // If the target gave us information about what's in the register, + // we can use that to re-use scratch regs. + DenseMap::iterator Entry = + FrameConstantRegMap.find(Reg); + trackingCurrentValue = Entry != FrameConstantRegMap.end(); + if (trackingCurrentValue) { + SPAdj = (*Entry).second.second; + Value = (*Entry).second.first; + } else { + SPAdj = 0; + Value.first = 0; + Value.second = 0; + } // If the scratch register from the last allocation is still // available, see if the value matches. If it does, just re-use it. - if (PrevScratchReg && Value == PrevValue) { + if (trackingCurrentValue && havePrevValue && PrevValue == Value) { // FIXME: This assumes that the instructions in the live range // for the virtual register are exclusively for the purpose - // of populating the value in the register. That reasonable + // of populating the value in the register. That's reasonable // for these frame index registers, but it's still a very, very - // strong assumption. Perhaps this implies that the frame index - // elimination should be before register allocation, with - // conservative heuristics since we'll know less then, and - // the reuse calculations done directly when doing the code-gen? + // strong assumption. rdar://7322732. Better would be to + // explicitly check each instruction in the range for references + // to the virtual register. Only delete those insns that + // touch the virtual register. // Find the last use of the new virtual register. Remove all // instruction between here and there, and update the current // instruction to reference the last use insn instead. MachineBasicBlock::iterator LastUseMI = - findLastUseReg(I, BB->end(), Reg, &i); + findLastUseReg(I, BB->end(), Reg); + // Remove all instructions up 'til the last use, since they're // just calculating the value we already have. BB->erase(I, LastUseMI); - MI = I = LastUseMI; + I = LastUseMI; - CurrentScratchReg = PrevScratchReg; - // Extend the live range of the register + // Extend the live range of the scratch register PrevLastUseMI->getOperand(PrevLastUseOp).setIsKill(false); RS->setUsed(CurrentScratchReg); - } else { - // When we first encounter a new virtual register, it - // must be a definition. - assert(MI->getOperand(i).isDef() && - "frame index virtual missing def!"); - // We can't have nested virtual register live ranges because - // there's only a guarantee of one scavenged register at a time. - assert (CurrentVirtReg == 0 && - "overlapping frame index virtual registers!"); CurrentVirtReg = Reg; - const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg); - CurrentScratchReg = RS->FindUnusedReg(RC); - if (CurrentScratchReg == 0) - // No register is "free". Scavenge a register. - CurrentScratchReg = RS->scavengeRegister(RC, I, SPAdj); - PrevValue = Value; + // We deleted the instruction we were scanning the operands of. + // Jump back to the instruction iterator loop. Don't increment + // past this instruction since we updated the iterator already. + DoIncr = false; + break; } + + // Scavenge a new scratch register + CurrentVirtReg = Reg; + const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg); + CurrentScratchReg = RS->scavengeRegister(RC, I, SPAdj); + PrevValue = Value; } + // replace this reference to the virtual register with the + // scratch register. assert (CurrentScratchReg && "Missing scratch register!"); MI->getOperand(i).setReg(CurrentScratchReg); - // If this is the last use of the register, stop tracking it. if (MI->getOperand(i).isKill()) { - PrevScratchReg = CurrentScratchReg; + isKillInsn = true; + PrevLastUseOp = i; PrevLastUseMI = MI; - CurrentScratchReg = CurrentVirtReg = 0; } } - RS->forward(MI); + } + // If this is the last use of the scratch, stop tracking it. The + // last use will be a kill operand in an instruction that does + // not also define the scratch register. + if (isKillInsn && !isDefInsn) { + CurrentVirtReg = 0; + havePrevValue = trackingCurrentValue; + } + // Similarly, notice if instruction clobbered the value in the + // register we're tracking for possible later reuse. This is noted + // above, but enforced here since the value is still live while we + // process the rest of the operands of the instruction. + if (clobbersScratchReg) { + havePrevValue = false; + CurrentScratchReg = 0; + } + if (DoIncr) { + RS->forward(I); + ++I; + } } } }