X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FARM%2FThumb1FrameLowering.cpp;h=064cff6f570407ffca6c4a2db1140aa9d645913a;hb=ddaf09c1921d4306b865fae11bf9cfdca6b62731;hp=d4d59ea59da1171e77a3c2d0e814e42c7433f72c;hpb=2bfaf521aed6d486b2d80dbf12b84b456100cb47;p=oota-llvm.git diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index d4d59ea59da..064cff6f570 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -1,4 +1,4 @@ -//======- Thumb1FrameLowering.cpp - Thumb1 Frame Information ---*- C++ -*-====// +//===-- Thumb1FrameLowering.cpp - Thumb1 Frame Information ----------------===// // // The LLVM Compiler Infrastructure // @@ -12,15 +12,19 @@ //===----------------------------------------------------------------------===// #include "Thumb1FrameLowering.h" -#include "ARMBaseInstrInfo.h" #include "ARMMachineFunctionInfo.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" using namespace llvm; +Thumb1FrameLowering::Thumb1FrameLowering(const ARMSubtarget &sti) + : ARMFrameLowering(sti) {} + bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const{ const MachineFrameInfo *FFI = MF.getFrameInfo(); unsigned CFSize = FFI->getMaxCallFrameSize(); @@ -38,28 +42,72 @@ static void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const TargetInstrInfo &TII, DebugLoc dl, - const Thumb1RegisterInfo &MRI, + const ThumbRegisterInfo &MRI, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags) { emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII, MRI, MIFlags); } -void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); + +void Thumb1FrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + const Thumb1InstrInfo &TII = + *static_cast(STI.getInstrInfo()); + const ThumbRegisterInfo *RegInfo = + static_cast(STI.getRegisterInfo()); + if (!hasReservedCallFrame(MF)) { + // If we have alloca, convert as follows: + // ADJCALLSTACKDOWN -> sub, sp, sp, amount + // ADJCALLSTACKUP -> add, sp, sp, amount + MachineInstr *Old = I; + DebugLoc dl = Old->getDebugLoc(); + unsigned Amount = Old->getOperand(0).getImm(); + if (Amount != 0) { + // We need to keep the stack aligned properly. To do this, we round the + // amount of space needed for the outgoing arguments up to the next + // alignment boundary. + unsigned Align = getStackAlignment(); + Amount = (Amount+Align-1)/Align*Align; + + // Replace the pseudo instruction with a new instruction... + unsigned Opc = Old->getOpcode(); + if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { + emitSPUpdate(MBB, I, TII, dl, *RegInfo, -Amount); + } else { + assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); + emitSPUpdate(MBB, I, TII, dl, *RegInfo, Amount); + } + } + } + MBB.erase(I); +} + +void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo(); - const Thumb1RegisterInfo *RegInfo = - static_cast(MF.getTarget().getRegisterInfo()); + MachineModuleInfo &MMI = MF.getMMI(); + const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); + const ThumbRegisterInfo *RegInfo = + static_cast(STI.getRegisterInfo()); const Thumb1InstrInfo &TII = - *static_cast(MF.getTarget().getInstrInfo()); + *static_cast(STI.getInstrInfo()); - unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); unsigned NumBytes = MFI->getStackSize(); + assert(NumBytes >= ArgRegsSaveSize && + "ArgRegsSaveSize is included in NumBytes"); const std::vector &CSI = MFI->getCalleeSavedInfo(); - DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + + // Debug location must be unknown since the first debug location is used + // to determine the end of the prologue. + DebugLoc dl; + unsigned FramePtr = RegInfo->getFrameRegister(MF); unsigned BasePtr = RegInfo->getBaseRegister(); + int CFAOffset = 0; // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4. NumBytes = (NumBytes + 3) & ~3; @@ -70,14 +118,28 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; int FramePtrSpillFI = 0; - if (VARegSaveSize) - emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -VARegSaveSize, + if (ArgRegsSaveSize) { + emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -ArgRegsSaveSize, MachineInstr::FrameSetup); + CFAOffset -= ArgRegsSaveSize; + unsigned CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + } if (!AFI->hasStackFrame()) { - if (NumBytes != 0) - emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes, + if (NumBytes - ArgRegsSaveSize != 0) { + emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -(NumBytes - ArgRegsSaveSize), MachineInstr::FrameSetup); + CFAOffset -= NumBytes - ArgRegsSaveSize; + unsigned CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + } return; } @@ -85,6 +147,15 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { unsigned Reg = CSI[i].getReg(); int FI = CSI[i].getFrameIdx(); switch (Reg) { + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R11: + if (STI.isTargetMachO()) { + GPRCS2Size += 4; + break; + } + // fallthrough case ARM::R4: case ARM::R5: case ARM::R6: @@ -92,62 +163,120 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { case ARM::LR: if (Reg == FramePtr) FramePtrSpillFI = FI; - AFI->addGPRCalleeSavedArea1Frame(FI); GPRCS1Size += 4; break; - case ARM::R8: - case ARM::R9: - case ARM::R10: - case ARM::R11: - if (Reg == FramePtr) - FramePtrSpillFI = FI; - if (STI.isTargetDarwin()) { - AFI->addGPRCalleeSavedArea2Frame(FI); - GPRCS2Size += 4; - } else { - AFI->addGPRCalleeSavedArea1Frame(FI); - GPRCS1Size += 4; - } - break; default: - AFI->addDPRCalleeSavedAreaFrame(FI); DPRCSSize += 8; } } if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { ++MBBI; - if (MBBI != MBB.end()) - dl = MBBI->getDebugLoc(); } // Determine starting offsets of spill areas. - unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); + unsigned DPRCSOffset = NumBytes - ArgRegsSaveSize - (GPRCS1Size + GPRCS2Size + DPRCSSize); unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; - AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); + bool HasFP = hasFP(MF); + if (HasFP) + AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + + NumBytes); AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); NumBytes = DPRCSOffset; + int FramePtrOffsetInBlock = 0; + unsigned adjustedGPRCS1Size = GPRCS1Size; + if (tryFoldSPUpdateIntoPushPop(STI, MF, std::prev(MBBI), NumBytes)) { + FramePtrOffsetInBlock = NumBytes; + adjustedGPRCS1Size += NumBytes; + NumBytes = 0; + } + + if (adjustedGPRCS1Size) { + CFAOffset -= adjustedGPRCS1Size; + unsigned CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + } + for (std::vector::const_iterator I = CSI.begin(), + E = CSI.end(); I != E; ++I) { + unsigned Reg = I->getReg(); + int FI = I->getFrameIdx(); + switch (Reg) { + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R11: + case ARM::R12: + if (STI.isTargetMachO()) + break; + // fallthough + case ARM::R0: + case ARM::R1: + case ARM::R2: + case ARM::R3: + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + case ARM::LR: + unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(Reg, true), MFI->getObjectOffset(FI))); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + break; + } + } + // Adjust FP so it point to the stack slot that contains the previous FP. - if (hasFP(MF)) { + if (HasFP) { + FramePtrOffsetInBlock += + MFI->getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize; AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) - .addFrameIndex(FramePtrSpillFI).addImm(0) + .addReg(ARM::SP).addImm(FramePtrOffsetInBlock / 4) .setMIFlags(MachineInstr::FrameSetup)); + if(FramePtrOffsetInBlock) { + CFAOffset += FramePtrOffsetInBlock; + unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa( + nullptr, MRI->getDwarfRegNum(FramePtr, true), CFAOffset)); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + } else { + unsigned CFIIndex = + MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister( + nullptr, MRI->getDwarfRegNum(FramePtr, true))); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + } if (NumBytes > 508) // If offset is > 508 then sp cannot be adjusted in a single instruction, // try restoring from fp instead. AFI->setShouldRestoreSPFromFP(true); } - if (NumBytes) + if (NumBytes) { // Insert it after all the callee-save spills. emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes, MachineInstr::FrameSetup); + if (!HasFP) { + CFAOffset -= NumBytes; + unsigned CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + } + } - if (STI.isTargetELF() && hasFP(MF)) + if (STI.isTargetELF() && HasFP) MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - AFI->getFramePtrSpillOffset()); @@ -155,6 +284,11 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); AFI->setDPRCalleeSavedAreaSize(DPRCSSize); + // Thumb1 does not currently support dynamic stack realignment. Report a + // fatal error rather then silently generate bad code. + if (RegInfo->needsStackRealignment(MF)) + report_fatal_error("Dynamic stack realignment not supported for thumb1."); + // If we need a base pointer, set it up here. It's whatever the value // of the stack pointer is at this point. Any variable size objects // will be allocated after this, so we can still use the base pointer @@ -170,14 +304,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setShouldRestoreSPFromFP(true); } -static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { - for (unsigned i = 0; CSRegs[i]; ++i) - if (Reg == CSRegs[i]) - return true; - return false; -} - -static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) { +static bool isCSRestore(MachineInstr *MI, const MCPhysReg *CSRegs) { if (MI->getOpcode() == ARM::tLDRspi && MI->getOperand(1).isFI() && isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs)) @@ -195,26 +322,25 @@ static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) { void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); - assert((MBBI->getOpcode() == ARM::tBX_RET || - MBBI->getOpcode() == ARM::tPOP_RET) && - "Can only insert epilog into returning blocks"); - DebugLoc dl = MBBI->getDebugLoc(); + MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); + DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo(); - const Thumb1RegisterInfo *RegInfo = - static_cast(MF.getTarget().getRegisterInfo()); + const ThumbRegisterInfo *RegInfo = + static_cast(STI.getRegisterInfo()); const Thumb1InstrInfo &TII = - *static_cast(MF.getTarget().getInstrInfo()); + *static_cast(STI.getInstrInfo()); - unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); int NumBytes = (int)MFI->getStackSize(); - const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(); + assert((unsigned)NumBytes >= ArgRegsSaveSize && + "ArgRegsSaveSize is included in NumBytes"); + const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); unsigned FramePtr = RegInfo->getFrameRegister(MF); if (!AFI->hasStackFrame()) { - if (NumBytes != 0) - emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes); + if (NumBytes - ArgRegsSaveSize != 0) + emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes - ArgRegsSaveSize); } else { // Unwind MBBI to point to first LDR / VLDRD. if (MBBI != MBB.begin()) { @@ -228,7 +354,8 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, // Move SP to start of FP callee save spill area. NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() + - AFI->getDPRCalleeSavedAreaSize()); + AFI->getDPRCalleeSavedAreaSize() + + ArgRegsSaveSize); if (AFI->shouldRestoreSPFromFP()) { NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; @@ -236,7 +363,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, // frame pointer stack slot, the target is ELF and the function has FP, or // the target uses var sized objects. if (NumBytes) { - assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) && + assert(!MFI->getPristineRegs(MF).test(ARM::R4) && "No scratch register to restore SP from FP!"); emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, TII, *RegInfo); @@ -248,36 +375,153 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, ARM::SP) .addReg(FramePtr)); } else { - if (MBBI->getOpcode() == ARM::tBX_RET && - &MBB.front() != MBBI && - prior(MBBI)->getOpcode() == ARM::tPOP) { - MachineBasicBlock::iterator PMBBI = prior(MBBI); - emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes); - } else + if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET && + &MBB.front() != MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) { + MachineBasicBlock::iterator PMBBI = std::prev(MBBI); + if (!tryFoldSPUpdateIntoPushPop(STI, MF, PMBBI, NumBytes)) + emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes); + } else if (!tryFoldSPUpdateIntoPushPop(STI, MF, MBBI, NumBytes)) emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes); } } - if (VARegSaveSize) { - // Unlike T2 and ARM mode, the T1 pop instruction cannot restore - // to LR, and we can't pop the value directly to the PC since - // we need to update the SP after popping the value. Therefore, we - // pop the old LR into R3 as a temporary. + if (needPopSpecialFixUp(MF)) { + bool Done = emitPopSpecialFixUp(MBB, /* DoIt */ true); + (void)Done; + assert(Done && "Emission of the special fixup failed!?"); + } +} + +bool Thumb1FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { + if (!needPopSpecialFixUp(*MBB.getParent())) + return true; - // Move back past the callee-saved register restoration - while (MBBI != MBB.end() && isCSRestore(MBBI, CSRegs)) - ++MBBI; - // Epilogue for vararg functions: pop LR to R3 and branch off it. - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) - .addReg(ARM::R3, RegState::Define); + MachineBasicBlock *TmpMBB = const_cast(&MBB); + return emitPopSpecialFixUp(*TmpMBB, /* DoIt */ false); +} - emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, VARegSaveSize); +bool Thumb1FrameLowering::needPopSpecialFixUp(const MachineFunction &MF) const { + ARMFunctionInfo *AFI = + const_cast(&MF)->getInfo(); + if (AFI->getArgRegsSaveSize()) + return true; - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)) - .addReg(ARM::R3, RegState::Kill)); + bool IsV4PopReturn = false; + for (const CalleeSavedInfo &CSI : MF.getFrameInfo()->getCalleeSavedInfo()) + if (CSI.getReg() == ARM::LR) + IsV4PopReturn = true; + return IsV4PopReturn && STI.hasV4TOps() && !STI.hasV5TOps(); +} + +bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, + bool DoIt) const { + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo(); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); + const TargetInstrInfo &TII = *STI.getInstrInfo(); + const ThumbRegisterInfo *RegInfo = + static_cast(STI.getRegisterInfo()); + + // When we need a special fix up for POP, this means that + // we either cannot use PC in POP or we have to update + // SP after poping the return address. + // In other words, we cannot use a pop {pc} like construction + // here, no matter what. + auto MBBI = MBB.getFirstTerminator(); + + // Look for a temporary register to use. + // First, compute the liveness information. + LivePhysRegs UsedRegs(STI.getRegisterInfo()); + UsedRegs.addLiveOuts(&MBB, /*AddPristines*/ true); + // The semantic of pristines changed recently and now, + // the callee-saved registers that are touched in the function + // are not part of the pristines set anymore. + // Add those callee-saved now. + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); + for (unsigned i = 0; CSRegs[i]; ++i) + UsedRegs.addReg(CSRegs[i]); + + DebugLoc dl = DebugLoc(); + if (MBBI != MBB.end()) { + dl = MBBI->getDebugLoc(); + auto InstUpToMBBI = MBB.end(); + // The post-decrement is on purpose here. + // We want to have the liveness right before MBBI. + while (InstUpToMBBI-- != MBBI) + UsedRegs.stepBackward(*InstUpToMBBI); + } + + // Look for a register that can be directly use in the POP. + unsigned PopReg = 0; + // And some temporary register, just in case. + unsigned TemporaryReg = 0; + BitVector PopFriendly = + TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::tGPRRegClassID)); + assert(PopFriendly.any() && "No allocatable pop-friendly register?!"); + // Rebuild the GPRs from the high registers because they are removed + // form the GPR reg class for thumb1. + BitVector GPRsNoLRSP = + TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::hGPRRegClassID)); + GPRsNoLRSP |= PopFriendly; + GPRsNoLRSP.reset(ARM::LR); + GPRsNoLRSP.reset(ARM::SP); + GPRsNoLRSP.reset(ARM::PC); + for (int Register = GPRsNoLRSP.find_first(); Register != -1; + Register = GPRsNoLRSP.find_next(Register)) { + if (!UsedRegs.contains(Register)) { + // Remember the first pop-friendly register and exit. + if (PopFriendly.test(Register)) { + PopReg = Register; + TemporaryReg = 0; + break; + } + // Otherwise, remember that the register will be available to + // save a pop-friendly register. + TemporaryReg = Register; + } + } + + if (!DoIt && !PopReg && !TemporaryReg) + return false; + + assert((PopReg || TemporaryReg) && "Cannot get LR"); + + if (TemporaryReg) { + assert(!PopReg && "Unnecessary MOV is about to be inserted"); + PopReg = PopFriendly.find_first(); + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) + .addReg(TemporaryReg, RegState::Define) + .addReg(PopReg, RegState::Kill)); + } + + assert(PopReg && "Do not know how to get LR"); + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) + .addReg(PopReg, RegState::Define); + + emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize); + + if (!TemporaryReg && MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET) { + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX)) + .addReg(PopReg, RegState::Kill); + AddDefaultPred(MIB); + MIB.copyImplicitOps(&*MBBI); // erase the old tBX_RET instruction MBB.erase(MBBI); + return true; + } + + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) + .addReg(ARM::LR, RegState::Define) + .addReg(PopReg, RegState::Kill)); + + if (TemporaryReg) { + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) + .addReg(PopReg, RegState::Define) + .addReg(TemporaryReg, RegState::Kill)); } + + return true; } bool Thumb1FrameLowering:: @@ -289,10 +533,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, return false; DebugLoc DL; - MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - - if (MI != MBB.end()) DL = MI->getDebugLoc(); + const TargetInstrInfo &TII = *STI.getInstrInfo(); MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)); AddDefaultPred(MIB); @@ -329,22 +570,27 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *STI.getInstrInfo(); - bool isVarArg = AFI->getVarArgsRegSaveSize() > 0; - DebugLoc DL = MI->getDebugLoc(); + bool isVarArg = AFI->getArgRegsSaveSize() > 0; + DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP)); AddDefaultPred(MIB); bool NumRegs = false; for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - if (Reg == ARM::LR) { + if (Reg == ARM::LR && MBB.succ_empty()) { // Special epilogue for vararg functions. See emitEpilogue if (isVarArg) continue; + // ARMv4T requires BX, see emitEpilogue + if (STI.hasV4TOps() && !STI.hasV5TOps()) + continue; Reg = ARM::PC; (*MIB).setDesc(TII.get(ARM::tPOP_RET)); + if (MI != MBB.end()) + MIB.copyImplicitOps(&*MI); MI = MBB.erase(MI); } MIB.addReg(Reg, getDefRegState(true));