X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86FrameLowering.cpp;h=875c63c4f6f80f99fb77bfbb85bc98b55a0d7f99;hb=c324f8dc3e84438ff74da6df317bb91b78f99a59;hp=a2f9efb5aa0c6f243a61d66d32758b9b41b8ccec;hpb=dff75493e8040cefd56c9b984e6af5b60c32ec45;p=oota-llvm.git diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index a2f9efb5aa0..875c63c4f6f 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -18,6 +18,7 @@ #include "X86Subtarget.h" #include "X86TargetMachine.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/LibCallSemantics.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -286,6 +287,8 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB, .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub)); if (isSub) MI->setFlag(MachineInstr::FrameSetup); + else + MI->setFlag(MachineInstr::FrameDestroy); Offset -= ThisVal; continue; } @@ -295,6 +298,8 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB, MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue); if (isSub) MI.setMIFlag(MachineInstr::FrameSetup); + else + MI.setMIFlag(MachineInstr::FrameDestroy); Offset -= ThisVal; } @@ -499,7 +504,7 @@ uint64_t X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) con const MachineFrameInfo *MFI = MF.getFrameInfo(); uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment. unsigned StackAlign = getStackAlignment(); - if (ForceStackAlign) { + if (MF.getFunction()->hasFnAttribute("stackrealign")) { if (MFI->hasCalls()) MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; else if (MaxAlign < SlotSize) @@ -510,15 +515,14 @@ uint64_t X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) con void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - DebugLoc DL, + DebugLoc DL, unsigned Reg, uint64_t MaxAlign) const { uint64_t Val = -MaxAlign; - MachineInstr *MI = - BuildMI(MBB, MBBI, DL, TII.get(getANDriOpcode(Uses64BitFramePtr, Val)), - StackPtr) - .addReg(StackPtr) - .addImm(Val) - .setMIFlag(MachineInstr::FrameSetup); + unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val); + MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg) + .addReg(Reg) + .addImm(Val) + .setMIFlag(MachineInstr::FrameSetup); // The EFLAGS implicit def is dead. MI->getOperand(3).setIsDead(); @@ -619,6 +623,10 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, X86MachineFunctionInfo *X86FI = MF.getInfo(); uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment. uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate. + bool IsFunclet = MBB.isEHFuncletEntry(); + bool IsClrFunclet = + IsFunclet && + classifyEHPersonality(Fn->getPersonalityFn()) == EHPersonality::CoreCLR; bool HasFP = hasFP(MF); bool IsWin64CC = STI.isCallingConvWin64(Fn->getCallingConv()); bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); @@ -631,6 +639,9 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, ? getX86SubSuperRegister(FramePtr, MVT::i64, false) : FramePtr; unsigned BasePtr = TRI->getBaseRegister(); + + // Debug location must be unknown since the first debug location is used + // to determine the end of the prologue. DebugLoc DL; // Add RETADDR move area to callee saved frame size. @@ -696,40 +707,25 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, uint64_t NumBytes = 0; int stackGrowth = -SlotSize; - if (MBB.isEHFuncletEntry()) { - assert(STI.isOSWindows() && "funclets only supported on Windows"); - - // Set up the FramePtr and BasePtr physical registers using the address - // passed as EBP or RDX by the MSVC EH runtime. - if (STI.is32Bit()) { - MBBI = restoreWin32EHFrameAndBasePtr(MBB, MBBI, DL); - } else { - // FIXME: Add SEH directives. - NeedsWinCFI = false; - // Immediately spill RDX into the home slot. The runtime cares about this. - unsigned RDX = Uses64BitFramePtr ? X86::RDX : X86::EDX; - // MOV64mr %rdx, 16(%rsp) - unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; - addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), - StackPtr, true, 16) - .addReg(RDX) + // Find the funclet establisher parameter + unsigned Establisher = X86::NoRegister; + if (IsClrFunclet) + Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX; + else if (IsFunclet) + Establisher = Uses64BitFramePtr ? X86::RDX : X86::EDX; + + if (IsWin64Prologue && IsFunclet & !IsClrFunclet) { + // Immediately spill establisher into the home slot. + // The runtime cares about this. + // MOV64mr %rdx, 16(%rsp) + unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), StackPtr, true, 16) + .addReg(Establisher) .setMIFlag(MachineInstr::FrameSetup); - // PUSH64r %rbp - BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) - .addReg(MachineFramePtr, RegState::Kill) - .setMIFlag(MachineInstr::FrameSetup); - // MOV64rr %rdx, %rbp - unsigned MOVrr = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr; - BuildMI(MBB, MBBI, DL, TII.get(MOVrr), FramePtr) - .addReg(RDX) - .setMIFlag(MachineInstr::FrameSetup); - assert(!TRI->hasBasePointer(MF) && - "x64 funclets with base ptrs not yet implemented"); - } + MBB.addLiveIn(Establisher); + } - // For EH funclets, only allocate enough space for outgoing calls. - NumBytes = MFI->getMaxCallFrameSize(); - } else if (HasFP) { + if (HasFP) { // Calculate required stack adjustment. uint64_t FrameSize = StackSize - SlotSize; // If required, include space for extra hidden slot for stashing base pointer. @@ -745,7 +741,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // Get the offset of the stack slot for the EBP register, which is // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. // Update the frame offset adjustment. - MFI->setOffsetAdjustment(-NumBytes); + if (!IsFunclet) + MFI->setOffsetAdjustment(-NumBytes); + else + assert(MFI->getOffsetAdjustment() == -(int)NumBytes && + "should calculate same local variable offset for funclets"); // Save EBP/RBP into the appropriate stack slot. BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) @@ -771,30 +771,40 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, .setMIFlag(MachineInstr::FrameSetup); } - if (!IsWin64Prologue) { + if (!IsWin64Prologue && !IsFunclet) { // Update EBP with the new base value. BuildMI(MBB, MBBI, DL, TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), FramePtr) .addReg(StackPtr) .setMIFlag(MachineInstr::FrameSetup); - } - if (NeedsDwarfCFI) { - // Mark effective beginning of when frame pointer becomes valid. - // Define the current CFA to use the EBP/RBP register. - unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); - BuildCFI(MBB, MBBI, DL, - MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr)); + if (NeedsDwarfCFI) { + // Mark effective beginning of when frame pointer becomes valid. + // Define the current CFA to use the EBP/RBP register. + unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); + BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfaRegister( + nullptr, DwarfFramePtr)); + } } - // Mark the FramePtr as live-in in every block. - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) - I->addLiveIn(MachineFramePtr); + // Mark the FramePtr as live-in in every block. Don't do this again for + // funclet prologues. + if (!IsFunclet) { + for (MachineBasicBlock &EveryMBB : MF) + EveryMBB.addLiveIn(MachineFramePtr); + } } else { + assert(!IsFunclet && "funclets without FPs not yet implemented"); NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); } + // For EH funclets, only allocate enough space for outgoing calls. Save the + // NumBytes value that we would've used for the parent frame. + unsigned ParentFrameNumBytes = NumBytes; + if (IsFunclet) + NumBytes = getWinEHFuncletFrameSize(MF); + // Skip the callee-saved push instructions. bool PushedRegs = false; int StackOffset = 2 * stackGrowth; @@ -825,9 +835,9 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // Realign stack after we pushed callee-saved registers (so that we'll be // able to calculate their offsets from the frame pointer). // Don't do this for Win64, it needs to realign the stack after the prologue. - if (!IsWin64Prologue && TRI->needsStackRealignment(MF)) { + if (!IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF)) { assert(HasFP && "There should be a frame pointer if stack is realigned."); - BuildStackAlignAND(MBB, MBBI, DL, MaxAlign); + BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign); } // If there is an SUB32ri of ESP immediately before this instruction, merge @@ -846,7 +856,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // increments is necessary to ensure that the guard pages used by the OS // virtual memory manager are allocated in correct sequence. uint64_t AlignedNumBytes = NumBytes; - if (IsWin64Prologue && TRI->needsStackRealignment(MF)) + if (IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF)) AlignedNumBytes = RoundUpToAlignment(AlignedNumBytes, MaxAlign); if (AlignedNumBytes >= StackProbeSize && UseStackProbe) { // Check whether EAX is livein for this function. @@ -916,19 +926,28 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, .setMIFlag(MachineInstr::FrameSetup); int SEHFrameOffset = 0; + unsigned SPOrEstablisher = IsFunclet ? Establisher : StackPtr; if (IsWin64Prologue && HasFP) { - SEHFrameOffset = calculateSetFPREG(NumBytes); + // Set RBP to a small fixed offset from RSP. In the funclet case, we base + // this calculation on the incoming establisher, which holds the value of + // RSP from the parent frame at the end of the prologue. + SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes); if (SEHFrameOffset) addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr), - StackPtr, false, SEHFrameOffset); + SPOrEstablisher, false, SEHFrameOffset); else - BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr).addReg(StackPtr); + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr) + .addReg(SPOrEstablisher); - if (NeedsWinCFI) + // If this is not a funclet, emit the CFI describing our frame pointer. + if (NeedsWinCFI && !IsFunclet) BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame)) .addImm(FramePtr) .addImm(SEHFrameOffset) .setMIFlag(MachineInstr::FrameSetup); + } else if (IsFunclet && STI.is32Bit()) { + // Reset EBP / ESI to something good for funclets. + MBBI = restoreWin32EHStackPointers(MBB, MBBI, DL); } while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) { @@ -961,9 +980,13 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // Win64 requires aligning the stack after the prologue. if (IsWin64Prologue && TRI->needsStackRealignment(MF)) { assert(HasFP && "There should be a frame pointer if stack is realigned."); - BuildStackAlignAND(MBB, MBBI, DL, MaxAlign); + BuildStackAlignAND(MBB, MBBI, DL, SPOrEstablisher, MaxAlign); } + // We already dealt with stack realignment and funclets above. + if (IsFunclet && STI.is32Bit()) + return; + // If we need a base pointer, set it up here. It's whatever the value // of the stack pointer is at this point. Any variable size objects // will be allocated after this, so we can still use the base pointer @@ -972,7 +995,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // Update the base pointer with the current stack pointer. unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr; BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr) - .addReg(StackPtr) + .addReg(SPOrEstablisher) .setMIFlag(MachineInstr::FrameSetup); if (X86FI->getRestoreBasePointer()) { // Stash value of base pointer. Saving RSP instead of EBP shortens @@ -980,11 +1003,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), FramePtr, true, X86FI->getRestoreBasePointerOffset()) - .addReg(StackPtr) + .addReg(SPOrEstablisher) .setMIFlag(MachineInstr::FrameSetup); } - if (X86FI->getHasSEHFramePtrSave()) { + if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) { // Stash the value of the frame pointer relative to the base pointer for // Win32 EH. This supports Win32 EH, which does the inverse of the above: // it recovers the frame pointer from the base pointer rather than the @@ -1029,7 +1052,7 @@ bool X86FrameLowering::canUseLEAForSPInEpilogue( static bool isFuncletReturnInstr(MachineInstr *MI) { switch (MI->getOpcode()) { case X86::CATCHRET: - case X86::CATCHRET64: + case X86::CLEANUPRET: return true; default: return false; @@ -1037,6 +1060,22 @@ static bool isFuncletReturnInstr(MachineInstr *MI) { llvm_unreachable("impossible"); } +unsigned X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const { + // This is the size of the pushed CSRs. + unsigned CSSize = + MF.getInfo()->getCalleeSavedFrameSize(); + // This is the amount of stack a funclet needs to allocate. + unsigned MaxCallSize = MF.getFrameInfo()->getMaxCallFrameSize(); + // RBP is not included in the callee saved register block. After pushing RBP, + // everything is 16 byte aligned. Everything we allocate before an outgoing + // call must also be 16 byte aligned. + unsigned FrameSizeMinusRBP = + RoundUpToAlignment(CSSize + MaxCallSize, getStackAlignment()); + // Subtract out the size of the callee saved registers. This is how much stack + // each funclet will allocate. + return FrameSizeMinusRBP - CSSize; +} + void X86FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -1055,6 +1094,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); bool NeedsWinCFI = IsWin64Prologue && MF.getFunction()->needsUnwindTableEntry(); + bool IsFunclet = isFuncletReturnInstr(MBBI); + MachineBasicBlock *TargetMBB = nullptr; // Get the number of bytes to allocate from the FrameInfo. uint64_t StackSize = MFI->getStackSize(); @@ -1062,15 +1103,26 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, unsigned CSSize = X86FI->getCalleeSavedFrameSize(); uint64_t NumBytes = 0; - if (isFuncletReturnInstr(MBBI)) { - NumBytes = MFI->getMaxCallFrameSize(); + if (MBBI->getOpcode() == X86::CATCHRET) { + // SEH shouldn't use catchret. + assert(!isAsynchronousEHPersonality( + classifyEHPersonality(MF.getFunction()->getPersonalityFn())) && + "SEH should not use CATCHRET"); - if (Is64Bit) { - assert(hasFP(MF) && "win64 EH funclets without FP not yet implemented"); - // POP64r %rbp - BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r), - MachineFramePtr); - } + NumBytes = getWinEHFuncletFrameSize(MF); + assert(hasFP(MF) && "EH funclets without FP not yet implemented"); + TargetMBB = MBBI->getOperand(0).getMBB(); + + // Pop EBP. + BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r), + MachineFramePtr) + .setMIFlag(MachineInstr::FrameDestroy); + } else if (MBBI->getOpcode() == X86::CLEANUPRET) { + NumBytes = getWinEHFuncletFrameSize(MF); + assert(hasFP(MF) && "EH funclets without FP not yet implemented"); + BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r), + MachineFramePtr) + .setMIFlag(MachineInstr::FrameDestroy); } else if (hasFP(MF)) { // Calculate required stack adjustment. uint64_t FrameSize = StackSize - SlotSize; @@ -1083,7 +1135,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, // Pop EBP. BuildMI(MBB, MBBI, DL, - TII.get(Is64Bit ? X86::POP64r : X86::POP32r), MachineFramePtr); + TII.get(Is64Bit ? X86::POP64r : X86::POP32r), MachineFramePtr) + .setMIFlag(MachineInstr::FrameDestroy); } else { NumBytes = StackSize - CSSize; } @@ -1094,14 +1147,36 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock::iterator PI = std::prev(MBBI); unsigned Opc = PI->getOpcode(); - if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE && - !PI->isTerminator()) + if ((Opc != X86::POP32r || !PI->getFlag(MachineInstr::FrameDestroy)) && + (Opc != X86::POP64r || !PI->getFlag(MachineInstr::FrameDestroy)) && + Opc != X86::DBG_VALUE && !PI->isTerminator()) break; --MBBI; } MachineBasicBlock::iterator FirstCSPop = MBBI; + if (TargetMBB) { + // Fill EAX/RAX with the address of the target block. + unsigned ReturnReg = STI.is64Bit() ? X86::RAX : X86::EAX; + if (STI.is64Bit()) { + // LEA64r TargetMBB(%rip), %rax + BuildMI(MBB, FirstCSPop, DL, TII.get(X86::LEA64r), ReturnReg) + .addReg(X86::RIP) + .addImm(0) + .addReg(0) + .addMBB(TargetMBB) + .addReg(0); + } else { + // MOV32ri $TargetMBB, %eax + BuildMI(MBB, FirstCSPop, DL, TII.get(X86::MOV32ri), ReturnReg) + .addMBB(TargetMBB); + } + // Record that we've taken the address of TargetMBB and no longer just + // reference it in a terminator. + TargetMBB->setHasAddressTaken(); + } + if (MBBI != MBB.end()) DL = MBBI->getDebugLoc(); @@ -1112,8 +1187,10 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, // If dynamic alloca is used, then reset esp to point to the last callee-saved // slot before popping them off! Same applies for the case, when stack was - // realigned. - if (TRI->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) { + // realigned. Don't do this if this was a funclet epilogue, since the funclets + // will not do realignment or dynamic stack allocation. + if ((TRI->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) && + !IsFunclet) { if (TRI->needsStackRealignment(MF)) MBBI = FirstCSPop; unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt); @@ -1262,21 +1339,42 @@ int X86FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF, const uint64_t StackSize = MFI->getStackSize(); { #ifndef NDEBUG - // Note: LLVM arranges the stack as: - // Args > Saved RetPC (<--FP) > CSRs > dynamic alignment (<--BP) - // > "Stack Slots" (<--SP) - // We can always address StackSlots from RSP. We can usually (unless - // needsStackRealignment) address CSRs from RSP, but sometimes need to - // address them from RBP. FixedObjects can be placed anywhere in the stack - // frame depending on their specific requirements (i.e. we can actually - // refer to arguments to the function which are stored in the *callers* - // frame). As a result, THE RESULT OF THIS CALL IS MEANINGLESS FOR CSRs - // AND FixedObjects IFF needsStackRealignment or hasVarSizedObject. - - assert(!TRI->hasBasePointer(MF) && "we don't handle this case"); - - // We don't handle tail calls, and shouldn't be seeing them - // either. + // LLVM arranges the stack as follows: + // ... + // ARG2 + // ARG1 + // RETADDR + // PUSH RBP <-- RBP points here + // PUSH CSRs + // ~~~~~~~ <-- optional stack realignment dynamic adjustment + // ... + // STACK OBJECTS + // ... <-- RSP after prologue points here + // + // if (hasVarSizedObjects()): + // ... <-- "base pointer" (ESI/RBX) points here + // DYNAMIC ALLOCAS + // ... <-- RSP points here + // + // Case 1: In the simple case of no stack realignment and no dynamic + // allocas, both "fixed" stack objects (arguments and CSRs) are addressable + // with fixed offsets from RSP. + // + // Case 2: In the case of stack realignment with no dynamic allocas, fixed + // stack objects are addressed with RBP and regular stack objects with RSP. + // + // Case 3: In the case of dynamic allocas and stack realignment, RSP is used + // to address stack arguments for outgoing calls and nothing else. The "base + // pointer" points to local variables, and RBP points to fixed objects. + // + // In cases 2 and 3, we can only answer for non-fixed stack objects, and the + // answer we give is relative to the SP after the prologue, and not the + // SP in the middle of the function. + + assert((!TRI->needsStackRealignment(MF) || !MFI->isFixedObjectIndex(FI)) && + "offset from fixed object to SP is not static"); + + // We don't handle tail calls, and shouldn't be seeing them either. int TailCallReturnAddrDelta = MF.getInfo()->getTCReturnAddrDelta(); assert(!(TailCallReturnAddrDelta < 0) && "we don't handle this case!"); @@ -1432,10 +1530,21 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, if (CSI.empty()) return false; - // Don't restore CSRs in 32-bit EH funclets. Matches - // spillCalleeSavedRegisters. - if (isFuncletReturnInstr(MI) && STI.is32Bit() && STI.isOSWindows()) - return true; + if (isFuncletReturnInstr(MI) && STI.isOSWindows()) { + // Don't restore CSRs in 32-bit EH funclets. Matches + // spillCalleeSavedRegisters. + if (STI.is32Bit()) + return true; + // Don't restore CSRs before an SEH catchret. SEH except blocks do not form + // funclets. emitEpilogue transforms these to normal jumps. + if (MI->getOpcode() == X86::CATCHRET) { + const Function *Func = MBB.getParent()->getFunction(); + bool IsSEH = isAsynchronousEHPersonality( + classifyEHPersonality(Func->getPersonalityFn())); + if (IsSEH) + return true; + } + } DebugLoc DL = MBB.findDebugLoc(MI); @@ -1458,7 +1567,8 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, !X86::GR32RegClass.contains(Reg)) continue; - BuildMI(MBB, MI, DL, TII.get(Opc), Reg); + BuildMI(MBB, MI, DL, TII.get(Opc), Reg) + .setMIFlag(MachineInstr::FrameDestroy); } return true; } @@ -1591,7 +1701,7 @@ void X86FrameLowering::adjustForSegmentedStacks( // The MOV R10, RAX needs to be in a different block, since the RET we emit in // allocMBB needs to be last (terminating) instruction. - for (unsigned LI : PrologueMBB.liveins()) { + for (const auto &LI : PrologueMBB.liveins()) { allocMBB->addLiveIn(LI); checkMBB->addLiveIn(LI); } @@ -1863,7 +1973,7 @@ void X86FrameLowering::adjustForHiPEPrologue( MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock(); MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock(); - for (unsigned LI : PrologueMBB.liveins()) { + for (const auto &LI : PrologueMBB.liveins()) { stackCheckMBB->addLiveIn(LI); incStackMBB->addLiveIn(LI); } @@ -1923,6 +2033,9 @@ void X86FrameLowering::adjustForHiPEPrologue( bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL, int Offset) const { + if (Offset <= 0) + return false; + if (Offset % SlotSize) return false; @@ -1943,9 +2056,11 @@ bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB, unsigned FoundRegs = 0; auto RegMask = Prev->getOperand(1); - - // Try to find up to NumPops free registers. - for (auto Candidate : X86::GR32_NOREX_NOSPRegClass) { + + auto &RegClass = + Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass; + // Try to find up to NumPops free registers. + for (auto Candidate : RegClass) { // Poor man's liveness: // Since we're immediately after a call, any register that is clobbered @@ -1998,8 +2113,6 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // If the stack pointer can be changed after prologue, turn the // adjcallstackup instruction into a 'sub ESP, ' and the // adjcallstackdown instruction into 'add ESP, ' - if (Amount == 0) - return; // We need to keep the stack aligned properly. To do this, we round the // amount of space needed for the outgoing arguments up to the next @@ -2007,19 +2120,65 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, unsigned StackAlign = getStackAlignment(); Amount = RoundUpToAlignment(Amount, StackAlign); + MachineModuleInfo &MMI = MF.getMMI(); + const Function *Fn = MF.getFunction(); + bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); + bool DwarfCFI = !WindowsCFI && + (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry()); + + // If we have any exception handlers in this function, and we adjust + // the SP before calls, we may need to indicate this to the unwinder + // using GNU_ARGS_SIZE. Note that this may be necessary even when + // Amount == 0, because the preceding function may have set a non-0 + // GNU_ARGS_SIZE. + // TODO: We don't need to reset this between subsequent functions, + // if it didn't change. + bool HasDwarfEHHandlers = !WindowsCFI && + !MF.getMMI().getLandingPads().empty(); + + if (HasDwarfEHHandlers && !isDestroy && + MF.getInfo()->getHasPushSequences()) + BuildCFI(MBB, I, DL, + MCCFIInstruction::createGnuArgsSize(nullptr, Amount)); + + if (Amount == 0) + return; + // Factor out the amount that gets handled inside the sequence // (Pushes of argument for frame setup, callee pops for frame destroy) Amount -= InternalAmt; + // If this is a callee-pop calling convention, and we're emitting precise + // SP-based CFI, emit a CFA adjust for the amount the callee popped. + if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF) && + MMI.usePreciseUnwindInfo()) + BuildCFI(MBB, I, DL, + MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt)); + if (Amount) { // Add Amount to SP to destroy a frame, and subtract to setup. int Offset = isDestroy ? Amount : -Amount; - if (!(MF.getFunction()->optForMinSize() && + if (!(Fn->optForMinSize() && adjustStackWithPops(MBB, I, DL, Offset))) BuildStackAdjustment(MBB, I, DL, Offset, /*InEpilogue=*/false); } + if (DwarfCFI && !hasFP(MF)) { + // If we don't have FP, but need to generate unwind information, + // we need to set the correct CFA offset after the stack adjustment. + // How much we adjust the CFA offset depends on whether we're emitting + // CFI only for EH purposes or for debugging. EH only requires the CFA + // offset to be correct at each call site, while for debugging we want + // it to be more precise. + int CFAOffset = Amount; + if (!MMI.usePreciseUnwindInfo()) + CFAOffset += InternalAmt; + CFAOffset = isDestroy ? -CFAOffset : CFAOffset; + BuildCFI(MBB, I, DL, + MCCFIInstruction::createAdjustCfaOffset(nullptr, CFAOffset)); + } + return; } @@ -2040,6 +2199,14 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { assert(MBB.getParent() && "Block is not attached to a function!"); + // Win64 has strict requirements in terms of epilogue and we are + // not taking a chance at messing with them. + // I.e., unless this block is already an exit block, we can't use + // it as an epilogue. + if (MBB.getParent()->getSubtarget().isTargetWin64() && + !MBB.succ_empty() && !MBB.isReturnBlock()) + return false; + if (canUseLEAForSPInEpilogue(*MBB.getParent())) return true; @@ -2050,9 +2217,9 @@ bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { return !terminatorsNeedFlagsAsInput(MBB); } -MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHFrameAndBasePtr( +MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - DebugLoc DL) const { + DebugLoc DL, bool RestoreSP) const { assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env"); assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32"); assert(STI.is32Bit() && !Uses64BitFramePtr && @@ -2070,15 +2237,22 @@ MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHFrameAndBasePtr( // FIXME: Don't set FrameSetup flag in catchret case. int FI = FuncInfo.EHRegNodeFrameIndex; + int EHRegSize = MFI->getObjectSize(FI); + + if (RestoreSP) { + // MOV32rm -EHRegSize(%ebp), %esp + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), X86::ESP), + X86::EBP, true, -EHRegSize) + .setMIFlag(MachineInstr::FrameSetup); + } + unsigned UsedReg; int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg); - int EHRegSize = MFI->getObjectSize(FI); int EndOffset = -EHRegOffset - EHRegSize; - assert(EndOffset >= 0 && - "end of registration object above normal EBP position!"); + FuncInfo.EHRegNodeEndOffset = EndOffset; + if (UsedReg == FramePtr) { // ADD $offset, %ebp - assert(UsedReg == FramePtr); unsigned ADDri = getADDriOpcode(false, EndOffset); BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr) .addReg(FramePtr) @@ -2086,21 +2260,35 @@ MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHFrameAndBasePtr( .setMIFlag(MachineInstr::FrameSetup) ->getOperand(3) .setIsDead(); - } else { - assert(UsedReg == BasePtr); + assert(EndOffset >= 0 && + "end of registration object above normal EBP position!"); + } else if (UsedReg == BasePtr) { // LEA offset(%ebp), %esi addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), BasePtr), FramePtr, false, EndOffset) .setMIFlag(MachineInstr::FrameSetup); - // MOV32mr SavedEBPOffset(%esi), %ebp + // MOV32rm SavedEBPOffset(%esi), %ebp assert(X86FI->getHasSEHFramePtrSave()); int Offset = getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg); assert(UsedReg == BasePtr); - addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32mr)), UsedReg, true, - Offset) - .addReg(FramePtr) + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), FramePtr), + UsedReg, true, Offset) .setMIFlag(MachineInstr::FrameSetup); + } else { + llvm_unreachable("32-bit frames with WinEH must use FramePtr or BasePtr"); } return MBBI; } + +unsigned X86FrameLowering::getWinEHParentFrameOffset(const MachineFunction &MF) const { + // RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue. + unsigned Offset = 16; + // RBP is immediately pushed. + Offset += SlotSize; + // All callee-saved registers are then pushed. + Offset += MF.getInfo()->getCalleeSavedFrameSize(); + // Every funclet allocates enough stack space for the largest outgoing call. + Offset += getWinEHFuncletFrameSize(MF); + return Offset; +}