From 100773aba1086244b13a103005be1c9a63814a99 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Thu, 5 Nov 2015 21:09:49 +0000 Subject: [PATCH] [WinEH] Fix funclet prologues with stack realignment We already had a test for this for 32-bit SEH catchpads, but those don't actually create funclets. We had a bug that only appeared in funclet prologues, where we would establish EBP and ESI as our FP and BP, and then downstream prologue code would overwrite them. While I was at it, I fixed Win64+funclets+stackrealign. This issue doesn't come up as often there due to the ABI requring 16 byte stack alignment, but now we can rest easy that AVX and WinEH will work well together =P. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@252210 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/WinException.cpp | 21 +++-- lib/CodeGen/AsmPrinter/WinException.h | 2 +- .../SelectionDAG/FunctionLoweringInfo.cpp | 8 +- lib/Target/X86/X86FrameLowering.cpp | 80 ++++++++++++------- lib/Target/X86/X86FrameLowering.h | 2 +- lib/Target/X86/X86ISelLowering.cpp | 5 -- test/CodeGen/X86/cleanuppad-realign.ll | 77 ++++++++++++++++++ 7 files changed, 147 insertions(+), 48 deletions(-) create mode 100644 test/CodeGen/X86/cleanuppad-realign.ll diff --git a/lib/CodeGen/AsmPrinter/WinException.cpp b/lib/CodeGen/AsmPrinter/WinException.cpp index 3999268b407..33eb3906fce 100644 --- a/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/lib/CodeGen/AsmPrinter/WinException.cpp @@ -299,12 +299,17 @@ const MCExpr *WinException::getOffsetPlusOne(const MCSymbol *OffsetOf, Asm->OutContext); } -int WinException::getFrameIndexOffset(int FrameIndex) { +int WinException::getFrameIndexOffset(int FrameIndex, WinEHFuncInfo &FuncInfo) { const TargetFrameLowering &TFI = *Asm->MF->getSubtarget().getFrameLowering(); unsigned UnusedReg; if (Asm->MAI->usesWindowsCFI()) return TFI.getFrameIndexReferenceFromSP(*Asm->MF, FrameIndex, UnusedReg); - return TFI.getFrameIndexReference(*Asm->MF, FrameIndex, UnusedReg); + // For 32-bit, offsets should be relative to the end of the EH registration + // node. For 64-bit, it's relative to SP at the end of the prologue. + assert(FuncInfo.EHRegNodeEndOffset != INT_MAX); + int Offset = TFI.getFrameIndexReference(*Asm->MF, FrameIndex, UnusedReg); + Offset += FuncInfo.EHRegNodeEndOffset; + return Offset; } namespace { @@ -613,7 +618,8 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) { int UnwindHelpOffset = 0; if (Asm->MAI->usesWindowsCFI()) - UnwindHelpOffset = getFrameIndexOffset(FuncInfo.UnwindHelpFrameIdx); + UnwindHelpOffset = + getFrameIndexOffset(FuncInfo.UnwindHelpFrameIdx, FuncInfo); MCSymbol *UnwindMapXData = nullptr; MCSymbol *TryBlockMapXData = nullptr; @@ -733,14 +739,7 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) { // emit an offset of zero, indicating that no copy will occur. const MCExpr *FrameAllocOffsetRef = nullptr; if (HT.CatchObj.FrameIndex != INT_MAX) { - int Offset = getFrameIndexOffset(HT.CatchObj.FrameIndex); - // For 32-bit, the catch object offset is relative to the end of the - // EH registration node. For 64-bit, it's relative to SP at the end of - // the prologue. - if (!shouldEmitPersonality) { - assert(FuncInfo.EHRegNodeEndOffset != INT_MAX); - Offset += FuncInfo.EHRegNodeEndOffset; - } + int Offset = getFrameIndexOffset(HT.CatchObj.FrameIndex, FuncInfo); FrameAllocOffsetRef = MCConstantExpr::create(Offset, Asm->OutContext); } else { FrameAllocOffsetRef = MCConstantExpr::create(0, Asm->OutContext); diff --git a/lib/CodeGen/AsmPrinter/WinException.h b/lib/CodeGen/AsmPrinter/WinException.h index 02134d6aa98..54c5f3a9a0e 100644 --- a/lib/CodeGen/AsmPrinter/WinException.h +++ b/lib/CodeGen/AsmPrinter/WinException.h @@ -77,7 +77,7 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer { /// given index. For targets using CFI (Win64, etc), this is relative to the /// established SP at the end of the prologue. For targets without CFI (Win32 /// only), it is relative to the frame pointer. - int getFrameIndexOffset(int FrameIndex); + int getFrameIndexOffset(int FrameIndex, WinEHFuncInfo &FuncInfo); public: //===--------------------------------------------------------------------===// diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index c3744f7494f..52216c2c1e8 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -215,10 +215,14 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // are really data, and no instructions can live here. if (BB->isEHPad()) { const Instruction *I = BB->getFirstNonPHI(); - // FIXME: Don't mark SEH functions without __finally blocks as having + // If this is a non-landingpad EH pad, mark this function as using // funclets. - if (!isa(I)) + // FIXME: SEH catchpads do not create funclets, so we could avoid setting + // this in such cases in order to improve frame layout. + if (!isa(I)) { MMI.setHasEHFunclets(true); + MF->getFrameInfo()->setHasOpaqueSPAdjustment(true); + } if (isa(I) || isa(I)) { assert(&*BB->begin() == I && "WinEHPrepare failed to remove PHIs from imaginary BBs"); diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index be6cec6fa59..bba328be1e0 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -515,15 +515,14 @@ uint64_t X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) con void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - DebugLoc DL, + DebugLoc DL, unsigned Reg, uint64_t MaxAlign) const { uint64_t Val = -MaxAlign; - MachineInstr *MI = - BuildMI(MBB, MBBI, DL, TII.get(getANDriOpcode(Uses64BitFramePtr, Val)), - StackPtr) - .addReg(StackPtr) - .addImm(Val) - .setMIFlag(MachineInstr::FrameSetup); + unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val); + MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg) + .addReg(Reg) + .addImm(Val) + .setMIFlag(MachineInstr::FrameSetup); // The EFLAGS implicit def is dead. MI->getOperand(3).setIsDead(); @@ -834,7 +833,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // Don't do this for Win64, it needs to realign the stack after the prologue. if (!IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF)) { assert(HasFP && "There should be a frame pointer if stack is realigned."); - BuildStackAlignAND(MBB, MBBI, DL, MaxAlign); + BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign); } // If there is an SUB32ri of ESP immediately before this instruction, merge @@ -923,11 +922,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, .setMIFlag(MachineInstr::FrameSetup); int SEHFrameOffset = 0; + unsigned SPOrEstablisher = IsFunclet ? Establisher : StackPtr; if (IsWin64Prologue && HasFP) { // Set RBP to a small fixed offset from RSP. In the funclet case, we base // this calculation on the incoming establisher, which holds the value of // RSP from the parent frame at the end of the prologue. - unsigned SPOrEstablisher = IsFunclet ? Establisher : StackPtr; SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes); if (SEHFrameOffset) addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr), @@ -977,9 +976,13 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // Win64 requires aligning the stack after the prologue. if (IsWin64Prologue && TRI->needsStackRealignment(MF)) { assert(HasFP && "There should be a frame pointer if stack is realigned."); - BuildStackAlignAND(MBB, MBBI, DL, MaxAlign); + BuildStackAlignAND(MBB, MBBI, DL, SPOrEstablisher, MaxAlign); } + // We already dealt with stack realignment and funclets above. + if (IsFunclet && STI.is32Bit()) + return; + // If we need a base pointer, set it up here. It's whatever the value // of the stack pointer is at this point. Any variable size objects // will be allocated after this, so we can still use the base pointer @@ -988,7 +991,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // Update the base pointer with the current stack pointer. unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr; BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr) - .addReg(StackPtr) + .addReg(SPOrEstablisher) .setMIFlag(MachineInstr::FrameSetup); if (X86FI->getRestoreBasePointer()) { // Stash value of base pointer. Saving RSP instead of EBP shortens @@ -996,11 +999,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), FramePtr, true, X86FI->getRestoreBasePointerOffset()) - .addReg(StackPtr) + .addReg(SPOrEstablisher) .setMIFlag(MachineInstr::FrameSetup); } - if (X86FI->getHasSEHFramePtrSave()) { + if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) { // Stash the value of the frame pointer relative to the base pointer for // Win32 EH. This supports Win32 EH, which does the inverse of the above: // it recovers the frame pointer from the base pointer rather than the @@ -1359,21 +1362,42 @@ int X86FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF, const uint64_t StackSize = MFI->getStackSize(); { #ifndef NDEBUG - // Note: LLVM arranges the stack as: - // Args > Saved RetPC (<--FP) > CSRs > dynamic alignment (<--BP) - // > "Stack Slots" (<--SP) - // We can always address StackSlots from RSP. We can usually (unless - // needsStackRealignment) address CSRs from RSP, but sometimes need to - // address them from RBP. FixedObjects can be placed anywhere in the stack - // frame depending on their specific requirements (i.e. we can actually - // refer to arguments to the function which are stored in the *callers* - // frame). As a result, THE RESULT OF THIS CALL IS MEANINGLESS FOR CSRs - // AND FixedObjects IFF needsStackRealignment or hasVarSizedObject. - - assert(!TRI->hasBasePointer(MF) && "we don't handle this case"); - - // We don't handle tail calls, and shouldn't be seeing them - // either. + // LLVM arranges the stack as follows: + // ... + // ARG2 + // ARG1 + // RETADDR + // PUSH RBP <-- RBP points here + // PUSH CSRs + // ~~~~~~~ <-- optional stack realignment dynamic adjustment + // ... + // STACK OBJECTS + // ... <-- RSP after prologue points here + // + // if (hasVarSizedObjects()): + // ... <-- "base pointer" (ESI/RBX) points here + // DYNAMIC ALLOCAS + // ... <-- RSP points here + // + // Case 1: In the simple case of no stack realignment and no dynamic + // allocas, both "fixed" stack objects (arguments and CSRs) are addressable + // with fixed offsets from RSP. + // + // Case 2: In the case of stack realignment with no dynamic allocas, fixed + // stack objects are addressed with RBP and regular stack objects with RSP. + // + // Case 3: In the case of dynamic allocas and stack realignment, RSP is used + // to address stack arguments for outgoing calls and nothing else. The "base + // pointer" points to local variables, and RBP points to fixed objects. + // + // In cases 2 and 3, we can only answer for non-fixed stack objects, and the + // answer we give is relative to the SP after the prologue, and not the + // SP in the middle of the function. + + assert((!TRI->needsStackRealignment(MF) || !MFI->isFixedObjectIndex(FI)) && + "offset from fixed object to SP is not static"); + + // We don't handle tail calls, and shouldn't be seeing them either. int TailCallReturnAddrDelta = MF.getInfo()->getTCReturnAddrDelta(); assert(!(TailCallReturnAddrDelta < 0) && "we don't handle this case!"); diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h index 261eade9173..35bafb532b4 100644 --- a/lib/Target/X86/X86FrameLowering.h +++ b/lib/Target/X86/X86FrameLowering.h @@ -135,7 +135,7 @@ private: /// Aligns the stack pointer by ANDing it with -MaxAlign. void BuildStackAlignAND(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL, - uint64_t MaxAlign) const; + unsigned Reg, uint64_t MaxAlign) const; /// Make small positive stack adjustments using POPs. bool adjustStackWithPops(MachineBasicBlock &MBB, diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 685067a28c6..d13d0510a0c 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2889,11 +2889,6 @@ SDValue X86TargetLowering::LowerFormalArguments( DAG.getMachineFunction(), UnwindHelpFI), /*isVolatile=*/true, /*isNonTemporal=*/false, /*Alignment=*/0); - } else { - // Functions using Win32 EH are considered to have opaque SP adjustments - // to force local variables to be addressed from the frame or base - // pointers. - MFI->setHasOpaqueSPAdjustment(true); } } diff --git a/test/CodeGen/X86/cleanuppad-realign.ll b/test/CodeGen/X86/cleanuppad-realign.ll new file mode 100644 index 00000000000..c80077e80c6 --- /dev/null +++ b/test/CodeGen/X86/cleanuppad-realign.ll @@ -0,0 +1,77 @@ +; RUN: llc -mtriple=i686-pc-windows-msvc < %s | FileCheck --check-prefix=X86 %s +; RUN: llc -mtriple=x86_64-pc-windows-msvc < %s | FileCheck --check-prefix=X64 %s + +declare i32 @__CxxFrameHandler3(...) +declare void @Dtor(i64* %o) +declare void @f(i32) + +define void @realigned_cleanup() personality i32 (...)* @__CxxFrameHandler3 { +entry: + ; Overalign %o to cause stack realignment. + %o = alloca i64, align 32 + invoke void @f(i32 1) + to label %invoke.cont unwind label %ehcleanup + +invoke.cont: ; preds = %entry + call void @Dtor(i64* %o) + ret void + +ehcleanup: ; preds = %entry + %0 = cleanuppad [] + call void @Dtor(i64* %o) + cleanupret %0 unwind to caller +} + +; X86-LABEL: _realigned_cleanup: # @realigned_cleanup +; X86: pushl %ebp +; X86: movl %esp, %ebp +; X86: pushl %ebx +; X86: pushl %edi +; X86: pushl %esi +; X86: andl $-32, %esp +; X86: subl $96, %esp +; X86: movl %esp, %esi +; EBP will reload from this offset. +; X86: movl %ebp, 28(%esi) +; The last EH reg field is the state number, so dtor adjust is this +4. +; X86: movl $-1, 72(%esi) + +; X86-LABEL: "?dtor$2@?0?realigned_cleanup@4HA": +; X86: pushl %ebp +; X86: leal -76(%ebp), %esi +; X86: movl 28(%esi), %ebp +; We used to have a bug where we clobbered ESI after the prologue. +; X86-NOT: movl {{.*}}, %esi +; X86: popl %ebp +; X86: retl # CLEANUPRET + +; X64-LABEL: realigned_cleanup: # @realigned_cleanup +; X64: pushq %rbp +; X64: .seh_pushreg 5 +; X64: pushq %rbx +; X64: .seh_pushreg 3 +; X64: subq $72, %rsp +; X64: .seh_stackalloc 72 +; X64: leaq 64(%rsp), %rbp +; X64: .seh_setframe 5, 64 +; X64: .seh_endprologue +; X64: andq $-32, %rsp +; X64: movq %rsp, %rbx +; RBP will reload from this offset. +; X64: movq %rbp, 48(%rbx) + +; X64-LABEL: "?dtor$2@?0?realigned_cleanup@4HA": +; X64: movq %rdx, 16(%rsp) +; X64: pushq %rbp +; X64: .seh_pushreg 5 +; X64: pushq %rbx +; X64: .seh_pushreg 3 +; X64: subq $40, %rsp +; X64: .seh_stackalloc 40 +; X64: leaq 64(%rdx), %rbp +; X64: .seh_endprologue +; X64: andq $-32, %rdx +; X64: movq %rdx, %rbx +; X64-NOT: mov{{.*}}, %rbx +; X64: popq %rbp +; X64: retq # CLEANUPRET -- 2.34.1