From: Reid Kleckner Date: Tue, 29 Sep 2015 23:32:01 +0000 (+0000) Subject: [WinEH] Setup RBP correctly in Win64 funclet prologues X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=a4778f21f057b6202f40d810aa72c85b4a5649d9 [WinEH] Setup RBP correctly in Win64 funclet prologues Previously local variable captures just didn't work in 64-bit. Now we can access local variables more or less correctly. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@248857 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index b0bd6983654..3257daf34a5 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -623,6 +623,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, X86MachineFunctionInfo *X86FI = MF.getInfo(); uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment. uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate. + bool IsFunclet = MBB.isEHFuncletEntry(); bool HasFP = hasFP(MF); bool IsWin64CC = STI.isCallingConvWin64(Fn->getCallingConv()); bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); @@ -700,46 +701,17 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, uint64_t NumBytes = 0; int stackGrowth = -SlotSize; - if (MBB.isEHFuncletEntry()) { - assert(STI.isOSWindows() && "funclets only supported on Windows"); - - // Set up the FramePtr and BasePtr physical registers using the address - // passed as EBP or RDX by the MSVC EH runtime. - if (STI.is32Bit()) { - // PUSH32r %ebp - BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) - .addReg(MachineFramePtr, RegState::Kill) - .setMIFlag(MachineInstr::FrameSetup); - // Reset EBP / ESI to something good. - MBBI = restoreWin32EHStackPointers(MBB, MBBI, DL); - } else { - // Immediately spill RDX into the home slot. The runtime cares about this. - unsigned RDX = Uses64BitFramePtr ? X86::RDX : X86::EDX; - // MOV64mr %rdx, 16(%rsp) - unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; - addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), - StackPtr, true, 16) + unsigned RDX = Uses64BitFramePtr ? X86::RDX : X86::EDX; + if (IsWin64Prologue && IsFunclet) { + // Immediately spill RDX into the home slot. The runtime cares about this. + // MOV64mr %rdx, 16(%rsp) + unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), StackPtr, true, 16) .addReg(RDX) .setMIFlag(MachineInstr::FrameSetup); - // PUSH64r %rbp - BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r)) - .addReg(MachineFramePtr, RegState::Kill) - .setMIFlag(MachineInstr::FrameSetup); - BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)) - .addImm(MachineFramePtr) - .setMIFlag(MachineInstr::FrameSetup); - // MOV64rr %rdx, %rbp - unsigned MOVrr = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr; - BuildMI(MBB, MBBI, DL, TII.get(MOVrr), FramePtr) - .addReg(RDX) - .setMIFlag(MachineInstr::FrameSetup); - assert(!TRI->hasBasePointer(MF) && - "x64 funclets with base ptrs not yet implemented"); - } + } - // For EH funclets, only allocate enough space for outgoing calls. - NumBytes = MFI->getMaxCallFrameSize(); - } else if (HasFP) { + if (HasFP) { // Calculate required stack adjustment. uint64_t FrameSize = StackSize - SlotSize; // If required, include space for extra hidden slot for stashing base pointer. @@ -755,7 +727,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // Get the offset of the stack slot for the EBP register, which is // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. // Update the frame offset adjustment. - MFI->setOffsetAdjustment(-NumBytes); + if (!IsFunclet) + MFI->setOffsetAdjustment(-NumBytes); + else + assert(MFI->getOffsetAdjustment() == -NumBytes && + "should calculate same local variable offset for funclets"); // Save EBP/RBP into the appropriate stack slot. BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) @@ -781,30 +757,37 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, .setMIFlag(MachineInstr::FrameSetup); } - if (!IsWin64Prologue) { + if (!IsWin64Prologue && !IsFunclet) { // Update EBP with the new base value. BuildMI(MBB, MBBI, DL, TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), FramePtr) .addReg(StackPtr) .setMIFlag(MachineInstr::FrameSetup); - } - if (NeedsDwarfCFI) { - // Mark effective beginning of when frame pointer becomes valid. - // Define the current CFA to use the EBP/RBP register. - unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); - BuildCFI(MBB, MBBI, DL, - MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr)); + if (NeedsDwarfCFI) { + // Mark effective beginning of when frame pointer becomes valid. + // Define the current CFA to use the EBP/RBP register. + unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); + BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfaRegister( + nullptr, DwarfFramePtr)); + } } // Mark the FramePtr as live-in in every block. for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) I->addLiveIn(MachineFramePtr); } else { + assert(!IsFunclet && "funclets without FPs not yet implemented"); NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); } + // For EH funclets, only allocate enough space for outgoing calls. Save the + // NumBytes value that we would've used for the parent frame. + unsigned ParentFrameNumBytes = NumBytes; + if (IsFunclet) + NumBytes = MFI->getMaxCallFrameSize(); + // Skip the callee-saved push instructions. bool PushedRegs = false; int StackOffset = 2 * stackGrowth; @@ -835,7 +818,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // Realign stack after we pushed callee-saved registers (so that we'll be // able to calculate their offsets from the frame pointer). // Don't do this for Win64, it needs to realign the stack after the prologue. - if (!IsWin64Prologue && TRI->needsStackRealignment(MF)) { + if (!IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF)) { assert(HasFP && "There should be a frame pointer if stack is realigned."); BuildStackAlignAND(MBB, MBBI, DL, MaxAlign); } @@ -856,7 +839,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // increments is necessary to ensure that the guard pages used by the OS // virtual memory manager are allocated in correct sequence. uint64_t AlignedNumBytes = NumBytes; - if (IsWin64Prologue && TRI->needsStackRealignment(MF)) + if (IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF)) AlignedNumBytes = RoundUpToAlignment(AlignedNumBytes, MaxAlign); if (AlignedNumBytes >= StackProbeSize && UseStackProbe) { // Check whether EAX is livein for this function. @@ -927,18 +910,27 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, int SEHFrameOffset = 0; if (IsWin64Prologue && HasFP) { - SEHFrameOffset = calculateSetFPREG(NumBytes); + // Set RBP to a small fixed offset from RSP. In the funclet case, we base + // this calculation on the incoming RDX, which holds the value of RSP from + // the parent frame at the end of the prologue. + unsigned SPOrRDX = !IsFunclet ? StackPtr : RDX; + SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes); if (SEHFrameOffset) addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr), - StackPtr, false, SEHFrameOffset); + SPOrRDX, false, SEHFrameOffset); else - BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr).addReg(StackPtr); + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr) + .addReg(SPOrRDX); - if (NeedsWinCFI) + // If this is not a funclet, emit the CFI describing our frame pointer. + if (NeedsWinCFI && !IsFunclet) BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame)) .addImm(FramePtr) .addImm(SEHFrameOffset) .setMIFlag(MachineInstr::FrameSetup); + } else if (IsFunclet && STI.is32Bit()) { + // Reset EBP / ESI to something good for funclets. + MBBI = restoreWin32EHStackPointers(MBB, MBBI, DL); } while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) { diff --git a/test/CodeGen/X86/win-catchpad-csrs.ll b/test/CodeGen/X86/win-catchpad-csrs.ll index 4a745121cc4..b9f99e61669 100644 --- a/test/CodeGen/X86/win-catchpad-csrs.ll +++ b/test/CodeGen/X86/win-catchpad-csrs.ll @@ -75,8 +75,8 @@ catchendblock: ; preds = %catch, ; X86: LBB0_[[catch1bb]]: # %catch{{$}} ; X86: pushl %ebp ; X86-NOT: pushl -; X86: addl $12, %ebp ; X86: subl $16, %esp +; X86: addl $12, %ebp ; X86: movl $1, -{{[0-9]+}}(%ebp) ; X86: movl $2, (%esp) ; X86: calll _f @@ -105,6 +105,7 @@ catchendblock: ; preds = %catch, ; X64: .seh_stackalloc 40 ; X64: leaq 32(%rsp), %rbp ; X64: .seh_setframe 5, 32 +; X64: .seh_endprologue ; X64: callq getint ; X64: callq getint ; X64: callq getint @@ -121,8 +122,11 @@ catchendblock: ; preds = %catch, ; X64: LBB0_[[catch1bb]]: # %catch{{$}} ; X64: movq %rdx, 16(%rsp) ; X64: pushq %rbp -; X64: movq %rdx, %rbp +; X64: .seh_pushreg 5 ; X64: subq $32, %rsp +; X64: .seh_stackalloc 32 +; X64: leaq 32(%rdx), %rbp +; X64: .seh_endprologue ; X64: movl $2, %ecx ; X64: callq f ; X64: leaq [[contbb]](%rip), %rax diff --git a/test/CodeGen/X86/win-catchpad.ll b/test/CodeGen/X86/win-catchpad.ll index 5a700b444f2..2c05fbe22f1 100644 --- a/test/CodeGen/X86/win-catchpad.ll +++ b/test/CodeGen/X86/win-catchpad.ll @@ -87,8 +87,8 @@ catchendblock: ; preds = %catch, %catch.2, %c ; X86: "?catch$[[catch1bb:[0-9]+]]@?0?try_catch_catch@4HA": ; X86: LBB0_[[catch1bb]]: # %catch{{$}} ; X86: pushl %ebp -; X86: addl $12, %ebp ; X86: subl $8, %esp +; X86: addl $12, %ebp ; X86: movl -32(%ebp), %[[e_reg:[a-z]+]] ; X86: movl $1, -{{[0-9]+}}(%ebp) ; X86: leal -[[local_offs]](%ebp), %[[addr_reg:[a-z]+]] @@ -103,8 +103,8 @@ catchendblock: ; preds = %catch, %catch.2, %c ; X86: "?catch$[[catch2bb:[0-9]+]]@?0?try_catch_catch@4HA": ; X86: LBB0_[[catch2bb]]: # %catch.2{{$}} ; X86: pushl %ebp -; X86: addl $12, %ebp ; X86: subl $8, %esp +; X86: addl $12, %ebp ; X86: movl $1, -{{[0-9]+}}(%ebp) ; X86: leal -[[local_offs]](%ebp), %[[addr_reg:[a-z]+]] ; X86-DAG: movl %[[addr_reg]], 4(%esp) @@ -134,6 +134,7 @@ catchendblock: ; preds = %catch, %catch.2, %c ; X64: .seh_stackalloc 48 ; X64: leaq 48(%rsp), %rbp ; X64: .seh_setframe 5, 48 +; X64: .seh_endprologue ; X64: .Ltmp0 ; X64-DAG: leaq -[[local_offs:[0-9]+]](%rbp), %rdx ; X64-DAG: movl $1, %ecx @@ -147,8 +148,11 @@ catchendblock: ; preds = %catch, %catch.2, %c ; X64: LBB0_[[catch1bb]]: # %catch{{$}} ; X64: movq %rdx, 16(%rsp) ; X64: pushq %rbp -; X64: movq %rdx, %rbp +; X64: .seh_pushreg 5 ; X64: subq $32, %rsp +; X64: .seh_stackalloc 32 +; X64: leaq 48(%rdx), %rbp +; X64: .seh_endprologue ; X64-DAG: .Ltmp4 ; X64-DAG: leaq -[[local_offs]](%rbp), %rdx ; X64-DAG: movl [[e_addr:[-0-9]+]](%rbp), %ecx @@ -162,8 +166,11 @@ catchendblock: ; preds = %catch, %catch.2, %c ; X64: LBB0_[[catch2bb]]: # %catch.2{{$}} ; X64: movq %rdx, 16(%rsp) ; X64: pushq %rbp -; X64: movq %rdx, %rbp +; X64: .seh_pushreg 5 ; X64: subq $32, %rsp +; X64: .seh_stackalloc 32 +; X64: leaq 48(%rdx), %rbp +; X64: .seh_endprologue ; X64-DAG: leaq -[[local_offs]](%rbp), %rdx ; X64-DAG: movl $3, %ecx ; X64: callq f diff --git a/test/CodeGen/X86/win-cleanuppad.ll b/test/CodeGen/X86/win-cleanuppad.ll index 02ead25603f..4190b5c5181 100644 --- a/test/CodeGen/X86/win-cleanuppad.ll +++ b/test/CodeGen/X86/win-cleanuppad.ll @@ -99,18 +99,18 @@ cleanup.outer: ; preds = %invoke.cont.1, %c ; X64-LABEL: nested_cleanup: ; X64: .Lfunc_begin1: -; X64: .Ltmp14: +; X64: .Ltmp13: ; X64: movl $1, %ecx ; X64: callq f -; X64: .Ltmp16: +; X64: .Ltmp15: ; X64: movl $2, %ecx ; X64: callq f -; X64: .Ltmp17: +; X64: .Ltmp16: ; X64: callq "??1Dtor@@QAE@XZ" -; X64: .Ltmp18: +; X64: .Ltmp17: ; X64: movl $3, %ecx ; X64: callq f -; X64: .Ltmp19: +; X64: .Ltmp18: ; X64: "?dtor$[[cleanup_inner:[0-9]+]]@?0?nested_cleanup@4HA": ; X64: LBB1_[[cleanup_inner]]: # %cleanup.inner{{$}} @@ -155,13 +155,13 @@ cleanup.outer: ; preds = %invoke.cont.1, %c ; X64: $ip2state$nested_cleanup: ; X64-NEXT: .long .Lfunc_begin1@IMGREL ; X64-NEXT: .long -1 -; X64-NEXT: .long .Ltmp14@IMGREL +; X64-NEXT: .long .Ltmp13@IMGREL ; X64-NEXT: .long 0 -; X64-NEXT: .long .Ltmp16@IMGREL +; X64-NEXT: .long .Ltmp15@IMGREL ; X64-NEXT: .long 1 -; X64-NEXT: .long .Ltmp18@IMGREL +; X64-NEXT: .long .Ltmp17@IMGREL ; X64-NEXT: .long 0 -; X64-NEXT: .long .Ltmp19@IMGREL+1 +; X64-NEXT: .long .Ltmp18@IMGREL+1 ; X64-NEXT: .long -1 attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/test/CodeGen/X86/win-funclet-cfi.ll b/test/CodeGen/X86/win-funclet-cfi.ll index 63a81d81551..63be4f26afa 100644 --- a/test/CodeGen/X86/win-funclet-cfi.ll +++ b/test/CodeGen/X86/win-funclet-cfi.ll @@ -51,9 +51,8 @@ declare i32 @__CxxFrameHandler3(...) ; CHECK: subq $32, %rsp ; CHECK: .seh_stackalloc 32 -; FIXME: This looks wrong... -; CHECK: leaq 32(%rsp), %rbp -; CHECK: .seh_setframe 5, 32 +; CHECK: leaq 48(%rdx), %rbp +; CHECK-NOT: .seh_setframe ; Prologue is done, emit the .seh_endprologue directive. ; CHECK: .seh_endprologue @@ -82,9 +81,8 @@ declare i32 @__CxxFrameHandler3(...) ; CHECK: subq $32, %rsp ; CHECK: .seh_stackalloc 32 -; FIXME: This looks wrong... -; CHECK: leaq 32(%rsp), %rbp -; CHECK: .seh_setframe 5, 32 +; CHECK: leaq 48(%rdx), %rbp +; CHECK-NOT: .seh_setframe ; Prologue is done, emit the .seh_endprologue directive. ; CHECK: .seh_endprologue