From: Reid Kleckner <rnk@google.com>
Date: Tue, 29 Sep 2015 23:32:01 +0000 (+0000)
Subject: [WinEH] Setup RBP correctly in Win64 funclet prologues
X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=a4778f21f057b6202f40d810aa72c85b4a5649d9

[WinEH] Setup RBP correctly in Win64 funclet prologues

Previously local variable captures just didn't work in 64-bit. Now we
can access local variables more or less correctly.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@248857 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index b0bd6983654..3257daf34a5 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -623,6 +623,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
   uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
   uint64_t StackSize = MFI->getStackSize();    // Number of bytes to allocate.
+  bool IsFunclet = MBB.isEHFuncletEntry();
   bool HasFP = hasFP(MF);
   bool IsWin64CC = STI.isCallingConvWin64(Fn->getCallingConv());
   bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
@@ -700,46 +701,17 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
   uint64_t NumBytes = 0;
   int stackGrowth = -SlotSize;
 
-  if (MBB.isEHFuncletEntry()) {
-    assert(STI.isOSWindows() && "funclets only supported on Windows");
-
-    // Set up the FramePtr and BasePtr physical registers using the address
-    // passed as EBP or RDX by the MSVC EH runtime.
-    if (STI.is32Bit()) {
-      // PUSH32r %ebp
-      BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
-          .addReg(MachineFramePtr, RegState::Kill)
-          .setMIFlag(MachineInstr::FrameSetup);
-      // Reset EBP / ESI to something good.
-      MBBI = restoreWin32EHStackPointers(MBB, MBBI, DL);
-    } else {
-      // Immediately spill RDX into the home slot. The runtime cares about this.
-      unsigned RDX = Uses64BitFramePtr ? X86::RDX : X86::EDX;
-      // MOV64mr %rdx, 16(%rsp)
-      unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
-      addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)),
-                   StackPtr, true, 16)
+  unsigned RDX = Uses64BitFramePtr ? X86::RDX : X86::EDX;
+  if (IsWin64Prologue && IsFunclet) {
+    // Immediately spill RDX into the home slot. The runtime cares about this.
+    // MOV64mr %rdx, 16(%rsp)
+    unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
+    addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), StackPtr, true, 16)
         .addReg(RDX)
         .setMIFlag(MachineInstr::FrameSetup);
-      // PUSH64r %rbp
-      BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
-          .addReg(MachineFramePtr, RegState::Kill)
-          .setMIFlag(MachineInstr::FrameSetup);
-      BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
-          .addImm(MachineFramePtr)
-          .setMIFlag(MachineInstr::FrameSetup);
-      // MOV64rr %rdx, %rbp
-      unsigned MOVrr = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
-      BuildMI(MBB, MBBI, DL, TII.get(MOVrr), FramePtr)
-          .addReg(RDX)
-          .setMIFlag(MachineInstr::FrameSetup);
-      assert(!TRI->hasBasePointer(MF) &&
-             "x64 funclets with base ptrs not yet implemented");
-    }
+  }
 
-    // For EH funclets, only allocate enough space for outgoing calls.
-    NumBytes = MFI->getMaxCallFrameSize();
-  } else if (HasFP) {
+  if (HasFP) {
     // Calculate required stack adjustment.
     uint64_t FrameSize = StackSize - SlotSize;
     // If required, include space for extra hidden slot for stashing base pointer.
@@ -755,7 +727,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
     // Get the offset of the stack slot for the EBP register, which is
     // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
     // Update the frame offset adjustment.
-    MFI->setOffsetAdjustment(-NumBytes);
+    if (!IsFunclet)
+      MFI->setOffsetAdjustment(-NumBytes);
+    else
+      assert(MFI->getOffsetAdjustment() == -NumBytes &&
+             "should calculate same local variable offset for funclets");
 
     // Save EBP/RBP into the appropriate stack slot.
     BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
@@ -781,30 +757,37 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
           .setMIFlag(MachineInstr::FrameSetup);
     }
 
-    if (!IsWin64Prologue) {
+    if (!IsWin64Prologue && !IsFunclet) {
       // Update EBP with the new base value.
       BuildMI(MBB, MBBI, DL,
               TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
               FramePtr)
           .addReg(StackPtr)
           .setMIFlag(MachineInstr::FrameSetup);
-    }
 
-    if (NeedsDwarfCFI) {
-      // Mark effective beginning of when frame pointer becomes valid.
-      // Define the current CFA to use the EBP/RBP register.
-      unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
-      BuildCFI(MBB, MBBI, DL,
-               MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr));
+      if (NeedsDwarfCFI) {
+        // Mark effective beginning of when frame pointer becomes valid.
+        // Define the current CFA to use the EBP/RBP register.
+        unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
+        BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfaRegister(
+                                    nullptr, DwarfFramePtr));
+      }
     }
 
     // Mark the FramePtr as live-in in every block.
     for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
       I->addLiveIn(MachineFramePtr);
   } else {
+    assert(!IsFunclet && "funclets without FPs not yet implemented");
     NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
   }
 
+  // For EH funclets, only allocate enough space for outgoing calls. Save the
+  // NumBytes value that we would've used for the parent frame.
+  unsigned ParentFrameNumBytes = NumBytes;
+  if (IsFunclet)
+    NumBytes = MFI->getMaxCallFrameSize();
+
   // Skip the callee-saved push instructions.
   bool PushedRegs = false;
   int StackOffset = 2 * stackGrowth;
@@ -835,7 +818,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
   // Realign stack after we pushed callee-saved registers (so that we'll be
   // able to calculate their offsets from the frame pointer).
   // Don't do this for Win64, it needs to realign the stack after the prologue.
-  if (!IsWin64Prologue && TRI->needsStackRealignment(MF)) {
+  if (!IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF)) {
     assert(HasFP && "There should be a frame pointer if stack is realigned.");
     BuildStackAlignAND(MBB, MBBI, DL, MaxAlign);
   }
@@ -856,7 +839,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
   // increments is necessary to ensure that the guard pages used by the OS
   // virtual memory manager are allocated in correct sequence.
   uint64_t AlignedNumBytes = NumBytes;
-  if (IsWin64Prologue && TRI->needsStackRealignment(MF))
+  if (IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF))
     AlignedNumBytes = RoundUpToAlignment(AlignedNumBytes, MaxAlign);
   if (AlignedNumBytes >= StackProbeSize && UseStackProbe) {
     // Check whether EAX is livein for this function.
@@ -927,18 +910,27 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
 
   int SEHFrameOffset = 0;
   if (IsWin64Prologue && HasFP) {
-    SEHFrameOffset = calculateSetFPREG(NumBytes);
+    // Set RBP to a small fixed offset from RSP. In the funclet case, we base
+    // this calculation on the incoming RDX, which holds the value of RSP from
+    // the parent frame at the end of the prologue.
+    unsigned SPOrRDX = !IsFunclet ? StackPtr : RDX;
+    SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes);
     if (SEHFrameOffset)
       addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr),
-                   StackPtr, false, SEHFrameOffset);
+                   SPOrRDX, false, SEHFrameOffset);
     else
-      BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr).addReg(StackPtr);
+      BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr)
+          .addReg(SPOrRDX);
 
-    if (NeedsWinCFI)
+    // If this is not a funclet, emit the CFI describing our frame pointer.
+    if (NeedsWinCFI && !IsFunclet)
       BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
           .addImm(FramePtr)
           .addImm(SEHFrameOffset)
           .setMIFlag(MachineInstr::FrameSetup);
+  } else if (IsFunclet && STI.is32Bit()) {
+    // Reset EBP / ESI to something good for funclets.
+    MBBI = restoreWin32EHStackPointers(MBB, MBBI, DL);
   }
 
   while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) {
diff --git a/test/CodeGen/X86/win-catchpad-csrs.ll b/test/CodeGen/X86/win-catchpad-csrs.ll
index 4a745121cc4..b9f99e61669 100644
--- a/test/CodeGen/X86/win-catchpad-csrs.ll
+++ b/test/CodeGen/X86/win-catchpad-csrs.ll
@@ -75,8 +75,8 @@ catchendblock:                                    ; preds = %catch,
 ; X86: LBB0_[[catch1bb]]: # %catch{{$}}
 ; X86: pushl %ebp
 ; X86-NOT: pushl
-; X86: addl $12, %ebp
 ; X86: subl $16, %esp
+; X86: addl $12, %ebp
 ; X86: movl $1, -{{[0-9]+}}(%ebp)
 ; X86: movl $2, (%esp)
 ; X86: calll _f
@@ -105,6 +105,7 @@ catchendblock:                                    ; preds = %catch,
 ; X64: .seh_stackalloc 40
 ; X64: leaq 32(%rsp), %rbp
 ; X64: .seh_setframe 5, 32
+; X64: .seh_endprologue
 ; X64: callq getint
 ; X64: callq getint
 ; X64: callq getint
@@ -121,8 +122,11 @@ catchendblock:                                    ; preds = %catch,
 ; X64: LBB0_[[catch1bb]]: # %catch{{$}}
 ; X64: movq %rdx, 16(%rsp)
 ; X64: pushq %rbp
-; X64: movq %rdx, %rbp
+; X64: .seh_pushreg 5
 ; X64: subq $32, %rsp
+; X64: .seh_stackalloc 32
+; X64: leaq 32(%rdx), %rbp
+; X64: .seh_endprologue
 ; X64: movl $2, %ecx
 ; X64: callq f
 ; X64: leaq [[contbb]](%rip), %rax
diff --git a/test/CodeGen/X86/win-catchpad.ll b/test/CodeGen/X86/win-catchpad.ll
index 5a700b444f2..2c05fbe22f1 100644
--- a/test/CodeGen/X86/win-catchpad.ll
+++ b/test/CodeGen/X86/win-catchpad.ll
@@ -87,8 +87,8 @@ catchendblock:                                    ; preds = %catch, %catch.2, %c
 ; X86: "?catch$[[catch1bb:[0-9]+]]@?0?try_catch_catch@4HA":
 ; X86: LBB0_[[catch1bb]]: # %catch{{$}}
 ; X86: pushl %ebp
-; X86: addl $12, %ebp
 ; X86: subl $8, %esp
+; X86: addl $12, %ebp
 ; X86: movl -32(%ebp), %[[e_reg:[a-z]+]]
 ; X86: movl $1, -{{[0-9]+}}(%ebp)
 ; X86: leal -[[local_offs]](%ebp), %[[addr_reg:[a-z]+]]
@@ -103,8 +103,8 @@ catchendblock:                                    ; preds = %catch, %catch.2, %c
 ; X86: "?catch$[[catch2bb:[0-9]+]]@?0?try_catch_catch@4HA":
 ; X86: LBB0_[[catch2bb]]: # %catch.2{{$}}
 ; X86: pushl %ebp
-; X86: addl $12, %ebp
 ; X86: subl $8, %esp
+; X86: addl $12, %ebp
 ; X86: movl $1, -{{[0-9]+}}(%ebp)
 ; X86: leal -[[local_offs]](%ebp), %[[addr_reg:[a-z]+]]
 ; X86-DAG: movl %[[addr_reg]], 4(%esp)
@@ -134,6 +134,7 @@ catchendblock:                                    ; preds = %catch, %catch.2, %c
 ; X64: .seh_stackalloc 48
 ; X64: leaq 48(%rsp), %rbp
 ; X64: .seh_setframe 5, 48
+; X64: .seh_endprologue
 ; X64: .Ltmp0
 ; X64-DAG: leaq -[[local_offs:[0-9]+]](%rbp), %rdx
 ; X64-DAG: movl $1, %ecx
@@ -147,8 +148,11 @@ catchendblock:                                    ; preds = %catch, %catch.2, %c
 ; X64: LBB0_[[catch1bb]]: # %catch{{$}}
 ; X64: movq %rdx, 16(%rsp)
 ; X64: pushq %rbp
-; X64: movq %rdx, %rbp
+; X64: .seh_pushreg 5
 ; X64: subq $32, %rsp
+; X64: .seh_stackalloc 32
+; X64: leaq 48(%rdx), %rbp
+; X64: .seh_endprologue
 ; X64-DAG: .Ltmp4
 ; X64-DAG: leaq -[[local_offs]](%rbp), %rdx
 ; X64-DAG: movl [[e_addr:[-0-9]+]](%rbp), %ecx
@@ -162,8 +166,11 @@ catchendblock:                                    ; preds = %catch, %catch.2, %c
 ; X64: LBB0_[[catch2bb]]: # %catch.2{{$}}
 ; X64: movq %rdx, 16(%rsp)
 ; X64: pushq %rbp
-; X64: movq %rdx, %rbp
+; X64: .seh_pushreg 5
 ; X64: subq $32, %rsp
+; X64: .seh_stackalloc 32
+; X64: leaq 48(%rdx), %rbp
+; X64: .seh_endprologue
 ; X64-DAG: leaq -[[local_offs]](%rbp), %rdx
 ; X64-DAG: movl $3, %ecx
 ; X64: callq f
diff --git a/test/CodeGen/X86/win-cleanuppad.ll b/test/CodeGen/X86/win-cleanuppad.ll
index 02ead25603f..4190b5c5181 100644
--- a/test/CodeGen/X86/win-cleanuppad.ll
+++ b/test/CodeGen/X86/win-cleanuppad.ll
@@ -99,18 +99,18 @@ cleanup.outer:                                      ; preds = %invoke.cont.1, %c
 
 ; X64-LABEL: nested_cleanup:
 ; X64: .Lfunc_begin1:
-; X64: .Ltmp14:
+; X64: .Ltmp13:
 ; X64: movl    $1, %ecx
 ; X64: callq   f
-; X64: .Ltmp16:
+; X64: .Ltmp15:
 ; X64: movl    $2, %ecx
 ; X64: callq   f
-; X64: .Ltmp17:
+; X64: .Ltmp16:
 ; X64: callq   "??1Dtor@@QAE@XZ"
-; X64: .Ltmp18:
+; X64: .Ltmp17:
 ; X64: movl    $3, %ecx
 ; X64: callq   f
-; X64: .Ltmp19:
+; X64: .Ltmp18:
 
 ; X64: "?dtor$[[cleanup_inner:[0-9]+]]@?0?nested_cleanup@4HA":
 ; X64: LBB1_[[cleanup_inner]]: # %cleanup.inner{{$}}
@@ -155,13 +155,13 @@ cleanup.outer:                                      ; preds = %invoke.cont.1, %c
 ; X64: $ip2state$nested_cleanup:
 ; X64-NEXT: .long   .Lfunc_begin1@IMGREL
 ; X64-NEXT: .long   -1
-; X64-NEXT: .long   .Ltmp14@IMGREL
+; X64-NEXT: .long   .Ltmp13@IMGREL
 ; X64-NEXT: .long   0
-; X64-NEXT: .long   .Ltmp16@IMGREL
+; X64-NEXT: .long   .Ltmp15@IMGREL
 ; X64-NEXT: .long   1
-; X64-NEXT: .long   .Ltmp18@IMGREL
+; X64-NEXT: .long   .Ltmp17@IMGREL
 ; X64-NEXT: .long   0
-; X64-NEXT: .long   .Ltmp19@IMGREL+1
+; X64-NEXT: .long   .Ltmp18@IMGREL+1
 ; X64-NEXT: .long   -1
 
 attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/X86/win-funclet-cfi.ll b/test/CodeGen/X86/win-funclet-cfi.ll
index 63a81d81551..63be4f26afa 100644
--- a/test/CodeGen/X86/win-funclet-cfi.ll
+++ b/test/CodeGen/X86/win-funclet-cfi.ll
@@ -51,9 +51,8 @@ declare i32 @__CxxFrameHandler3(...)
 ; CHECK: subq    $32, %rsp
 ; CHECK: .seh_stackalloc 32
 
-; FIXME: This looks wrong...
-; CHECK: leaq    32(%rsp), %rbp
-; CHECK: .seh_setframe 5, 32
+; CHECK: leaq    48(%rdx), %rbp
+; CHECK-NOT: .seh_setframe
 
 ; Prologue is done, emit the .seh_endprologue directive.
 ; CHECK: .seh_endprologue
@@ -82,9 +81,8 @@ declare i32 @__CxxFrameHandler3(...)
 ; CHECK: subq    $32, %rsp
 ; CHECK: .seh_stackalloc 32
 
-; FIXME: This looks wrong...
-; CHECK: leaq    32(%rsp), %rbp
-; CHECK: .seh_setframe 5, 32
+; CHECK: leaq    48(%rdx), %rbp
+; CHECK-NOT: .seh_setframe
 
 ; Prologue is done, emit the .seh_endprologue directive.
 ; CHECK: .seh_endprologue