From 69114ee01626e83678a8c6e44a12895f7c637466 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Tue, 10 Feb 2015 00:57:42 +0000 Subject: [PATCH] X86: Emit an ABI compliant prologue and epilogue for Win64 Win64 has specific contraints on what valid prologues and epilogues look like. This constraint is born from the flexibility and descriptiveness of Win64's unwind opcodes. Prologues previously emitted by LLVM could not be represented by the unwind opcodes, preventing operations powered by stack unwinding to successfully work. Differential Revision: http://reviews.llvm.org/D7520 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@228641 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 211 +++++++++++++-------- test/CodeGen/X86/frameaddr.ll | 4 +- test/CodeGen/X86/gcc_except_table.ll | 2 +- test/CodeGen/X86/win64_alloca_dynalloca.ll | 14 +- test/CodeGen/X86/win64_alloca_dynalloca.s | 29 +++ test/CodeGen/X86/win64_eh.ll | 14 +- test/CodeGen/X86/win64_eh.s | 50 +++++ test/CodeGen/X86/win64_frame.ll | 121 ++++++++++++ 8 files changed, 348 insertions(+), 97 deletions(-) create mode 100644 test/CodeGen/X86/win64_alloca_dynalloca.s create mode 100644 test/CodeGen/X86/win64_eh.s create mode 100644 test/CodeGen/X86/win64_frame.ll diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 897defd6306..8b79a9f1ebd 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -484,6 +484,35 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF, } } +static unsigned calculateSetFPREG(uint64_t SPAdjust) { + // Win64 ABI has a less restrictive limitation of 240; 128 works equally well + // and might require smaller successive adjustments. + const uint64_t Win64MaxSEHOffset = 128; + uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset); + // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode. + return static_cast(RoundUpToAlignment(SEHFrameOffset, 16)); +} + +// If we're forcing a stack realignment we can't rely on just the frame +// info, we need to know the ABI stack alignment as well in case we +// have a call out. Otherwise just make sure we have some alignment - we'll +// go with the minimum SlotSize. +static uint64_t calculateMaxStackAlign(const MachineFunction &MF) { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment. + const X86Subtarget &STI = MF.getSubtarget(); + const X86RegisterInfo *RegInfo = STI.getRegisterInfo(); + unsigned SlotSize = RegInfo->getSlotSize(); + unsigned StackAlign = STI.getFrameLowering()->getStackAlignment(); + if (ForceStackAlign) { + if (MFI->hasCalls()) + MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; + else if (MaxAlign < SlotSize) + MaxAlign = SlotSize; + } + return MaxAlign; +} + /// emitPrologue - Push callee-saved registers onto the stack, which /// automatically adjust the stack pointer. Adjust the stack pointer to allocate /// space for local variables. Also emit labels used by the exception handler to @@ -578,7 +607,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { const TargetInstrInfo &TII = *STI.getInstrInfo(); MachineModuleInfo &MMI = MF.getMMI(); X86MachineFunctionInfo *X86FI = MF.getInfo(); - uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment. + uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment. uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate. bool HasFP = hasFP(MF); bool Is64Bit = STI.is64Bit(); @@ -591,7 +620,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { bool NeedsDwarfCFI = !IsWinEH && (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry()); bool UseLEA = STI.useLeaForSP(); - unsigned StackAlign = getStackAlignment(); unsigned SlotSize = RegInfo->getSlotSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); const unsigned MachineFramePtr = @@ -602,19 +630,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { unsigned BasePtr = RegInfo->getBaseRegister(); DebugLoc DL; - // If we're forcing a stack realignment we can't rely on just the frame - // info, we need to know the ABI stack alignment as well in case we - // have a call out. Otherwise just make sure we have some alignment - we'll - // go with the minimum SlotSize. - if (ForceStackAlign) { - if (MFI->hasCalls()) - MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; - else if (MaxAlign < SlotSize) - MaxAlign = SlotSize; - } - // Add RETADDR move area to callee saved frame size. int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); + if (TailCallReturnAddrDelta && IsWinEH) + report_fatal_error("Can't handle guaranteed tail call under win64 yet"); + if (TailCallReturnAddrDelta < 0) X86FI->setCalleeSavedFrameSize( X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta); @@ -689,7 +709,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // Callee-saved registers are pushed on stack before the stack // is realigned. FrameSize -= X86FI->getCalleeSavedFrameSize(); - NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign; + NumBytes = RoundUpToAlignment(FrameSize, MaxAlign); } else { NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize(); } @@ -728,11 +748,14 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { .setMIFlag(MachineInstr::FrameSetup); } - // Update EBP with the new base value. - BuildMI(MBB, MBBI, DL, - TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), FramePtr) - .addReg(StackPtr) - .setMIFlag(MachineInstr::FrameSetup); + if (!IsWinEH) { + // Update EBP with the new base value. + BuildMI(MBB, MBBI, DL, + TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), + FramePtr) + .addReg(StackPtr) + .setMIFlag(MachineInstr::FrameSetup); + } if (NeedsDwarfCFI) { // Mark effective beginning of when frame pointer becomes valid. @@ -781,15 +804,16 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // Realign stack after we pushed callee-saved registers (so that we'll be // able to calculate their offsets from the frame pointer). - if (RegInfo->needsStackRealignment(MF)) { + // Don't do this for Win64, it needs to realign the stack after the prologue. + if (!IsWinEH && RegInfo->needsStackRealignment(MF)) { assert(HasFP && "There should be a frame pointer if stack is realigned."); uint64_t Val = -MaxAlign; MachineInstr *MI = - BuildMI(MBB, MBBI, DL, - TII.get(getANDriOpcode(Uses64BitFramePtr, Val)), StackPtr) - .addReg(StackPtr) - .addImm(Val) - .setMIFlag(MachineInstr::FrameSetup); + BuildMI(MBB, MBBI, DL, TII.get(getANDriOpcode(Uses64BitFramePtr, Val)), + StackPtr) + .addReg(StackPtr) + .addImm(Val) + .setMIFlag(MachineInstr::FrameSetup); // The EFLAGS implicit def is dead. MI->getOperand(3).setIsDead(); @@ -867,50 +891,28 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { UseLEA, TII, *RegInfo); } + if (NeedsWinEH && NumBytes) + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc)) + .addImm(NumBytes) + .setMIFlag(MachineInstr::FrameSetup); + int SEHFrameOffset = 0; - if (NeedsWinEH) { - if (HasFP) { - // We need to set frame base offset low enough such that all saved - // register offsets would be positive relative to it, but we can't - // just use NumBytes, because .seh_setframe offset must be <=240. - // So we pretend to have only allocated enough space to spill the - // non-volatile registers. - // We don't care about the rest of stack allocation, because unwinder - // will restore SP to (BP - SEHFrameOffset) - for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) { - int offset = MFI->getObjectOffset(Info.getFrameIdx()); - SEHFrameOffset = std::max(SEHFrameOffset, std::abs(offset)); - } - SEHFrameOffset += SEHFrameOffset % 16; // ensure alignmant - - // This only needs to account for XMM spill slots, GPR slots - // are covered by the .seh_pushreg's emitted above. - unsigned Size = SEHFrameOffset - X86FI->getCalleeSavedFrameSize(); - if (Size) { - BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc)) - .addImm(Size) - .setMIFlag(MachineInstr::FrameSetup); - } + if (IsWinEH && HasFP) { + SEHFrameOffset = calculateSetFPREG(NumBytes); + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr), + StackPtr, false, SEHFrameOffset); + if (NeedsWinEH) BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame)) .addImm(FramePtr) .addImm(SEHFrameOffset) .setMIFlag(MachineInstr::FrameSetup); - } else { - // SP will be the base register for restoring XMMs - if (NumBytes) { - BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc)) - .addImm(NumBytes) - .setMIFlag(MachineInstr::FrameSetup); - } - } } // Skip the rest of register spilling code while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) ++MBBI; - // Emit SEH info for non-GPRs if (NeedsWinEH) { for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) { unsigned Reg = Info.getReg(); @@ -931,6 +933,23 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { .setMIFlag(MachineInstr::FrameSetup); } + // Realign stack after we spilled callee-saved registers (so that we'll be + // able to calculate their offsets from the frame pointer). + // Win64 requires aligning the stack after the prologue. + if (IsWinEH && RegInfo->needsStackRealignment(MF)) { + assert(HasFP && "There should be a frame pointer if stack is realigned."); + uint64_t Val = -MaxAlign; + MachineInstr *MI = + BuildMI(MBB, MBBI, DL, TII.get(getANDriOpcode(Uses64BitFramePtr, Val)), + StackPtr) + .addReg(StackPtr) + .addImm(Val) + .setMIFlag(MachineInstr::FrameSetup); + + // The EFLAGS implicit def is dead. + MI->getOperand(3).setIsDead(); + } + // If we need a base pointer, set it up here. It's whatever the value // of the stack pointer is at this point. Any variable size objects // will be allocated after this, so we can still use the base pointer @@ -986,7 +1005,6 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64(); const bool Is64BitILP32 = STI.isTarget64BitILP32(); bool UseLEA = STI.useLeaForSP(); - unsigned StackAlign = getStackAlignment(); unsigned SlotSize = RegInfo->getSlotSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); unsigned MachineFramePtr = @@ -1017,21 +1035,10 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, // Get the number of bytes to allocate from the FrameInfo. uint64_t StackSize = MFI->getStackSize(); - uint64_t MaxAlign = MFI->getMaxAlignment(); + uint64_t MaxAlign = calculateMaxStackAlign(MF); unsigned CSSize = X86FI->getCalleeSavedFrameSize(); uint64_t NumBytes = 0; - // If we're forcing a stack realignment we can't rely on just the frame - // info, we need to know the ABI stack alignment as well in case we - // have a call out. Otherwise just make sure we have some alignment - we'll - // go with the minimum. - if (ForceStackAlign) { - if (MFI->hasCalls()) - MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; - else - MaxAlign = MaxAlign ? MaxAlign : 4; - } - if (hasFP(MF)) { // Calculate required stack adjustment. uint64_t FrameSize = StackSize - SlotSize; @@ -1050,6 +1057,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, } else { NumBytes = StackSize - CSSize; } + uint64_t SEHStackAllocAmt = NumBytes; // Skip the callee-saved pop instructions. while (MBBI != MBB.begin()) { @@ -1077,7 +1085,12 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, if (RegInfo->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) { if (RegInfo->needsStackRealignment(MF)) MBBI = FirstCSPop; - if (CSSize != 0) { + if (IsWinEH) { + unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt); + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), StackPtr), + FramePtr, false, SEHStackAllocAmt - SEHFrameOffset); + --MBBI; + } else if (CSSize != 0) { unsigned Opc = getLEArOpcode(Uses64BitFramePtr); addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr), FramePtr, false, -CSSize); @@ -1195,14 +1208,53 @@ int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, const X86RegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); const MachineFrameInfo *MFI = MF.getFrameInfo(); + // Offset will hold the offset from the stack pointer at function entry to the + // object. + // We need to factor in additional offsets applied during the prologue to the + // frame, base, and stack pointer depending on which is used. int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea(); + const X86MachineFunctionInfo *X86FI = MF.getInfo(); + unsigned CSSize = X86FI->getCalleeSavedFrameSize(); uint64_t StackSize = MFI->getStackSize(); + unsigned SlotSize = RegInfo->getSlotSize(); + bool HasFP = hasFP(MF); + bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); + int64_t FPDelta = 0; + + if (IsWinEH) { + uint64_t NumBytes = 0; + // Calculate required stack adjustment. + uint64_t FrameSize = StackSize - SlotSize; + // If required, include space for extra hidden slot for stashing base pointer. + if (X86FI->getRestoreBasePointer()) + FrameSize += SlotSize; + uint64_t SEHStackAllocAmt = StackSize; + if (RegInfo->needsStackRealignment(MF)) { + // Callee-saved registers are pushed on stack before the stack + // is realigned. + FrameSize -= CSSize; + + uint64_t MaxAlign = + calculateMaxStackAlign(MF); // Desired stack alignment. + NumBytes = RoundUpToAlignment(FrameSize, MaxAlign); + SEHStackAllocAmt = RoundUpToAlignment(SEHStackAllocAmt, 16); + } else { + NumBytes = FrameSize - CSSize; + } + uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes); + // FPDelta is the offset from the "traditional" FP location of the old base + // pointer followed by return address and the location required by the + // restricted Win64 prologue. + // Add FPDelta to all offsets below that go through the frame pointer. + FPDelta = SEHStackAllocAmt - SEHFrameOffset; + } + if (RegInfo->hasBasePointer(MF)) { - assert (hasFP(MF) && "VLAs and dynamic stack realign, but no FP?!"); + assert(HasFP && "VLAs and dynamic stack realign, but no FP?!"); if (FI < 0) { // Skip the saved EBP. - return Offset + RegInfo->getSlotSize(); + return Offset + SlotSize + FPDelta; } else { assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0); return Offset + StackSize; @@ -1210,21 +1262,22 @@ int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, } else if (RegInfo->needsStackRealignment(MF)) { if (FI < 0) { // Skip the saved EBP. - return Offset + RegInfo->getSlotSize(); + return Offset + SlotSize + FPDelta; } else { assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0); return Offset + StackSize; } // FIXME: Support tail calls } else { - if (!hasFP(MF)) + if (!HasFP) return Offset + StackSize; + if (IsWinEH) + return Offset + FPDelta; // Skip the saved EBP. - Offset += RegInfo->getSlotSize(); + Offset += SlotSize; // Skip the RETADDR move area - const X86MachineFunctionInfo *X86FI = MF.getInfo(); int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); if (TailCallReturnAddrDelta < 0) Offset -= TailCallReturnAddrDelta; @@ -1959,8 +2012,8 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // We need to keep the stack aligned properly. To do this, we round the // amount of space needed for the outgoing arguments up to the next // alignment boundary. - unsigned StackAlign = STI.getFrameLowering()->getStackAlignment(); - Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign; + unsigned StackAlign = getStackAlignment(); + Amount = RoundUpToAlignment(Amount, StackAlign); MachineInstr *New = nullptr; diff --git a/test/CodeGen/X86/frameaddr.ll b/test/CodeGen/X86/frameaddr.ll index 3e0f8bc34d6..d795c5749a0 100644 --- a/test/CodeGen/X86/frameaddr.ll +++ b/test/CodeGen/X86/frameaddr.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=x86 | FileCheck %s --check-prefix=CHECK-32 ; RUN: llc < %s -march=x86 -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=CHECK-32 -; RUN: llc < %s -march=x86-64 | FileCheck %s --check-prefix=CHECK-64 -; RUN: llc < %s -march=x86-64 -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=CHECK-64 +; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=CHECK-64 +; RUN: llc < %s -mtriple=x86_64-unknown -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=CHECK-64 ; RUN: llc < %s -mtriple=x86_64-gnux32 | FileCheck %s --check-prefix=CHECK-X32ABI ; RUN: llc < %s -mtriple=x86_64-gnux32 -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=CHECK-X32ABI ; RUN: llc < %s -mtriple=x86_64-nacl | FileCheck %s --check-prefix=CHECK-NACL64 diff --git a/test/CodeGen/X86/gcc_except_table.ll b/test/CodeGen/X86/gcc_except_table.ll index a732eb1efbd..abce13002db 100644 --- a/test/CodeGen/X86/gcc_except_table.ll +++ b/test/CodeGen/X86/gcc_except_table.ll @@ -15,7 +15,7 @@ define i32 @main() uwtable optsize ssp { ; MINGW64: .seh_proc ; MINGW64: .seh_handler __gxx_personality_v0 -; MINGW64: .seh_setframe 5, 0 +; MINGW64: .seh_setframe 5, 32 ; MINGW64: callq _Unwind_Resume ; MINGW64: .seh_handlerdata ; MINGW64: GCC_except_table0: diff --git a/test/CodeGen/X86/win64_alloca_dynalloca.ll b/test/CodeGen/X86/win64_alloca_dynalloca.ll index aab2eea7ce4..810b23593fd 100644 --- a/test/CodeGen/X86/win64_alloca_dynalloca.ll +++ b/test/CodeGen/X86/win64_alloca_dynalloca.ll @@ -14,26 +14,24 @@ entry: %buf0 = alloca i8, i64 4096, align 1 ; ___chkstk_ms does not adjust %rsp. -; M64: movq %rsp, %rbp ; M64: $4096, %rax ; M64: callq ___chkstk_ms ; M64: subq %rax, %rsp +; M64: leaq 128(%rsp), %rbp ; __chkstk does not adjust %rsp. -; W64: movq %rsp, %rbp ; W64: $4096, %rax ; W64: callq __chkstk ; W64: subq %rax, %rsp +; W64: leaq 128(%rsp), %rbp ; Use %r11 for the large model. -; L64: movq %rsp, %rbp ; L64: $4096, %rax ; L64: movabsq $__chkstk, %r11 ; L64: callq *%r11 ; L64: subq %rax, %rsp ; Freestanding -; EFI: movq %rsp, %rbp ; EFI: $[[B0OFS:4096|4104]], %rsp ; EFI-NOT: call @@ -68,12 +66,12 @@ entry: ; M64: subq $48, %rsp ; M64: movq %rax, 32(%rsp) -; M64: leaq -4096(%rbp), %r9 +; M64: leaq -128(%rbp), %r9 ; M64: callq bar ; W64: subq $48, %rsp ; W64: movq %rax, 32(%rsp) -; W64: leaq -4096(%rbp), %r9 +; W64: leaq -128(%rbp), %r9 ; W64: callq bar ; EFI: subq $48, %rsp @@ -83,9 +81,9 @@ entry: ret i64 %r -; M64: movq %rbp, %rsp +; M64: leaq 3968(%rbp), %rsp -; W64: movq %rbp, %rsp +; W64: leaq 3968(%rbp), %rsp } diff --git a/test/CodeGen/X86/win64_alloca_dynalloca.s b/test/CodeGen/X86/win64_alloca_dynalloca.s new file mode 100644 index 00000000000..d2c6af211f3 --- /dev/null +++ b/test/CodeGen/X86/win64_alloca_dynalloca.s @@ -0,0 +1,29 @@ + .text + .def unaligned; + .scl 2; + .type 32; + .endef + .globl unaligned + .align 16, 0x90 +unaligned: # @unaligned +# BB#0: # %entry + pushq %rbp + movabsq $4096, %rax # imm = 0x1000 + callq __chkstk + subq %rax, %rsp + leaq 128(%rsp), %rbp + leaq 15(%rcx), %rax + andq $-16, %rax + callq __chkstk + subq %rax, %rsp + movq %rsp, %rax + subq $48, %rsp + movq %rax, 32(%rsp) + leaq -128(%rbp), %r9 + movq %rcx, %r8 + callq bar + leaq 4016(%rbp), %rsp + popq %rbp + retq + + diff --git a/test/CodeGen/X86/win64_eh.ll b/test/CodeGen/X86/win64_eh.ll index 65b06176755..ed3a6e19ef1 100644 --- a/test/CodeGen/X86/win64_eh.ll +++ b/test/CodeGen/X86/win64_eh.ll @@ -146,23 +146,23 @@ entry: ; WIN64: .seh_proc foo5 ; WIN64: pushq %rbp ; WIN64: .seh_pushreg 5 -; WIN64: movq %rsp, %rbp ; WIN64: pushq %rdi ; WIN64: .seh_pushreg 7 ; WIN64: pushq %rbx ; WIN64: .seh_pushreg 3 -; WIN64: andq $-64, %rsp ; WIN64: subq $128, %rsp -; WIN64: .seh_stackalloc 48 -; WIN64: .seh_setframe 5, 64 +; WIN64: .seh_stackalloc 128 +; WIN64: leaq 128(%rsp), %rbp +; WIN64: .seh_setframe 5, 128 ; WIN64: movaps %xmm7, -32(%rbp) # 16-byte Spill ; WIN64: movaps %xmm6, -48(%rbp) # 16-byte Spill -; WIN64: .seh_savexmm 6, 16 -; WIN64: .seh_savexmm 7, 32 +; WIN64: .seh_savexmm 6, 80 +; WIN64: .seh_savexmm 7, 96 ; WIN64: .seh_endprologue +; WIN64: andq $-64, %rsp ; WIN64: movaps -48(%rbp), %xmm6 # 16-byte Reload ; WIN64: movaps -32(%rbp), %xmm7 # 16-byte Reload -; WIN64: leaq -16(%rbp), %rsp +; WIN64: leaq (%rbp), %rsp ; WIN64: popq %rbx ; WIN64: popq %rdi ; WIN64: popq %rbp diff --git a/test/CodeGen/X86/win64_eh.s b/test/CodeGen/X86/win64_eh.s new file mode 100644 index 00000000000..5e7ea3d9735 --- /dev/null +++ b/test/CodeGen/X86/win64_eh.s @@ -0,0 +1,50 @@ + .text + .def foo5; + .scl 2; + .type 32; + .endef + .globl foo5 + .align 16, 0x90 +foo5: # @foo5 +.Ltmp0: +.seh_proc foo5 +# BB#0: # %entry + pushq %rbp +.Ltmp1: + .seh_pushreg 5 + pushq %rdi +.Ltmp2: + .seh_pushreg 7 + pushq %rbx +.Ltmp3: + .seh_pushreg 3 + subq $384, %rsp # imm = 0x180 +.Ltmp4: + .seh_stackalloc 384 + leaq 128(%rsp), %rbp +.Ltmp5: + .seh_setframe 5, 128 + movaps %xmm7, -32(%rbp) # 16-byte Spill + movaps %xmm6, -48(%rbp) # 16-byte Spill +.Ltmp6: + .seh_savexmm 6, 80 +.Ltmp7: + .seh_savexmm 7, 96 +.Ltmp8: + .seh_endprologue + andq $-64, %rsp + #APP + #NO_APP + movl $42, (%rsp) + movaps -48(%rbp), %xmm6 # 16-byte Reload + movaps -32(%rbp), %xmm7 # 16-byte Reload + leaq 256(%rbp), %rsp + popq %rbx + popq %rdi + popq %rbp + retq +.Leh_func_end0: +.Ltmp9: + .seh_endproc + + diff --git a/test/CodeGen/X86/win64_frame.ll b/test/CodeGen/X86/win64_frame.ll new file mode 100644 index 00000000000..a450a83f56f --- /dev/null +++ b/test/CodeGen/X86/win64_frame.ll @@ -0,0 +1,121 @@ +; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s + +define i32 @f1(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) "no-frame-pointer-elim"="true" { + ; CHECK-LABEL: f1: + ; CHECK: movl 48(%rbp), %eax + ret i32 %p5 +} + +define void @f2(i32 %p, ...) "no-frame-pointer-elim"="true" { + ; CHECK-LABEL: f2: + ; CHECK: .seh_stackalloc 8 + ; CHECK: leaq 16(%rsp), %rbp + ; CHECK: .seh_setframe 5, 16 + ; CHECK: movq %rdx, 16(%rbp) + ; CHECK: leaq 16(%rbp), %rax + %ap = alloca i8, align 8 + call void @llvm.va_start(i8* %ap) + ret void +} + +define i8* @f3() "no-frame-pointer-elim"="true" { + ; CHECK-LABEL: f3: + ; CHECK: leaq (%rsp), %rbp + ; CHECK: .seh_setframe 5, 0 + ; CHECK: movq 8(%rbp), %rax + %ra = call i8* @llvm.returnaddress(i32 0) + ret i8* %ra +} + +define i8* @f4() "no-frame-pointer-elim"="true" { + ; CHECK-LABEL: f4: + ; CHECK: pushq %rbp + ; CHECK: .seh_pushreg 5 + ; CHECK: subq $304, %rsp + ; CHECK: .seh_stackalloc 304 + ; CHECK: leaq 128(%rsp), %rbp + ; CHECK: .seh_setframe 5, 128 + ; CHECK: .seh_endprologue + ; CHECK: movq 184(%rbp), %rax + alloca [300 x i8] + %ra = call i8* @llvm.returnaddress(i32 0) + ret i8* %ra +} + +declare void @external(i8*) + +define void @f5() "no-frame-pointer-elim"="true" { + ; CHECK-LABEL: f5: + ; CHECK: subq $336, %rsp + ; CHECK: .seh_stackalloc 336 + ; CHECK: leaq 128(%rsp), %rbp + ; CHECK: .seh_setframe 5, 128 + ; CHECK: leaq -92(%rbp), %rcx + ; CHECK: callq external + %a = alloca [300 x i8] + %gep = getelementptr [300 x i8]* %a, i32 0, i32 0 + call void @external(i8* %gep) + ret void +} + +define void @f6(i32 %p, ...) "no-frame-pointer-elim"="true" { + ; CHECK-LABEL: f6: + ; CHECK: subq $336, %rsp + ; CHECK: .seh_stackalloc 336 + ; CHECK: leaq 128(%rsp), %rbp + ; CHECK: .seh_setframe 5, 128 + ; CHECK: leaq -92(%rbp), %rcx + ; CHECK: callq external + %a = alloca [300 x i8] + %gep = getelementptr [300 x i8]* %a, i32 0, i32 0 + call void @external(i8* %gep) + ret void +} + +define i32 @f7(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) "no-frame-pointer-elim"="true" { + ; CHECK-LABEL: f7: + ; CHECK: pushq %rbp + ; CHECK: .seh_pushreg 5 + ; CHECK: subq $320, %rsp + ; CHECK: .seh_stackalloc 320 + ; CHECK: leaq 128(%rsp), %rbp + ; CHECK: .seh_setframe 5, 128 + ; CHECK: movl 240(%rbp), %eax + ; CHECK: leaq 192(%rbp), %rsp + alloca [300 x i8], align 64 + ret i32 %e +} + +define i32 @f8(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) "no-frame-pointer-elim"="true" { + ; CHECK-LABEL: f8: + ; CHECK: subq $384, %rsp + ; CHECK: .seh_stackalloc 384 + ; CHECK: leaq 128(%rsp), %rbp + ; CHECK: .seh_setframe 5, 128 + + %alloca = alloca [300 x i8], align 64 + ; CHECK: andq $-64, %rsp + ; CHECK: movq %rsp, %rbx + + alloca i32, i32 %a + ; CHECK: movl %ecx, %eax + ; CHECK: leaq 15(,%rax,4), %rax + ; CHECK: andq $-16, %rax + ; CHECK: callq __chkstk + ; CHECK: subq %rax, %rsp + + %gep = getelementptr [300 x i8]* %alloca, i32 0, i32 0 + call void @external(i8* %gep) + ; CHECK: subq $32, %rsp + ; CHECK: leaq (%rbx), %rcx + ; CHECK: callq external + ; CHECK: addq $32, %rsp + + ret i32 %e + ; CHECK: movl %esi, %eax + ; CHECK: leaq 256(%rbp), %rsp +} + +declare i8* @llvm.returnaddress(i32) nounwind readnone + +declare void @llvm.va_start(i8*) nounwind -- 2.34.1