From 134207508213455c8969a2e07115798619989154 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Sun, 27 Dec 2015 06:07:26 +0000 Subject: [PATCH] [X86, Win64] Use a frame pointer if pushf is emitted A frame pointer must be used if stack pointer is modified after the prologue. LLVM will emit pushf/popf if we need to save/restore the FLAGS register, requiring us to have a frame pointer for the function. There is a small twist: this sequence might exist in user code via inline-assembly. For now, conservatively assume that such functions require a frame pointer. For real world justification, please see clang's implementation of __readeflags. This fixes PR25945. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@256456 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 41 ++++++----- lib/Target/X86/X86InstrInfo.cpp | 4 +- test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll | 8 ++- test/CodeGen/X86/inline-sse.ll | 4 +- test/CodeGen/X86/pr11415.ll | 8 ++- test/CodeGen/X86/win64_frame.ll | 70 ++++++++++++++++++- test/CodeGen/X86/x86-win64-shrink-wrapping.ll | 4 ++ test/DebugInfo/COFF/asm.ll | 17 +++-- 8 files changed, 121 insertions(+), 35 deletions(-) diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index d6ba11def39..242d0333ef9 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -78,6 +78,27 @@ X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const { MF.getInfo()->getHasPushSequences(); } +/// usesTheStack - This function checks if any of the users of EFLAGS +/// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has +/// to use the stack, and if we don't adjust the stack we clobber the first +/// frame index. +/// See X86InstrInfo::copyPhysReg. +static bool usesTheStack(const MachineFunction &MF) { + const MachineRegisterInfo &MRI = MF.getRegInfo(); + + // Conservativley assume that inline assembly might use the stack. + if (MF.hasInlineAsm()) + return true; + + return any_of(MRI.reg_instructions(X86::EFLAGS), + [](const MachineInstr &RI) { return RI.isCopy(); }); +} + +static bool doesStackUseImplyFP(const MachineFunction &MF) { + bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); + return IsWin64Prologue && usesTheStack(MF); +} + /// hasFP - Return true if the specified function should have a dedicated frame /// pointer register. This is true if the function has variable sized allocas /// or if frame pointer elimination is disabled. @@ -91,7 +112,8 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const { MFI->isFrameAddressTaken() || MFI->hasOpaqueSPAdjustment() || MF.getInfo()->getForceFramePointer() || MMI.callsUnwindInit() || MMI.hasEHFunclets() || MMI.callsEHReturn() || - MFI->hasStackMap() || MFI->hasPatchPoint()); + MFI->hasStackMap() || MFI->hasPatchPoint() || + doesStackUseImplyFP(MF)); } static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) { @@ -426,23 +448,6 @@ X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, } } -/// usesTheStack - This function checks if any of the users of EFLAGS -/// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has -/// to use the stack, and if we don't adjust the stack we clobber the first -/// frame index. -/// See X86InstrInfo::copyPhysReg. -static bool usesTheStack(const MachineFunction &MF) { - const MachineRegisterInfo &MRI = MF.getRegInfo(); - - for (MachineRegisterInfo::reg_instr_iterator - ri = MRI.reg_instr_begin(X86::EFLAGS), re = MRI.reg_instr_end(); - ri != re; ++ri) - if (ri->isCopy()) - return true; - - return false; -} - MachineInstr *X86FrameLowering::emitStackProbe(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 2c57c0ee295..0537b52d171 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -4421,7 +4421,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, "Not having LAHF/SAHF only happens on 64-bit."); // Moving EFLAGS to / from another register requires a push and a pop. // Notice that we have to adjust the stack if we don't want to clobber the - // first frame index. See X86FrameLowering.cpp - clobbersTheStack. + // first frame index. See X86FrameLowering.cpp - usesTheStack. if (FromEFLAGS) { BuildMI(MBB, MI, DL, get(PushF)); BuildMI(MBB, MI, DL, get(Pop), DestReg); @@ -4453,7 +4453,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // such as TF/IF/DF, which LLVM doesn't model. // // Notice that we have to adjust the stack if we don't want to clobber the - // first frame index. See X86FrameLowering.cpp - clobbersTheStack. + // first frame index. See X86FrameLowering.cpp - usesTheStack. bool AXDead = (Reg == AX) || diff --git a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll index dfb98bb1ab3..a74aa2dd462 100644 --- a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll +++ b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll @@ -1,7 +1,9 @@ ; RUN: llc -mcpu=generic -mtriple=x86_64-mingw32 < %s | FileCheck %s -; CHECK: subq $40, %rsp -; CHECK: movaps %xmm8, 16(%rsp) -; CHECK: movaps %xmm7, (%rsp) +; CHECK: pushq %rbp +; CHECK: subq $32, %rsp +; CHECK: leaq 32(%rsp), %rbp +; CHECK: movaps %xmm8, -16(%rbp) +; CHECK: movaps %xmm7, -32(%rbp) define i32 @a() nounwind { entry: diff --git a/test/CodeGen/X86/inline-sse.ll b/test/CodeGen/X86/inline-sse.ll index 08819b85829..78d6b762b5e 100644 --- a/test/CodeGen/X86/inline-sse.ll +++ b/test/CodeGen/X86/inline-sse.ll @@ -21,9 +21,11 @@ define void @nop() nounwind { ; ; X64-LABEL: nop: ; X64: # BB#0: +; X64-NEXT: subq $24, %rsp ; X64-NEXT: #APP ; X64-NEXT: #NO_APP -; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movaps %xmm0, (%rsp) +; X64-NEXT: addq $24, %rsp ; X64-NEXT: retq %1 = alloca <4 x float>, align 16 %2 = call <4 x float> asm "", "=x,~{dirflag},~{fpsr},~{flags}"() diff --git a/test/CodeGen/X86/pr11415.ll b/test/CodeGen/X86/pr11415.ll index 6c32a2206a7..73c49701411 100644 --- a/test/CodeGen/X86/pr11415.ll +++ b/test/CodeGen/X86/pr11415.ll @@ -4,15 +4,17 @@ ; defining %0 before it was read. This caused us to omit the ; movq -8(%rsp), %rdx +; CHECK: pushq %rax ; CHECK: #APP ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movq %rcx, %rax -; CHECK-NEXT: movq %rax, -8(%rsp) -; CHECK-NEXT: movq -8(%rsp), %rdx +; CHECK-NEXT: movq %rax, (%rsp) +; CHECK-NEXT: movq (%rsp), %rdx ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movq %rdx, %rax -; CHECK-NEXT: movq %rdx, -8(%rsp) +; CHECK-NEXT: movq %rdx, (%rsp) +; CHECK-NEXT: popq %rcx ; CHECK-NEXT: ret define i64 @foo() { diff --git a/test/CodeGen/X86/win64_frame.ll b/test/CodeGen/X86/win64_frame.ll index 477b3144d9e..27d78dbe547 100644 --- a/test/CodeGen/X86/win64_frame.ll +++ b/test/CodeGen/X86/win64_frame.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s --check-prefix=CHECK --check-prefix=PUSHF +; RUN: llc < %s -mtriple=x86_64-pc-win32 -mattr=+sahf | FileCheck %s --check-prefix=SAHF define i32 @f1(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) "no-frame-pointer-elim"="true" { ; CHECK-LABEL: f1: @@ -118,6 +119,73 @@ define i32 @f8(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) "no-frame-pointer-elim"=" ; CHECK: leaq 224(%rbp), %rsp } +define i64 @f9() { +entry: + ; CHECK-LABEL: f9: + ; CHECK: pushq %rbp + ; CHECK: .seh_pushreg 5 + ; CHECK-NEXT: movq %rsp, %rbp + ; CHECK: .seh_setframe 5, 0 + ; CHECK: .seh_endprologue + + %call = call i64 asm sideeffect "pushf\0A\09popq $0\0A", "=r,~{dirflag},~{fpsr},~{flags}"() + ; CHECK-NEXT: #APP + ; CHECK-NEXT: pushfq + ; CHECK-NEXT: popq %rax + ; CHECK: #NO_APP + + ret i64 %call + ; CHECK-NEXT: popq %rbp + ; CHECK-NEXT: retq +} + +declare i64 @dummy() + +define i64 @f10(i64* %foo, i64 %bar, i64 %baz) { + ; CHECK-LABEL: f10: + ; CHECK: pushq %rbp + ; CHECK: .seh_pushreg 5 + ; CHECK: pushq %rsi + ; CHECK: .seh_pushreg 6 + ; CHECK: pushq %rdi + ; CHECK: .seh_pushreg 7 + ; CHECK: subq $32, %rsp + ; CHECK: .seh_stackalloc 32 + ; CHECK: leaq 32(%rsp), %rbp + ; CHECK: .seh_setframe 5, 32 + ; CHECK: .seh_endprologue + + %cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst + ; PUSHF: lock cmpxchgq + ; PUSHF-NEXT: pushfq + ; PUSHF-NEXT: popq %[[REG:.*]] + ; SAHF: lock cmpxchgq + ; SAHF-NEXT: seto %al + ; SAHF-NEXT: lahf + + %v = extractvalue { i64, i1 } %cx, 0 + %p = extractvalue { i64, i1 } %cx, 1 + + %call = call i64 @dummy() + ; PUSHF: callq dummy + ; PUSHF-NEXT: pushq %[[REG]] + ; PUSHF-NEXT: popfq + ; SAHF: callq dummy + ; SAHF-NEXT: pushq + ; SAHF: addb $127, %al + ; SAHF-NEXT: sahf + ; SAHF-NEXT: popq + + %sel = select i1 %p, i64 %call, i64 %bar + ; CHECK-NEXT: cmovneq + + ret i64 %sel + ; CHECK-NEXT: addq $32, %rsp + ; CHECK-NEXT: popq %rdi + ; CHECK-NEXT: popq %rsi + ; CHECK-NEXT: popq %rbp +} + declare i8* @llvm.returnaddress(i32) nounwind readnone declare void @llvm.va_start(i8*) nounwind diff --git a/test/CodeGen/X86/x86-win64-shrink-wrapping.ll b/test/CodeGen/X86/x86-win64-shrink-wrapping.ll index 5d9b2ba3267..395de686d2e 100644 --- a/test/CodeGen/X86/x86-win64-shrink-wrapping.ll +++ b/test/CodeGen/X86/x86-win64-shrink-wrapping.ll @@ -11,8 +11,10 @@ target triple = "x86_64--windows-gnu" ; etc.) prior to the return and this is forbidden for Win64. ; CHECK-LABEL: loopInfoSaveOutsideLoop: ; CHECK: push +; CHECK: push ; CHECK-NOT: popq ; CHECK: popq +; CHECK: popq ; CHECK-NOT: popq ; CHECK-NEXT: retq define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) #0 { @@ -55,6 +57,7 @@ if.end: ; preds = %if.else, %for.end ; ; Prologue code. ; Make sure we save the CSR used in the inline asm: rbx. +; CHECK: pushq %rbp ; CHECK: pushq %rbx ; ; DISABLE: testl %ecx, %ecx @@ -76,6 +79,7 @@ if.end: ; preds = %if.else, %for.end ; DISABLE: jmp [[EPILOG_BB:.LBB[0-9_]+]] ; ; ENABLE-NEXT: popq %rbx +; ENABLE-NEXT: popq %rbp ; ENABLE-NEXT: retq ; ; CHECK: [[ELSE_LABEL]]: # %if.else diff --git a/test/DebugInfo/COFF/asm.ll b/test/DebugInfo/COFF/asm.ll index b67100c87fd..f3e52df54be 100644 --- a/test/DebugInfo/COFF/asm.ll +++ b/test/DebugInfo/COFF/asm.ll @@ -130,12 +130,15 @@ ; X64-NEXT: .L{{.*}}:{{$}} ; X64-NEXT: [[START:.*]]:{{$}} ; X64: # BB -; X64: subq $40, %rsp +; X64: pushq %rbp +; X64-NEXT: subq $32, %rsp +; X64-NEXT: leaq 32(%rsp), %rbp ; X64-NEXT: [[ASM_LINE:.*]]:{{$}} ; X64: [[CALL_LINE:.*]]:{{$}} ; X64: callq g ; X64-NEXT: [[EPILOG_AND_RET:.*]]: -; X64: addq $40, %rsp +; X64: addq $32, %rsp +; X64-NEXT: popq %rbp ; X64-NEXT: ret ; X64-NEXT: [[END_OF_F:.*]]: ; @@ -222,22 +225,22 @@ ; OBJ64: ProcStart { ; OBJ64-NEXT: DisplayName: f ; OBJ64-NEXT: Section: f -; OBJ64-NEXT: CodeSize: 0xE +; OBJ64-NEXT: CodeSize: 0x17 ; OBJ64-NEXT: } ; OBJ64-NEXT: ProcEnd ; OBJ64-NEXT: ] ; OBJ64: FunctionLineTable [ ; OBJ64-NEXT: Name: f ; OBJ64-NEXT: Flags: 0x1 -; OBJ64-NEXT: CodeSize: 0xE +; OBJ64-NEXT: CodeSize: 0x17 ; OBJ64-NEXT: FilenameSegment [ ; OBJ64-NEXT: Filename: D:\asm.c ; OBJ64-NEXT: +0x0: 3 ; FIXME: An empty __asm stmt creates an extra entry. ; See PR18679 for the details. -; OBJ64-NEXT: +0x4: 4 -; OBJ64-NEXT: +0x4: 5 -; OBJ64-NEXT: +0x9: 6 +; OBJ64-NEXT: +0xA: 4 +; OBJ64-NEXT: +0xC: 5 +; OBJ64-NEXT: +0x11: 6 ; OBJ64-NEXT: ColStart: 0 ; OBJ64-NEXT: ColEnd: 0 ; OBJ64-NEXT: ColStart: 0 -- 2.34.1