[X86] Generate .cfi_adjust_cfa_offset correctly when pushing arguments

author Michael Kuperstein <michael.m.kuperstein@intel.com>

Tue, 3 Nov 2015 08:17:25 +0000 (08:17 +0000)

committer Michael Kuperstein <michael.m.kuperstein@intel.com>

Tue, 3 Nov 2015 08:17:25 +0000 (08:17 +0000)
author Michael Kuperstein <michael.m.kuperstein@intel.com>
Tue, 3 Nov 2015 08:17:25 +0000 (08:17 +0000)
committer Michael Kuperstein <michael.m.kuperstein@intel.com>
Tue, 3 Nov 2015 08:17:25 +0000 (08:17 +0000)
diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h

index 8cc5d6f242f0880fb27e5810a601a8e1a571ffeb..4df580f0dbae3da4032d9a854a6ad7ac2b084796 100644 (file)
--- a/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/include/llvm/CodeGen/MachineModuleInfo.h
@@ -245,6 +245,11 @@ public:
    bool hasDebugInfo() const { return DbgInfoAvailable; }
    void setDebugInfoAvailability(bool avail) { DbgInfoAvailable = avail; }
  
+  // Returns true if we need to generate precise CFI. Currently
+  // this is equivalent to hasDebugInfo(), but if we ever implement
+  // async EH, it will require precise CFI as well.
+  bool usePreciseUnwindInfo() const { return hasDebugInfo(); }
+
    bool callsEHReturn() const { return CallsEHReturn; }
    void setCallsEHReturn(bool b) { CallsEHReturn = b; }
  
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp

index 8efa03a88620e8fc7c8bcd87f65bbd2b0ecbc190..9ede04c4c1b758623876af4f2edf760537f7c4c2 100644 (file)
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -216,6 +216,9 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const {
    case MCCFIInstruction::OpDefCfaOffset:
      OutStreamer->EmitCFIDefCfaOffset(Inst.getOffset());
      break;
+  case MCCFIInstruction::OpAdjustCfaOffset:
+    OutStreamer->EmitCFIAdjustCfaOffset(Inst.getOffset());
+    break;
    case MCCFIInstruction::OpDefCfa:
      OutStreamer->EmitCFIDefCfa(Inst.getRegister(), Inst.getOffset());
      break;
diff --git a/lib/Target/X86/X86CallFrameOptimization.cpp b/lib/Target/X86/X86CallFrameOptimization.cpp

index e3031b8946417744f75ffa3655c2d4164ad52ef7..23990b01ba1818206e5eb8d6ca045d5fabc41e44 100644 (file)
--- a/lib/Target/X86/X86CallFrameOptimization.cpp
+++ b/lib/Target/X86/X86CallFrameOptimization.cpp
@@ -103,7 +103,8 @@ private:
    const char *getPassName() const override { return "X86 Optimize Call Frame"; }
  
    const TargetInstrInfo *TII;
-  const TargetFrameLowering *TFL;
+  const X86FrameLowering *TFL;
+  const X86Subtarget *STI;
    const MachineRegisterInfo *MRI;
    static char ID;
  };
@@ -127,13 +128,15 @@ bool X86CallFrameOptimization::isLegal(MachineFunction &MF) {
    // No point in running this in 64-bit mode, since some arguments are
    // passed in-register in all common calling conventions, so the pattern
    // we're looking for will never match.
-  const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
-  if (STI.is64Bit())
+  if (STI->is64Bit())
      return false;
  
-  // We can't encode multiple DW_CFA_GNU_args_size in the compact
-  // unwind encoding that Darwin uses.
-  if (STI.isTargetDarwin() && !MF.getMMI().getLandingPads().empty())
+  // We can't encode multiple DW_CFA_GNU_args_size or DW_CFA_def_cfa_offset
+  // in the compact unwind encoding that Darwin uses. So, bail if there
+  // is a danger of that being generated.
+  if (STI->isTargetDarwin() && 
+     (!MF.getMMI().getLandingPads().empty() || 
+       (MF.getFunction()->needsUnwindTableEntry() && !TFL->hasFP(MF))))
      return false;
  
    // You would expect straight-line code between call-frame setup and
@@ -216,8 +219,9 @@ bool X86CallFrameOptimization::isProfitable(MachineFunction &MF,
  }
  
  bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) {
-  TII = MF.getSubtarget().getInstrInfo();
-  TFL = MF.getSubtarget().getFrameLowering();
+  STI = &MF.getSubtarget<X86Subtarget>();
+  TII = STI->getInstrInfo();
+  TFL = STI->getFrameLowering();
    MRI = &MF.getRegInfo();
  
    if (!isLegal(MF))
@@ -312,7 +316,7 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
    // Check that this particular call sequence is amenable to the
    // transformation.
    const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>(
-                                       MF.getSubtarget().getRegisterInfo());
+                                       STI->getRegisterInfo());
    unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
  
    // We expect to enter this at the beginning of a call sequence
@@ -455,6 +459,7 @@ bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
    for (int Idx = (Context.ExpectedDist / 4) - 1; Idx >= 0; --Idx) {
      MachineBasicBlock::iterator MOV = *Context.MovVector[Idx];
      MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands);
+    MachineBasicBlock::iterator Push = nullptr;
      if (MOV->getOpcode() == X86::MOV32mi) {
        unsigned PushOpcode = X86::PUSHi32;
        // If the operand is a small (8-bit) immediate, we can use a
@@ -466,21 +471,20 @@ bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
          if (isInt<8>(Val))
            PushOpcode = X86::PUSH32i8;
        }
-      BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)).addOperand(PushOp);
+      Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode))
+          .addOperand(PushOp);
      } else {
        unsigned int Reg = PushOp.getReg();
  
        // If PUSHrmm is not slow on this target, try to fold the source of the
        // push into the instruction.
-      const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
-      bool SlowPUSHrmm = ST.isAtom() || ST.isSLM();
+      bool SlowPUSHrmm = STI->isAtom() || STI->isSLM();
  
        // Check that this is legal to fold. Right now, we're extremely
        // conservative about that.
        MachineInstr *DefMov = nullptr;
        if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) {
-        MachineInstr *Push =
-            BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32rmm));
+        Push = BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32rmm));
  
          unsigned NumOps = DefMov->getDesc().getNumOperands();
          for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
@@ -488,12 +492,18 @@ bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
  
          DefMov->eraseFromParent();
        } else {
-        BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32r))
+        Push = BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32r))
              .addReg(Reg)
              .getInstr();
        }
      }
  
+    // For debugging, when using SP-based CFA, we need to adjust the CFA
+    // offset after each push.
+    if (!TFL->hasFP(MF) && MF.getMMI().usePreciseUnwindInfo())
+      TFL->BuildCFI(MBB, std::next(Push), DL, 
+                    MCCFIInstruction::createAdjustCfaOffset(nullptr, 4));
+
      MBB.erase(MOV);
    }
  
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp

index 95cb76094eca7549c58fcd8680903d7bb08556f6..7b7f0daf12b3ae2b55e572d7f7eab0aae6e38f76 100644 (file)
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -2105,18 +2105,23 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
      unsigned StackAlign = getStackAlignment();
      Amount = RoundUpToAlignment(Amount, StackAlign);
  
+    MachineModuleInfo &MMI = MF.getMMI();
+    const Function *Fn = MF.getFunction();
+    bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
+    bool DwarfCFI = !WindowsCFI && 
+                    (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry());
+
      // If we have any exception handlers in this function, and we adjust
-    // the SP before calls, we may need to indicate this to the unwinder,
-    // using GNU_ARGS_SIZE. Note that this may be necessary
-    // even when Amount == 0, because the preceding function may have
-    // set a non-0 GNU_ARGS_SIZE.
+    // the SP before calls, we may need to indicate this to the unwinder
+    // using GNU_ARGS_SIZE. Note that this may be necessary even when
+    // Amount == 0, because the preceding function may have set a non-0
+    // GNU_ARGS_SIZE.
      // TODO: We don't need to reset this between subsequent functions,
      // if it didn't change.
-    bool HasDwarfEHHandlers =
-      !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
-      !MF.getMMI().getLandingPads().empty();
+    bool HasDwarfEHHandlers = !WindowsCFI &&
+                              !MF.getMMI().getLandingPads().empty();
  
-    if (HasDwarfEHHandlers && !isDestroy && 
+    if (HasDwarfEHHandlers && !isDestroy &&
          MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences())
        BuildCFI(MBB, I, DL,
                 MCCFIInstruction::createGnuArgsSize(nullptr, Amount));
@@ -2128,15 +2133,37 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
      // (Pushes of argument for frame setup, callee pops for frame destroy)
      Amount -= InternalAmt;
  
+    // If this is a callee-pop calling convention, and we're emitting precise
+    // SP-based CFI, emit a CFA adjust for the amount the callee popped.
+    if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF) && 
+        MMI.usePreciseUnwindInfo())
+      BuildCFI(MBB, I, DL, 
+               MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
+
      if (Amount) {
        // Add Amount to SP to destroy a frame, and subtract to setup.
        int Offset = isDestroy ? Amount : -Amount;
  
-      if (!(MF.getFunction()->optForMinSize() && 
+      if (!(Fn->optForMinSize() && 
              adjustStackWithPops(MBB, I, DL, Offset)))
          BuildStackAdjustment(MBB, I, DL, Offset, /*InEpilogue=*/false);
      }
  
+    if (DwarfCFI && !hasFP(MF)) {
+      // If we don't have FP, but need to generate unwind information,
+      // we need to set the correct CFA offset after the stack adjustment.
+      // How much we adjust the CFA offset depends on whether we're emitting
+      // CFI only for EH purposes or for debugging. EH only requires the CFA
+      // offset to be correct at each call site, while for debugging we want
+      // it to be more precise.
+      int CFAOffset = Amount;
+      if (!MMI.usePreciseUnwindInfo())
+        CFAOffset += InternalAmt;
+      CFAOffset = isDestroy ? -CFAOffset : CFAOffset;
+      BuildCFI(MBB, I, DL, 
+               MCCFIInstruction::createAdjustCfaOffset(nullptr, CFAOffset));
+    }
+
      return;
    }
  
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h

index 59c6a062810bdf21fe4224ca365afa530205dda9..261eade9173c899c5bba71e045da3f88fe930695 100644 (file)
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -125,13 +125,13 @@ public:
    /// \p MBB will be correctly handled by the target.
    bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override;
  
-private:
-  uint64_t calculateMaxStackAlign(const MachineFunction &MF) const;
-
    /// Wraps up getting a CFI index and building a MachineInstr for it.
    void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
                  DebugLoc DL, MCCFIInstruction CFIInst) const;
  
+private:
+  uint64_t calculateMaxStackAlign(const MachineFunction &MF) const;
+
    /// Aligns the stack pointer by ANDing it with -MaxAlign.
    void BuildStackAlignAND(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator MBBI, DebugLoc DL,
diff --git a/test/CodeGen/X86/debugloc-argsize.ll b/test/CodeGen/X86/debugloc-argsize.ll

index 7db7459e8fadafc3c4df2576244b5df64e74f038..56f67b72d980d4fc968b9797f7d9f9fb2da7fdbf 100644 (file)
--- a/test/CodeGen/X86/debugloc-argsize.ll
+++ b/test/CodeGen/X86/debugloc-argsize.ll
@@ -30,7 +30,7 @@ declare i8* @__cxa_begin_catch(i8*)
  
  declare void @__cxa_end_catch()
  
-attributes #0 = { optsize "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { optsize "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
  attributes #1 = { optsize }
  attributes #2 = { nounwind }
  
diff --git a/test/CodeGen/X86/fold-push.ll b/test/CodeGen/X86/fold-push.ll

index cb4e435e35c1b9778fa82dc31deb7bf8768ab928..eaf91351021fe3389e3b8c5ec8bed31aa50be263 100644 (file)
--- a/test/CodeGen/X86/fold-push.ll
+++ b/test/CodeGen/X86/fold-push.ll
@@ -3,7 +3,7 @@
  
  declare void @foo(i32 %r)
  
-define void @test(i32 %a, i32 %b) optsize {
+define void @test(i32 %a, i32 %b) optsize nounwind {
  ; CHECK-LABEL: test:
  ; CHECK: movl [[EAX:%e..]], (%esp)
  ; CHECK-NEXT: pushl [[EAX]]
@@ -22,7 +22,7 @@ define void @test(i32 %a, i32 %b) optsize {
    ret void
  }
  
-define void @test_min(i32 %a, i32 %b) minsize {
+define void @test_min(i32 %a, i32 %b) minsize nounwind {
  ; CHECK-LABEL: test_min:
  ; CHECK: movl [[EAX:%e..]], (%esp)
  ; CHECK-NEXT: pushl [[EAX]]
diff --git a/test/CodeGen/X86/pop-stack-cleanup.ll b/test/CodeGen/X86/pop-stack-cleanup.ll

index 3a22cc19100b166026a36d9fc5b016b1f92cb6ef..bcf7594065f396dc6c8c2b0f5426e14dfce7af5e 100644 (file)
--- a/test/CodeGen/X86/pop-stack-cleanup.ll
+++ b/test/CodeGen/X86/pop-stack-cleanup.ll
@@ -9,7 +9,7 @@ declare void @param3(i32 %a, i32 %b, i32 %c)
  declare void @param8(i64, i64, i64, i64, i64, i64, i64, i64)
  
  
-define void @test() minsize {
+define void @test() minsize nounwind {
  ; CHECK-LABEL: test:
  ; CHECK: calll _param1
  ; CHECK-NEXT: popl %eax
@@ -48,7 +48,7 @@ define void @negative(i32 %k) {
    ret void
  }
  
-define void @spill(i32 inreg %a, i32 inreg %b, i32 inreg %c) minsize {
+define void @spill(i32 inreg %a, i32 inreg %b, i32 inreg %c) minsize nounwind {
  ; CHECK-LABEL: spill:
  ; CHECK-DAG: movl %ecx,
  ; CHECK-DAG: movl %edx,
@@ -63,7 +63,7 @@ define void @spill(i32 inreg %a, i32 inreg %b, i32 inreg %c) minsize {
    ret void
  }
  
-define void @test_linux64(i32 %size) minsize {
+define void @test_linux64(i32 %size) minsize nounwind {
  ; LINUX64-LABEL: test_linux64:
  ; LINUX64: pushq %rbp
  ; LINUX64: callq param8
diff --git a/test/CodeGen/X86/push-cfi-debug.ll b/test/CodeGen/X86/push-cfi-debug.ll

new file mode 100644 (file)

index 0000000..61110e5
--- /dev/null
+++ b/test/CodeGen/X86/push-cfi-debug.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s
+
+
+; Function Attrs: optsize
+declare void @foo(i32, i32) #0
+declare x86_stdcallcc void @stdfoo(i32, i32) #0
+
+; CHECK-LABEL: test1:
+; CHECK: subl $8, %esp
+; CHECK: .cfi_adjust_cfa_offset 8
+; CHECK: pushl $2
+; CHECK: .cfi_adjust_cfa_offset 4
+; CHECK: pushl $1
+; CHECK: .cfi_adjust_cfa_offset 4
+; CHECK: calll foo
+; CHECK: addl $16, %esp
+; CHECK: .cfi_adjust_cfa_offset -16
+; CHECK: subl $8, %esp
+; CHECK: .cfi_adjust_cfa_offset 8
+; CHECK: pushl $4
+; CHECK: .cfi_adjust_cfa_offset 4
+; CHECK: pushl $3
+; CHECK: .cfi_adjust_cfa_offset 4
+; CHECK: calll stdfoo
+; CHECK: .cfi_adjust_cfa_offset -8
+; CHECK: addl $8, %esp
+; CHECK: .cfi_adjust_cfa_offset -8
+define void @test1() #0 {
+entry:
+  tail call void @foo(i32 1, i32 2) #1, !dbg !10
+  tail call x86_stdcallcc void @stdfoo(i32 3, i32 4) #1, !dbg !11
+  ret void, !dbg !12
+}
+
+attributes #0 = { nounwind optsize }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 250289)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "foo.c", directory: "foo")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "test1", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, isOptimized: true, function: void ()* @test1, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{!"clang version 3.8.0 (trunk 250289)"}
+!10 = !DILocation(line: 4, column: 3, scope: !4)
+!11 = !DILocation(line: 5, column: 3, scope: !4)
+!12 = !DILocation(line: 6, column: 1, scope: !4)
diff --git a/test/CodeGen/X86/push-cfi-obj.ll b/test/CodeGen/X86/push-cfi-obj.ll

index 43ab34e744bf9812fe2324106fe66f71ddd6e2a8..bc01407f6f39576ec365755139bf28a9d1da3f2f 100644 (file)
--- a/test/CodeGen/X86/push-cfi-obj.ll
+++ b/test/CodeGen/X86/push-cfi-obj.ll
@@ -1,36 +1,36 @@
-; RUN: llc < %s -mtriple=i686-pc-linux -filetype=obj | llvm-readobj -s -sr -sd | FileCheck %s
+; RUN: llc < %s -mtriple=i686-pc-linux -filetype=obj | llvm-readobj -s -sr -sd | FileCheck %s -check-prefix=LINUX
  ; RUN: llc < %s -mtriple=i686-darwin-macosx10.7 -filetype=obj | llvm-readobj -sections | FileCheck -check-prefix=DARWIN %s
  
  ; On darwin, check that we manage to generate the compact unwind section
  ; DARWIN: Name: __compact_unwind
  ; DARWIN: Segment: __LD
  
-; CHECK:         Index: 8
-; CHECK-NEXT:    Name: .eh_frame (41)
-; CHECK-NEXT:    Type: SHT_PROGBITS (0x1)
-; CHECK-NEXT:    Flags [ (0x2)
-; CHECK-NEXT:      SHF_ALLOC (0x2)
-; CHECK-NEXT:    ]
-; CHECK-NEXT:    Address: 0x0
-; CHECK-NEXT:    Offset: 0x64
-; CHECK-NEXT:    Size: 60
-; CHECK-NEXT:    Link: 0
-; CHECK-NEXT:    Info: 0
-; CHECK-NEXT:    AddressAlignment: 4
-; CHECK-NEXT:    EntrySize: 0
-; CHECK-NEXT:    Relocations [
-; CHECK-NEXT:    ]
-; CHECK-NEXT:    SectionData (
-; CHECK-NEXT:      0000: 1C000000 00000000 017A504C 5200017C  |.........zPLR..||
-; CHECK-NEXT:      0010: 08070000 00000000 1B0C0404 88010000  |................|
-; CHECK-NEXT:      0020: 18000000 24000000 00000000 19000000  |....$...........|
-; CHECK-NEXT:      0030: 04000000 00430E10 2E100000           |.....C......|
-; CHECK-NEXT:    )
+; LINUX:         Index: 8
+; LINUX-NEXT:    Name: .eh_frame (41)
+; LINUX-NEXT:    Type: SHT_PROGBITS (0x1)
+; LINUX-NEXT:    Flags [ (0x2)
+; LINUX-NEXT:      SHF_ALLOC (0x2)
+; LINUX-NEXT:    ]
+; LINUX-NEXT:    Address: 0x0
+; LINUX-NEXT:    Offset: 0x68
+; LINUX-NEXT:    Size: 64
+; LINUX-NEXT:    Link: 0
+; LINUX-NEXT:    Info: 0
+; LINUX-NEXT:    AddressAlignment: 4
+; LINUX-NEXT:    EntrySize: 0
+; LINUX-NEXT:    Relocations [
+; LINUX-NEXT:    ]
+; LINUX-NEXT:    SectionData (
+; LINUX-NEXT:      0000: 1C000000 00000000 017A504C 5200017C  |.........zPLR..||
+; LINUX-NEXT:      0010: 08070000 00000000 1B0C0404 88010000  |................|
+; LINUX-NEXT:      0020: 1C000000 24000000 00000000 1D000000  |....$...........|
+; LINUX-NEXT:      0030: 04000000 00410E08 8502420D 05432E10  |.....A....B..C..|
+; LINUX-NEXT:    )
  
  declare i32 @__gxx_personality_v0(...)
  declare void @good(i32 %a, i32 %b, i32 %c, i32 %d)
  
-define void @test() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+define void @test() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
  entry:
    invoke void @good(i32 1, i32 2, i32 3, i32 4)
            to label %continue unwind label %cleanup
@@ -41,3 +41,5 @@ cleanup:
       cleanup
    ret void
  }
+
+attributes #0 = { optsize "no-frame-pointer-elim"="true" }
diff --git a/test/CodeGen/X86/push-cfi.ll b/test/CodeGen/X86/push-cfi.ll

index 959522782665902da6fc2d6dac7bf6fd854c604f..4d07a1d8181bcf3a44c34e293a1a08a5d2dd30ac 100644 (file)
--- a/test/CodeGen/X86/push-cfi.ll
+++ b/test/CodeGen/X86/push-cfi.ll
@@ -1,21 +1,51 @@
-; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s
+; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s -check-prefix=LINUX -check-prefix=CHECK
+; RUN: llc < %s -mtriple=i686-apple-darwin | FileCheck %s -check-prefix=DARWIN -check-prefix=CHECK
  
  declare i32 @__gxx_personality_v0(...)
  declare void @good(i32 %a, i32 %b, i32 %c, i32 %d)
  declare void @large(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f)
  declare void @empty()
  
-; We use an invoke, and expect a .cfi_escape GNU_ARGS_SIZE with size 16
-; before the invocation
-; CHECK-LABEL: test1:
-; CHECK: .cfi_escape 0x2e, 0x10
-; CHECK-NEXT: pushl   $4
-; CHECK-NEXT: pushl   $3
-; CHECK-NEXT: pushl   $2
-; CHECK-NEXT: pushl   $1
-; CHECK-NEXT: call
-; CHECK-NEXT: addl $16, %esp
-define void @test1() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; When we use an invoke, and have FP, we expect a .cfi_escape GNU_ARGS_SIZE
+; with size 16 before the invocation. Without FP, we expect.cfi_adjust_cfa_offset
+; before and after.
+; Darwin should not generate pushes in neither circumstance.
+; CHECK-LABEL: test1_nofp:
+; LINUX: .cfi_escape 0x2e, 0x10
+; LINUX: .cfi_adjust_cfa_offset 16
+; LINUX-NEXT: pushl   $4
+; LINUX-NEXT: pushl   $3
+; LINUX-NEXT: pushl   $2
+; LINUX-NEXT: pushl   $1
+; LINUX-NEXT: call
+; LINUX-NEXT: addl $16, %esp
+; LINUX: .cfi_adjust_cfa_offset -16
+; DARWIN-NOT: .cfi_escape
+; DARWIN-NOT: pushl
+define void @test1_nofp() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  invoke void @good(i32 1, i32 2, i32 3, i32 4)
+          to label %continue unwind label %cleanup
+continue:
+  ret void
+cleanup:  
+  landingpad { i8*, i32 }
+     cleanup
+  ret void
+}
+
+; CHECK-LABEL: test1_fp:
+; LINUX: .cfi_escape 0x2e, 0x10
+; LINUX-NEXT: pushl   $4
+; LINUX-NEXT: pushl   $3
+; LINUX-NEXT: pushl   $2
+; LINUX-NEXT: pushl   $1
+; LINUX-NEXT: call
+; LINUX-NEXT: addl $16, %esp
+; DARWIN: pushl %ebp
+; DARWIN-NOT: .cfi_escape
+; DARWIN-NOT: pushl
+define void @test1_fp() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
  entry:
    invoke void @good(i32 1, i32 2, i32 3, i32 4)
            to label %continue unwind label %cleanup
@@ -28,27 +58,69 @@ cleanup:
  }
  
  ; If the function has no handlers, we don't need to generate GNU_ARGS_SIZE,
-; even if it has an unwind table.
-; CHECK-LABEL: test2:
+; even if it has an unwind table. Without FP, we still need cfi_adjust_cfa_offset,
+; so darwin should not generate pushes.
+; CHECK-LABEL: test2_nofp:
+; LINUX-NOT: .cfi_escape
+; LINUX: .cfi_adjust_cfa_offset 16
+; LINUX-NEXT: pushl   $4
+; LINUX-NEXT: pushl   $3
+; LINUX-NEXT: pushl   $2
+; LINUX-NEXT: pushl   $1
+; LINUX-NEXT: call
+; LINUX-NEXT: addl $16, %esp
+; LINUX: .cfi_adjust_cfa_offset -16
+; DARWIN-NOT: .cfi_escape
+; DARWIN-NOT: pushl
+define void @test2_nofp() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  call void @good(i32 1, i32 2, i32 3, i32 4)
+  ret void
+}
+
+; CHECK-LABEL: test2_fp:
  ; CHECK-NOT: .cfi_escape
+; CHECK-NOT: .cfi_adjust_cfa_offset
  ; CHECK: pushl   $4
  ; CHECK-NEXT: pushl   $3
  ; CHECK-NEXT: pushl   $2
  ; CHECK-NEXT: pushl   $1
  ; CHECK-NEXT: call
-; CHECK-NEXT: addl $16, %esp
-define void @test2() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-NEXT: addl $24, %esp
+define void @test2_fp() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
  entry:
    call void @good(i32 1, i32 2, i32 3, i32 4)
    ret void
  }
  
-; If we did not end up using any pushes, no need for GNU_ARGS_SIZE anywhere
-; CHECK-LABEL: test3:
-; CHECK-NOT: .cfi_escape
-; CHECK-NOT: pushl
-; CHECK: retl
-define void @test3() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; If we did not end up using any pushes, no need for GNU_ARGS_SIZE or
+; cfi_adjust_cfa_offset.
+; CHECK-LABEL: test3_nofp:
+; LINUX-NOT: .cfi_escape
+; LINUX-NOT: .cfi_adjust_cfa_offset
+; LINUX-NOT: pushl
+; LINUX: retl
+define void @test3_nofp() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  invoke void @empty()
+          to label %continue unwind label %cleanup
+continue:
+  ret void
+cleanup:  
+  landingpad { i8*, i32 }
+     cleanup
+  ret void
+}
+
+; If we did not end up using any pushes, no need for GNU_ARGS_SIZE or
+; cfi_adjust_cfa_offset.
+; CHECK-LABEL: test3_fp:
+; LINUX: pushl %ebp
+; LINUX-NOT: .cfi_escape
+; LINUX-NOT: .cfi_adjust_cfa_offset
+; LINUX-NOT: pushl
+; LINUX: retl
+define void @test3_fp() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
  entry:
    invoke void @empty()
            to label %continue unwind label %cleanup
@@ -62,24 +134,24 @@ cleanup:
  
  ; Different sized stacks need different GNU_ARGS_SIZEs
  ; CHECK-LABEL: test4:
-; CHECK: .cfi_escape 0x2e, 0x10
-; CHECK-NEXT: pushl   $4
-; CHECK-NEXT: pushl   $3
-; CHECK-NEXT: pushl   $2
-; CHECK-NEXT: pushl   $1
-; CHECK-NEXT: call
-; CHECK-NEXT: addl $16, %esp
-; CHECK: .cfi_escape 0x2e, 0x20
-; CHECK-NEXT: subl    $8, %esp
-; CHECK-NEXT: pushl   $11
-; CHECK-NEXT: pushl   $10
-; CHECK-NEXT: pushl   $9
-; CHECK-NEXT: pushl   $8
-; CHECK-NEXT: pushl   $7
-; CHECK-NEXT: pushl   $6
-; CHECK-NEXT: calll   large
-; CHECK-NEXT: addl $32, %esp
-define void @test4() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; LINUX: .cfi_escape 0x2e, 0x10
+; LINUX-NEXT: pushl   $4
+; LINUX-NEXT: pushl   $3
+; LINUX-NEXT: pushl   $2
+; LINUX-NEXT: pushl   $1
+; LINUX-NEXT: call
+; LINUX-NEXT: addl $16, %esp
+; LINUX: .cfi_escape 0x2e, 0x20
+; LINUX: subl    $8, %esp
+; LINUX-NEXT: pushl   $11
+; LINUX-NEXT: pushl   $10
+; LINUX-NEXT: pushl   $9
+; LINUX-NEXT: pushl   $8
+; LINUX-NEXT: pushl   $7
+; LINUX-NEXT: pushl   $6
+; LINUX-NEXT: calll   large
+; LINUX-NEXT: addl $32, %esp
+define void @test4() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
  entry:
    invoke void @good(i32 1, i32 2, i32 3, i32 4)
            to label %continue1 unwind label %cleanup
@@ -95,18 +167,48 @@ cleanup:
  }
  
  ; If we did use pushes, we need to reset GNU_ARGS_SIZE before a call
-; without parameters
-; CHECK-LABEL: test5:
-; CHECK: .cfi_escape 0x2e, 0x10
-; CHECK-NEXT: pushl   $4
-; CHECK-NEXT: pushl   $3
-; CHECK-NEXT: pushl   $2
-; CHECK-NEXT: pushl   $1
-; CHECK-NEXT: call
-; CHECK-NEXT: addl $16, %esp
-; CHECK: .cfi_escape 0x2e, 0x00
-; CHECK-NEXT: call
-define void @test5() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; without parameters, but don't need to adjust the cfa offset
+; CHECK-LABEL: test5_nofp:
+; LINUX: .cfi_escape 0x2e, 0x10
+; LINUX: .cfi_adjust_cfa_offset 16
+; LINUX-NEXT: pushl   $4
+; LINUX-NEXT: pushl   $3
+; LINUX-NEXT: pushl   $2
+; LINUX-NEXT: pushl   $1
+; LINUX-NEXT: call
+; LINUX-NEXT: addl $16, %esp
+; LINUX: .cfi_adjust_cfa_offset -16
+; LINUX-NOT: .cfi_adjust_cfa_offset
+; LINUX: .cfi_escape 0x2e, 0x00
+; LINUX-NOT: .cfi_adjust_cfa_offset
+; LINUX: call
+define void @test5_nofp() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  invoke void @good(i32 1, i32 2, i32 3, i32 4)
+          to label %continue1 unwind label %cleanup
+continue1:
+  invoke void @empty()
+          to label %continue2 unwind label %cleanup
+continue2:
+  ret void          
+cleanup:  
+  landingpad { i8*, i32 }
+     cleanup
+  ret void
+}
+
+; CHECK-LABEL: test5_fp:
+; LINUX: .cfi_escape 0x2e, 0x10
+; LINUX-NEXT: pushl   $4
+; LINUX-NEXT: pushl   $3
+; LINUX-NEXT: pushl   $2
+; LINUX-NEXT: pushl   $1
+; LINUX-NEXT: call
+; LINUX-NEXT: addl $16, %esp
+; LINUX: .cfi_escape 0x2e, 0x00
+; LINUX-NOT: .cfi_adjust_cfa_offset
+; LINUX: call
+define void @test5_fp() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
  entry:
    invoke void @good(i32 1, i32 2, i32 3, i32 4)
            to label %continue1 unwind label %cleanup
@@ -121,13 +223,13 @@ cleanup:
    ret void
  }
  
-; This is actually inefficient - we don't need to repeat the .cfi_escape twice.
+; FIXME: This is actually inefficient - we don't need to repeat the .cfi_escape twice.
  ; CHECK-LABEL: test6:
-; CHECK: .cfi_escape 0x2e, 0x10
-; CHECK: call
-; CHECK: .cfi_escape 0x2e, 0x10
-; CHECK: call
-define void @test6() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; LINUX: .cfi_escape 0x2e, 0x10
+; LINUX: call
+; LINUX: .cfi_escape 0x2e, 0x10
+; LINUX: call
+define void @test6() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
  entry:
    invoke void @good(i32 1, i32 2, i32 3, i32 4)
            to label %continue1 unwind label %cleanup
@@ -141,3 +243,41 @@ cleanup:
       cleanup
    ret void
  }
+
+; Darwin should generate pushes in the presense of FP and an unwind table,
+; but not FP and invoke.
+; CHECK-LABEL: test7:
+; DARWIN: pushl %ebp
+; DARWIN: movl %esp, %ebp
+; DARWIN: .cfi_def_cfa_register %ebp
+; DARWIN-NOT: .cfi_adjust_cfa_offset
+; DARWIN: pushl   $4
+; DARWIN-NEXT: pushl   $3
+; DARWIN-NEXT: pushl   $2
+; DARWIN-NEXT: pushl   $1
+; DARWIN-NEXT: call
+define void @test7() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  call void @good(i32 1, i32 2, i32 3, i32 4)
+  ret void
+}
+
+; CHECK-LABEL: test8:
+; DARWIN: pushl %ebp
+; DARWIN: movl %esp, %ebp
+; DARWIN-NOT: .cfi_adjust_cfa_offset
+; DARWIN-NOT: pushl
+define void @test8() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  invoke void @good(i32 1, i32 2, i32 3, i32 4)
+          to label %continue unwind label %cleanup
+continue:
+  ret void
+cleanup:  
+  landingpad { i8*, i32 }
+     cleanup
+  ret void
+}
+
+attributes #0 = { optsize }
+attributes #1 = { optsize "no-frame-pointer-elim"="true" }
author	Michael Kuperstein <michael.m.kuperstein@intel.com>
	Tue, 3 Nov 2015 08:17:25 +0000 (08:17 +0000)
committer	Michael Kuperstein <michael.m.kuperstein@intel.com>
	Tue, 3 Nov 2015 08:17:25 +0000 (08:17 +0000)
include/llvm/CodeGen/MachineModuleInfo.h		patch \| blob \| history
lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp		patch \| blob \| history
lib/Target/X86/X86CallFrameOptimization.cpp		patch \| blob \| history
lib/Target/X86/X86FrameLowering.cpp		patch \| blob \| history
lib/Target/X86/X86FrameLowering.h		patch \| blob \| history
test/CodeGen/X86/debugloc-argsize.ll		patch \| blob \| history
test/CodeGen/X86/fold-push.ll		patch \| blob \| history
test/CodeGen/X86/pop-stack-cleanup.ll		patch \| blob \| history
test/CodeGen/X86/push-cfi-debug.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/X86/push-cfi-obj.ll		patch \| blob \| history
test/CodeGen/X86/push-cfi.ll		patch \| blob \| history