ARM/Dwarf: correctly align stack before callee-saved VPRs

author Tim Northover <tnorthover@apple.com>

Wed, 5 Nov 2014 00:27:13 +0000 (00:27 +0000)

committer Tim Northover <tnorthover@apple.com>

Wed, 5 Nov 2014 00:27:13 +0000 (00:27 +0000)
author Tim Northover <tnorthover@apple.com>
Wed, 5 Nov 2014 00:27:13 +0000 (00:27 +0000)
committer Tim Northover <tnorthover@apple.com>
Wed, 5 Nov 2014 00:27:13 +0000 (00:27 +0000)
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp

index 74f686507df5fb8257f1bf41abad9e77b37b271c..4589799933b8b5630d30f0cf6b1b7130dd8bb7da 100644 (file)
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -260,10 +260,11 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
  
    // Determine starting offsets of spill areas.
    bool HasFP = hasFP(MF);
-  unsigned DPRCSOffset  = NumBytes - (ArgRegsSaveSize + GPRCS1Size
-                                      + GPRCS2Size + DPRCSSize);
-  unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
-  unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
+  unsigned GPRCS1Offset = NumBytes - ArgRegsSaveSize - GPRCS1Size;
+  unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
+  unsigned DPRAlign = DPRCSSize ? std::min(8U, Align) : 4U;
+  unsigned DPRGapSize = (GPRCS1Size + GPRCS2Size + ArgRegsSaveSize) % DPRAlign;
+  unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
    int FramePtrOffsetInPush = 0;
    if (HasFP) {
      FramePtrOffsetInPush = MFI->getObjectOffset(FramePtrSpillFI)
@@ -279,6 +280,15 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
    if (GPRCS2Size > 0)
      GPRCS2Push = LastPush = MBBI++;
  
+  // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
+  // .cfi_offset operations will reflect that.
+  if (DPRGapSize) {
+    assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
+    if (!tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, DPRGapSize))
+      emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
+                   MachineInstr::FrameSetup);
+  }
+
    // Move past area 3.
    if (DPRCSSize > 0) {
      DPRCSPush = MBBI;
@@ -508,6 +518,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
  
    AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
    AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
+  AFI->setDPRCalleeSavedGapSize(DPRGapSize);
    AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
  
    // If we need dynamic stack realignment, do it here. Be paranoid and make
@@ -613,6 +624,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
      NumBytes -= (ArgRegsSaveSize +
                   AFI->getGPRCalleeSavedArea1Size() +
                   AFI->getGPRCalleeSavedArea2Size() +
+                 AFI->getDPRCalleeSavedGapSize() +
                   AFI->getDPRCalleeSavedAreaSize());
  
      // Reset SP based on frame pointer only if the stack frame extends beyond
@@ -661,6 +673,12 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
        while (MBBI->getOpcode() == ARM::VLDMDIA_UPD)
          MBBI++;
      }
+    if (AFI->getDPRCalleeSavedGapSize()) {
+      assert(AFI->getDPRCalleeSavedGapSize() == 4 &&
+             "unexpected DPR alignment gap");
+      emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize());
+    }
+
      if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
      if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
    }
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h

index 4f9ea7abb0d2b16174cde13b43ef923ee90d0870..4e67fa13638e4cec3544b611b935c21fbc57b282 100644 (file)
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -86,6 +86,7 @@ class ARMFunctionInfo : public MachineFunctionInfo {
    /// areas.
    unsigned GPRCS1Size;
    unsigned GPRCS2Size;
+  unsigned DPRCSAlignGapSize;
    unsigned DPRCSSize;
  
    /// NumAlignedDPRCS2Regs - The number of callee-saved DPRs that are saved in
@@ -134,7 +135,7 @@ public:
      RestoreSPFromFP(false),
      LRSpilledForFarJump(false),
      FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
-    GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
+    GPRCS1Size(0), GPRCS2Size(0), DPRCSAlignGapSize(0), DPRCSSize(0),
      NumAlignedDPRCS2Regs(0),
      JumpTableUId(0), PICLabelUId(0),
      VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {}
@@ -183,10 +184,12 @@ public:
  
    unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; }
    unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; }
+  unsigned getDPRCalleeSavedGapSize() const   { return DPRCSAlignGapSize; }
    unsigned getDPRCalleeSavedAreaSize()  const { return DPRCSSize; }
  
    void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; }
    void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; }
+  void setDPRCalleeSavedGapSize(unsigned s)   { DPRCSAlignGapSize = s; }
    void setDPRCalleeSavedAreaSize(unsigned s)  { DPRCSSize = s; }
  
    unsigned getArgumentStackSize() const { return ArgumentStackSize; }
diff --git a/test/CodeGen/ARM/dwarf-unwind.ll b/test/CodeGen/ARM/dwarf-unwind.ll

new file mode 100644 (file)

index 0000000..58f486d
--- /dev/null
+++ b/test/CodeGen/ARM/dwarf-unwind.ll
@@ -0,0 +1,68 @@
+; RUN: llc -mtriple=thumbv7-netbsd-eabi -o - %s | FileCheck %s
+declare void @bar()
+
+; ARM's frame lowering attempts to tack another callee-saved register onto the
+; list when it detects a potential misaligned VFP store. However, if there are
+; none available it used to just vpush anyway and misreport the location of the
+; registers in unwind info. Since there are benefits to aligned stores, it's
+; better to correct the code than the .cfi_offset directive.
+
+define void @test_dpr_align(i8 %l, i8 %r) {
+; CHECK-LABEL: test_dpr_align:
+; CHECK: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK: sub sp, #4
+; CHECK: vpush {d8}
+; CHECK: .cfi_offset d8, -48
+; CHECK-NOT: sub sp
+; [...]
+; CHECK: bl bar
+; CHECK-NOT: add sp
+; CHECK: vpop {d8}
+; CHECK: add sp, #4
+; CHECK: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{d8}"()
+  call void @bar()
+  ret void
+}
+
+; The prologue (but not the epilogue) can be made more space efficient by
+; chucking an argument register into the list. Not worth it in general though,
+; "sub sp, #4" is likely faster.
+define void @test_dpr_align_tiny(i8 %l, i8 %r) minsize {
+; CHECK-LABEL: test_dpr_align_tiny:
+; CHECK: push.w {r3, r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NOT: sub sp
+; CHECK: vpush {d8}
+; CHECK: .cfi_offset d8, -48
+; CHECK-NOT: sub sp
+; [...]
+; CHECK: bl bar
+; CHECK-NOT: add sp
+; CHECK: vpop {d8}
+; CHECK: add sp, #4
+; CHECK: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{d8}"()
+  call void @bar()
+  ret void
+}
+
+
+; However, we shouldn't do a 2-step align/adjust if there are no DPRs to be
+; saved.
+define void @test_nodpr_noalign(i8 %l, i8 %r) {
+; CHECK-LABEL: test_nodpr_noalign:
+; CHECK: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NOT: sub sp
+; CHECK: sub sp, #12
+; CHECK-NOT: sub sp
+; [...]
+; CHECK: bl bar
+; CHECK-NOT: add sp
+; CHECK: add sp, #12
+; CHECK-NOT: add sp
+; CHECK: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  alloca i64
+  call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11}"()
+  call void @bar()
+  ret void
+}
author	Tim Northover <tnorthover@apple.com>
	Wed, 5 Nov 2014 00:27:13 +0000 (00:27 +0000)
committer	Tim Northover <tnorthover@apple.com>
	Wed, 5 Nov 2014 00:27:13 +0000 (00:27 +0000)
lib/Target/ARM/ARMFrameLowering.cpp		patch \| blob \| history
lib/Target/ARM/ARMMachineFunctionInfo.h		patch \| blob \| history
test/CodeGen/ARM/dwarf-unwind.ll	[new file with mode: 0644]	patch \| blob