return false;
}
+
+/// GetScratchRegister - Get a register for performing work in the segmented
+/// stack prologue. Depending on platform and the properties of the function
+/// either one or two registers will be needed. Set primary to true for
+/// the first register, false for the second.
static unsigned
-GetScratchRegister(bool Is64Bit, const MachineFunction &MF) {
+GetScratchRegister(bool Is64Bit, const MachineFunction &MF, bool Primary) {
if (Is64Bit) {
- return X86::R11;
+ return Primary ? X86::R11 : X86::R12;
} else {
CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv();
bool IsNested = HasNestArgument(&MF);
"nested function.");
return -1;
} else {
- return X86::EAX;
+ return Primary ? X86::EAX : X86::ECX;
}
} else {
if (IsNested)
- return X86::EDX;
+ return Primary ? X86::EDX : X86::EAX;
else
- return X86::ECX;
+ return Primary ? X86::ECX : X86::EAX;
}
}
}
DebugLoc DL;
const X86Subtarget *ST = &MF.getTarget().getSubtarget<X86Subtarget>();
- unsigned ScratchReg = GetScratchRegister(Is64Bit, MF);
+ unsigned ScratchReg = GetScratchRegister(Is64Bit, MF, true);
assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
"Scratch register is live-in");
if (MF.getFunction()->isVarArg())
report_fatal_error("Segmented stacks do not support vararg functions.");
- if (!ST->isTargetLinux())
- report_fatal_error("Segmented stacks supported only on linux.");
+ if (!ST->isTargetLinux() && !ST->isTargetDarwin())
+ report_fatal_error("Segmented stacks supported only on linux and darwin.");
MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock();
MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock();
// prologue.
StackSize = MFI->getStackSize();
+ // When the frame size is less than 256 we just compare the stack
+ // boundary directly to the value of the stack pointer, per gcc.
+ bool CompareStackPointer = StackSize < kSplitStackAvailable;
+
// Read the limit off the current stacklet off the stack_guard location.
if (Is64Bit) {
- TlsReg = X86::FS;
- TlsOffset = 0x70;
+ if (ST->isTargetLinux()) {
+ TlsReg = X86::FS;
+ TlsOffset = 0x70;
+ } else if (ST->isTargetDarwin()) {
+ TlsReg = X86::GS;
+ TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90.
+ }
- if (StackSize < kSplitStackAvailable)
+ if (CompareStackPointer)
ScratchReg = X86::RSP;
else
BuildMI(checkMBB, DL, TII.get(X86::LEA64r), ScratchReg).addReg(X86::RSP)
.addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg);
} else {
TlsReg = X86::GS;
- TlsOffset = 0x30;
- if (StackSize < kSplitStackAvailable)
+ if (CompareStackPointer)
ScratchReg = X86::ESP;
else
BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
.addImm(1).addReg(0).addImm(-StackSize).addReg(0);
- BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
- .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
+ if (ST->isTargetLinux()) {
+ TlsOffset = 0x30;
+
+ BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
+ .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
+ } else if (ST->isTargetDarwin()) {
+ TlsOffset = 0x48 + 90*4;
+
+ // TlsOffset doesn't fit into a mod r/m byte so we need an extra register
+ unsigned ScratchReg2;
+ bool SaveScratch2;
+ if (CompareStackPointer) {
+ // The primary scratch register is available for holding the TLS offset
+ ScratchReg2 = GetScratchRegister(Is64Bit, MF, true);
+ SaveScratch2 = false;
+ } else {
+ // Need to use a second register to hold the TLS offset
+ ScratchReg2 = GetScratchRegister(Is64Bit, MF, false);
+
+ // Unfortunately, with fastcc the second scratch register may hold an arg
+ SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
+ }
+
+ // If Scratch2 is live-in then it needs to be saved
+ assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
+ "Scratch register is live-in and not saved");
+
+ if (SaveScratch2)
+ BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
+ .addReg(ScratchReg2, RegState::Kill);
+
+ BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
+ .addImm(TlsOffset);
+ BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
+ .addReg(ScratchReg)
+ .addReg(ScratchReg2).addImm(1).addReg(0)
+ .addImm(0)
+ .addReg(TlsReg);
+
+ if (SaveScratch2)
+ BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
+ }
}
// This jump is taken if SP >= (Stacklet Limit + Stack Space required).
-; RUN: llc < %s -mtriple=i686-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=i686-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-Linux
+; RUN: llc < %s -mtriple=x86_64-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-Linux
+; RUN: llc < %s -mtriple=i686-darwin -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-Darwin
+; RUN: llc < %s -mtriple=x86_64-darwin -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-Darwin
; We used to crash with filetype=obj
; RUN: llc < %s -mtriple=i686-linux -segmented-stacks -filetype=obj
; RUN: llc < %s -mtriple=x86_64-linux -segmented-stacks -filetype=obj
+; RUN: llc < %s -mtriple=i686-darwin -segmented-stacks -filetype=obj
+; RUN: llc < %s -mtriple=x86_64-darwin -segmented-stacks -filetype=obj
; Just to prevent the alloca from being optimized away
declare void @dummy_use(i32*, i32)
call void @dummy_use (i32* %mem, i32 10)
ret void
-; X32: test_basic:
+; X32-Linux: test_basic:
-; X32: cmpl %gs:48, %esp
-; X32-NEXT: ja .LBB0_2
+; X32-Linux: cmpl %gs:48, %esp
+; X32-Linux-NEXT: ja .LBB0_2
-; X32: pushl $0
-; X32-NEXT: pushl $60
-; X32-NEXT: calll __morestack
-; X32-NEXT: ret
+; X32-Linux: pushl $0
+; X32-Linux-NEXT: pushl $60
+; X32-Linux-NEXT: calll __morestack
+; X32-Linux-NEXT: ret
-; X64: test_basic:
+; X64-Linux: test_basic:
-; X64: cmpq %fs:112, %rsp
-; X64-NEXT: ja .LBB0_2
+; X64-Linux: cmpq %fs:112, %rsp
+; X64-Linux-NEXT: ja .LBB0_2
-; X64: movabsq $40, %r10
-; X64-NEXT: movabsq $0, %r11
-; X64-NEXT: callq __morestack
-; X64-NEXT: ret
+; X64-Linux: movabsq $40, %r10
+; X64-Linux-NEXT: movabsq $0, %r11
+; X64-Linux-NEXT: callq __morestack
+; X64-Linux-NEXT: ret
+
+; X32-Darwin: test_basic:
+
+; X32-Darwin: movl $432, %ecx
+; X32-Darwin-NEXT: cmpl %gs:(%ecx), %esp
+; X32-Darwin-NEXT: ja LBB0_2
+
+; X32-Darwin: pushl $0
+; X32-Darwin-NEXT: pushl $60
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
+
+; X64-Darwin: test_basic:
+
+; X64-Darwin: cmpq %gs:816, %rsp
+; X64-Darwin-NEXT: ja LBB0_2
+
+; X64-Darwin: movabsq $40, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
}
%result = add i32 %other, %addend
ret i32 %result
-; X32: cmpl %gs:48, %esp
-; X32-NEXT: ja .LBB1_2
+; X32-Linux: cmpl %gs:48, %esp
+; X32-Linux-NEXT: ja .LBB1_2
+
+; X32-Linux: pushl $4
+; X32-Linux-NEXT: pushl $0
+; X32-Linux-NEXT: calll __morestack
+; X32-Linux-NEXT: ret
+
+; X64-Linux: cmpq %fs:112, %rsp
+; X64-Linux-NEXT: ja .LBB1_2
+
+; X64-Linux: movq %r10, %rax
+; X64-Linux-NEXT: movabsq $0, %r10
+; X64-Linux-NEXT: movabsq $0, %r11
+; X64-Linux-NEXT: callq __morestack
+; X64-Linux-NEXT: ret
+; X64-Linux-NEXT: movq %rax, %r10
-; X32: pushl $4
-; X32-NEXT: pushl $0
-; X32-NEXT: calll __morestack
-; X32-NEXT: ret
+; X32-Darwin: movl $432, %edx
+; X32-Darwin-NEXT: cmpl %gs:(%edx), %esp
+; X32-Darwin-NEXT: ja LBB1_2
-; X64: cmpq %fs:112, %rsp
-; X64-NEXT: ja .LBB1_2
+; X32-Darwin: pushl $4
+; X32-Darwin-NEXT: pushl $0
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
-; X64: movq %r10, %rax
-; X64-NEXT: movabsq $0, %r10
-; X64-NEXT: movabsq $0, %r11
-; X64-NEXT: callq __morestack
-; X64-NEXT: ret
-; X64-NEXT: movq %rax, %r10
+; X64-Darwin: cmpq %gs:816, %rsp
+; X64-Darwin-NEXT: ja LBB1_2
+
+; X64-Darwin: movq %r10, %rax
+; X64-Darwin-NEXT: movabsq $0, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
+; X64-Darwin-NEXT: movq %rax, %r10
}
call void @dummy_use (i32* %mem, i32 0)
ret void
-; X32: leal -40012(%esp), %ecx
-; X32-NEXT: cmpl %gs:48, %ecx
-; X32-NEXT: ja .LBB2_2
+; X32-Linux: leal -40012(%esp), %ecx
+; X32-Linux-NEXT: cmpl %gs:48, %ecx
+; X32-Linux-NEXT: ja .LBB2_2
+
+; X32-Linux: pushl $0
+; X32-Linux-NEXT: pushl $40012
+; X32-Linux-NEXT: calll __morestack
+; X32-Linux-NEXT: ret
+
+; X64-Linux: leaq -40008(%rsp), %r11
+; X64-Linux-NEXT: cmpq %fs:112, %r11
+; X64-Linux-NEXT: ja .LBB2_2
+
+; X64-Linux: movabsq $40008, %r10
+; X64-Linux-NEXT: movabsq $0, %r11
+; X64-Linux-NEXT: callq __morestack
+; X64-Linux-NEXT: ret
+
+; X32-Darwin: leal -40012(%esp), %ecx
+; X32-Darwin-NEXT: movl $432, %eax
+; X32-Darwin-NEXT: cmpl %gs:(%eax), %ecx
+; X32-Darwin-NEXT: ja LBB2_2
-; X32: pushl $0
-; X32-NEXT: pushl $40012
-; X32-NEXT: calll __morestack
-; X32-NEXT: ret
+; X32-Darwin: pushl $0
+; X32-Darwin-NEXT: pushl $40012
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
-; X64: leaq -40008(%rsp), %r11
-; X64-NEXT: cmpq %fs:112, %r11
-; X64-NEXT: ja .LBB2_2
+; X64-Darwin: leaq -40008(%rsp), %r11
+; X64-Darwin-NEXT: cmpq %gs:816, %r11
+; X64-Darwin-NEXT: ja LBB2_2
-; X64: movabsq $40008, %r10
-; X64-NEXT: movabsq $0, %r11
-; X64-NEXT: callq __morestack
-; X64-NEXT: ret
+; X64-Darwin: movabsq $40008, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
}
call void @dummy_use (i32* %mem, i32 10)
ret void
-; X32: test_fastcc:
+; X32-Linux: test_fastcc:
+
+; X32-Linux: cmpl %gs:48, %esp
+; X32-Linux-NEXT: ja .LBB3_2
+
+; X32-Linux: pushl $0
+; X32-Linux-NEXT: pushl $60
+; X32-Linux-NEXT: calll __morestack
+; X32-Linux-NEXT: ret
+
+; X64-Linux: test_fastcc:
+
+; X64-Linux: cmpq %fs:112, %rsp
+; X64-Linux-NEXT: ja .LBB3_2
-; X32: cmpl %gs:48, %esp
-; X32-NEXT: ja .LBB3_2
+; X64-Linux: movabsq $40, %r10
+; X64-Linux-NEXT: movabsq $0, %r11
+; X64-Linux-NEXT: callq __morestack
+; X64-Linux-NEXT: ret
-; X32: pushl $0
-; X32-NEXT: pushl $60
-; X32-NEXT: calll __morestack
-; X32-NEXT: ret
+; X32-Darwin: test_fastcc:
-; X64: test_fastcc:
+; X32-Darwin: movl $432, %eax
+; X32-Darwin-NEXT: cmpl %gs:(%eax), %esp
+; X32-Darwin-NEXT: ja LBB3_2
-; X64: cmpq %fs:112, %rsp
-; X64-NEXT: ja .LBB3_2
+; X32-Darwin: pushl $0
+; X32-Darwin-NEXT: pushl $60
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
-; X64: movabsq $40, %r10
-; X64-NEXT: movabsq $0, %r11
-; X64-NEXT: callq __morestack
-; X64-NEXT: ret
+; X64-Darwin: test_fastcc:
+
+; X64-Darwin: cmpq %gs:816, %rsp
+; X64-Darwin-NEXT: ja LBB3_2
+
+; X64-Darwin: movabsq $40, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
}
call void @dummy_use (i32* %mem, i32 0)
ret void
-; X32: test_fastcc_large:
+; X32-Linux: test_fastcc_large:
+
+; X32-Linux: leal -40012(%esp), %eax
+; X32-Linux-NEXT: cmpl %gs:48, %eax
+; X32-Linux-NEXT: ja .LBB4_2
+
+; X32-Linux: pushl $0
+; X32-Linux-NEXT: pushl $40012
+; X32-Linux-NEXT: calll __morestack
+; X32-Linux-NEXT: ret
+
+; X64-Linux: test_fastcc_large:
+
+; X64-Linux: leaq -40008(%rsp), %r11
+; X64-Linux-NEXT: cmpq %fs:112, %r11
+; X64-Linux-NEXT: ja .LBB4_2
+
+; X64-Linux: movabsq $40008, %r10
+; X64-Linux-NEXT: movabsq $0, %r11
+; X64-Linux-NEXT: callq __morestack
+; X64-Linux-NEXT: ret
+
+; X32-Darwin: test_fastcc_large:
+
+; X32-Darwin: leal -40012(%esp), %eax
+; X32-Darwin-NEXT: movl $432, %ecx
+; X32-Darwin-NEXT: cmpl %gs:(%ecx), %eax
+; X32-Darwin-NEXT: ja LBB4_2
+
+; X32-Darwin: pushl $0
+; X32-Darwin-NEXT: pushl $40012
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
+
+; X64-Darwin: test_fastcc_large:
+
+; X64-Darwin: leaq -40008(%rsp), %r11
+; X64-Darwin-NEXT: cmpq %gs:816, %r11
+; X64-Darwin-NEXT: ja LBB4_2
+
+; X64-Darwin: movabsq $40008, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
+
+}
+
+define fastcc void @test_fastcc_large_with_ecx_arg(i32 %a) {
+ %mem = alloca i32, i32 10000
+ call void @dummy_use (i32* %mem, i32 %a)
+ ret void
-; X32: leal -40012(%esp), %eax
-; X32-NEXT: cmpl %gs:48, %eax
-; X32-NEXT: ja .LBB4_2
+; This is testing that the Mac implementation preserves ecx
-; X32: pushl $0
-; X32-NEXT: pushl $40012
-; X32-NEXT: calll __morestack
-; X32-NEXT: ret
+; X32-Darwin: test_fastcc_large_with_ecx_arg:
-; X64: test_fastcc_large:
+; X32-Darwin: leal -40012(%esp), %eax
+; X32-Darwin-NEXT: pushl %ecx
+; X32-Darwin-NEXT: movl $432, %ecx
+; X32-Darwin-NEXT: cmpl %gs:(%ecx), %eax
+; X32-Darwin-NEXT: popl %ecx
+; X32-Darwin-NEXT: ja LBB5_2
-; X64: leaq -40008(%rsp), %r11
-; X64-NEXT: cmpq %fs:112, %r11
-; X64-NEXT: ja .LBB4_2
+; X32-Darwin: pushl $0
+; X32-Darwin-NEXT: pushl $40012
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
-; X64: movabsq $40008, %r10
-; X64-NEXT: movabsq $0, %r11
-; X64-NEXT: callq __morestack
-; X64-NEXT: ret
}