From: Rafael Espindola Date: Tue, 30 Aug 2011 19:39:58 +0000 (+0000) Subject: Emit segmented-stack specific code into function prologues for X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=76927d758657b3a511c73467ec5a7288795c1513;ds=sidebyside Emit segmented-stack specific code into function prologues for X86. Modify the pass added in the previous patch to call this new code. This new prologues generated will call a libgcc routine (__morestack) to allocate more stack space from the heap when required Patch by Sanjoy Das. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@138812 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/Target/TargetFrameLowering.h b/include/llvm/Target/TargetFrameLowering.h index bec84e5d16d..4c759b2ccb9 100644 --- a/include/llvm/Target/TargetFrameLowering.h +++ b/include/llvm/Target/TargetFrameLowering.h @@ -114,6 +114,10 @@ public: virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const = 0; + /// Adjust the prologue to have the function use segmented stacks. This works + /// by adding a check even before the "normal" function prologue. + virtual void adjustForSegmentedStacks(MachineFunction &MF) const { } + /// spillCalleeSavedRegisters - Issues instruction(s) to spill all callee /// saved registers and returns true if it isn't possible / profitable to do /// so by issuing a series of store instructions via diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 7e5d804f913..ec5fe25704a 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" @@ -699,6 +700,13 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) { if (!I->empty() && I->back().getDesc().isReturn()) TFI.emitEpilogue(Fn, *I); } + + // Emit additional code that is required support segmented stacks, if we've + // been asked for it. This, when linked with a runtime with support for + // segmented stacks (libgcc is one), will result allocating stack space in + // small chunks instead of one large contiguous block. + if (EnableSegmentedStacks) + TFI.adjustForSegmentedStacks(Fn); } /// replaceFrameIndices - Replace all MO_FrameIndex operands with physical diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index d4d7d0dcc9c..c399ec3fa2e 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -15,6 +15,7 @@ #include "X86InstrBuilder.h" #include "X86InstrInfo.h" #include "X86MachineFunctionInfo.h" +#include "X86Subtarget.h" #include "X86TargetMachine.h" #include "llvm/Function.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -645,7 +646,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { !RegInfo->needsStackRealignment(MF) && !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->adjustsStack() && // No calls. - !IsWin64) { // Win64 has no Red Zone + !IsWin64 && // Win64 has no Red Zone + !EnableSegmentedStacks) { // Regular stack uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); if (HasFP) MinSize += SlotSize; StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); @@ -1275,3 +1277,160 @@ X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, FrameIdx = 0; } } + +static bool +HasNestArgument(const MachineFunction *MF) { + const Function *F = MF->getFunction(); + for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; I++) { + if (I->hasNestAttr()) + return true; + } + return false; +} + +static unsigned +GetScratchRegister(bool Is64Bit, const MachineFunction &MF) { + if (Is64Bit) { + return X86::R11; + } else { + CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv(); + bool IsNested = HasNestArgument(&MF); + + if (CallingConvention == CallingConv::X86_FastCall) { + if (IsNested) { + report_fatal_error("Segmented stacks does not supprot fastcall with " + "nested fucntion."); + return -1; + } else { + return X86::EAX; + } + } else { + if (IsNested) + return X86::EDX; + else + return X86::ECX; + } + } +} + +void +X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { + MachineBasicBlock &prologueMBB = MF.front(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const X86InstrInfo &TII = *TM.getInstrInfo(); + uint64_t StackSize; + bool Is64Bit = STI.is64Bit(); + unsigned TlsReg, TlsOffset; + DebugLoc DL; + const X86Subtarget *ST = &MF.getTarget().getSubtarget(); + + unsigned ScratchReg = GetScratchRegister(Is64Bit, MF); + assert(!MF.getRegInfo().isLiveIn(ScratchReg) && + "Scratch register is live-in"); + + if (MF.getFunction()->isVarArg()) + report_fatal_error("Segmented stacks do not support vararg functions."); + if (!ST->isTargetLinux()) + report_fatal_error("Segmented stacks supported only on linux."); + + MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock(); + MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock(); + X86MachineFunctionInfo *X86FI = MF.getInfo(); + bool IsNested = false; + + // We need to know if the function has a nest argument only in 64 bit mode. + if (Is64Bit) + IsNested = HasNestArgument(&MF); + + for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(), + e = prologueMBB.livein_end(); i != e; i++) { + allocMBB->addLiveIn(*i); + checkMBB->addLiveIn(*i); + } + + if (IsNested) + allocMBB->addLiveIn(X86::R10); + + MF.push_front(allocMBB); + MF.push_front(checkMBB); + + // Eventually StackSize will be calculated by a link-time pass; which will + // also decide whether checking code needs to be injected into this particular + // prologue. + StackSize = MFI->getStackSize(); + + // Read the limit off the current stacklet off the stack_guard location. + if (Is64Bit) { + TlsReg = X86::FS; + TlsOffset = 0x70; + + BuildMI(checkMBB, DL, TII.get(X86::LEA64r), ScratchReg).addReg(X86::RSP) + .addImm(0).addReg(0).addImm(-StackSize).addReg(0); + BuildMI(checkMBB, DL, TII.get(X86::CMP64rm)).addReg(ScratchReg) + .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg); + } else { + TlsReg = X86::GS; + TlsOffset = 0x30; + + BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP) + .addImm(0).addReg(0).addImm(-StackSize).addReg(0); + BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg) + .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg); + } + + // This jump is taken if SP >= (Stacklet Limit + Stack Space required). + // It jumps to normal execution of the function body. + BuildMI(checkMBB, DL, TII.get(X86::JG_4)).addMBB(&prologueMBB); + + // On 32 bit we first push the arguments size and then the frame size. On 64 + // bit, we pass the stack frame size in r10 and the argument size in r11. + if (Is64Bit) { + // Functions with nested arguments use R10, so it needs to be saved across + // the call to _morestack + + if (IsNested) + BuildMI(allocMBB, DL, TII.get(X86::MOV64rr), X86::RAX).addReg(X86::R10); + + BuildMI(allocMBB, DL, TII.get(X86::MOV64ri), X86::R10) + .addImm(StackSize); + BuildMI(allocMBB, DL, TII.get(X86::MOV64ri), X86::R11) + .addImm(X86FI->getArgumentStackSize()); + MF.getRegInfo().setPhysRegUsed(X86::R10); + MF.getRegInfo().setPhysRegUsed(X86::R11); + } else { + // Since we'll call __morestack, stack alignment needs to be preserved. + BuildMI(allocMBB, DL, TII.get(X86::SUB32ri), X86::ESP).addReg(X86::ESP) + .addImm(8); + BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) + .addImm(X86FI->getArgumentStackSize()); + BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) + .addImm(StackSize); + } + + // __morestack is in libgcc + if (Is64Bit) + BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32)) + .addExternalSymbol("__morestack"); + else + BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32)) + .addExternalSymbol("__morestack"); + + // __morestack only seems to remove 8 bytes off the stack. Add back the + // additional 8 bytes we added before pushing the arguments. + if (!Is64Bit) + BuildMI(allocMBB, DL, TII.get(X86::ADD32ri), X86::ESP).addReg(X86::ESP) + .addImm(8); + BuildMI(allocMBB, DL, TII.get(X86::RET)); + + if (Is64Bit && IsNested) + BuildMI(allocMBB, DL, TII.get(X86::MOV64rr), X86::R10).addReg(X86::RAX); + + allocMBB->addSuccessor(&prologueMBB); + checkMBB->addSuccessor(allocMBB); + checkMBB->addSuccessor(&prologueMBB); + +#ifndef NDEBUG + MF.verify(); +#endif +} diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h index 7aa94b29dcc..6f490640b4e 100644 --- a/lib/Target/X86/X86FrameLowering.h +++ b/lib/Target/X86/X86FrameLowering.h @@ -41,6 +41,8 @@ public: void emitPrologue(MachineFunction &MF) const; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + void adjustForSegmentedStacks(MachineFunction &MF) const; + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index bd89bf999d0..2041c3c9625 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1931,6 +1931,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, FuncInfo->setVarArgsFrameIndex(0xAAAAAAA); } + FuncInfo->setArgumentStackSize(StackSize); + return Chain; } diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h index 06043ecd3f3..97e88b57cb5 100644 --- a/lib/Target/X86/X86MachineFunctionInfo.h +++ b/lib/Target/X86/X86MachineFunctionInfo.h @@ -65,6 +65,9 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { unsigned VarArgsGPOffset; /// VarArgsFPOffset - X86-64 vararg func fp reg offset. unsigned VarArgsFPOffset; + /// ArgumentStackSize - The number of bytes on stack consumed by the arguments + /// being passed on the stack. + unsigned ArgumentStackSize; public: X86MachineFunctionInfo() : ForceFramePointer(false), @@ -77,7 +80,8 @@ public: VarArgsFrameIndex(0), RegSaveFrameIndex(0), VarArgsGPOffset(0), - VarArgsFPOffset(0) {} + VarArgsFPOffset(0), + ArgumentStackSize(0) {} explicit X86MachineFunctionInfo(MachineFunction &MF) : ForceFramePointer(false), @@ -91,7 +95,8 @@ public: VarArgsFrameIndex(0), RegSaveFrameIndex(0), VarArgsGPOffset(0), - VarArgsFPOffset(0) {} + VarArgsFPOffset(0), + ArgumentStackSize(0) {} bool getForceFramePointer() const { return ForceFramePointer;} void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; } @@ -128,6 +133,9 @@ public: unsigned getVarArgsFPOffset() const { return VarArgsFPOffset; } void setVarArgsFPOffset(unsigned Offset) { VarArgsFPOffset = Offset; } + + unsigned getArgumentStackSize() const { return ArgumentStackSize; } + void setArgumentStackSize(unsigned size) { ArgumentStackSize = size; } }; } // End llvm namespace