X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86RegisterInfo.cpp;h=afb07405b709ae97d3f2a503ded7c48fdb415137;hp=f1574cb23ae5e1ef83a3ed1b4824e65c6bb2199b;hb=1f74590e9d1b9cf0f1f81a156efea73f76546e05;hpb=2e9919a5e5fe76f4b1e3290103c4bfd149ebba9c diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index f1574cb23ae..afb07405b70 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -37,10 +37,16 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/CommandLine.h" using namespace llvm; +static cl::opt +ForceStackAlign("force-align-stack", + cl::desc("Force align the stack to the minimum alignment" + " needed for the function."), + cl::init(false), cl::Hidden); + X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii) : X86GenRegisterInfo(tm.getSubtarget().is64Bit() ? @@ -128,23 +134,78 @@ unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) { case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7: return RegNo-X86::ST0; - case X86::XMM0: case X86::XMM8: case X86::MM0: + case X86::XMM0: case X86::XMM8: + case X86::YMM0: case X86::YMM8: case X86::MM0: + return 0; + case X86::XMM1: case X86::XMM9: + case X86::YMM1: case X86::YMM9: case X86::MM1: + return 1; + case X86::XMM2: case X86::XMM10: + case X86::YMM2: case X86::YMM10: case X86::MM2: + return 2; + case X86::XMM3: case X86::XMM11: + case X86::YMM3: case X86::YMM11: case X86::MM3: + return 3; + case X86::XMM4: case X86::XMM12: + case X86::YMM4: case X86::YMM12: case X86::MM4: + return 4; + case X86::XMM5: case X86::XMM13: + case X86::YMM5: case X86::YMM13: case X86::MM5: + return 5; + case X86::XMM6: case X86::XMM14: + case X86::YMM6: case X86::YMM14: case X86::MM6: + return 6; + case X86::XMM7: case X86::XMM15: + case X86::YMM7: case X86::YMM15: case X86::MM7: + return 7; + + case X86::ES: + return 0; + case X86::CS: + return 1; + case X86::SS: + return 2; + case X86::DS: + return 3; + case X86::FS: + return 4; + case X86::GS: + return 5; + + case X86::CR0: + return 0; + case X86::CR1: + return 1; + case X86::CR2: + return 2; + case X86::CR3: + return 3; + case X86::CR4: + return 4; + + case X86::DR0: return 0; - case X86::XMM1: case X86::XMM9: case X86::MM1: + case X86::DR1: return 1; - case X86::XMM2: case X86::XMM10: case X86::MM2: + case X86::DR2: return 2; - case X86::XMM3: case X86::XMM11: case X86::MM3: + case X86::DR3: return 3; - case X86::XMM4: case X86::XMM12: case X86::MM4: + case X86::DR4: return 4; - case X86::XMM5: case X86::XMM13: case X86::MM5: + case X86::DR5: return 5; - case X86::XMM6: case X86::XMM14: case X86::MM6: + case X86::DR6: return 6; - case X86::XMM7: case X86::XMM15: case X86::MM7: + case X86::DR7: return 7; + // Pseudo index registers are equivalent to a "none" + // scaled index (See Intel Manual 2A, table 2-3) + case X86::EIZ: + case X86::RIZ: + return 4; + default: assert(isVirtualRegister(RegNo) && "Unknown physical register!"); llvm_unreachable("Register allocator hasn't allocated reg correctly yet!"); @@ -158,8 +219,7 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, unsigned SubIdx) const { switch (SubIdx) { default: return 0; - case 1: - // 8-bit + case X86::sub_8bit: if (B == &X86::GR8RegClass) { if (A->getSize() == 2 || A->getSize() == 4 || A->getSize() == 8) return A; @@ -191,12 +251,9 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, return &X86::GR16_NOREXRegClass; else if (A == &X86::GR16_ABCDRegClass) return &X86::GR16_ABCDRegClass; - } else if (B == &X86::FR32RegClass) { - return A; } break; - case 2: - // 8-bit hi + case X86::sub_8bit_hi: if (B == &X86::GR8_ABCD_HRegClass) { if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass || A == &X86::GR64_NOREXRegClass || @@ -209,12 +266,9 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass || A == &X86::GR16_NOREXRegClass) return &X86::GR16_ABCDRegClass; - } else if (B == &X86::FR64RegClass) { - return A; } break; - case 3: - // 16-bit + case X86::sub_16bit: if (B == &X86::GR16RegClass) { if (A->getSize() == 4 || A->getSize() == 8) return A; @@ -238,12 +292,9 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, return &X86::GR32_NOREXRegClass; else if (A == &X86::GR32_ABCDRegClass) return &X86::GR64_ABCDRegClass; - } else if (B == &X86::VR128RegClass) { - return A; } break; - case 4: - // 32-bit + case X86::sub_32bit: if (B == &X86::GR32RegClass || B == &X86::GR32_NOSPRegClass) { if (A->getSize() == 8) return A; @@ -261,6 +312,18 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, return &X86::GR64_ABCDRegClass; } break; + case X86::sub_ss: + if (B == &X86::FR32RegClass) + return A; + break; + case X86::sub_sd: + if (B == &X86::FR64RegClass) + return A; + break; + case X86::sub_xmm: + if (B == &X86::VR128RegClass) + return A; + break; } return 0; } @@ -297,9 +360,7 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { bool ghcCall = false; if (MF) { - const MachineFrameInfo *MFI = MF->getFrameInfo(); - const MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); - callsEHReturn = (MMI ? MMI->callsEHReturn() : false); + callsEHReturn = MF->getMMI().callsEHReturn(); const Function *F = MF->getFunction(); ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false); } @@ -345,60 +406,6 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { } } -const TargetRegisterClass* const* -X86RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - bool callsEHReturn = false; - - if (MF) { - const MachineFrameInfo *MFI = MF->getFrameInfo(); - const MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); - callsEHReturn = (MMI ? MMI->callsEHReturn() : false); - } - - static const TargetRegisterClass * const CalleeSavedRegClasses32Bit[] = { - &X86::GR32RegClass, &X86::GR32RegClass, - &X86::GR32RegClass, &X86::GR32RegClass, 0 - }; - static const TargetRegisterClass * const CalleeSavedRegClasses32EHRet[] = { - &X86::GR32RegClass, &X86::GR32RegClass, - &X86::GR32RegClass, &X86::GR32RegClass, - &X86::GR32RegClass, &X86::GR32RegClass, 0 - }; - static const TargetRegisterClass * const CalleeSavedRegClasses64Bit[] = { - &X86::GR64RegClass, &X86::GR64RegClass, - &X86::GR64RegClass, &X86::GR64RegClass, - &X86::GR64RegClass, &X86::GR64RegClass, 0 - }; - static const TargetRegisterClass * const CalleeSavedRegClasses64EHRet[] = { - &X86::GR64RegClass, &X86::GR64RegClass, - &X86::GR64RegClass, &X86::GR64RegClass, - &X86::GR64RegClass, &X86::GR64RegClass, - &X86::GR64RegClass, &X86::GR64RegClass, 0 - }; - static const TargetRegisterClass * const CalleeSavedRegClassesWin64[] = { - &X86::GR64RegClass, &X86::GR64RegClass, - &X86::GR64RegClass, &X86::GR64RegClass, - &X86::GR64RegClass, &X86::GR64RegClass, - &X86::GR64RegClass, &X86::GR64RegClass, - &X86::VR128RegClass, &X86::VR128RegClass, - &X86::VR128RegClass, &X86::VR128RegClass, - &X86::VR128RegClass, &X86::VR128RegClass, - &X86::VR128RegClass, &X86::VR128RegClass, - &X86::VR128RegClass, &X86::VR128RegClass, 0 - }; - - if (Is64Bit) { - if (IsWin64) - return CalleeSavedRegClassesWin64; - else - return (callsEHReturn ? - CalleeSavedRegClasses64EHRet : CalleeSavedRegClasses64Bit); - } else { - return (callsEHReturn ? - CalleeSavedRegClasses32EHRet : CalleeSavedRegClasses32Bit); - } -} - BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); // Set the stack-pointer register and its aliases as reserved. @@ -443,14 +450,14 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { /// or if frame pointer elimination is disabled. bool X86RegisterInfo::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - const MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); + const MachineModuleInfo &MMI = MF.getMMI(); - return (NoFramePointerElim || + return (DisableFramePointerElim(MF) || needsStackRealignment(MF) || MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || MF.getInfo()->getForceFramePointer() || - (MMI && MMI->callsUnwindInit())); + MMI.callsUnwindInit()); } bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const { @@ -462,26 +469,29 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const { bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *F = MF.getFunction(); - bool requiresRealignment = - RealignStack && ((MFI->getMaxAlignment() > StackAlign) || - F->hasFnAttr(Attribute::StackAlignment)); + bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || + F->hasFnAttr(Attribute::StackAlignment)); // FIXME: Currently we don't support stack realignment for functions with // variable-sized allocas. - // FIXME: Temporary disable the error - it seems to be too conservative. + // FIXME: It's more complicated than this... if (0 && requiresRealignment && MFI->hasVarSizedObjects()) - llvm_report_error( + report_fatal_error( "Stack realignment in presense of dynamic allocas is not supported"); - - return (requiresRealignment && !MFI->hasVarSizedObjects()); + + // If we've requested that we force align the stack do so now. + if (ForceStackAlign) + return canRealignStack(MF); + + return requiresRealignment && canRealignStack(MF); } -bool X86RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const { +bool X86RegisterInfo::hasReservedCallFrame(const MachineFunction &MF) const { return !MF.getFrameInfo()->hasVarSizedObjects(); } -bool X86RegisterInfo::hasReservedSpillSlot(MachineFunction &MF, unsigned Reg, - int &FrameIdx) const { +bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, + unsigned Reg, int &FrameIdx) const { if (Reg == FramePtr && hasFP(MF)) { FrameIdx = MF.getFrameInfo()->getObjectIndexBegin(); return true; @@ -524,6 +534,30 @@ X86RegisterInfo::getFrameIndexOffset(const MachineFunction &MF, int FI) const { return Offset; } +static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) { + if (is64Bit) { + if (isInt<8>(Imm)) + return X86::SUB64ri8; + return X86::SUB64ri32; + } else { + if (isInt<8>(Imm)) + return X86::SUB32ri8; + return X86::SUB32ri; + } +} + +static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) { + if (is64Bit) { + if (isInt<8>(Imm)) + return X86::ADD64ri8; + return X86::ADD64ri32; + } else { + if (isInt<8>(Imm)) + return X86::ADD32ri8; + return X86::ADD32ri; + } +} + void X86RegisterInfo:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { @@ -543,7 +577,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineInstr *New = 0; if (Old->getOpcode() == getCallFrameSetupOpcode()) { New = BuildMI(MF, Old->getDebugLoc(), - TII.get(Is64Bit ? X86::SUB64ri32 : X86::SUB32ri), + TII.get(getSUBriOpcode(Is64Bit, Amount)), StackPtr) .addReg(StackPtr) .addImm(Amount); @@ -555,9 +589,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, Amount -= CalleeAmt; if (Amount) { - unsigned Opc = (Amount < 128) ? - (Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) : - (Is64Bit ? X86::ADD64ri32 : X86::ADD32ri); + unsigned Opc = getADDriOpcode(Is64Bit, Amount); New = BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), StackPtr) .addReg(StackPtr) .addImm(Amount); @@ -577,9 +609,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // something off the stack pointer, add it back. We do this until we have // more advanced stack pointer tracking ability. if (uint64_t CalleeAmt = I->getOperand(1).getImm()) { - unsigned Opc = (CalleeAmt < 128) ? - (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) : - (Is64Bit ? X86::SUB64ri32 : X86::SUB32ri); + unsigned Opc = getSUBriOpcode(Is64Bit, CalleeAmt); MachineInstr *Old = I; MachineInstr *New = BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), @@ -614,8 +644,12 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int FrameIndex = MI.getOperand(i).getIndex(); unsigned BasePtr; + unsigned Opc = MI.getOpcode(); + bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm; if (needsStackRealignment(MF)) BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr); + else if (AfterFPPop) + BasePtr = StackPtr; else BasePtr = (hasFP(MF) ? FramePtr : StackPtr); @@ -624,16 +658,22 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(i).ChangeToRegister(BasePtr, false); // Now add the frame object offset to the offset from EBP. + int FIOffset; + if (AfterFPPop) { + // Tail call jmp happens after FP is popped. + const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo(); + const MachineFrameInfo *MFI = MF.getFrameInfo(); + FIOffset = MFI->getObjectOffset(FrameIndex) - TFI.getOffsetOfLocalArea(); + } else + FIOffset = getFrameIndexOffset(MF, FrameIndex); + if (MI.getOperand(i+3).isImm()) { // Offset is a 32-bit integer. - int Offset = getFrameIndexOffset(MF, FrameIndex) + - (int)(MI.getOperand(i + 3).getImm()); - + int Offset = FIOffset + (int)(MI.getOperand(i + 3).getImm()); MI.getOperand(i + 3).ChangeToImmediate(Offset); } else { // Offset is symbolic. This is extremely rare. - uint64_t Offset = getFrameIndexOffset(MF, FrameIndex) + - (uint64_t)MI.getOperand(i+3).getOffset(); + uint64_t Offset = FIOffset + (uint64_t)MI.getOperand(i+3).getOffset(); MI.getOperand(i+3).setOffset(Offset); } return 0; @@ -658,8 +698,7 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // } // [EBP] MFI->CreateFixedObject(-TailCallReturnAddrDelta, - (-1U*SlotSize)+TailCallReturnAddrDelta, - true, false); + (-1U*SlotSize)+TailCallReturnAddrDelta, true); } if (hasFP(MF)) { @@ -672,7 +711,7 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, -(int)SlotSize + TFI.getOffsetOfLocalArea() + TailCallReturnAddrDelta, - true, false); + true); assert(FrameIdx == MFI->getObjectIndexBegin() && "Slot for EBP register must be last in order to be found!"); FrameIdx = 0; @@ -687,13 +726,9 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const TargetInstrInfo &TII) { bool isSub = NumBytes < 0; uint64_t Offset = isSub ? -NumBytes : NumBytes; - unsigned Opc = isSub - ? ((Offset < 128) ? - (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) : - (Is64Bit ? X86::SUB64ri32 : X86::SUB32ri)) - : ((Offset < 128) ? - (Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) : - (Is64Bit ? X86::ADD64ri32 : X86::ADD32ri)); + unsigned Opc = isSub ? + getSUBriOpcode(Is64Bit, Offset) : + getADDriOpcode(Is64Bit, Offset); uint64_t Chunk = (1LL << 31) - 1; DebugLoc DL = MBB.findDebugLoc(MBBI); @@ -800,14 +835,13 @@ void X86RegisterInfo::emitCalleeSavedFrameMoves(MachineFunction &MF, MCSymbol *Label, unsigned FramePtr) const { MachineFrameInfo *MFI = MF.getFrameInfo(); - MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); - if (!MMI) return; + MachineModuleInfo &MMI = MF.getMMI(); // Add callee saved registers to move list. const std::vector &CSI = MFI->getCalleeSavedInfo(); if (CSI.empty()) return; - std::vector &Moves = MMI->getFrameMoves(); + std::vector &Moves = MMI.getFrameMoves(); const TargetData *TD = MF.getTarget().getTargetData(); bool HasFP = hasFP(MF); @@ -874,15 +908,26 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *Fn = MF.getFunction(); const X86Subtarget *Subtarget = &MF.getTarget().getSubtarget(); - MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); + MachineModuleInfo &MMI = MF.getMMI(); X86MachineFunctionInfo *X86FI = MF.getInfo(); - bool needsFrameMoves = (MMI && MMI->hasDebugInfo()) || + bool needsFrameMoves = MMI.hasDebugInfo() || !Fn->doesNotThrow() || UnwindTablesMandatory; uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment. uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate. bool HasFP = hasFP(MF); DebugLoc DL; + // If we're forcing a stack realignment we can't rely on just the frame + // info, we need to know the ABI stack alignment as well in case we + // have a call out. Otherwise just make sure we have some alignment - we'll + // go with the minimum SlotSize. + if (ForceStackAlign) { + if (MFI->hasCalls()) + MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; + else if (MaxAlign < SlotSize) + MaxAlign = SlotSize; + } + // Add RETADDR move area to callee saved frame size. int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); if (TailCallReturnAddrDelta < 0) @@ -896,7 +941,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) && !needsStackRealignment(MF) && !MFI->hasVarSizedObjects() && // No dynamic alloca. - !MFI->hasCalls() && // No calls. + !MFI->adjustsStack() && // No calls. !Subtarget->isTargetWin64()) { // Win64 has no Red Zone uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); if (HasFP) MinSize += SlotSize; @@ -914,7 +959,8 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { // size is bigger than the callers. if (TailCallReturnAddrDelta < 0) { MachineInstr *MI = - BuildMI(MBB, MBBI, DL, TII.get(Is64Bit? X86::SUB64ri32 : X86::SUB32ri), + BuildMI(MBB, MBBI, DL, + TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)), StackPtr) .addReg(StackPtr) .addImm(-TailCallReturnAddrDelta); @@ -935,7 +981,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { // REG < 64 => DW_CFA_offset + Reg // ELSE => DW_CFA_offset_extended - std::vector &Moves = MMI->getFrameMoves(); + std::vector &Moves = MMI.getFrameMoves(); const TargetData *TD = MF.getTarget().getTargetData(); uint64_t NumBytes = 0; int stackGrowth = -TD->getPointerSize(); @@ -959,8 +1005,8 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { if (needsFrameMoves) { // Mark the place where EBP/RBP was saved. - MCSymbol *FrameLabel = MMI->getLabelSym(MMI->NextLabelID()); - BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addSym(FrameLabel); + MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel); // Define the current CFA rule to use the provided offset. if (StackSize) { @@ -987,8 +1033,8 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { if (needsFrameMoves) { // Mark effective beginning of when frame pointer becomes valid. - MCSymbol *FrameLabel = MMI->getLabelSym(MMI->NextLabelID()); - BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addSym(FrameLabel); + MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel); // Define the current CFA to use the EBP/RBP register. MachineLocation FPDst(FramePtr); @@ -1027,8 +1073,8 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { if (!HasFP && needsFrameMoves) { // Mark callee-saved push instruction. - MCSymbol *Label = MMI->getLabelSym(MMI->NextLabelID()); - BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addSym(Label); + MCSymbol *Label = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label); // Define the current CFA rule to use the provided offset. unsigned Ptr = StackSize ? @@ -1099,8 +1145,8 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { if ((NumBytes || PushedRegs) && needsFrameMoves) { // Mark end of stack pointer adjustment. - MCSymbol *Label = MMI->getLabelSym(MMI->NextLabelID()); - BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addSym(Label); + MCSymbol *Label = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label); if (!HasFP && NumBytes) { // Define the current CFA rule to use the provided offset. @@ -1153,6 +1199,17 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, unsigned CSSize = X86FI->getCalleeSavedFrameSize(); uint64_t NumBytes = 0; + // If we're forcing a stack realignment we can't rely on just the frame + // info, we need to know the ABI stack alignment as well in case we + // have a call out. Otherwise just make sure we have some alignment - we'll + // go with the minimum. + if (ForceStackAlign) { + if (MFI->hasCalls()) + MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; + else + MaxAlign = MaxAlign ? MaxAlign : 4; + } + if (hasFP(MF)) { // Calculate required stack adjustment. uint64_t FrameSize = StackSize - SlotSize; @@ -1206,8 +1263,8 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, if (CSSize) { unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r; MachineInstr *MI = - addLeaRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr), - FramePtr, false, -CSSize); + addRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr), + FramePtr, false, -CSSize); MBB.insert(MBBI, MI); } else { BuildMI(MBB, MBBI, DL, @@ -1267,9 +1324,11 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, for (unsigned i = 0; i != 5; ++i) MIB.addOperand(MBBI->getOperand(i)); } else if (RetOpcode == X86::TCRETURNri64) { - BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64), JumpTarget.getReg()); + BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)). + addReg(JumpTarget.getReg(), RegState::Kill); } else { - BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr), JumpTarget.getReg()); + BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)). + addReg(JumpTarget.getReg(), RegState::Kill); } MachineInstr *NewMI = prior(MBBI); @@ -1304,7 +1363,7 @@ X86RegisterInfo::getInitialFrameState(std::vector &Moves) const { // Calculate amount of bytes used for return address storing int stackGrowth = (Is64Bit ? -8 : -4); - // Initial state of the frame pointer is esp+4. + // Initial state of the frame pointer is esp+stackGrowth. MachineLocation Dst(MachineLocation::VirtualFP); MachineLocation Src(StackPtr, stackGrowth); Moves.push_back(MachineMove(0, Dst, Src)); @@ -1494,3 +1553,46 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) { } #include "X86GenRegisterInfo.inc" + +namespace { + struct MSAH : public MachineFunctionPass { + static char ID; + MSAH() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF) { + const X86TargetMachine *TM = + static_cast(&MF.getTarget()); + const X86RegisterInfo *X86RI = TM->getRegisterInfo(); + MachineRegisterInfo &RI = MF.getRegInfo(); + X86MachineFunctionInfo *FuncInfo = MF.getInfo(); + unsigned StackAlignment = X86RI->getStackAlignment(); + + // Be over-conservative: scan over all vreg defs and find whether vector + // registers are used. If yes, there is a possibility that vector register + // will be spilled and thus require dynamic stack realignment. + for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister; + RegNum < RI.getLastVirtReg(); ++RegNum) + if (RI.getRegClass(RegNum)->getAlignment() > StackAlignment) { + FuncInfo->setReserveFP(true); + return true; + } + + // Nothing to do + return false; + } + + virtual const char *getPassName() const { + return "X86 Maximal Stack Alignment Check"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + }; + + char MSAH::ID = 0; +} + +FunctionPass* +llvm::createX86MaxStackAlignmentHeuristicPass() { return new MSAH(); }