X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FARM%2FARMBaseRegisterInfo.cpp;h=8744f1c622173681d8f289357c7147f9722d6162;hb=341a7e245e7d503457ef6c294a021e8223268a4e;hp=7d712bec3a0278eaa6511600c23301355237cb49;hpb=831737d329a727f53a1fb0572f7b7a8127208881;p=oota-llvm.git diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 7d712bec3a0..8744f1c6221 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -27,10 +27,10 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/VirtRegMap.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/LLVMContext.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -38,40 +38,63 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#define DEBUG_TYPE "arm-register-info" + #define GET_REGINFO_TARGET_DESC #include "ARMGenRegisterInfo.inc" using namespace llvm; -ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii, - const ARMSubtarget &sti) - : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), TII(tii), STI(sti), - FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11), - BasePtr(ARM::R6) { +ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMSubtarget &sti) + : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), STI(sti), BasePtr(ARM::R6) { + if (STI.isTargetMachO()) { + if (STI.isTargetDarwin() || STI.isThumb1Only()) + FramePtr = ARM::R7; + else + FramePtr = ARM::R11; + } else if (STI.isTargetWindows()) + FramePtr = ARM::R11; + else // ARM EABI + FramePtr = STI.isThumb() ? ARM::R7 : ARM::R11; } -const uint16_t* +const MCPhysReg* ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - bool ghcCall = false; - - if (MF) { - const Function *F = MF->getFunction(); - ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false); - } - - if (ghcCall) { - return CSR_GHC_SaveList; - } - else { - return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) - ? CSR_iOS_SaveList : CSR_AAPCS_SaveList; + const MCPhysReg *RegList = + STI.isTargetDarwin() ? CSR_iOS_SaveList : CSR_AAPCS_SaveList; + + if (!MF) return RegList; + + const Function *F = MF->getFunction(); + if (F->getCallingConv() == CallingConv::GHC) { + // GHC set of callee saved regs is empty as all those regs are + // used for passing STG regs around + return CSR_NoRegs_SaveList; + } else if (F->hasFnAttribute("interrupt")) { + if (STI.isMClass()) { + // M-class CPUs have hardware which saves the registers needed to allow a + // function conforming to the AAPCS to function as a handler. + return CSR_AAPCS_SaveList; + } else if (F->getFnAttribute("interrupt").getValueAsString() == "FIQ") { + // Fast interrupt mode gives the handler a private copy of R8-R14, so less + // need to be saved to restore user-mode state. + return CSR_FIQ_SaveList; + } else { + // Generally only R13-R14 (i.e. SP, LR) are automatically preserved by + // exception handling. + return CSR_GenericInt_SaveList; + } } + + return RegList; } const uint32_t* -ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID) const { - return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) - ? CSR_iOS_RegMask : CSR_AAPCS_RegMask; +ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { + if (CC == CallingConv::GHC) + // This is academic becase all GHC calls are (supposed to be) tail calls + return CSR_NoRegs_RegMask; + return STI.isTargetDarwin() ? CSR_iOS_RegMask : CSR_AAPCS_RegMask; } const uint32_t* @@ -79,15 +102,33 @@ ARMBaseRegisterInfo::getNoPreservedMask() const { return CSR_NoRegs_RegMask; } +const uint32_t* +ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const { + // This should return a register mask that is the same as that returned by + // getCallPreservedMask but that additionally preserves the register used for + // the first i32 argument (which must also be the register used to return a + // single i32 return value) + // + // In case that the calling convention does not use the same register for + // both or otherwise does not want to enable this optimization, the function + // should return NULL + if (CC == CallingConv::GHC) + // This is academic becase all GHC calls are (supposed to be) tail calls + return nullptr; + return STI.isTargetDarwin() ? CSR_iOS_ThisReturn_RegMask + : CSR_AAPCS_ThisReturn_RegMask; +} + BitVector ARMBaseRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); // FIXME: avoid re-calculating this every time. BitVector Reserved(getNumRegs()); Reserved.set(ARM::SP); Reserved.set(ARM::PC); Reserved.set(ARM::FPSCR); + Reserved.set(ARM::APSR_NZCV); if (TFI->hasFP(MF)) Reserved.set(FramePtr); if (hasBasePointer(MF)) @@ -139,14 +180,14 @@ ARMBaseRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind const TargetRegisterClass * ARMBaseRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { if (RC == &ARM::CCRRegClass) - return 0; // Can't copy CCR registers. + return &ARM::rGPRRegClass; // Can't copy CCR registers. return RC; } unsigned ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); switch (RC->getID()) { default: @@ -205,7 +246,8 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg, } // First prefer the paired physreg. - if (PairedPhys) + if (PairedPhys && + std::find(Order.begin(), Order.end(), PairedPhys) != Order.end()) Hints.push_back(PairedPhys); // Then prefer even or odd registers. @@ -267,7 +309,7 @@ ARMBaseRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const { bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const ARMFunctionInfo *AFI = MF.getInfo(); - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); // When outgoing call frames are so large that we adjust the stack pointer // around the call, we can no longer use the stack pointer to reach the @@ -302,7 +344,7 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { // 1. Dynamic stack realignment is explicitly disabled, // 2. This is a Thumb1 function (it's not useful, so we don't bother), or // 3. There are VLAs in the function and the base pointer is disabled. - if (!MF.getTarget().Options.RealignStack) + if (MF.getFunction()->hasFnAttribute("no-realign-stack")) return false; if (AFI->isThumb1OnlyFunction()) return false; @@ -312,7 +354,10 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { return false; // We may also need a base pointer if there are dynamic allocas or stack // pointer adjustments around calls. - if (MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF)) + if (MF.getTarget() + .getSubtargetImpl() + ->getFrameLowering() + ->hasReservedCallFrame(MF)) return true; // A base pointer is required and allowed. Check that it isn't too late to // reserve it. @@ -323,7 +368,10 @@ bool ARMBaseRegisterInfo:: needsStackRealignment(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *F = MF.getFunction(); - unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned StackAlign = MF.getTarget() + .getSubtargetImpl() + ->getFrameLowering() + ->getStackAlignment(); bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, @@ -343,21 +391,13 @@ cannotEliminateFrame(const MachineFunction &MF) const { unsigned ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); if (TFI->hasFP(MF)) return FramePtr; return ARM::SP; } -unsigned ARMBaseRegisterInfo::getEHExceptionRegister() const { - llvm_unreachable("What is the exception register"); -} - -unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const { - llvm_unreachable("What is the exception handler register"); -} - /// emitLoadConstPool - Emits a load from constpool to materialize the /// specified immediate. void ARMBaseRegisterInfo:: @@ -368,6 +408,7 @@ emitLoadConstPool(MachineBasicBlock &MBB, ARMCC::CondCodes Pred, unsigned PredReg, unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); MachineConstantPool *ConstantPool = MF.getConstantPool(); const Constant *C = ConstantInt::get(Type::getInt32Ty(MF.getFunction()->getContext()), Val); @@ -400,64 +441,6 @@ requiresVirtualBaseRegisters(const MachineFunction &MF) const { return true; } -static void -emitSPUpdate(bool isARM, - MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - DebugLoc dl, const ARMBaseInstrInfo &TII, - int NumBytes, - ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) { - if (isARM) - emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, - Pred, PredReg, TII); - else - emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, - Pred, PredReg, TII); -} - - -void ARMBaseRegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - if (!TFI->hasReservedCallFrame(MF)) { - // If we have alloca, convert as follows: - // ADJCALLSTACKDOWN -> sub, sp, sp, amount - // ADJCALLSTACKUP -> add, sp, sp, amount - MachineInstr *Old = I; - DebugLoc dl = Old->getDebugLoc(); - unsigned Amount = Old->getOperand(0).getImm(); - if (Amount != 0) { - // We need to keep the stack aligned properly. To do this, we round the - // amount of space needed for the outgoing arguments up to the next - // alignment boundary. - unsigned Align = TFI->getStackAlignment(); - Amount = (Amount+Align-1)/Align*Align; - - ARMFunctionInfo *AFI = MF.getInfo(); - assert(!AFI->isThumb1OnlyFunction() && - "This eliminateCallFramePseudoInstr does not support Thumb1!"); - bool isARM = !AFI->isThumbFunction(); - - // Replace the pseudo instruction with a new instruction... - unsigned Opc = Old->getOpcode(); - int PIdx = Old->findFirstPredOperandIdx(); - ARMCC::CondCodes Pred = (PIdx == -1) - ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm(); - if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { - // Note: PredReg is operand 2 for ADJCALLSTACKDOWN. - unsigned PredReg = Old->getOperand(2).getReg(); - emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, Pred, PredReg); - } else { - // Note: PredReg is operand 3 for ADJCALLSTACKUP. - unsigned PredReg = Old->getOperand(3).getReg(); - assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); - emitSPUpdate(isARM, MBB, I, dl, TII, Amount, Pred, PredReg); - } - } - } - MBB.erase(I); -} - int64_t ARMBaseRegisterInfo:: getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const { const MCInstrDesc &Desc = MI->getDesc(); @@ -547,7 +530,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { // Note that the incoming offset is based on the SP value at function entry, // so it'll be negative. MachineFunction &MF = *MI->getParent()->getParent(); - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo(); @@ -600,31 +583,32 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB, int64_t Offset) const { ARMFunctionInfo *AFI = MBB->getParent()->getInfo(); unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : - (AFI->isThumb1OnlyFunction() ? ARM::tADDrSPi : ARM::t2ADDri); + (AFI->isThumb1OnlyFunction() ? ARM::tADDframe : ARM::t2ADDri); MachineBasicBlock::iterator Ins = MBB->begin(); DebugLoc DL; // Defaults to "unknown" if (Ins != MBB->end()) DL = Ins->getDebugLoc(); - const MCInstrDesc &MCID = TII.get(ADDriOpc); - MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); const MachineFunction &MF = *MBB->getParent(); + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + const MCInstrDesc &MCID = TII.get(ADDriOpc); MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF)); - MachineInstrBuilder MIB = AddDefaultPred(BuildMI(*MBB, Ins, DL, MCID, BaseReg) - .addFrameIndex(FrameIdx).addImm(Offset)); + MachineInstrBuilder MIB = BuildMI(*MBB, Ins, DL, MCID, BaseReg) + .addFrameIndex(FrameIdx).addImm(Offset); if (!AFI->isThumb1OnlyFunction()) - AddDefaultCC(MIB); + AddDefaultCC(AddDefaultPred(MIB)); } -void -ARMBaseRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I, - unsigned BaseReg, int64_t Offset) const { - MachineInstr &MI = *I; +void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, + int64_t Offset) const { MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const ARMBaseInstrInfo &TII = + *static_cast(MF.getSubtarget().getInstrInfo()); ARMFunctionInfo *AFI = MF.getInfo(); int Off = Offset; // ARM doesn't need the general 64-bit offsets unsigned i = 0; @@ -717,23 +701,19 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, void ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { - unsigned i = 0; + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); - const ARMFrameLowering *TFI = - static_cast(MF.getTarget().getFrameLowering()); + const ARMBaseInstrInfo &TII = + *static_cast(MF.getSubtarget().getInstrInfo()); + const ARMFrameLowering *TFI = static_cast( + MF.getSubtarget().getFrameLowering()); ARMFunctionInfo *AFI = MF.getInfo(); assert(!AFI->isThumb1OnlyFunction() && "This eliminateFrameIndex does not support Thumb1!"); - - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); - } - - int FrameIndex = MI.getOperand(i).getIndex(); + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); unsigned FrameReg; int Offset = TFI->ResolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj); @@ -743,7 +723,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // means the stack pointer cannot be used to access the emergency spill slot // when !hasReservedCallFrame(). #ifndef NDEBUG - if (RS && FrameReg == ARM::SP && FrameIndex == RS->getScavengingFrameIndex()){ + if (RS && FrameReg == ARM::SP && RS->isScavengingFrameIndex(FrameIndex)){ assert(TFI->hasReservedCallFrame(MF) && "Cannot use SP to access the emergency spill slot in " "functions without a reserved call frame"); @@ -753,20 +733,15 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } #endif // NDEBUG - // Special handling of dbg_value instructions. - if (MI.isDebugValue()) { - MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/); - MI.getOperand(i+1).ChangeToImmediate(Offset); - return; - } + assert(!MI.isDebugValue() && "DBG_VALUEs should be handled in target-independent code"); // Modify MI as necessary to handle as much of 'Offset' as possible bool Done = false; if (!AFI->isThumbFunction()) - Done = rewriteARMFrameIndex(MI, i, FrameReg, Offset, TII); + Done = rewriteARMFrameIndex(MI, FIOperandNum, FrameReg, Offset, TII); else { assert(AFI->isThumb2Function()); - Done = rewriteT2FrameIndex(MI, i, FrameReg, Offset, TII); + Done = rewriteT2FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII); } if (Done) return; @@ -786,7 +761,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg(); if (Offset == 0) // Must be addrmode4/6. - MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false); + MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false, false, false); else { ScratchReg = MF.getRegInfo().createVirtualRegister(&ARM::GPRRegClass); if (!AFI->isThumbFunction()) @@ -798,6 +773,63 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Offset, Pred, PredReg, TII); } // Update the original instruction to use the scratch register. - MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true); + MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false,true); } } + +bool ARMBaseRegisterInfo::shouldCoalesce(MachineInstr *MI, + const TargetRegisterClass *SrcRC, + unsigned SubReg, + const TargetRegisterClass *DstRC, + unsigned DstSubReg, + const TargetRegisterClass *NewRC) const { + auto MBB = MI->getParent(); + auto MF = MBB->getParent(); + const MachineRegisterInfo &MRI = MF->getRegInfo(); + // If not copying into a sub-register this should be ok because we shouldn't + // need to split the reg. + if (!DstSubReg) + return true; + // Small registers don't frequently cause a problem, so we can coalesce them. + if (NewRC->getSize() < 32 && DstRC->getSize() < 32 && SrcRC->getSize() < 32) + return true; + + auto NewRCWeight = + MRI.getTargetRegisterInfo()->getRegClassWeight(NewRC); + auto SrcRCWeight = + MRI.getTargetRegisterInfo()->getRegClassWeight(SrcRC); + auto DstRCWeight = + MRI.getTargetRegisterInfo()->getRegClassWeight(DstRC); + // If the source register class is more expensive than the destination, the + // coalescing is probably profitable. + if (SrcRCWeight.RegWeight > NewRCWeight.RegWeight) + return true; + if (DstRCWeight.RegWeight > NewRCWeight.RegWeight) + return true; + + // If the register allocator isn't constrained, we can always allow coalescing + // unfortunately we don't know yet if we will be constrained. + // The goal of this heuristic is to restrict how many expensive registers + // we allow to coalesce in a given basic block. + auto AFI = MF->getInfo(); + auto It = AFI->getCoalescedWeight(MBB); + + DEBUG(dbgs() << "\tARM::shouldCoalesce - Coalesced Weight: " + << It->second << "\n"); + DEBUG(dbgs() << "\tARM::shouldCoalesce - Reg Weight: " + << NewRCWeight.RegWeight << "\n"); + + // This number is the largest round number that which meets the criteria: + // (1) addresses PR18825 + // (2) generates better code in some test cases (like vldm-shed-a9.ll) + // (3) Doesn't regress any test cases (in-tree, test-suite, and SPEC) + // In practice the SizeMultiplier will only factor in for straight line code + // that uses a lot of NEON vectors, which isn't terribly common. + unsigned SizeMultiplier = MBB->size()/100; + SizeMultiplier = SizeMultiplier ? SizeMultiplier : 1; + if (It->second < NewRCWeight.WeightLimit * SizeMultiplier) { + It->second += NewRCWeight.RegWeight; + return true; + } + return false; +}