X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FARM%2FARMBaseRegisterInfo.cpp;h=8744f1c622173681d8f289357c7147f9722d6162;hb=341a7e245e7d503457ef6c294a021e8223268a4e;hp=cdd91c7a7036e07f98c9df1f5a143cf1c4c2da23;hpb=645c5b94e270db6648919efc140152bbac85bcd9;p=oota-llvm.git diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index cdd91c7a703..8744f1c6221 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -38,6 +38,8 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#define DEBUG_TYPE "arm-register-info" + #define GET_REGINFO_TARGET_DESC #include "ARMGenRegisterInfo.inc" @@ -58,9 +60,8 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMSubtarget &sti) const MCPhysReg* ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - const MCPhysReg *RegList = (STI.isTargetIOS() && !STI.isAAPCS_ABI()) - ? CSR_iOS_SaveList - : CSR_AAPCS_SaveList; + const MCPhysReg *RegList = + STI.isTargetDarwin() ? CSR_iOS_SaveList : CSR_AAPCS_SaveList; if (!MF) return RegList; @@ -93,8 +94,7 @@ ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { if (CC == CallingConv::GHC) // This is academic becase all GHC calls are (supposed to be) tail calls return CSR_NoRegs_RegMask; - return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) - ? CSR_iOS_RegMask : CSR_AAPCS_RegMask; + return STI.isTargetDarwin() ? CSR_iOS_RegMask : CSR_AAPCS_RegMask; } const uint32_t* @@ -115,13 +115,13 @@ ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const { if (CC == CallingConv::GHC) // This is academic becase all GHC calls are (supposed to be) tail calls return nullptr; - return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) - ? CSR_iOS_ThisReturn_RegMask : CSR_AAPCS_ThisReturn_RegMask; + return STI.isTargetDarwin() ? CSR_iOS_ThisReturn_RegMask + : CSR_AAPCS_ThisReturn_RegMask; } BitVector ARMBaseRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); // FIXME: avoid re-calculating this every time. BitVector Reserved(getNumRegs()); @@ -180,14 +180,14 @@ ARMBaseRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind const TargetRegisterClass * ARMBaseRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { if (RC == &ARM::CCRRegClass) - return nullptr; // Can't copy CCR registers. + return &ARM::rGPRRegClass; // Can't copy CCR registers. return RC; } unsigned ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); switch (RC->getID()) { default: @@ -309,7 +309,7 @@ ARMBaseRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const { bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const ARMFunctionInfo *AFI = MF.getInfo(); - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); // When outgoing call frames are so large that we adjust the stack pointer // around the call, we can no longer use the stack pointer to reach the @@ -354,7 +354,10 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { return false; // We may also need a base pointer if there are dynamic allocas or stack // pointer adjustments around calls. - if (MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF)) + if (MF.getTarget() + .getSubtargetImpl() + ->getFrameLowering() + ->hasReservedCallFrame(MF)) return true; // A base pointer is required and allowed. Check that it isn't too late to // reserve it. @@ -365,7 +368,10 @@ bool ARMBaseRegisterInfo:: needsStackRealignment(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *F = MF.getFunction(); - unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned StackAlign = MF.getTarget() + .getSubtargetImpl() + ->getFrameLowering() + ->getStackAlignment(); bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, @@ -385,7 +391,7 @@ cannotEliminateFrame(const MachineFunction &MF) const { unsigned ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); if (TFI->hasFP(MF)) return FramePtr; @@ -402,7 +408,7 @@ emitLoadConstPool(MachineBasicBlock &MBB, ARMCC::CondCodes Pred, unsigned PredReg, unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); MachineConstantPool *ConstantPool = MF.getConstantPool(); const Constant *C = ConstantInt::get(Type::getInt32Ty(MF.getFunction()->getContext()), Val); @@ -415,11 +421,6 @@ emitLoadConstPool(MachineBasicBlock &MBB, .setMIFlags(MIFlags); } -bool ARMBaseRegisterInfo::mayOverrideLocalAssignment() const { - // The native linux build hits a downstream codegen bug when this is enabled. - return STI.isTargetDarwin(); -} - bool ARMBaseRegisterInfo:: requiresRegisterScavenging(const MachineFunction &MF) const { return true; @@ -529,7 +530,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { // Note that the incoming offset is based on the SP value at function entry, // so it'll be negative. MachineFunction &MF = *MI->getParent()->getParent(); - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo(); @@ -582,7 +583,7 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB, int64_t Offset) const { ARMFunctionInfo *AFI = MBB->getParent()->getInfo(); unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : - (AFI->isThumb1OnlyFunction() ? ARM::tADDrSPi : ARM::t2ADDri); + (AFI->isThumb1OnlyFunction() ? ARM::tADDframe : ARM::t2ADDri); MachineBasicBlock::iterator Ins = MBB->begin(); DebugLoc DL; // Defaults to "unknown" @@ -591,15 +592,15 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB, const MachineFunction &MF = *MBB->getParent(); MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); const MCInstrDesc &MCID = TII.get(ADDriOpc); MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF)); - MachineInstrBuilder MIB = AddDefaultPred(BuildMI(*MBB, Ins, DL, MCID, BaseReg) - .addFrameIndex(FrameIdx).addImm(Offset)); + MachineInstrBuilder MIB = BuildMI(*MBB, Ins, DL, MCID, BaseReg) + .addFrameIndex(FrameIdx).addImm(Offset); if (!AFI->isThumb1OnlyFunction()) - AddDefaultCC(MIB); + AddDefaultCC(AddDefaultPred(MIB)); } void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, @@ -607,7 +608,7 @@ void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); const ARMBaseInstrInfo &TII = - *static_cast(MF.getTarget().getInstrInfo()); + *static_cast(MF.getSubtarget().getInstrInfo()); ARMFunctionInfo *AFI = MF.getInfo(); int Off = Offset; // ARM doesn't need the general 64-bit offsets unsigned i = 0; @@ -706,9 +707,9 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); const ARMBaseInstrInfo &TII = - *static_cast(MF.getTarget().getInstrInfo()); - const ARMFrameLowering *TFI = - static_cast(MF.getTarget().getFrameLowering()); + *static_cast(MF.getSubtarget().getInstrInfo()); + const ARMFrameLowering *TFI = static_cast( + MF.getSubtarget().getFrameLowering()); ARMFunctionInfo *AFI = MF.getInfo(); assert(!AFI->isThumb1OnlyFunction() && "This eliminateFrameIndex does not support Thumb1!"); @@ -775,3 +776,60 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false,true); } } + +bool ARMBaseRegisterInfo::shouldCoalesce(MachineInstr *MI, + const TargetRegisterClass *SrcRC, + unsigned SubReg, + const TargetRegisterClass *DstRC, + unsigned DstSubReg, + const TargetRegisterClass *NewRC) const { + auto MBB = MI->getParent(); + auto MF = MBB->getParent(); + const MachineRegisterInfo &MRI = MF->getRegInfo(); + // If not copying into a sub-register this should be ok because we shouldn't + // need to split the reg. + if (!DstSubReg) + return true; + // Small registers don't frequently cause a problem, so we can coalesce them. + if (NewRC->getSize() < 32 && DstRC->getSize() < 32 && SrcRC->getSize() < 32) + return true; + + auto NewRCWeight = + MRI.getTargetRegisterInfo()->getRegClassWeight(NewRC); + auto SrcRCWeight = + MRI.getTargetRegisterInfo()->getRegClassWeight(SrcRC); + auto DstRCWeight = + MRI.getTargetRegisterInfo()->getRegClassWeight(DstRC); + // If the source register class is more expensive than the destination, the + // coalescing is probably profitable. + if (SrcRCWeight.RegWeight > NewRCWeight.RegWeight) + return true; + if (DstRCWeight.RegWeight > NewRCWeight.RegWeight) + return true; + + // If the register allocator isn't constrained, we can always allow coalescing + // unfortunately we don't know yet if we will be constrained. + // The goal of this heuristic is to restrict how many expensive registers + // we allow to coalesce in a given basic block. + auto AFI = MF->getInfo(); + auto It = AFI->getCoalescedWeight(MBB); + + DEBUG(dbgs() << "\tARM::shouldCoalesce - Coalesced Weight: " + << It->second << "\n"); + DEBUG(dbgs() << "\tARM::shouldCoalesce - Reg Weight: " + << NewRCWeight.RegWeight << "\n"); + + // This number is the largest round number that which meets the criteria: + // (1) addresses PR18825 + // (2) generates better code in some test cases (like vldm-shed-a9.ll) + // (3) Doesn't regress any test cases (in-tree, test-suite, and SPEC) + // In practice the SizeMultiplier will only factor in for straight line code + // that uses a lot of NEON vectors, which isn't terribly common. + unsigned SizeMultiplier = MBB->size()/100; + SizeMultiplier = SizeMultiplier ? SizeMultiplier : 1; + if (It->second < NewRCWeight.WeightLimit * SizeMultiplier) { + It->second += NewRCWeight.RegWeight; + return true; + } + return false; +}