From f19c6f576367a368cf729cd0019d16d691163d72 Mon Sep 17 00:00:00 2001 From: Robert Lytton Date: Mon, 2 Dec 2013 11:05:28 +0000 Subject: [PATCH] XCore target: Make handling of large frames not dependent upon an FP. eliminateFrameIndex() has been reworked to handle both small & large frames with either a FP or SP. An additional Slot is required for Scavenging spills when not using FP for large frames. Reworked the handling of Register Scavenging. Whether we are using an FP or not, whether it is a large frame or not, and whether we are using a large code model or not are now independent. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@196091 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/XCoreFrameLowering.cpp | 62 ++-- lib/Target/XCore/XCoreFrameLowering.h | 3 + lib/Target/XCore/XCoreMachineFunctionInfo.cpp | 16 + lib/Target/XCore/XCoreMachineFunctionInfo.h | 9 +- lib/Target/XCore/XCoreRegisterInfo.cpp | 285 +++++++++++------- lib/Target/XCore/XCoreRegisterInfo.h | 13 - test/CodeGen/XCore/epilogue_prologue.ll | 125 ++++++-- test/CodeGen/XCore/epilogue_prologue_fp.ll | 42 --- test/CodeGen/XCore/scavenging.ll | 70 ++++- 9 files changed, 409 insertions(+), 216 deletions(-) delete mode 100644 test/CodeGen/XCore/epilogue_prologue_fp.ll diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp index 06eab7a4cea..ea25e71961b 100644 --- a/lib/Target/XCore/XCoreFrameLowering.cpp +++ b/lib/Target/XCore/XCoreFrameLowering.cpp @@ -302,10 +302,11 @@ void XCoreFrameLowering::emitEpilogue(MachineFunction &MF, } // else Don't erase the return instruction. } -bool XCoreFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector &CSI, - const TargetRegisterInfo *TRI) const { +bool XCoreFrameLowering:: +spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const { if (CSI.empty()) return true; @@ -337,10 +338,11 @@ bool XCoreFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, return true; } -bool XCoreFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector &CSI, - const TargetRegisterInfo *TRI) const{ +bool XCoreFrameLowering:: +restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const{ MachineFunction *MF = MBB.getParent(); const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo(); @@ -420,11 +422,10 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MBB.erase(I); } -void -XCoreFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { +void XCoreFrameLowering:: +processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const { MachineFrameInfo *MFI = MF.getFrameInfo(); - const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); bool LRUsed = MF.getRegInfo().isPhysRegUsed(XCore::LR); const TargetRegisterClass *RC = &XCore::GRRegsRegClass; XCoreFunctionInfo *XFI = MF.getInfo(); @@ -434,7 +435,7 @@ XCoreFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, bool isVarArg = MF.getFunction()->isVarArg(); int FrameIdx; if (! isVarArg) { - // A fixed offset of 0 allows us to save / restore LR using entsp / retsp. + // A fixed offset of 0 allows us to save/restore LR using entsp/retsp. FrameIdx = MFI->CreateFixedObject(RC->getSize(), 0, true); } else { FrameIdx = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), @@ -443,17 +444,32 @@ XCoreFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, XFI->setUsesLR(FrameIdx); XFI->setLRSpillSlot(FrameIdx); } - if (RegInfo->requiresRegisterScavenging(MF)) { - // Reserve a slot close to SP or frame pointer. - RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); - } - if (hasFP(MF)) { - // A callee save register is used to hold the FP. - // This needs saving / restoring in the epilogue / prologue. + + // A callee save register is used to hold the FP. + // This needs saving / restoring in the epilogue / prologue. + if (hasFP(MF)) XFI->setFPSpillSlot(MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false)); - } +} + +void XCoreFrameLowering:: +processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS) const { + assert(RS && "requiresRegisterScavenging failed"); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetRegisterClass *RC = &XCore::GRRegsRegClass; + XCoreFunctionInfo *XFI = MF.getInfo(); + // Reserve slots close to SP or frame pointer for Scavenging spills. + // When using SP for small frames, we don't need any scratch registers. + // When using SP for large frames, we may need 2 scratch registers. + // When using FP, for large or small frames, we may need 1 scratch register. + if (XFI->isLargeFrame(MF) || hasFP(MF)) + RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment(), + false)); + if (XFI->isLargeFrame(MF) && !hasFP(MF)) + RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment(), + false)); } diff --git a/lib/Target/XCore/XCoreFrameLowering.h b/lib/Target/XCore/XCoreFrameLowering.h index ebad62f2fa5..6cd90c96e7f 100644 --- a/lib/Target/XCore/XCoreFrameLowering.h +++ b/lib/Target/XCore/XCoreFrameLowering.h @@ -48,6 +48,9 @@ namespace llvm { void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS = NULL) const; + //! Stack slot size (4 bytes) static int stackSlotSize() { return 4; diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.cpp b/lib/Target/XCore/XCoreMachineFunctionInfo.cpp index 7ca06729120..91b29760080 100644 --- a/lib/Target/XCore/XCoreMachineFunctionInfo.cpp +++ b/lib/Target/XCore/XCoreMachineFunctionInfo.cpp @@ -12,3 +12,19 @@ using namespace llvm; void XCoreFunctionInfo::anchor() { } + +bool XCoreFunctionInfo::isLargeFrame(const MachineFunction &MF) const { + if (CachedEStackSize == -1) { + CachedEStackSize = MF.getFrameInfo()->estimateStackSize(MF); + } + // isLargeFrame() is used when deciding if spill slots should be added to + // allow eliminateFrameIndex() to scavenge registers. + // This is only required when there is no FP and offsets are greater than + // ~256KB (~64Kwords). Thus only for code run on the emulator! + // + // The arbitrary value of 0xf000 allows frames of up to ~240KB before spill + // slots are added for the use of eliminateFrameIndex() register scavenging. + // For frames less than 240KB, it is assumed that there will be less than + // 16KB of function arguments. + return CachedEStackSize > 0xf000; +} diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.h b/lib/Target/XCore/XCoreMachineFunctionInfo.h index 69d5de3e03a..4fa4ee5d9c0 100644 --- a/lib/Target/XCore/XCoreMachineFunctionInfo.h +++ b/lib/Target/XCore/XCoreMachineFunctionInfo.h @@ -31,6 +31,7 @@ class XCoreFunctionInfo : public MachineFunctionInfo { int LRSpillSlot; int FPSpillSlot; int VarArgsFrameIndex; + mutable int CachedEStackSize; std::vector > SpillLabels; public: @@ -38,13 +39,15 @@ public: UsesLR(false), LRSpillSlot(0), FPSpillSlot(0), - VarArgsFrameIndex(0) {} + VarArgsFrameIndex(0), + CachedEStackSize(-1) {} explicit XCoreFunctionInfo(MachineFunction &MF) : UsesLR(false), LRSpillSlot(0), FPSpillSlot(0), - VarArgsFrameIndex(0) {} + VarArgsFrameIndex(0), + CachedEStackSize(-1) {} ~XCoreFunctionInfo() {} @@ -60,6 +63,8 @@ public: void setFPSpillSlot(int off) { FPSpillSlot = off; } int getFPSpillSlot() const { return FPSpillSlot; } + bool isLargeFrame(const MachineFunction &MF) const; + std::vector > &getSpillLabels() { return SpillLabels; } diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp index 607e512b3ec..7c2d842e774 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -57,6 +57,165 @@ static inline bool isImmU16(unsigned val) { return val < (1 << 16); } +static void loadConstant(MachineBasicBlock::iterator II, + const TargetInstrInfo &TII, + unsigned DstReg, int64_t Value) { + MachineInstr &MI = *II; + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc dl = MI.getDebugLoc(); + + if (isMask_32(Value)) { + int N = Log2_32(Value) + 1; + BuildMI(MBB, II, dl, TII.get(XCore::MKMSK_rus), DstReg).addImm(N); + } else if (isImmU16(Value)) { + int Opcode = isImmU6(Value) ? XCore::LDC_ru6 : XCore::LDC_lru6; + BuildMI(MBB, II, dl, TII.get(Opcode), DstReg).addImm(Value); + } else { + MachineConstantPool *ConstantPool = MBB.getParent()->getConstantPool(); + const Constant *C = ConstantInt::get( + Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Value); + unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); + BuildMI(MBB, II, dl, TII.get(XCore::LDWCP_lru6), DstReg) + .addConstantPoolIndex(Idx); + } +} + +static void InsertFPImmInst(MachineBasicBlock::iterator II, + const TargetInstrInfo &TII, + unsigned Reg, unsigned FrameReg, int Offset ) { + MachineInstr &MI = *II; + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc dl = MI.getDebugLoc(); + + switch (MI.getOpcode()) { + case XCore::LDWFI: + BuildMI(MBB, II, dl, TII.get(XCore::LDW_2rus), Reg) + .addReg(FrameReg) + .addImm(Offset); + break; + case XCore::STWFI: + BuildMI(MBB, II, dl, TII.get(XCore::STW_2rus)) + .addReg(Reg, getKillRegState(MI.getOperand(0).isKill())) + .addReg(FrameReg) + .addImm(Offset); + break; + case XCore::LDAWFI: + BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l2rus), Reg) + .addReg(FrameReg) + .addImm(Offset); + break; + default: + llvm_unreachable("Unexpected Opcode"); + } +} + +static void InsertFPConstInst(MachineBasicBlock::iterator II, + const TargetInstrInfo &TII, + unsigned Reg, unsigned FrameReg, + int Offset, RegScavenger *RS ) { + assert(RS && "requiresRegisterScavenging failed"); + MachineInstr &MI = *II; + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc dl = MI.getDebugLoc(); + + unsigned ScratchOffset = RS->scavengeRegister(&XCore::GRRegsRegClass, II, 0); + RS->setUsed(ScratchOffset); + loadConstant(II, TII, ScratchOffset, Offset); + + switch (MI.getOpcode()) { + case XCore::LDWFI: + BuildMI(MBB, II, dl, TII.get(XCore::LDW_3r), Reg) + .addReg(FrameReg) + .addReg(ScratchOffset, RegState::Kill); + break; + case XCore::STWFI: + BuildMI(MBB, II, dl, TII.get(XCore::STW_l3r)) + .addReg(Reg, getKillRegState(MI.getOperand(0).isKill())) + .addReg(FrameReg) + .addReg(ScratchOffset, RegState::Kill); + break; + case XCore::LDAWFI: + BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l3r), Reg) + .addReg(FrameReg) + .addReg(ScratchOffset, RegState::Kill); + break; + default: + llvm_unreachable("Unexpected Opcode"); + } +} + +static void InsertSPImmInst(MachineBasicBlock::iterator II, + const TargetInstrInfo &TII, + unsigned Reg, int Offset) { + MachineInstr &MI = *II; + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc dl = MI.getDebugLoc(); + bool isU6 = isImmU6(Offset); + switch (MI.getOpcode()) { + int NewOpcode; + case XCore::LDWFI: + NewOpcode = (isU6) ? XCore::LDWSP_ru6 : XCore::LDWSP_lru6; + BuildMI(MBB, II, dl, TII.get(NewOpcode), Reg) + .addImm(Offset); + break; + case XCore::STWFI: + NewOpcode = (isU6) ? XCore::STWSP_ru6 : XCore::STWSP_lru6; + BuildMI(MBB, II, dl, TII.get(NewOpcode)) + .addReg(Reg, getKillRegState(MI.getOperand(0).isKill())) + .addImm(Offset); + break; + case XCore::LDAWFI: + NewOpcode = (isU6) ? XCore::LDAWSP_ru6 : XCore::LDAWSP_lru6; + BuildMI(MBB, II, dl, TII.get(NewOpcode), Reg) + .addImm(Offset); + break; + default: + llvm_unreachable("Unexpected Opcode"); + } +} + +static void InsertSPConstInst(MachineBasicBlock::iterator II, + const TargetInstrInfo &TII, + unsigned Reg, int Offset, RegScavenger *RS ) { + assert(RS && "requiresRegisterScavenging failed"); + MachineInstr &MI = *II; + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc dl = MI.getDebugLoc(); + unsigned OpCode = MI.getOpcode(); + + unsigned ScratchBase; + if (OpCode==XCore::STWFI) { + ScratchBase = RS->scavengeRegister(&XCore::GRRegsRegClass, II, 0); + RS->setUsed(ScratchBase); + } else + ScratchBase = Reg; + BuildMI(MBB, II, dl, TII.get(XCore::LDAWSP_ru6), ScratchBase).addImm(0); + unsigned ScratchOffset = RS->scavengeRegister(&XCore::GRRegsRegClass, II, 0); + RS->setUsed(ScratchOffset); + loadConstant(II, TII, ScratchOffset, Offset); + + switch (OpCode) { + case XCore::LDWFI: + BuildMI(MBB, II, dl, TII.get(XCore::LDW_3r), Reg) + .addReg(ScratchBase, RegState::Kill) + .addReg(ScratchOffset, RegState::Kill); + break; + case XCore::STWFI: + BuildMI(MBB, II, dl, TII.get(XCore::STW_l3r)) + .addReg(Reg, getKillRegState(MI.getOperand(0).isKill())) + .addReg(ScratchBase, RegState::Kill) + .addReg(ScratchOffset, RegState::Kill); + break; + case XCore::LDAWFI: + BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l3r), Reg) + .addReg(ScratchBase, RegState::Kill) + .addReg(ScratchOffset, RegState::Kill); + break; + default: + llvm_unreachable("Unexpected Opcode"); + } +} + bool XCoreRegisterInfo::needsFrameMoves(const MachineFunction &MF) { return MF.getMMI().hasDebugInfo() || MF.getFunction()->needsUnwindTableEntry(); @@ -88,15 +247,12 @@ BitVector XCoreRegisterInfo::getReservedRegs(const MachineFunction &MF) const { bool XCoreRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - // TODO can we estimate stack size? - return TFI->hasFP(MF); + return true; } bool XCoreRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const { - return requiresRegisterScavenging(MF); + return true; } bool @@ -110,7 +266,6 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); MachineInstr &MI = *II; - DebugLoc dl = MI.getDebugLoc(); MachineOperand &FrameOp = MI.getOperand(FIOperandNum); int FrameIndex = FrameOp.getIndex(); @@ -146,124 +301,28 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0); assert(Offset%4 == 0 && "Misaligned stack offset"); - DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); - Offset/=4; - bool FP = TFI->hasFP(MF); - unsigned Reg = MI.getOperand(0).getReg(); - bool isKill = MI.getOpcode() == XCore::STWFI && MI.getOperand(0).isKill(); - assert(XCore::GRRegsRegClass.contains(Reg) && "Unexpected register operand"); - - MachineBasicBlock &MBB = *MI.getParent(); - - if (FP) { - bool isUs = isImmUs(Offset); - - if (!isUs) { - if (!RS) - report_fatal_error("eliminateFrameIndex Frame size too big: " + - Twine(Offset)); - unsigned ScratchReg = RS->scavengeRegister(&XCore::GRRegsRegClass, II, - SPAdj); - loadConstant(MBB, II, ScratchReg, Offset, dl); - switch (MI.getOpcode()) { - case XCore::LDWFI: - BuildMI(MBB, II, dl, TII.get(XCore::LDW_3r), Reg) - .addReg(FrameReg) - .addReg(ScratchReg, RegState::Kill); - break; - case XCore::STWFI: - BuildMI(MBB, II, dl, TII.get(XCore::STW_l3r)) - .addReg(Reg, getKillRegState(isKill)) - .addReg(FrameReg) - .addReg(ScratchReg, RegState::Kill); - break; - case XCore::LDAWFI: - BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l3r), Reg) - .addReg(FrameReg) - .addReg(ScratchReg, RegState::Kill); - break; - default: - llvm_unreachable("Unexpected Opcode"); - } - } else { - switch (MI.getOpcode()) { - case XCore::LDWFI: - BuildMI(MBB, II, dl, TII.get(XCore::LDW_2rus), Reg) - .addReg(FrameReg) - .addImm(Offset); - break; - case XCore::STWFI: - BuildMI(MBB, II, dl, TII.get(XCore::STW_2rus)) - .addReg(Reg, getKillRegState(isKill)) - .addReg(FrameReg) - .addImm(Offset); - break; - case XCore::LDAWFI: - BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l2rus), Reg) - .addReg(FrameReg) - .addImm(Offset); - break; - default: - llvm_unreachable("Unexpected Opcode"); - } - } + + if (TFI->hasFP(MF)) { + if (isImmUs(Offset)) + InsertFPImmInst(II, TII, Reg, FrameReg, Offset); + else + InsertFPConstInst(II, TII, Reg, FrameReg, Offset, RS); } else { - bool isU6 = isImmU6(Offset); - if (!isU6 && !isImmU16(Offset)) - report_fatal_error("eliminateFrameIndex Frame size too big: " + - Twine(Offset)); - - switch (MI.getOpcode()) { - int NewOpcode; - case XCore::LDWFI: - NewOpcode = (isU6) ? XCore::LDWSP_ru6 : XCore::LDWSP_lru6; - BuildMI(MBB, II, dl, TII.get(NewOpcode), Reg) - .addImm(Offset); - break; - case XCore::STWFI: - NewOpcode = (isU6) ? XCore::STWSP_ru6 : XCore::STWSP_lru6; - BuildMI(MBB, II, dl, TII.get(NewOpcode)) - .addReg(Reg, getKillRegState(isKill)) - .addImm(Offset); - break; - case XCore::LDAWFI: - NewOpcode = (isU6) ? XCore::LDAWSP_ru6 : XCore::LDAWSP_lru6; - BuildMI(MBB, II, dl, TII.get(NewOpcode), Reg) - .addImm(Offset); - break; - default: - llvm_unreachable("Unexpected Opcode"); - } + if (isImmU16(Offset)) + InsertSPImmInst(II, TII, Reg, Offset); + else + InsertSPConstInst(II, TII, Reg, Offset, RS); } // Erase old instruction. + MachineBasicBlock &MBB = *MI.getParent(); MBB.erase(II); } -void XCoreRegisterInfo:: -loadConstant(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned DstReg, int64_t Value, DebugLoc dl) const { - const TargetInstrInfo &TII = *MBB.getParent()->getTarget().getInstrInfo(); - if (isMask_32(Value)) { - int N = Log2_32(Value) + 1; - BuildMI(MBB, I, dl, TII.get(XCore::MKMSK_rus), DstReg).addImm(N); - } else if (isImmU16(Value)) { - int Opcode = isImmU6(Value) ? XCore::LDC_ru6 : XCore::LDC_lru6; - BuildMI(MBB, I, dl, TII.get(Opcode), DstReg).addImm(Value); - return; - } else { - MachineConstantPool *ConstantPool = MBB.getParent()->getConstantPool(); - const Constant *C = ConstantInt::get( - Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Value); - unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); - BuildMI(MBB, I, dl, TII.get(XCore::LDWCP_lru6), DstReg) - .addConstantPoolIndex(Idx); - } -} unsigned XCoreRegisterInfo::getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h index 2370c6280f2..36ba7b46e5e 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.h +++ b/lib/Target/XCore/XCoreRegisterInfo.h @@ -24,19 +24,6 @@ namespace llvm { class TargetInstrInfo; struct XCoreRegisterInfo : public XCoreGenRegisterInfo { -private: - void loadConstant(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DstReg, int64_t Value, DebugLoc dl) const; - - void storeToStack(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned SrcReg, int Offset, DebugLoc dl) const; - - void loadFromStack(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DstReg, int Offset, DebugLoc dl) const; - public: XCoreRegisterInfo(); diff --git a/test/CodeGen/XCore/epilogue_prologue.ll b/test/CodeGen/XCore/epilogue_prologue.ll index ffbe7a1571f..2898ae5dc82 100644 --- a/test/CodeGen/XCore/epilogue_prologue.ll +++ b/test/CodeGen/XCore/epilogue_prologue.ll @@ -1,7 +1,11 @@ ; RUN: llc < %s -march=xcore | FileCheck %s ; RUN: llc < %s -march=xcore -disable-fp-elim | FileCheck %s -check-prefix=CHECKFP +; When using SP for small frames, we don't need any scratch registers (SR). +; When using SP for large frames, we may need two scratch registers. +; When using FP, for large or small frames, we may need one scratch register. +; FP + small frame: spill FP+SR = entsp 2 ; CHECKFP-LABEL: f1 ; CHECKFP: entsp 2 ; CHECKFP-NEXT: stw r10, sp[1] @@ -10,6 +14,7 @@ ; CHECKFP-NEXT: ldw r10, sp[1] ; CHECKFP-NEXT: retsp 2 ; +; !FP + small frame: no spills = no stack adjustment needed ; CHECK-LABEL: f1 ; CHECK: stw lr, sp[0] ; CHECK: ldw lr, sp[0] @@ -21,6 +26,7 @@ entry: } +; FP + small frame: spill FP+SR+R0+LR = entsp 3 + extsp 1 ; CHECKFP-LABEL:f3 ; CHECKFP: entsp 3 ; CHECKFP-NEXT: stw r10, sp[1] @@ -36,14 +42,15 @@ entry: ; CHECKFP-NEXT: ldw r10, sp[1] ; CHECKFP-NEXT: retsp 3 ; +; !FP + small frame: spill R0+LR = entsp 2 ; CHECK-LABEL: f3 ; CHECK: entsp 2 -; CHECK: stw [[REG:r[4-9]+]], sp[1] -; CHECK: mov [[REG]], r0 -; CHECK: bl f2 -; CHECK: mov r0, [[REG]] -; CHECK: ldw [[REG]], sp[1] -; CHECK: retsp 2 +; CHECK-NEXT: stw [[REG:r[4-9]+]], sp[1] +; CHECK-NEXT: mov [[REG]], r0 +; CHECK-NEXT: bl f2 +; CHECK-NEXT: mov r0, [[REG]] +; CHECK-NEXT: ldw [[REG]], sp[1] +; CHECK-NEXT: retsp 2 declare void @f2() define i32 @f3(i32 %i) nounwind { entry: @@ -52,6 +59,7 @@ entry: } +; FP + large frame: spill FP+SR = entsp 2 + 100000 ; CHECKFP-LABEL: f4 ; CHECKFP: extsp 65535 ; CHECKFP-NEXT: .Ltmp{{[0-9]+}} @@ -71,23 +79,32 @@ entry: ; CHECKFP-NEXT: ldaw sp, sp[34467] ; CHECKFP-NEXT: retsp 0 ; +; !FP + large frame: spill SR+SR = entsp 2 + 100000 ; CHECK-LABEL: f4 ; CHECK: extsp 65535 ; CHECK-NEXT: .Ltmp{{[0-9]+}} ; CHECK-NEXT: .cfi_def_cfa_offset 262140 -; CHECK-NEXT: extsp 34465 +; CHECK-NEXT: extsp 34467 ; CHECK-NEXT: .Ltmp{{[0-9]+}} -; CHECK-NEXT: .cfi_def_cfa_offset 400000 +; CHECK-NEXT: .cfi_def_cfa_offset 400008 ; CHECK-NEXT: ldaw sp, sp[65535] -; CHECK-NEXT: ldaw sp, sp[34465] +; CHECK-NEXT: ldaw sp, sp[34467] ; CHECK-NEXT: retsp 0 define void @f4() { entry: - %0 = alloca [100000 x i32], align 4 + %0 = alloca [100000 x i32] ret void } +; FP + large frame: spill FP+SR+R4+LR = entsp 3 + 200000 + extsp 1 +; CHECKFP: .section .cp.rodata.cst4,"aMc",@progbits,4 +; CHECKFP-NEXT: .align 4 +; CHECKFP-NEXT: .LCPI[[CNST0:[0-9_]+]]: +; CHECKFP-NEXT: .long 200002 +; CHECKFP-NEXT: .LCPI[[CNST1:[0-9_]+]]: +; CHECKFP-NEXT: .long 200001 +; CHECKFP-NEXT: .text ; CHECKFP-LABEL: f6 ; CHECKFP: entsp 65535 ; CHECKFP-NEXT: .Ltmp{{[0-9]+}} @@ -100,26 +117,47 @@ entry: ; CHECKFP-NEXT: extsp 65535 ; CHECKFP-NEXT: .Ltmp{{[0-9]+}} ; CHECKFP-NEXT: .cfi_def_cfa_offset 786420 -; CHECKFP-NEXT: extsp 3396 +; CHECKFP-NEXT: extsp 3398 ; CHECKFP-NEXT: .Ltmp{{[0-9]+}} -; CHECKFP-NEXT: .cfi_def_cfa_offset 800004 +; CHECKFP-NEXT: .cfi_def_cfa_offset 800012 ; CHECKFP-NEXT: stw r10, sp[1] ; CHECKFP-NEXT: .Ltmp{{[0-9]+}} -; CHECKFP-NEXT: .cfi_offset 10, -800000 +; CHECKFP-NEXT: .cfi_offset 10, -800008 ; CHECKFP-NEXT: ldaw r10, sp[0] ; CHECKFP-NEXT: .Ltmp{{[0-9]+}} ; CHECKFP-NEXT: .cfi_def_cfa_register 10 +; CHECKFP-NEXT: ldw r1, cp[.LCPI[[CNST0]]] +; CHECKFP-NEXT: stw [[REG:r[4-9]+]], r10[r1] +; CHECKFP-NEXT: .Ltmp{{[0-9]+}} +; CHECKFP-NEXT: .cfi_offset 4, -4 +; CHECKFP-NEXT: mov [[REG]], r0 ; CHECKFP-NEXT: extsp 1 ; CHECKFP-NEXT: ldaw r0, r10[2] ; CHECKFP-NEXT: bl f5 ; CHECKFP-NEXT: ldaw sp, sp[1] +; CHECKFP-NEXT: ldw r1, cp[.LCPI3_1] +; CHECKFP-NEXT: ldaw r0, r10[r1] +; CHECKFP-NEXT: extsp 1 +; CHECKFP-NEXT: bl f5 +; CHECKFP-NEXT: ldaw sp, sp[1] +; CHECKFP-NEXT: mov r0, [[REG]] +; CHECKFP-NEXT: ldw r1, cp[.LCPI[[CNST0]]] +; CHECKFP-NEXT: ldw [[REG]], r10[r1] ; CHECKFP-NEXT: set sp, r10 ; CHECKFP-NEXT: ldw r10, sp[1] ; CHECKFP-NEXT: ldaw sp, sp[65535] ; CHECKFP-NEXT: ldaw sp, sp[65535] ; CHECKFP-NEXT: ldaw sp, sp[65535] -; CHECKFP-NEXT: retsp 3396 +; CHECKFP-NEXT: retsp 3398 ; +; !FP + large frame: spill SR+SR+R4+LR = entsp 4 + 200000 +; CHECK: .section .cp.rodata.cst4,"aMc",@progbits,4 +; CHECK-NEXT: .align 4 +; CHECK-NEXT: .LCPI[[CNST0:[0-9_]+]]: +; CHECK-NEXT: .long 200003 +; CHECK-NEXT: .LCPI[[CNST1:[0-9_]+]]: +; CHECK-NEXT: .long 200002 +; CHECK-NEXT: .text ; CHECK-LABEL: f6 ; CHECK: entsp 65535 ; CHECK-NEXT: .Ltmp{{[0-9]+}} @@ -132,20 +170,65 @@ entry: ; CHECK-NEXT: extsp 65535 ; CHECK-NEXT: .Ltmp{{[0-9]+}} ; CHECK-NEXT: .cfi_def_cfa_offset 786420 -; CHECK-NEXT: extsp 3395 +; CHECK-NEXT: extsp 3399 ; CHECK-NEXT: .Ltmp{{[0-9]+}} -; CHECK-NEXT: .cfi_def_cfa_offset 800000 -; CHECK-NEXT: ldaw r0, sp[1] +; CHECK-NEXT: .cfi_def_cfa_offset 800016 +; CHECK-NEXT: ldaw r1, sp[0] +; CHECK-NEXT: ldw r2, cp[.LCPI[[CNST0]]] +; CHECK-NEXT: stw [[REG:r[4-9]+]], r1[r2] +; CHECK-NEXT: .Ltmp{{[0-9]+}} +; CHECK-NEXT: .cfi_offset 4, -4 +; CHECK-NEXT: mov [[REG]], r0 +; CHECK-NEXT: ldaw r0, sp[3] +; CHECK-NEXT: bl f5 +; CHECK-NEXT: ldaw r0, sp[0] +; CHECK-NEXT: ldw r1, cp[.LCPI[[CNST1]]] +; CHECK-NEXT: ldaw r0, r0[r1] ; CHECK-NEXT: bl f5 +; CHECK-NEXT: mov r0, [[REG]] +; CHECK-NEXT: ldaw [[REG]], sp[0] +; CHECK-NEXT: ldw r1, cp[.LCPI[[CNST0]]] +; CHECK-NEXT: ldw [[REG]], [[REG]][r1] ; CHECK-NEXT: ldaw sp, sp[65535] ; CHECK-NEXT: ldaw sp, sp[65535] ; CHECK-NEXT: ldaw sp, sp[65535] -; CHECK-NEXT: retsp 3395 +; CHECK-NEXT: retsp 3399 declare void @f5(i32*) -define void @f6() { +define i32 @f6(i32 %i) { +entry: + %0 = alloca [200000 x i32] + %1 = getelementptr inbounds [200000 x i32]* %0, i32 0, i32 0 + call void @f5(i32* %1) + %2 = getelementptr inbounds [200000 x i32]* %0, i32 0, i32 199999 + call void @f5(i32* %2) + ret i32 %i +} + + +; FP + large frame: spill FP+SR+LR = entsp 2 + 32768 + extsp 1 +; CHECKFP-LABEL:f8 +; CHECKFP: entsp 32770 +; CHECKFP-NEXT: stw r10, sp[1] +; CHECKFP-NEXT: ldaw r10, sp[0] +; CHECKFP-NEXT: mkmsk r1, 15 +; CHECKFP-NEXT: ldaw r0, r10[r1] +; CHECKFP-NEXT: extsp 1 +; CHECKFP-NEXT: bl f5 +; CHECKFP-NEXT: ldaw sp, sp[1] +; CHECKFP-NEXT: set sp, r10 +; CHECKFP-NEXT: ldw r10, sp[1] +; CHECKFP-NEXT: retsp 32770 +; +; !FP + large frame: spill SR+SR+LR = entsp 3 + 32768 +; CHECK-LABEL:f8 +; CHECK: entsp 32771 +; CHECK-NEXT: ldaw r0, sp[32768] +; CHECK-NEXT: bl f5 +; CHECK-NEXT: retsp 32771 +define void @f8() nounwind { entry: - %0 = alloca [199999 x i32], align 4 - %1 = getelementptr inbounds [199999 x i32]* %0, i32 0, i32 0 + %0 = alloca [32768 x i32] + %1 = getelementptr inbounds [32768 x i32]* %0, i32 0, i32 32765 call void @f5(i32* %1) ret void } diff --git a/test/CodeGen/XCore/epilogue_prologue_fp.ll b/test/CodeGen/XCore/epilogue_prologue_fp.ll deleted file mode 100644 index 9b9837c90d4..00000000000 --- a/test/CodeGen/XCore/epilogue_prologue_fp.ll +++ /dev/null @@ -1,42 +0,0 @@ -; Functions with frames > 256K bytes require a frame pointer to access the stack. -; At present, functions must be compiled using '-fno-omit-frame-pointer'. -; RUN: llc < %s -march=xcore -disable-fp-elim | FileCheck %s - -declare void @f0(i32*) - -; CHECK: .section .cp.rodata.cst4,"aMc",@progbits,4 -; CHECK: .LCPI[[NUM:[0-9_]+]]: -; CHECK: .long 99999 -; CHECK: .text -; CHECK-LABEL:f1 -; CHECK: entsp 65535 -; CHECK-NEXT: extsp 34465 -; CHECK-NEXT: stw r10, sp[1] -; CHECK-NEXT: ldaw r10, sp[0] -; CHECK-NEXT: ldw r1, cp[.LCPI[[NUM]]] -; CHECK-NEXT: ldaw r0, r10[r1] -; CHECK-NEXT: extsp 1 -; CHECK-NEXT: bl f0 -; CHECK-NEXT: ldaw sp, sp[1] -; CHECK-NEXT: set sp, r10 -; CHECK-NEXT: ldw r10, sp[1] -; CHECK-NEXT: ldaw sp, sp[65535] -; CHECK-NEXT: retsp 34465 -define void @f1() nounwind { -entry: - %0 = alloca [99998 x i32] - %1 = getelementptr inbounds [99998 x i32]* %0, i32 0, i32 99997 - call void @f0(i32* %1) - ret void -} - -; CHECK-LABEL:f2 -; CHECK: mkmsk [[REG:r[0-9]+]], 15 -; CHECK-NEXT: ldaw r0, r10{{\[}}[[REG]]{{\]}} -define void @f2() nounwind { -entry: - %0 = alloca [32768 x i32] - %1 = getelementptr inbounds [32768 x i32]* %0, i32 0, i32 32765 - call void @f0(i32* %1) - ret void -} diff --git a/test/CodeGen/XCore/scavenging.ll b/test/CodeGen/XCore/scavenging.ll index 5b612d0f9b5..f96ecd3fc21 100644 --- a/test/CodeGen/XCore/scavenging.ll +++ b/test/CodeGen/XCore/scavenging.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=xcore +; RUN: llc < %s -march=xcore | FileCheck %s + @size = global i32 0 ; [#uses=1] @g0 = external global i32 ; [#uses=2] @g1 = external global i32 ; [#uses=2] @@ -48,5 +49,70 @@ entry: call void @g(i32* %x1, i32* %1) nounwind ret void } - declare void @g(i32*, i32*) + + +; CHECK: .section .cp.rodata.cst4,"aMc",@progbits,4 +; CHECK: .align 4 +; CHECK: [[ARG5:.LCPI[0-9_]+]]: +; CHECK: .long 100003 +; CHECK: [[INDEX0:.LCPI[0-9_]+]]: +; CHECK: .long 80002 +; CHECK: [[INDEX1:.LCPI[0-9_]+]]: +; CHECK: .long 81002 +; CHECK: [[INDEX2:.LCPI[0-9_]+]]: +; CHECK: .long 82002 +; CHECK: [[INDEX3:.LCPI[0-9_]+]]: +; CHECK: .long 83002 +; CHECK: [[INDEX4:.LCPI[0-9_]+]]: +; CHECK: .long 84002 +; CHECK: .text +; !FP + large frame: spill SR+SR = entsp 2 + 100000 +; CHECK-LABEL: ScavengeSlots: +; CHECK: extsp 65535 +; CHECK: extsp 34467 +; scavenge r11 +; CHECK: ldaw r11, sp[0] +; scavenge r4 using SR spill slot +; CHECK: stw r4, sp[1] +; CHECK: ldw r4, cp{{\[}}[[ARG5]]{{\]}} +; r11 used to load 5th argument +; CHECK: ldw r11, r11[r4] +; CHECK: ldaw r4, sp[0] +; scavenge r5 using SR spill slot +; CHECK: stw r5, sp[0] +; CHECK: ldw r5, cp{{\[}}[[INDEX0]]{{\]}} +; r4 & r5 used by InsertSPConstInst() to emit STW_l3r instruction. +; CHECK: stw r0, r4[r5] +; CHECK: ldaw r0, sp[0] +; CHECK: ldw r5, cp{{\[}}[[INDEX1]]{{\]}} +; CHECK: stw r1, r0[r5] +; CHECK: ldaw r0, sp[0] +; CHECK: ldw r1, cp{{\[}}[[INDEX2]]{{\]}} +; CHECK: stw r2, r0[r1] +; CHECK: ldaw r0, sp[0] +; CHECK: ldw r1, cp{{\[}}[[INDEX3]]{{\]}} +; CHECK: stw r3, r0[r1] +; CHECK: ldaw r0, sp[0] +; CHECK: ldw r1, cp{{\[}}[[INDEX4]]{{\]}} +; CHECK: stw r11, r0[r1] +; CHECK: ldaw sp, sp[65535] +; CHECK: ldaw sp, sp[34467] +; CHECK: ldw r4, sp[1] +; CHECK: ldw r5, sp[0] +; CHECK: retsp 0 +define void @ScavengeSlots(i32 %r0, i32 %r1, i32 %r2, i32 %r3, i32 %r4) nounwind { +entry: + %Data = alloca [100000 x i32] + %i0 = getelementptr inbounds [100000 x i32]* %Data, i32 0, i32 80000 + store volatile i32 %r0, i32* %i0 + %i1 = getelementptr inbounds [100000 x i32]* %Data, i32 0, i32 81000 + store volatile i32 %r1, i32* %i1 + %i2 = getelementptr inbounds [100000 x i32]* %Data, i32 0, i32 82000 + store volatile i32 %r2, i32* %i2 + %i3 = getelementptr inbounds [100000 x i32]* %Data, i32 0, i32 83000 + store volatile i32 %r3, i32* %i3 + %i4 = getelementptr inbounds [100000 x i32]* %Data, i32 0, i32 84000 + store volatile i32 %r4, i32* %i4 + ret void +} -- 2.34.1