From: Chris Lattner Date: Mon, 29 Mar 2010 17:38:47 +0000 (+0000) Subject: From Kalle Raiskila: X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=e27e02b1a48a715be93952a7cef49cdd19e9593b;p=oota-llvm.git From Kalle Raiskila: "the bigstack patch for SPU, with testcase. It is essentially the patch committed as 97091, and reverted as 97099, but with the following additions: -in vararg handling, registers are marked to be live, to not confuse the register scavenger -function prologue and epilogue are not emitted, if the stack size is 16. 16 means it is empty - there is only the register scavenger emergency spill slot, which is not used as there is no stack." git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@99819 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/CellSPU/SPU.h b/lib/Target/CellSPU/SPU.h index c96097494ea..c6208121902 100644 --- a/lib/Target/CellSPU/SPU.h +++ b/lib/Target/CellSPU/SPU.h @@ -66,9 +66,6 @@ namespace llvm { //! Predicate test for an unsigned 10-bit value /*! \param Value The input value to be tested - - This predicate tests for an unsigned 10-bit value, returning the 10-bit value - as a short if true. */ inline bool isU10Constant(short Value) { return (Value == (Value & 0x3ff)); @@ -90,6 +87,70 @@ namespace llvm { return (Value == (Value & 0x3ff)); } + //! Predicate test for a signed 14-bit value + /*! + \param Value The input value to be tested + */ + template + inline bool isS14Constant(T Value); + + template<> + inline bool isS14Constant(short Value) { + return (Value >= -(1 << 13) && Value <= (1 << 13) - 1); + } + + template<> + inline bool isS14Constant(int Value) { + return (Value >= -(1 << 13) && Value <= (1 << 13) - 1); + } + + template<> + inline bool isS14Constant(uint32_t Value) { + return (Value <= ((1 << 13) - 1)); + } + + template<> + inline bool isS14Constant(int64_t Value) { + return (Value >= -(1 << 13) && Value <= (1 << 13) - 1); + } + + template<> + inline bool isS14Constant(uint64_t Value) { + return (Value <= ((1 << 13) - 1)); + } + + //! Predicate test for a signed 16-bit value + /*! + \param Value The input value to be tested + */ + template + inline bool isS16Constant(T Value); + + template<> + inline bool isS16Constant(short Value) { + return true; + } + + template<> + inline bool isS16Constant(int Value) { + return (Value >= -(1 << 15) && Value <= (1 << 15) - 1); + } + + template<> + inline bool isS16Constant(uint32_t Value) { + return (Value <= ((1 << 15) - 1)); + } + + template<> + inline bool isS16Constant(int64_t Value) { + return (Value >= -(1 << 15) && Value <= (1 << 15) - 1); + } + + template<> + inline bool isS16Constant(uint64_t Value) { + return (Value <= ((1 << 15) - 1)); + } + extern Target TheCellSPUTarget; } diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index e863ee31e41..e7c788170cd 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -1107,7 +1107,8 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset, true, false); SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); - SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8); + unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass); + SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8); SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0, false, false, 0); Chain = Store.getOperand(0); diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp index 8c78bab37b6..916ebf0b6cf 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.cpp +++ b/lib/Target/CellSPU/SPURegisterInfo.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineLocation.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetInstrInfo.h" @@ -336,6 +337,7 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); MachineFrameInfo *MFI = MF.getFrameInfo(); + DebugLoc dl = II->getDebugLoc(); while (!MI.getOperand(i).isFI()) { ++i; @@ -364,11 +366,22 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, // Replace the FrameIndex with base register with $sp (aka $r1) SPOp.ChangeToRegister(SPU::R1, false); - if (Offset > SPUFrameInfo::maxFrameOffset() - || Offset < SPUFrameInfo::minFrameOffset()) { - errs() << "Large stack adjustment (" - << Offset - << ") in SPURegisterInfo::eliminateFrameIndex."; + + // if 'Offset' doesn't fit to the D-form instruction's + // immediate, convert the instruction to X-form + // if the instruction is not an AI (which takes a s10 immediate), assume + // it is a load/store that can take a s14 immediate + if ( (MI.getOpcode() == SPU::AIr32 && !isS10Constant(Offset)) + || !isS14Constant(Offset) ) { + int newOpcode = convertDFormToXForm(MI.getOpcode()); + unsigned tmpReg = findScratchRegister(II, RS, &SPU::R32CRegClass, SPAdj); + BuildMI(MBB, II, dl, TII.get(SPU::ILr32), tmpReg ) + .addImm(Offset); + BuildMI(MBB, II, dl, TII.get(newOpcode), MI.getOperand(0).getReg()) + .addReg(tmpReg, RegState::Kill) + .addReg(SPU::R1); + // remove the replaced D-form instruction + MBB.erase(II); } else { MO.ChangeToImmediate(Offset); } @@ -423,6 +436,14 @@ void SPURegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, MF.getRegInfo().setPhysRegUnused(SPU::R0); MF.getRegInfo().setPhysRegUnused(SPU::R1); MF.getRegInfo().setPhysRegUnused(SPU::R2); + + MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetRegisterClass *RC = &SPU::R32CRegClass; + RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment(), + false)); + + } void SPURegisterInfo::emitPrologue(MachineFunction &MF) const @@ -448,7 +469,8 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const assert((FrameSize & 0xf) == 0 && "SPURegisterInfo::emitPrologue: FrameSize not aligned"); - if (FrameSize > 0 || MFI->hasCalls()) { + // the "empty" frame size is 16 - just the register scavenger spill slot + if (FrameSize > 16 || MFI->hasCalls()) { FrameSize = -(FrameSize + SPUFrameInfo::minStackSize()); if (hasDebugInfo) { // Mark effective beginning of when frame pointer becomes valid. @@ -467,7 +489,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const // Adjust $sp by required amout BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1).addReg(SPU::R1) .addImm(FrameSize); - } else if (FrameSize <= (1 << 16) - 1 && FrameSize >= -(1 << 16)) { + } else if (isS16Constant(FrameSize)) { // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use // $r2 to adjust $sp: BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2) @@ -475,7 +497,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const .addReg(SPU::R1); BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2) .addImm(FrameSize); - BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R1) + BuildMI(MBB, MBBI, dl, TII.get(SPU::STQXr32), SPU::R1) .addReg(SPU::R2) .addReg(SPU::R1); BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1) @@ -549,7 +571,9 @@ SPURegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const "Can only insert epilog into returning blocks"); assert((FrameSize & 0xf) == 0 && "SPURegisterInfo::emitEpilogue: FrameSize not aligned"); - if (FrameSize > 0 || MFI->hasCalls()) { + + // the "empty" frame size is 16 - just the register scavenger spill slot + if (FrameSize > 16 || MFI->hasCalls()) { FrameSize = FrameSize + SPUFrameInfo::minStackSize(); if (isS10Constant(FrameSize + LinkSlotOffset)) { // Reload $lr, adjust $sp by required amount @@ -574,7 +598,7 @@ SPURegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const .addReg(SPU::R2); BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0) .addImm(16) - .addReg(SPU::R2); + .addReg(SPU::R1); BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2). addReg(SPU::R2) .addImm(16); @@ -618,4 +642,43 @@ SPURegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { return SPUGenRegisterInfo::getDwarfRegNumFull(RegNum, 0); } +int +SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const +{ + switch(dFormOpcode) + { + case SPU::AIr32: return SPU::Ar32; + case SPU::LQDr32: return SPU::LQXr32; + case SPU::LQDr128: return SPU::LQXr128; + case SPU::LQDv16i8: return SPU::LQXv16i8; + case SPU::LQDv4f32: return SPU::LQXv4f32; + case SPU::STQDr32: return SPU::STQXr32; + case SPU::STQDr128: return SPU::STQXr128; + case SPU::STQDv16i8: return SPU::STQXv16i8; + case SPU::STQDv4i32: return SPU::STQXv4i32; + case SPU::STQDv4f32: return SPU::STQXv4f32; + + default: assert( false && "Unhandled D to X-form conversion"); + } + // default will assert, but need to return something to keep the + // compiler happy. + return dFormOpcode; +} + +// TODO this is already copied from PPC. Could this convenience function +// be moved to the RegScavenger class? +unsigned +SPURegisterInfo::findScratchRegister(MachineBasicBlock::iterator II, + RegScavenger *RS, + const TargetRegisterClass *RC, + int SPAdj) const +{ + assert(RS && "Register scavenging must be on"); + unsigned Reg = RS->FindUnusedReg(RC); + if (Reg == 0) + Reg = RS->scavengeRegister(RC, II, SPAdj); + assert( Reg && "Register scavenger failed"); + return Reg; +} + #include "SPUGenRegisterInfo.inc" diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h index 48feb5c452a..0a703183569 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.h +++ b/lib/Target/CellSPU/SPURegisterInfo.h @@ -53,6 +53,10 @@ namespace llvm { virtual const TargetRegisterClass* const * getCalleeSavedRegClasses(const MachineFunction *MF) const; + //! Allow for scavenging, so we can get scratch registers when needed. + virtual bool requiresRegisterScavenging(const MachineFunction &MF) const + { return true; } + //! Return the reserved registers BitVector getReservedRegs(const MachineFunction &MF) const; @@ -97,6 +101,21 @@ namespace llvm { //! Get DWARF debugging register number int getDwarfRegNum(unsigned RegNum, bool isEH) const; + + //! Convert D-form load/store to X-form load/store + /*! + Converts a regiser displacement load/store into a register-indexed + load/store for large stack frames, when the stack frame exceeds the + range of a s10 displacement. + */ + int convertDFormToXForm(int dFormOpcode) const; + + //! Acquire an unused register in an emergency. + unsigned findScratchRegister(MachineBasicBlock::iterator II, + RegScavenger *RS, + const TargetRegisterClass *RC, + int SPAdj) const; + }; } // end namespace llvm diff --git a/test/CodeGen/CellSPU/bigstack.ll b/test/CodeGen/CellSPU/bigstack.ll new file mode 100644 index 00000000000..5483f463732 --- /dev/null +++ b/test/CodeGen/CellSPU/bigstack.ll @@ -0,0 +1,17 @@ +; RUN: llc < %s -march=cellspu -o %t1.s +; RUN: grep lqx %t1.s | count 4 +; RUN: grep il %t1.s | grep -v file | count 7 +; RUN: grep stqx %t1.s | count 2 + +define i32 @bigstack() nounwind { +entry: + %avar = alloca i32 + %big_data = alloca [2048 x i32] + store i32 3840, i32* %avar, align 4 + br label %return + +return: + %retval = load i32* %avar + ret i32 %retval +} +