X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FARM%2FARMLoadStoreOptimizer.cpp;h=dc8cafa2e32fe0f6df30652248d89d89f9f3f4af;hb=431bdfc4c118cd8bc39b0052abe6ceb0ea40de66;hp=098faaab2423702cd250c786ee91b298a82cdc9e;hpb=d11898db4c8d9233af7539bc09d26bdf707e03cc;p=oota-llvm.git diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 098faaab242..dc8cafa2e32 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -12,12 +12,14 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "arm-ldst-opt" #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" +#include "ARMISelLowering.h" #include "ARMMachineFunctionInfo.h" +#include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "Thumb1RegisterInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -36,12 +38,13 @@ #include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "arm-ldst-opt" + STATISTIC(NumLDMGened , "Number of ldm instructions generated"); STATISTIC(NumSTMGened , "Number of stm instructions generated"); STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); @@ -65,9 +68,10 @@ namespace { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; const ARMSubtarget *STI; + const TargetLowering *TL; ARMFunctionInfo *AFI; RegScavenger *RS; - bool isThumb2; + bool isThumb1, isThumb2; bool runOnMachineFunction(MachineFunction &Fn) override; @@ -93,7 +97,6 @@ namespace { void findUsesOfImpDef(SmallVectorImpl &UsesOfImpDefs, const MemOpQueue &MemOps, unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd); - bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, int Offset, unsigned Base, bool BaseKill, int Opcode, ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, @@ -119,7 +122,6 @@ namespace { ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, MemOpQueue &MemOps, SmallVectorImpl &Merges); - void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); bool FixInvalidRegPairOp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI); @@ -159,6 +161,21 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { case ARM_AM::db: return ARM::STMDB; case ARM_AM::ib: return ARM::STMIB; } + case ARM::tLDRi: + // tLDMIA is writeback-only - unless the base register is in the input + // reglist. + ++NumLDMGened; + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::tLDMIA; + } + case ARM::tSTRi: + // There is no non-writeback tSTMIA either. + ++NumSTMGened; + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::tSTMIA_UPD; + } case ARM::t2LDRi8: case ARM::t2LDRi12: ++NumLDMGened; @@ -217,6 +234,9 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) { case ARM::LDMIA_UPD: case ARM::STMIA: case ARM::STMIA_UPD: + case ARM::tLDMIA: + case ARM::tLDMIA_UPD: + case ARM::tSTMIA_UPD: case ARM::t2LDMIA_RET: case ARM::t2LDMIA: case ARM::t2LDMIA_UPD: @@ -263,12 +283,20 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) { } // end namespace ARM_AM } // end namespace llvm +static bool isT1i32Load(unsigned Opc) { + return Opc == ARM::tLDRi; +} + static bool isT2i32Load(unsigned Opc) { return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; } static bool isi32Load(unsigned Opc) { - return Opc == ARM::LDRi12 || isT2i32Load(Opc); + return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ; +} + +static bool isT1i32Store(unsigned Opc) { + return Opc == ARM::tSTRi; } static bool isT2i32Store(unsigned Opc) { @@ -276,7 +304,7 @@ static bool isT2i32Store(unsigned Opc) { } static bool isi32Store(unsigned Opc) { - return Opc == ARM::STRi12 || isT2i32Store(Opc); + return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc); } /// MergeOps - Create and insert a LDM or STM with Base as base register and @@ -296,18 +324,19 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, return false; ARM_AM::AMSubMode Mode = ARM_AM::ia; - // VFP and Thumb2 do not support IB or DA modes. + // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); - bool haveIBAndDA = isNotVFP && !isThumb2; - if (Offset == 4 && haveIBAndDA) + bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1; + + if (Offset == 4 && haveIBAndDA) { Mode = ARM_AM::ib; - else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) + } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) { Mode = ARM_AM::da; - else if (Offset == -4 * (int)NumRegs && isNotVFP) + } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) { // VLDM/VSTM do not support DB mode without also updating the base reg. Mode = ARM_AM::db; - else if (Offset != 0) { - // Check if this is a supported opcode before we insert instructions to + } else if (Offset != 0) { + // Check if this is a supported opcode before inserting instructions to // calculate a new base register. if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false; @@ -318,41 +347,100 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, return false; unsigned NewBase; - if (isi32Load(Opcode)) + if (isi32Load(Opcode)) { // If it is a load, then just use one of the destination register to // use as the new base. NewBase = Regs[NumRegs-1].first; - else { + } else { // Use the scratch register to use as a new base. NewBase = Scratch; if (NewBase == 0) return false; } - int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri; + + int BaseOpc = + isThumb2 ? ARM::t2ADDri : + isThumb1 ? ARM::tADDi8 : ARM::ADDri; + if (Offset < 0) { - BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri; + BaseOpc = + isThumb2 ? ARM::t2SUBri : + isThumb1 ? ARM::tSUBi8 : ARM::SUBri; Offset = - Offset; } - int ImmedOffset = isThumb2 - ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset); - if (ImmedOffset == -1) - // FIXME: Try t2ADDri12 or t2SUBri12? - return false; // Probably not worth it then. - - BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) - .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) - .addImm(Pred).addReg(PredReg).addReg(0); + + if (!TL->isLegalAddImmediate(Offset)) + // FIXME: Try add with register operand? + return false; // Probably not worth it then. + + if (isThumb1) { + if (Base != NewBase) { + // Need to insert a MOV to the new base first. + // FIXME: If the immediate fits in 3 bits, use ADD instead. + BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase) + .addReg(Base, getKillRegState(BaseKill)) + .addImm(Pred).addReg(PredReg); + } + AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)) + .addReg(NewBase, getKillRegState(true)).addImm(Offset) + .addImm(Pred).addReg(PredReg); + } else { + BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) + .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) + .addImm(Pred).addReg(PredReg).addReg(0); + } + Base = NewBase; - BaseKill = true; // New base is always killed right its use. + BaseKill = true; // New base is always killed straight away. } bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD); + + // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with + // base register writeback. Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); if (!Opcode) return false; - MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)) - .addReg(Base, getKillRegState(BaseKill)) - .addImm(Pred).addReg(PredReg); + + bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. + + // Exception: If the base register is in the input reglist, Thumb1 LDM is + // non-writeback. Check for this. + if (Opcode == ARM::tLDMIA && isThumb1) + for (unsigned I = 0; I < NumRegs; ++I) + if (Base == Regs[I].first) { + Writeback = false; + break; + } + + // If the merged instruction has writeback and the base register is not killed + // it's not safe to do the merge on Thumb1. This is because resetting the base + // register writeback by inserting a SUBS sets the condition flags. + // FIXME: Try something clever here to see if resetting the base register can + // be avoided, e.g. by updating a later ADD/SUB of the base register with the + // writeback. + if (isThumb1 && Writeback && !BaseKill) return false; + + MachineInstrBuilder MIB; + + if (Writeback) { + if (Opcode == ARM::tLDMIA) + // Update tLDMIA with writeback if necessary. + Opcode = ARM::tLDMIA_UPD; + + MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); + + // Thumb1: we might need to set base writeback when building the MI. + MIB.addReg(Base, getDefRegState(true)) + .addReg(Base, getKillRegState(BaseKill)); + } else { + // No writeback, simply build the MachineInstr. + MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); + MIB.addReg(Base, getKillRegState(BaseKill)); + } + + MIB.addImm(Pred).addReg(PredReg); + for (unsigned i = 0; i != NumRegs; ++i) MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) | getKillRegState(Regs[i].second)); @@ -492,7 +580,7 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, // affected uses. for (SmallVectorImpl::iterator I = UsesOfImpDefs.begin(), E = UsesOfImpDefs.end(); - I != E; ++I) + I != E; ++I) (*I)->setIsUndef(); for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { @@ -536,7 +624,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); unsigned Count = 1; unsigned Limit = ~0U; - + bool BaseKill = false; // vldm / vstm limit are 32 for S variants, 16 for D variants. switch (Opcode) { @@ -575,21 +663,26 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, ++Count; } else { // Can't merge this in. Try merge the earlier ones first. - MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, - Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges); + // We need to compute BaseKill here because the MemOps may have been + // reordered. + BaseKill = Loc->killsRegister(Base); + + MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base, + BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, MemOps, Merges); return; } - if (MemOps[i].Position > MemOps[insertAfter].Position) + if (MemOps[i].Position > MemOps[insertAfter].Position) { insertAfter = i; + Loc = MemOps[i].MBBI; + } } - bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; + BaseKill = Loc->killsRegister(Base); MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); - return; } static bool definesCPSR(MachineInstr *MI) { @@ -616,6 +709,7 @@ static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, bool CheckCPSRDef = false; switch (MI->getOpcode()) { default: return false; + case ARM::tSUBi8: case ARM::t2SUBri: case ARM::SUBri: CheckCPSRDef = true; @@ -628,10 +722,11 @@ static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, if (Bytes == 0 || (Limit && Bytes >= Limit)) return false; - unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME + unsigned Scale = (MI->getOpcode() == ARM::tSUBspi || + MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME if (!(MI->getOperand(0).getReg() == Base && MI->getOperand(1).getReg() == Base && - (MI->getOperand(2).getImm()*Scale) == Bytes && + (MI->getOperand(2).getImm() * Scale) == Bytes && getInstrPredicate(MI, MyPredReg) == Pred && MyPredReg == PredReg)) return false; @@ -649,6 +744,7 @@ static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, bool CheckCPSRDef = false; switch (MI->getOpcode()) { default: return false; + case ARM::tADDi8: case ARM::t2ADDri: case ARM::ADDri: CheckCPSRDef = true; @@ -661,10 +757,11 @@ static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, // Make sure the offset fits in 8 bits. return false; - unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME + unsigned Scale = (MI->getOpcode() == ARM::tADDspi || + MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME if (!(MI->getOperand(0).getReg() == Base && MI->getOperand(1).getReg() == Base && - (MI->getOperand(2).getImm()*Scale) == Bytes && + (MI->getOperand(2).getImm() * Scale) == Bytes && getInstrPredicate(MI, MyPredReg) == Pred && MyPredReg == PredReg)) return false; @@ -677,6 +774,8 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { default: return 0; case ARM::LDRi12: case ARM::STRi12: + case ARM::tLDRi: + case ARM::tSTRi: case ARM::t2LDRi8: case ARM::t2LDRi12: case ARM::t2STRi8: @@ -695,6 +794,9 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { case ARM::STMDA: case ARM::STMDB: case ARM::STMIB: + case ARM::tLDMIA: + case ARM::tLDMIA_UPD: + case ARM::tSTMIA_UPD: case ARM::t2LDMIA: case ARM::t2LDMDB: case ARM::t2STMIA: @@ -791,6 +893,9 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool &Advance, MachineBasicBlock::iterator &I) { + // Thumb1 is already using updating loads/stores. + if (isThumb1) return false; + MachineInstr *MI = MBBI; unsigned Base = MI->getOperand(0).getReg(); bool BaseKill = MI->getOperand(0).isKill(); @@ -927,6 +1032,10 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, const TargetInstrInfo *TII, bool &Advance, MachineBasicBlock::iterator &I) { + // Thumb1 doesn't have updating LDR/STR. + // FIXME: Use LDM/STM with single register instead. + if (isThumb1) return false; + MachineInstr *MI = MBBI; unsigned Base = MI->getOperand(1).getReg(); bool BaseKill = MI->getOperand(1).isKill(); @@ -1002,7 +1111,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, return false; if (isAM5) { - // VLDM[SD}_UPD, VSTM[SD]_UPD + // VLDM[SD]_UPD, VSTM[SD]_UPD // (There are no base-updating versions of VLDR/VSTR instructions, but the // updating load/store-multiple instructions can be used with only one // register.) @@ -1100,6 +1209,8 @@ static bool isMemoryOp(const MachineInstr *MI) { return MI->getOperand(1).isReg(); case ARM::LDRi12: case ARM::STRi12: + case ARM::tLDRi: + case ARM::tSTRi: case ARM::t2LDRi8: case ARM::t2LDRi12: case ARM::t2STRi8: @@ -1137,6 +1248,10 @@ static int getMemoryOpOffset(const MachineInstr *MI) { Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) return OffField; + // Thumb1 immediate offsets are scaled by 4 + if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi) + return OffField * 4; + int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4; if (isAM3) { @@ -1408,16 +1523,20 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { if (MBBI == E) // Reach the end of the block, try merging the memory instructions. TryMerge = true; - } else + } else { TryMerge = true; + } if (TryMerge) { if (NumMemOps > 1) { // Try to find a free register to use as a new base in case it's needed. // First advance to the instruction just before the start of the chain. AdvanceRS(MBB, MemOps); + // Find a scratch register. - unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass); + unsigned Scratch = + RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass); + // Process the load / store instructions. RS->forward(std::prev(MBBI)); @@ -1483,6 +1602,8 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { /// => /// ldmfd sp!, {..., pc} bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { + // Thumb1 LDM doesn't allow high registers. + if (isThumb1) return false; if (MBB.empty()) return false; MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); @@ -1513,12 +1634,14 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { const TargetMachine &TM = Fn.getTarget(); + TL = TM.getSubtargetImpl()->getTargetLowering(); AFI = Fn.getInfo(); - TII = TM.getInstrInfo(); - TRI = TM.getRegisterInfo(); + TII = TM.getSubtargetImpl()->getInstrInfo(); + TRI = TM.getSubtargetImpl()->getRegisterInfo(); STI = &TM.getSubtarget(); RS = new RegScavenger(); isThumb2 = AFI->isThumb2Function(); + isThumb1 = AFI->isThumbFunction() && !isThumb2; bool Modified = false; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; @@ -1573,9 +1696,9 @@ namespace { } bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { - TD = Fn.getTarget().getDataLayout(); - TII = Fn.getTarget().getInstrInfo(); - TRI = Fn.getTarget().getRegisterInfo(); + TD = Fn.getSubtarget().getDataLayout(); + TII = Fn.getSubtarget().getInstrInfo(); + TRI = Fn.getSubtarget().getRegisterInfo(); STI = &Fn.getTarget().getSubtarget(); MRI = &Fn.getRegInfo(); MF = &Fn; @@ -1591,7 +1714,7 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, MachineBasicBlock::iterator I, MachineBasicBlock::iterator E, - SmallPtrSet &MemOps, + SmallPtrSetImpl &MemOps, SmallSet &MemRegs, const TargetRegisterInfo *TRI) { // Are there stores / loads / calls between them? @@ -1666,11 +1789,11 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD unsigned Scale = 1; unsigned Opcode = Op0->getOpcode(); - if (Opcode == ARM::LDRi12) + if (Opcode == ARM::LDRi12) { NewOpc = ARM::LDRD; - else if (Opcode == ARM::STRi12) + } else if (Opcode == ARM::STRi12) { NewOpc = ARM::STRD; - else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { + } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { NewOpc = ARM::t2LDRDi8; Scale = 4; isT2 = true; @@ -1678,8 +1801,9 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, NewOpc = ARM::t2STRDi8; Scale = 4; isT2 = true; - } else + } else { return false; + } // Make sure the base address satisfies i64 ld / st alignment requirement. // At the moment, we ignore the memoryoperand's value. @@ -1746,8 +1870,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, while (Ops.size() > 1) { unsigned FirstLoc = ~0U; unsigned LastLoc = 0; - MachineInstr *FirstOp = 0; - MachineInstr *LastOp = 0; + MachineInstr *FirstOp = nullptr; + MachineInstr *LastOp = nullptr; int LastOffset = 0; unsigned LastOpcode = 0; unsigned LastBytes = 0;