X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTarget%2FARM%2FARMLoadStoreOptimizer.cpp;h=46ff326ba630c807c8f6ef6caf3b05bd2a981956;hp=0f6dc0479e00da5f2c2d96aeec1c5c411456b4f3;hb=cd52a7a381a73c53ec4ef517ad87f19808cb1a28;hpb=31d157ae1ac2cd9c787dc3c1d28e64c682803844 diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 0f6dc0479e0..46ff326ba63 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -7,19 +7,25 @@ // //===----------------------------------------------------------------------===// // -// This file contains a pass that performs load / store related peephole -// optimizations. This pass should be run after register allocation. +/// \file This file contains a pass that performs load / store related peephole +/// optimizations. This pass should be run after register allocation. // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "arm-ldst-opt" #include "ARM.h" #include "ARMBaseInstrInfo.h" +#include "ARMBaseRegisterInfo.h" +#include "ARMISelLowering.h" #include "ARMMachineFunctionInfo.h" -#include "ARMRegisterInfo.h" +#include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" +#include "ThumbRegisterInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -27,21 +33,19 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/Target/TargetData.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; +#define DEBUG_TYPE "arm-ldst-opt" + STATISTIC(NumLDMGened , "Number of ldm instructions generated"); STATISTIC(NumSTMGened , "Number of stm instructions generated"); STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); @@ -54,10 +58,9 @@ STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); -/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine -/// load / store instructions to form ldm / stm instructions. - namespace { + /// Post- register allocation pass the combine load / store instructions to + /// form ldm / stm instructions. struct ARMLoadStoreOpt : public MachineFunctionPass { static char ID; ARMLoadStoreOpt() : MachineFunctionPass(ID) {} @@ -65,13 +68,14 @@ namespace { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; const ARMSubtarget *STI; + const TargetLowering *TL; ARMFunctionInfo *AFI; RegScavenger *RS; - bool isThumb2; + bool isThumb1, isThumb2; - virtual bool runOnMachineFunction(MachineFunction &Fn); + bool runOnMachineFunction(MachineFunction &Fn) override; - virtual const char *getPassName() const { + const char *getPassName() const override { return "ARM load / store optimization pass"; } @@ -90,10 +94,19 @@ namespace { typedef SmallVector MemOpQueue; typedef MemOpQueue::iterator MemOpQueueIter; + void findUsesOfImpDef(SmallVectorImpl &UsesOfImpDefs, + const MemOpQueue &MemOps, unsigned DefReg, + unsigned RangeBegin, unsigned RangeEnd); + void UpdateBaseRegUses(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc dl, unsigned Base, unsigned WordOffset, + ARMCC::CondCodes Pred, unsigned PredReg); bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - int Offset, unsigned Base, bool BaseKill, int Opcode, + int Offset, unsigned Base, bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, - DebugLoc dl, SmallVector, 8> &Regs); + DebugLoc dl, + ArrayRef > Regs, + ArrayRef ImpDefs); void MergeOpsUpdate(MachineBasicBlock &MBB, MemOpQueue &MemOps, unsigned memOpsBegin, @@ -102,18 +115,17 @@ namespace { int Offset, unsigned Base, bool BaseKill, - int Opcode, + unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, DebugLoc dl, - SmallVector &Merges); + SmallVectorImpl &Merges); void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, - int Opcode, unsigned Size, + unsigned Opcode, unsigned Size, ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, MemOpQueue &MemOps, - SmallVector &Merges); - + SmallVectorImpl &Merges); void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); bool FixInvalidRegPairOp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI); @@ -132,7 +144,48 @@ namespace { char ARMLoadStoreOpt::ID = 0; } -static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { +static bool definesCPSR(const MachineInstr *MI) { + for (const auto &MO : MI->operands()) { + if (!MO.isReg()) + continue; + if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) + // If the instruction has live CPSR def, then it's not safe to fold it + // into load / store. + return true; + } + + return false; +} + +static int getMemoryOpOffset(const MachineInstr *MI) { + unsigned Opcode = MI->getOpcode(); + bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; + unsigned NumOperands = MI->getDesc().getNumOperands(); + unsigned OffField = MI->getOperand(NumOperands-3).getImm(); + + if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || + Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || + Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || + Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) + return OffField; + + // Thumb1 immediate offsets are scaled by 4 + if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi || + Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) + return OffField * 4; + + int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) + : ARM_AM::getAM5Offset(OffField) * 4; + ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField) + : ARM_AM::getAM5Op(OffField); + + if (Op == ARM_AM::sub) + return -Offset; + + return Offset; +} + +static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) { switch (Opcode) { default: llvm_unreachable("Unhandled opcode!"); case ARM::LDRi12: @@ -153,6 +206,23 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { case ARM_AM::db: return ARM::STMDB; case ARM_AM::ib: return ARM::STMIB; } + case ARM::tLDRi: + case ARM::tLDRspi: + // tLDMIA is writeback-only - unless the base register is in the input + // reglist. + ++NumLDMGened; + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::tLDMIA; + } + case ARM::tSTRi: + case ARM::tSTRspi: + // There is no non-writeback tSTMIA either. + ++NumSTMGened; + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::tSTMIA_UPD; + } case ARM::t2LDRi8: case ARM::t2LDRi12: ++NumLDMGened; @@ -200,10 +270,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { } } -namespace llvm { - namespace ARM_AM { - -AMSubMode getLoadStoreMultipleSubMode(int Opcode) { +static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) { switch (Opcode) { default: llvm_unreachable("Unhandled opcode!"); case ARM::LDMIA_RET: @@ -211,6 +278,9 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) { case ARM::LDMIA_UPD: case ARM::STMIA: case ARM::STMIA_UPD: + case ARM::tLDMIA: + case ARM::tLDMIA_UPD: + case ARM::tSTMIA_UPD: case ARM::t2LDMIA_RET: case ARM::t2LDMIA: case ARM::t2LDMIA_UPD: @@ -254,15 +324,20 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) { } } - } // end namespace ARM_AM -} // end namespace llvm +static bool isT1i32Load(unsigned Opc) { + return Opc == ARM::tLDRi || Opc == ARM::tLDRspi; +} static bool isT2i32Load(unsigned Opc) { return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; } static bool isi32Load(unsigned Opc) { - return Opc == ARM::LDRi12 || isT2i32Load(Opc); + return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ; +} + +static bool isT1i32Store(unsigned Opc) { + return Opc == ARM::tSTRi || Opc == ARM::tSTRspi; } static bool isT2i32Store(unsigned Opc) { @@ -270,37 +345,176 @@ static bool isT2i32Store(unsigned Opc) { } static bool isi32Store(unsigned Opc) { - return Opc == ARM::STRi12 || isT2i32Store(Opc); + return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc); +} + +static unsigned getImmScale(unsigned Opc) { + switch (Opc) { + default: llvm_unreachable("Unhandled opcode!"); + case ARM::tLDRi: + case ARM::tSTRi: + case ARM::tLDRspi: + case ARM::tSTRspi: + return 1; + case ARM::tLDRHi: + case ARM::tSTRHi: + return 2; + case ARM::tLDRBi: + case ARM::tSTRBi: + return 4; + } +} + +/// Update future uses of the base register with the offset introduced +/// due to writeback. This function only works on Thumb1. +void +ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc dl, unsigned Base, + unsigned WordOffset, + ARMCC::CondCodes Pred, unsigned PredReg) { + assert(isThumb1 && "Can only update base register uses for Thumb1!"); + // Start updating any instructions with immediate offsets. Insert a SUB before + // the first non-updateable instruction (if any). + for (; MBBI != MBB.end(); ++MBBI) { + bool InsertSub = false; + unsigned Opc = MBBI->getOpcode(); + + if (MBBI->readsRegister(Base)) { + int Offset; + bool IsLoad = + Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi; + bool IsStore = + Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi; + + if (IsLoad || IsStore) { + // Loads and stores with immediate offsets can be updated, but only if + // the new offset isn't negative. + // The MachineOperand containing the offset immediate is the last one + // before predicates. + MachineOperand &MO = + MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); + // The offsets are scaled by 1, 2 or 4 depending on the Opcode. + Offset = MO.getImm() - WordOffset * getImmScale(Opc); + + // If storing the base register, it needs to be reset first. + unsigned InstrSrcReg = MBBI->getOperand(0).getReg(); + + if (Offset >= 0 && !(IsStore && InstrSrcReg == Base)) + MO.setImm(Offset); + else + InsertSub = true; + + } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) && + !definesCPSR(MBBI)) { + // SUBS/ADDS using this register, with a dead def of the CPSR. + // Merge it with the update; if the merged offset is too large, + // insert a new sub instead. + MachineOperand &MO = + MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); + Offset = (Opc == ARM::tSUBi8) ? + MO.getImm() + WordOffset * 4 : + MO.getImm() - WordOffset * 4 ; + if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) { + // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if + // Offset == 0. + MO.setImm(Offset); + // The base register has now been reset, so exit early. + return; + } else { + InsertSub = true; + } + + } else { + // Can't update the instruction. + InsertSub = true; + } + + } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) { + // Since SUBS sets the condition flags, we can't place the base reset + // after an instruction that has a live CPSR def. + // The base register might also contain an argument for a function call. + InsertSub = true; + } + + if (InsertSub) { + // An instruction above couldn't be updated, so insert a sub. + AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true) + .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); + return; + } + + if (MBBI->killsRegister(Base)) + // Register got killed. Stop updating. + return; + } + + // End of block was reached. + if (MBB.succ_size() > 0) { + // FIXME: Because of a bug, live registers are sometimes missing from + // the successor blocks' live-in sets. This means we can't trust that + // information and *always* have to reset at the end of a block. + // See PR21029. + if (MBBI != MBB.end()) --MBBI; + AddDefaultT1CC( + BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true) + .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); + } } -/// MergeOps - Create and insert a LDM or STM with Base as base register and -/// registers in Regs as the register operands that would be loaded / stored. -/// It returns true if the transformation is done. +/// Create and insert a LDM or STM with Base as base register and registers in +/// Regs as the register operands that would be loaded / stored. It returns +/// true if the transformation is done. bool ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, int Offset, unsigned Base, bool BaseKill, - int Opcode, ARMCC::CondCodes Pred, + unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, DebugLoc dl, - SmallVector, 8> &Regs) { + ArrayRef > Regs, + ArrayRef ImpDefs) { // Only a single register to load / store. Don't bother. unsigned NumRegs = Regs.size(); if (NumRegs <= 1) return false; + // For Thumb1 targets, it might be necessary to clobber the CPSR to merge. + // Compute liveness information for that register to make the decision. + bool SafeToClobberCPSR = !isThumb1 || + (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) == + MachineBasicBlock::LQR_Dead); + + bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. + + // Exception: If the base register is in the input reglist, Thumb1 LDM is + // non-writeback. + // It's also not possible to merge an STR of the base register in Thumb1. + if (isThumb1) + for (const std::pair &R : Regs) + if (Base == R.first) { + assert(Base != ARM::SP && "Thumb1 does not allow SP in register list"); + if (Opcode == ARM::tLDRi) { + Writeback = false; + break; + } else if (Opcode == ARM::tSTRi) { + return false; + } + } + ARM_AM::AMSubMode Mode = ARM_AM::ia; - // VFP and Thumb2 do not support IB or DA modes. + // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); - bool haveIBAndDA = isNotVFP && !isThumb2; - if (Offset == 4 && haveIBAndDA) + bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1; + + if (Offset == 4 && haveIBAndDA) { Mode = ARM_AM::ib; - else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) + } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) { Mode = ARM_AM::da; - else if (Offset == -4 * (int)NumRegs && isNotVFP) + } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) { // VLDM/VSTM do not support DB mode without also updating the base reg. Mode = ARM_AM::db; - else if (Offset != 0) { - // Check if this is a supported opcode before we insert instructions to + } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) { + // Check if this is a supported opcode before inserting instructions to // calculate a new base register. if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false; @@ -310,61 +524,207 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, if (NumRegs <= 2) return false; + // On Thumb1, it's not worth materializing a new base register without + // clobbering the CPSR (i.e. not using ADDS/SUBS). + if (!SafeToClobberCPSR) + return false; + unsigned NewBase; - if (isi32Load(Opcode)) + if (isi32Load(Opcode)) { // If it is a load, then just use one of the destination register to // use as the new base. NewBase = Regs[NumRegs-1].first; - else { + } else { // Use the scratch register to use as a new base. NewBase = Scratch; if (NewBase == 0) return false; } - int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri; + + int BaseOpc = + isThumb2 ? ARM::t2ADDri : + (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi : + (isThumb1 && Offset < 8) ? ARM::tADDi3 : + isThumb1 ? ARM::tADDi8 : ARM::ADDri; + if (Offset < 0) { - BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri; Offset = - Offset; + BaseOpc = + isThumb2 ? ARM::t2SUBri : + (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 : + isThumb1 ? ARM::tSUBi8 : ARM::SUBri; + } + + if (!TL->isLegalAddImmediate(Offset)) + // FIXME: Try add with register operand? + return false; // Probably not worth it then. + + if (isThumb1) { + // Thumb1: depending on immediate size, use either + // ADDS NewBase, Base, #imm3 + // or + // MOV NewBase, Base + // ADDS NewBase, #imm8. + if (Base != NewBase && + (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) { + // Need to insert a MOV to the new base first. + if (isARMLowRegister(NewBase) && isARMLowRegister(Base) && + !STI->hasV6Ops()) { + // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr + if (Pred != ARMCC::AL) + return false; + BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVSr), NewBase) + .addReg(Base, getKillRegState(BaseKill)); + } else + BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase) + .addReg(Base, getKillRegState(BaseKill)) + .addImm(Pred).addReg(PredReg); + + // Set up BaseKill and Base correctly to insert the ADDS/SUBS below. + Base = NewBase; + BaseKill = false; + } + if (BaseOpc == ARM::tADDrSPi) { + assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4"); + BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) + .addReg(Base, getKillRegState(BaseKill)).addImm(Offset/4) + .addImm(Pred).addReg(PredReg); + } else + AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true) + .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) + .addImm(Pred).addReg(PredReg); + } else { + BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) + .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) + .addImm(Pred).addReg(PredReg).addReg(0); } - int ImmedOffset = isThumb2 - ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset); - if (ImmedOffset == -1) - // FIXME: Try t2ADDri12 or t2SUBri12? - return false; // Probably not worth it then. - - BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) - .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) - .addImm(Pred).addReg(PredReg).addReg(0); Base = NewBase; - BaseKill = true; // New base is always killed right its use. + BaseKill = true; // New base is always killed straight away. } bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD); + + // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with + // base register writeback. Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); if (!Opcode) return false; - MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)) - .addReg(Base, getKillRegState(BaseKill)) - .addImm(Pred).addReg(PredReg); - for (unsigned i = 0; i != NumRegs; ++i) - MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) - | getKillRegState(Regs[i].second)); + + // Check if a Thumb1 LDM/STM merge is safe. This is the case if: + // - There is no writeback (LDM of base register), + // - the base register is killed by the merged instruction, + // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS + // to reset the base register. + // Otherwise, don't merge. + // It's safe to return here since the code to materialize a new base register + // above is also conditional on SafeToClobberCPSR. + if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill) + return false; + + MachineInstrBuilder MIB; + + if (Writeback) { + if (Opcode == ARM::tLDMIA) + // Update tLDMIA with writeback if necessary. + Opcode = ARM::tLDMIA_UPD; + + MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); + + // Thumb1: we might need to set base writeback when building the MI. + MIB.addReg(Base, getDefRegState(true)) + .addReg(Base, getKillRegState(BaseKill)); + + // The base isn't dead after a merged instruction with writeback. + // Insert a sub instruction after the newly formed instruction to reset. + if (!BaseKill) + UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg); + + } else { + // No writeback, simply build the MachineInstr. + MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); + MIB.addReg(Base, getKillRegState(BaseKill)); + } + + MIB.addImm(Pred).addReg(PredReg); + + for (const std::pair &R : Regs) + MIB = MIB.addReg(R.first, getDefRegState(isDef) + | getKillRegState(R.second)); + + // Add implicit defs for super-registers. + for (unsigned ImpDef : ImpDefs) + MIB.addReg(ImpDef, RegState::ImplicitDefine); return true; } -// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on -// success. +/// Find all instructions using a given imp-def within a range. +/// +/// We are trying to combine a range of instructions, one of which (located at +/// position RangeBegin) implicitly defines a register. The final LDM/STM will +/// be placed at RangeEnd, and so any uses of this definition between RangeStart +/// and RangeEnd must be modified to use an undefined value. +/// +/// The live range continues until we find a second definition or one of the +/// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so +/// we must consider all uses and decide which are relevant in a second pass. +void ARMLoadStoreOpt::findUsesOfImpDef( + SmallVectorImpl &UsesOfImpDefs, const MemOpQueue &MemOps, + unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) { + std::map Uses; + unsigned LastLivePos = RangeEnd; + + // First we find all uses of this register with Position between RangeBegin + // and RangeEnd, any or all of these could be uses of a definition at + // RangeBegin. We also record the latest position a definition at RangeBegin + // would be considered live. + for (unsigned i = 0; i < MemOps.size(); ++i) { + MachineInstr &MI = *MemOps[i].MBBI; + unsigned MIPosition = MemOps[i].Position; + if (MIPosition <= RangeBegin || MIPosition > RangeEnd) + continue; + + // If this instruction defines the register, then any later use will be of + // that definition rather than ours. + if (MI.definesRegister(DefReg)) + LastLivePos = std::min(LastLivePos, MIPosition); + + MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg); + if (!UseOp) + continue; + + // If this instruction kills the register then (assuming liveness is + // correct when we start) we don't need to think about anything after here. + if (UseOp->isKill()) + LastLivePos = std::min(LastLivePos, MIPosition); + + Uses[MIPosition] = UseOp; + } + + // Now we traverse the list of all uses, and append the ones that actually use + // our definition to the requested list. + for (std::map::iterator I = Uses.begin(), + E = Uses.end(); + I != E; ++I) { + // List is sorted by position so once we've found one out of range there + // will be no more to consider. + if (I->first > LastLivePos) + break; + UsesOfImpDefs.push_back(I->second); + } +} + +/// Call MergeOps and update MemOps and merges accordingly on success. void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, MemOpQueue &memOps, unsigned memOpsBegin, unsigned memOpsEnd, unsigned insertAfter, int Offset, unsigned Base, bool BaseKill, - int Opcode, + unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, DebugLoc dl, - SmallVector &Merges) { + SmallVectorImpl &Merges) { // First calculate which of the registers should be killed by the merged // instruction. const unsigned insertPos = memOps[insertAfter].Position; @@ -384,23 +744,50 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, } SmallVector, 8> Regs; + SmallVector ImpDefs; + SmallVector UsesOfImpDefs; for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { unsigned Reg = memOps[i].Reg; // If we are inserting the merged operation after an operation that // uses the same register, make sure to transfer any kill flag. bool isKill = memOps[i].isKill || KilledRegs.count(Reg); Regs.push_back(std::make_pair(Reg, isKill)); + + // Collect any implicit defs of super-registers. They must be preserved. + for (const MachineOperand &MO : memOps[i].MBBI->operands()) { + if (!MO.isReg() || !MO.isDef() || !MO.isImplicit() || MO.isDead()) + continue; + unsigned DefReg = MO.getReg(); + if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end()) + ImpDefs.push_back(DefReg); + + // There may be other uses of the definition between this instruction and + // the eventual LDM/STM position. These should be marked undef if the + // merge takes place. + findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position, + insertPos); + } } // Try to do the merge. MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; ++Loc; if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, - Pred, PredReg, Scratch, dl, Regs)) + Pred, PredReg, Scratch, dl, Regs, ImpDefs)) return; // Merge succeeded, update records. - Merges.push_back(prior(Loc)); + Merges.push_back(std::prev(Loc)); + + // In gathering loads together, we may have moved the imp-def of a register + // past one of its uses. This is OK, since we know better than the rest of + // LLVM what's OK with ARM loads and stores; but we still have to adjust the + // affected uses. + for (SmallVectorImpl::iterator I = UsesOfImpDefs.begin(), + E = UsesOfImpDefs.end(); + I != E; ++I) + (*I)->setIsUndef(); + for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { // Remove kill flags from any memops that come before insertPos. if (Regs[i-memOpsBegin].second) { @@ -421,16 +808,21 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, memOps[i].MBBI = Merges.back(); memOps[i].Position = insertPos; } + + // Update memOps offsets, since they may have been modified by MergeOps. + for (auto &MemOp : memOps) { + MemOp.Offset = getMemoryOpOffset(MemOp.MBBI); + } } -/// MergeLDR_STR - Merge a number of load / store instructions into one or more -/// load / store multiple instructions. +/// Merge a number of load / store instructions into one or more load / store +/// multiple instructions. void ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, - unsigned Base, int Opcode, unsigned Size, - ARMCC::CondCodes Pred, unsigned PredReg, - unsigned Scratch, MemOpQueue &MemOps, - SmallVector &Merges) { + unsigned Base, unsigned Opcode, unsigned Size, + ARMCC::CondCodes Pred, unsigned PredReg, + unsigned Scratch, MemOpQueue &MemOps, + SmallVectorImpl &Merges) { bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); int Offset = MemOps[SIndex].Offset; int SOffset = Offset; @@ -439,11 +831,10 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, DebugLoc dl = Loc->getDebugLoc(); const MachineOperand &PMO = Loc->getOperand(0); unsigned PReg = PMO.getReg(); - unsigned PRegNum = PMO.isUndef() ? UINT_MAX - : getARMRegisterNumbering(PReg); + unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); unsigned Count = 1; unsigned Limit = ~0U; - + bool BaseKill = false; // vldm / vstm limit are 32 for S variants, 16 for D variants. switch (Opcode) { @@ -466,49 +857,42 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, int NewOffset = MemOps[i].Offset; const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); unsigned Reg = MO.getReg(); - unsigned RegNum = MO.isUndef() ? UINT_MAX - : getARMRegisterNumbering(Reg); + unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); // Register numbers must be in ascending order. For VFP / NEON load and // store multiples, the registers must also be consecutive and within the // limit on the number of registers per instruction. if (Reg != ARM::SP && NewOffset == Offset + (int)Size && ((isNotVFP && RegNum > PRegNum) || - ((Count < Limit) && RegNum == PRegNum+1))) { + ((Count < Limit) && RegNum == PRegNum+1)) && + // On Swift we don't want vldm/vstm to start with a odd register num + // because Q register unaligned vldm/vstm need more uops. + (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) { Offset += Size; PRegNum = RegNum; ++Count; } else { // Can't merge this in. Try merge the earlier ones first. - MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, - Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges); + // We need to compute BaseKill here because the MemOps may have been + // reordered. + BaseKill = Loc->killsRegister(Base); + + MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base, + BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, MemOps, Merges); return; } - if (MemOps[i].Position > MemOps[insertAfter].Position) + if (MemOps[i].Position > MemOps[insertAfter].Position) { insertAfter = i; + Loc = MemOps[i].MBBI; + } } - bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; + BaseKill = Loc->killsRegister(Base); MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); - return; -} - -static bool definesCPSR(MachineInstr *MI) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) - continue; - if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) - // If the instruction has live CPSR def, then it's not safe to fold it - // into load / store. - return true; - } - - return false; } static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, @@ -521,10 +905,11 @@ static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, bool CheckCPSRDef = false; switch (MI->getOpcode()) { default: return false; + case ARM::tSUBi8: case ARM::t2SUBri: case ARM::SUBri: CheckCPSRDef = true; - // fallthrough + break; case ARM::tSUBspi: break; } @@ -533,11 +918,12 @@ static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, if (Bytes == 0 || (Limit && Bytes >= Limit)) return false; - unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME + unsigned Scale = (MI->getOpcode() == ARM::tSUBspi || + MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME if (!(MI->getOperand(0).getReg() == Base && MI->getOperand(1).getReg() == Base && - (MI->getOperand(2).getImm()*Scale) == Bytes && - llvm::getInstrPredicate(MI, MyPredReg) == Pred && + (MI->getOperand(2).getImm() * Scale) == Bytes && + getInstrPredicate(MI, MyPredReg) == Pred && MyPredReg == PredReg)) return false; @@ -554,10 +940,11 @@ static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, bool CheckCPSRDef = false; switch (MI->getOpcode()) { default: return false; + case ARM::tADDi8: case ARM::t2ADDri: case ARM::ADDri: CheckCPSRDef = true; - // fallthrough + break; case ARM::tADDspi: break; } @@ -566,11 +953,12 @@ static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, // Make sure the offset fits in 8 bits. return false; - unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME + unsigned Scale = (MI->getOpcode() == ARM::tADDspi || + MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME if (!(MI->getOperand(0).getReg() == Base && MI->getOperand(1).getReg() == Base && - (MI->getOperand(2).getImm()*Scale) == Bytes && - llvm::getInstrPredicate(MI, MyPredReg) == Pred && + (MI->getOperand(2).getImm() * Scale) == Bytes && + getInstrPredicate(MI, MyPredReg) == Pred && MyPredReg == PredReg)) return false; @@ -582,6 +970,10 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { default: return 0; case ARM::LDRi12: case ARM::STRi12: + case ARM::tLDRi: + case ARM::tSTRi: + case ARM::tLDRspi: + case ARM::tSTRspi: case ARM::t2LDRi8: case ARM::t2LDRi12: case ARM::t2STRi8: @@ -600,6 +992,9 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { case ARM::STMDA: case ARM::STMDB: case ARM::STMIB: + case ARM::tLDMIA: + case ARM::tLDMIA_UPD: + case ARM::tSTMIA_UPD: case ARM::t2LDMIA: case ARM::t2LDMDB: case ARM::t2STMIA: @@ -680,8 +1075,8 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, } } -/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base -/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: +/// Fold proceeding/trailing inc/dec of base register into the +/// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: /// /// stmia rn, /// rn := rn + 4 * 3; @@ -696,13 +1091,16 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool &Advance, MachineBasicBlock::iterator &I) { + // Thumb1 is already using updating loads/stores. + if (isThumb1) return false; + MachineInstr *MI = MBBI; unsigned Base = MI->getOperand(0).getReg(); bool BaseKill = MI->getOperand(0).isKill(); unsigned Bytes = getLSMultipleTransferSize(MI); unsigned PredReg = 0; - ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); - int Opcode = MI->getOpcode(); + ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); + unsigned Opcode = MI->getOpcode(); DebugLoc dl = MI->getDebugLoc(); // Can't use an updating ld/st if the base register is also a dest @@ -712,12 +1110,12 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, return false; bool DoMerge = false; - ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode); + ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode); // Try merging with the previous instruction. MachineBasicBlock::iterator BeginMBBI = MBB.begin(); if (MBBI != BeginMBBI) { - MachineBasicBlock::iterator PrevMBBI = prior(MBBI); + MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) --PrevMBBI; if (Mode == ARM_AM::ia && @@ -736,7 +1134,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, // Try merging with the next instruction. MachineBasicBlock::iterator EndMBBI = MBB.end(); if (!DoMerge && MBBI != EndMBBI) { - MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); + MachineBasicBlock::iterator NextMBBI = std::next(MBBI); while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) ++NextMBBI; if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && @@ -825,18 +1223,22 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, } } -/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base -/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: +/// Fold proceeding/trailing inc/dec of base register into the +/// LDR/STR/FLD{D|S}/FST{D|S} op when possible: bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const TargetInstrInfo *TII, bool &Advance, MachineBasicBlock::iterator &I) { + // Thumb1 doesn't have updating LDR/STR. + // FIXME: Use LDM/STM with single register instead. + if (isThumb1) return false; + MachineInstr *MI = MBBI; unsigned Base = MI->getOperand(1).getReg(); bool BaseKill = MI->getOperand(1).isKill(); unsigned Bytes = getLSMultipleTransferSize(MI); - int Opcode = MI->getOpcode(); + unsigned Opcode = MI->getOpcode(); DebugLoc dl = MI->getDebugLoc(); bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); @@ -850,11 +1252,11 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; // Can't do the merge if the destination register is the same as the would-be // writeback register. - if (isLd && MI->getOperand(0).getReg() == Base) + if (MI->getOperand(0).getReg() == Base) return false; unsigned PredReg = 0; - ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); bool DoMerge = false; ARM_AM::AddrOpc AddSub = ARM_AM::add; unsigned NewOpc = 0; @@ -864,7 +1266,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, // Try merging with the previous instruction. MachineBasicBlock::iterator BeginMBBI = MBB.begin(); if (MBBI != BeginMBBI) { - MachineBasicBlock::iterator PrevMBBI = prior(MBBI); + MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) --PrevMBBI; if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { @@ -883,7 +1285,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, // Try merging with the next instruction. MachineBasicBlock::iterator EndMBBI = MBB.end(); if (!DoMerge && MBBI != EndMBBI) { - MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); + MachineBasicBlock::iterator NextMBBI = std::next(MBBI); while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) ++NextMBBI; if (!isAM5 && @@ -907,7 +1309,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, return false; if (isAM5) { - // VLDM[SD}_UPD, VSTM[SD]_UPD + // VLDM[SD]_UPD, VSTM[SD]_UPD // (There are no base-updating versions of VLDR/VSTR instructions, but the // updating load/store-multiple instructions can be used with only one // register.) @@ -963,8 +1365,8 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, return true; } -/// isMemoryOp - Returns true if instruction is a memory operation that this -/// pass is capable of operating on. +/// Returns true if instruction is a memory operation that this pass is capable +/// of operating on. static bool isMemoryOp(const MachineInstr *MI) { // When no memory operands are present, conservatively assume unaligned, // volatile, unfoldable. @@ -994,7 +1396,7 @@ static bool isMemoryOp(const MachineInstr *MI) { MI->getOperand(1).isUndef()) return false; - int Opcode = MI->getOpcode(); + unsigned Opcode = MI->getOpcode(); switch (Opcode) { default: break; case ARM::VLDRS: @@ -1005,6 +1407,10 @@ static bool isMemoryOp(const MachineInstr *MI) { return MI->getOperand(1).isReg(); case ARM::LDRi12: case ARM::STRi12: + case ARM::tLDRi: + case ARM::tSTRi: + case ARM::tLDRspi: + case ARM::tSTRspi: case ARM::t2LDRi8: case ARM::t2LDRi12: case ARM::t2STRi8: @@ -1014,8 +1420,8 @@ static bool isMemoryOp(const MachineInstr *MI) { return false; } -/// AdvanceRS - Advance register scavenger to just before the earliest memory -/// op that is being merged. +/// Advance register scavenger to just before the earliest memory op that is +/// being merged. void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { MachineBasicBlock::iterator Loc = MemOps[0].MBBI; unsigned Position = MemOps[0].Position; @@ -1027,31 +1433,7 @@ void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { } if (Loc != MBB.begin()) - RS->forward(prior(Loc)); -} - -static int getMemoryOpOffset(const MachineInstr *MI) { - int Opcode = MI->getOpcode(); - bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; - unsigned NumOperands = MI->getDesc().getNumOperands(); - unsigned OffField = MI->getOperand(NumOperands-3).getImm(); - - if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || - Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || - Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || - Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) - return OffField; - - int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) - : ARM_AM::getAM5Offset(OffField) * 4; - if (isAM3) { - if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) - Offset = -Offset; - } else { - if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) - Offset = -Offset; - } - return Offset; + RS->forward(std::prev(Loc)); } static void InsertLDR_STR(MachineBasicBlock &MBB, @@ -1082,8 +1464,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) { MachineInstr *MI = &*MBBI; unsigned Opcode = MI->getOpcode(); - if (Opcode == ARM::LDRD || Opcode == ARM::STRD || - Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { + if (Opcode == ARM::LDRD || Opcode == ARM::STRD) { const MachineOperand &BaseOp = MI->getOperand(2); unsigned BaseReg = BaseOp.getReg(); unsigned EvenReg = MI->getOperand(0).getReg(); @@ -1112,7 +1493,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); int OffImm = getMemoryOpOffset(MI); unsigned PredReg = 0; - ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); if (OddRegNum > EvenRegNum && OffImm == 0) { // Ascending register numbers and no offset. It's safe to change it to a @@ -1137,12 +1518,17 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); ++NumSTRD2STM; } - NewBBI = llvm::prior(MBBI); + NewBBI = std::prev(MBBI); } else { // Split into two instructions. unsigned NewOpc = (isLd) ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); + // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, + // so adjust and use t2LDRi12 here for that. + unsigned NewOpc2 = (isLd) + ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) + : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); DebugLoc dl = MBBI->getDebugLoc(); // If this is a load and base register is killed, it may have been // re-defed by the load, make sure the first load does not clobber it. @@ -1150,11 +1536,11 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, (BaseKill || OffKill) && (TRI->regsOverlap(EvenReg, BaseReg))) { assert(!TRI->regsOverlap(OddReg, BaseReg)); - InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, + InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, OddReg, OddDeadKill, false, BaseReg, false, BaseUndef, false, OffUndef, Pred, PredReg, TII, isT2); - NewBBI = llvm::prior(MBBI); + NewBBI = std::prev(MBBI); InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill, false, BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, @@ -1167,12 +1553,15 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, EvenDeadKill = false; OddDeadKill = true; } + // Never kill the base register in the first instruction. + if (EvenReg == BaseReg) + EvenDeadKill = false; InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill, EvenUndef, BaseReg, false, BaseUndef, false, OffUndef, Pred, PredReg, TII, isT2); - NewBBI = llvm::prior(MBBI); - InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, + NewBBI = std::prev(MBBI); + InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, OddReg, OddDeadKill, OddUndef, BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, Pred, PredReg, TII, isT2); @@ -1190,14 +1579,14 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, return false; } -/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR -/// ops of the same base and incrementing offset into LDM / STM ops. +/// An optimization pass to turn multiple LDR / STR ops of the same base and +/// incrementing offset into LDM / STM ops. bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { unsigned NumMerges = 0; unsigned NumMemOps = 0; MemOpQueue MemOps; unsigned CurrBase = 0; - int CurrOpc = -1; + unsigned CurrOpc = ~0u; unsigned CurrSize = 0; ARMCC::CondCodes CurrPred = ARMCC::AL; unsigned CurrPredReg = 0; @@ -1212,18 +1601,17 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { bool Advance = false; bool TryMerge = false; - bool Clobber = false; bool isMemOp = isMemoryOp(MBBI); if (isMemOp) { - int Opcode = MBBI->getOpcode(); + unsigned Opcode = MBBI->getOpcode(); unsigned Size = getLSMultipleTransferSize(MBBI); const MachineOperand &MO = MBBI->getOperand(0); unsigned Reg = MO.getReg(); bool isKill = MO.isDef() ? false : MO.isKill(); unsigned Base = MBBI->getOperand(1).getReg(); unsigned PredReg = 0; - ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); int Offset = getMemoryOpOffset(MBBI); // Watch out for: // r4 := ldr [r5] @@ -1234,7 +1622,23 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { // looks like the later ldr(s) use the same base register. Try to // merge the ldr's so far, including this one. But don't try to // combine the following ldr(s). - Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); + bool Clobber = isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg(); + + // Watch out for: + // r4 := ldr [r0, #8] + // r4 := ldr [r0, #4] + // + // The optimization may reorder the second ldr in front of the first + // ldr, which violates write after write(WAW) dependence. The same as + // str. Try to merge inst(s) already in MemOps. + bool Overlap = false; + for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) { + if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) { + Overlap = true; + break; + } + } + if (CurrBase == 0 && !Clobber) { // Start of a new chain. CurrBase = Base; @@ -1245,7 +1649,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); ++NumMemOps; Advance = true; - } else { + } else if (!Overlap) { if (Clobber) { TryMerge = true; Advance = true; @@ -1289,18 +1693,22 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { if (MBBI == E) // Reach the end of the block, try merging the memory instructions. TryMerge = true; - } else + } else { TryMerge = true; + } if (TryMerge) { if (NumMemOps > 1) { // Try to find a free register to use as a new base in case it's needed. // First advance to the instruction just before the start of the chain. AdvanceRS(MBB, MemOps); + // Find a scratch register. - unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass); + unsigned Scratch = + RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass); + // Process the load / store instructions. - RS->forward(prior(MBBI)); + RS->forward(std::prev(MBBI)); // Merge ops. Merges.clear(); @@ -1322,18 +1730,18 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { ++NumMerges; // RS may be pointing to an instruction that's deleted. - RS->skipTo(prior(MBBI)); + RS->skipTo(std::prev(MBBI)); } else if (NumMemOps == 1) { // Try folding preceding/trailing base inc/dec into the single // load/store. if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { ++NumMerges; - RS->forward(prior(MBBI)); + RS->forward(std::prev(MBBI)); } } CurrBase = 0; - CurrOpc = -1; + CurrOpc = ~0u; CurrSize = 0; CurrPred = ARMCC::AL; CurrPredReg = 0; @@ -1353,9 +1761,9 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { return NumMerges > 0; } -/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops -/// ("bx lr" and "mov pc, lr") into the preceding stack restore so it -/// directly restore the value of LR into pc. +/// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr") +/// into the preceding stack restore so it directly restore the value of LR +/// into pc. /// ldmfd sp!, {..., lr} /// bx lr /// or @@ -1364,6 +1772,8 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { /// => /// ldmfd sp!, {..., pc} bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { + // Thumb1 LDM doesn't allow high registers. + if (isThumb1) return false; if (MBB.empty()) return false; MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); @@ -1371,7 +1781,7 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::tBX_RET || MBBI->getOpcode() == ARM::MOVPCLR)) { - MachineInstr *PrevMI = prior(MBBI); + MachineInstr *PrevMI = std::prev(MBBI); unsigned Opcode = PrevMI->getOpcode(); if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || @@ -1384,7 +1794,7 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); PrevMI->setDesc(TII->get(NewOpc)); MO.setReg(ARM::PC); - PrevMI->copyImplicitOps(&*MBBI); + PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI); MBB.erase(MBBI); return true; } @@ -1393,20 +1803,21 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { } bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { - const TargetMachine &TM = Fn.getTarget(); + STI = &static_cast(Fn.getSubtarget()); + TL = STI->getTargetLowering(); AFI = Fn.getInfo(); - TII = TM.getInstrInfo(); - TRI = TM.getRegisterInfo(); - STI = &TM.getSubtarget(); + TII = STI->getInstrInfo(); + TRI = STI->getRegisterInfo(); RS = new RegScavenger(); isThumb2 = AFI->isThumb2Function(); + isThumb1 = AFI->isThumbFunction() && !isThumb2; bool Modified = false; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ++MFI) { MachineBasicBlock &MBB = *MFI; Modified |= LoadStoreMultipleOpti(MBB); - if (TM.getSubtarget().hasV5TOps()) + if (STI->hasV5TOps()) Modified |= MergeReturnIntoLDM(MBB); } @@ -1414,26 +1825,23 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { return Modified; } - -/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move -/// load / stores from consecutive locations close to make it more -/// likely they will be combined later. - namespace { + /// Pre- register allocation pass that move load / stores from consecutive + /// locations close to make it more likely they will be combined later. struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ static char ID; ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} - const TargetData *TD; + const DataLayout *TD; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; const ARMSubtarget *STI; MachineRegisterInfo *MRI; MachineFunction *MF; - virtual bool runOnMachineFunction(MachineFunction &Fn); + bool runOnMachineFunction(MachineFunction &Fn) override; - virtual const char *getPassName() const { + const char *getPassName() const override { return "ARM pre- register allocation load / store optimization pass"; } @@ -1445,7 +1853,7 @@ namespace { unsigned &PredReg, ARMCC::CondCodes &Pred, bool &isT2); bool RescheduleOps(MachineBasicBlock *MBB, - SmallVector &Ops, + SmallVectorImpl &Ops, unsigned Base, bool isLd, DenseMap &MI2LocMap); bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); @@ -1454,10 +1862,10 @@ namespace { } bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { - TD = Fn.getTarget().getTargetData(); - TII = Fn.getTarget().getInstrInfo(); - TRI = Fn.getTarget().getRegisterInfo(); - STI = &Fn.getTarget().getSubtarget(); + TD = Fn.getTarget().getDataLayout(); + STI = &static_cast(Fn.getSubtarget()); + TII = STI->getInstrInfo(); + TRI = STI->getRegisterInfo(); MRI = &Fn.getRegInfo(); MF = &Fn; @@ -1472,7 +1880,7 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, MachineBasicBlock::iterator I, MachineBasicBlock::iterator E, - SmallPtrSet &MemOps, + SmallPtrSetImpl &MemOps, SmallSet &MemRegs, const TargetRegisterInfo *TRI) { // Are there stores / loads / calls between them? @@ -1516,7 +1924,7 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, } -/// Copy Op0 and Op1 operands into a new array assigned to MI. +/// Copy \p Op0 and \p Op1 operands into a new array assigned to MI. static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, MachineInstr *Op1) { assert(MI->memoperands_empty() && "expected a new machineinstr"); @@ -1534,10 +1942,11 @@ static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, bool ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, - DebugLoc &dl, - unsigned &NewOpc, unsigned &EvenReg, - unsigned &OddReg, unsigned &BaseReg, - int &Offset, unsigned &PredReg, + DebugLoc &dl, unsigned &NewOpc, + unsigned &FirstReg, + unsigned &SecondReg, + unsigned &BaseReg, int &Offset, + unsigned &PredReg, ARMCC::CondCodes &Pred, bool &isT2) { // Make sure we're allowed to generate LDRD/STRD. @@ -1547,11 +1956,11 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD unsigned Scale = 1; unsigned Opcode = Op0->getOpcode(); - if (Opcode == ARM::LDRi12) + if (Opcode == ARM::LDRi12) { NewOpc = ARM::LDRD; - else if (Opcode == ARM::STRi12) + } else if (Opcode == ARM::STRi12) { NewOpc = ARM::STRD; - else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { + } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { NewOpc = ARM::t2LDRDi8; Scale = 4; isT2 = true; @@ -1559,12 +1968,14 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, NewOpc = ARM::t2STRDi8; Scale = 4; isT2 = true; - } else + } else { return false; + } // Make sure the base address satisfies i64 ld / st alignment requirement. + // At the moment, we ignore the memoryoperand's value. + // If we want to use AliasAnalysis, we should check it accordingly. if (!Op0->hasOneMemOperand() || - !(*Op0->memoperands_begin())->getValue() || (*Op0->memoperands_begin())->isVolatile()) return false; @@ -1594,35 +2005,30 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, return false; Offset = ARM_AM::getAM3Opc(AddSub, OffImm); } - EvenReg = Op0->getOperand(0).getReg(); - OddReg = Op1->getOperand(0).getReg(); - if (EvenReg == OddReg) + FirstReg = Op0->getOperand(0).getReg(); + SecondReg = Op1->getOperand(0).getReg(); + if (FirstReg == SecondReg) return false; BaseReg = Op0->getOperand(1).getReg(); - Pred = llvm::getInstrPredicate(Op0, PredReg); + Pred = getInstrPredicate(Op0, PredReg); dl = Op0->getDebugLoc(); return true; } -namespace { - struct OffsetCompare { - bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { - int LOffset = getMemoryOpOffset(LHS); - int ROffset = getMemoryOpOffset(RHS); - assert(LHS == RHS || LOffset != ROffset); - return LOffset > ROffset; - } - }; -} - bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, - SmallVector &Ops, + SmallVectorImpl &Ops, unsigned Base, bool isLd, DenseMap &MI2LocMap) { bool RetVal = false; // Sort by offset (in reverse order). - std::sort(Ops.begin(), Ops.end(), OffsetCompare()); + std::sort(Ops.begin(), Ops.end(), + [](const MachineInstr *LHS, const MachineInstr *RHS) { + int LOffset = getMemoryOpOffset(LHS); + int ROffset = getMemoryOpOffset(RHS); + assert(LHS == RHS || LOffset != ROffset); + return LOffset > ROffset; + }); // The loads / stores of the same base are in order. Scan them from first to // last and check for the following: @@ -1631,8 +2037,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, while (Ops.size() > 1) { unsigned FirstLoc = ~0U; unsigned LastLoc = 0; - MachineInstr *FirstOp = 0; - MachineInstr *LastOp = 0; + MachineInstr *FirstOp = nullptr; + MachineInstr *LastOp = nullptr; int LastOffset = 0; unsigned LastOpcode = 0; unsigned LastBytes = 0; @@ -1697,7 +2103,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, // to try to allocate a pair of registers that can form register pairs. MachineInstr *Op0 = Ops.back(); MachineInstr *Op1 = Ops[Ops.size()-2]; - unsigned EvenReg = 0, OddReg = 0; + unsigned FirstReg = 0, SecondReg = 0; unsigned BaseReg = 0, PredReg = 0; ARMCC::CondCodes Pred = ARMCC::AL; bool isT2 = false; @@ -1705,21 +2111,21 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, int Offset = 0; DebugLoc dl; if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, - EvenReg, OddReg, BaseReg, + FirstReg, SecondReg, BaseReg, Offset, PredReg, Pred, isT2)) { Ops.pop_back(); Ops.pop_back(); const MCInstrDesc &MCID = TII->get(NewOpc); - const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI); - MRI->constrainRegClass(EvenReg, TRC); - MRI->constrainRegClass(OddReg, TRC); + const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); + MRI->constrainRegClass(FirstReg, TRC); + MRI->constrainRegClass(SecondReg, TRC); // Form the pair instruction. if (isLd) { MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) - .addReg(EvenReg, RegState::Define) - .addReg(OddReg, RegState::Define) + .addReg(FirstReg, RegState::Define) + .addReg(SecondReg, RegState::Define) .addReg(BaseReg); // FIXME: We're converting from LDRi12 to an insn that still // uses addrmode2, so we need an explicit offset reg. It should @@ -1732,8 +2138,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, ++NumLDRDFormed; } else { MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) - .addReg(EvenReg) - .addReg(OddReg) + .addReg(FirstReg) + .addReg(SecondReg) .addReg(BaseReg); // FIXME: We're converting from LDRi12 to an insn that still // uses addrmode2, so we need an explicit offset reg. It should @@ -1748,9 +2154,11 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, MBB->erase(Op0); MBB->erase(Op1); - // Add register allocation hints to form register pairs. - MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); - MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); + if (!isT2) { + // Add register allocation hints to form register pairs. + MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg); + MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg); + } } else { for (unsigned i = 0; i != NumMove; ++i) { MachineInstr *Op = Ops.back(); @@ -1796,7 +2204,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { if (!isMemoryOp(MI)) continue; unsigned PredReg = 0; - if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL) + if (getInstrPredicate(MI, PredReg) != ARMCC::AL) continue; int Opc = MI->getOpcode(); @@ -1818,9 +2226,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { if (!StopHere) BI->second.push_back(MI); } else { - SmallVector MIs; - MIs.push_back(MI); - Base2LdsMap[Base] = MIs; + Base2LdsMap[Base].push_back(MI); LdBases.push_back(Base); } } else { @@ -1836,9 +2242,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { if (!StopHere) BI->second.push_back(MI); } else { - SmallVector MIs; - MIs.push_back(MI); - Base2StsMap[Base] = MIs; + Base2StsMap[Base].push_back(MI); StBases.push_back(Base); } } @@ -1854,7 +2258,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { // Re-schedule loads. for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { unsigned Base = LdBases[i]; - SmallVector &Lds = Base2LdsMap[Base]; + SmallVectorImpl &Lds = Base2LdsMap[Base]; if (Lds.size() > 1) RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); } @@ -1862,7 +2266,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { // Re-schedule stores. for (unsigned i = 0, e = StBases.size(); i != e; ++i) { unsigned Base = StBases[i]; - SmallVector &Sts = Base2StsMap[Base]; + SmallVectorImpl &Sts = Base2StsMap[Base]; if (Sts.size() > 1) RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); } @@ -1879,8 +2283,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { } -/// createARMLoadStoreOptimizationPass - returns an instance of the load / store -/// optimization pass. +/// Returns an instance of the load / store optimization pass. FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { if (PreAlloc) return new ARMPreAllocLoadStoreOpt();