//
//===----------------------------------------------------------------------===//
//
-// This file contains a pass that performs load / store related peephole
-// optimizations. This pass should be run after register allocation.
+/// \file This file contains a pass that performs load / store related peephole
+/// optimizations. This pass should be run after register allocation.
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "arm-ldst-opt"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMISelLowering.h"
#include "ARMMachineFunctionInfo.h"
-#include "ARMRegisterInfo.h"
+#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
+#include "ThumbRegisterInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
using namespace llvm;
+#define DEBUG_TYPE "arm-ldst-opt"
+
STATISTIC(NumLDMGened , "Number of ldm instructions generated");
STATISTIC(NumSTMGened , "Number of stm instructions generated");
STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
-/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
-/// load / store instructions to form ldm / stm instructions.
-
namespace {
+ /// Post- register allocation pass the combine load / store instructions to
+ /// form ldm / stm instructions.
struct ARMLoadStoreOpt : public MachineFunctionPass {
static char ID;
ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
const ARMSubtarget *STI;
+ const TargetLowering *TL;
ARMFunctionInfo *AFI;
RegScavenger *RS;
- bool isThumb2;
+ bool isThumb1, isThumb2;
- virtual bool runOnMachineFunction(MachineFunction &Fn);
+ bool runOnMachineFunction(MachineFunction &Fn) override;
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "ARM load / store optimization pass";
}
typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
typedef MemOpQueue::iterator MemOpQueueIter;
+ void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
+ const MemOpQueue &MemOps, unsigned DefReg,
+ unsigned RangeBegin, unsigned RangeEnd);
+ void UpdateBaseRegUses(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc dl, unsigned Base, unsigned WordOffset,
+ ARMCC::CondCodes Pred, unsigned PredReg);
bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- int Offset, unsigned Base, bool BaseKill, int Opcode,
+ int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
- DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
+ DebugLoc dl,
+ ArrayRef<std::pair<unsigned, bool> > Regs,
+ ArrayRef<unsigned> ImpDefs);
void MergeOpsUpdate(MachineBasicBlock &MBB,
MemOpQueue &MemOps,
unsigned memOpsBegin,
int Offset,
unsigned Base,
bool BaseKill,
- int Opcode,
+ unsigned Opcode,
ARMCC::CondCodes Pred,
unsigned PredReg,
unsigned Scratch,
DebugLoc dl,
- SmallVector<MachineBasicBlock::iterator, 4> &Merges);
+ SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
- int Opcode, unsigned Size,
+ unsigned Opcode, unsigned Size,
ARMCC::CondCodes Pred, unsigned PredReg,
unsigned Scratch, MemOpQueue &MemOps,
- SmallVector<MachineBasicBlock::iterator, 4> &Merges);
-
+ SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI);
char ARMLoadStoreOpt::ID = 0;
}
-static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
+static bool definesCPSR(const MachineInstr *MI) {
+ for (const auto &MO : MI->operands()) {
+ if (!MO.isReg())
+ continue;
+ if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
+ // If the instruction has live CPSR def, then it's not safe to fold it
+ // into load / store.
+ return true;
+ }
+
+ return false;
+}
+
+static int getMemoryOpOffset(const MachineInstr *MI) {
+ unsigned Opcode = MI->getOpcode();
+ bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
+ unsigned NumOperands = MI->getDesc().getNumOperands();
+ unsigned OffField = MI->getOperand(NumOperands-3).getImm();
+
+ if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
+ Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
+ Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
+ Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
+ return OffField;
+
+ // Thumb1 immediate offsets are scaled by 4
+ if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
+ Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
+ return OffField * 4;
+
+ int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
+ : ARM_AM::getAM5Offset(OffField) * 4;
+ ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
+ : ARM_AM::getAM5Op(OffField);
+
+ if (Op == ARM_AM::sub)
+ return -Offset;
+
+ return Offset;
+}
+
+static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
switch (Opcode) {
default: llvm_unreachable("Unhandled opcode!");
case ARM::LDRi12:
case ARM_AM::db: return ARM::STMDB;
case ARM_AM::ib: return ARM::STMIB;
}
+ case ARM::tLDRi:
+ case ARM::tLDRspi:
+ // tLDMIA is writeback-only - unless the base register is in the input
+ // reglist.
+ ++NumLDMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::tLDMIA;
+ }
+ case ARM::tSTRi:
+ case ARM::tSTRspi:
+ // There is no non-writeback tSTMIA either.
+ ++NumSTMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::tSTMIA_UPD;
+ }
case ARM::t2LDRi8:
case ARM::t2LDRi12:
++NumLDMGened;
}
}
-namespace llvm {
- namespace ARM_AM {
-
-AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
+static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
switch (Opcode) {
default: llvm_unreachable("Unhandled opcode!");
case ARM::LDMIA_RET:
case ARM::LDMIA_UPD:
case ARM::STMIA:
case ARM::STMIA_UPD:
+ case ARM::tLDMIA:
+ case ARM::tLDMIA_UPD:
+ case ARM::tSTMIA_UPD:
case ARM::t2LDMIA_RET:
case ARM::t2LDMIA:
case ARM::t2LDMIA_UPD:
}
}
- } // end namespace ARM_AM
-} // end namespace llvm
+static bool isT1i32Load(unsigned Opc) {
+ return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
+}
static bool isT2i32Load(unsigned Opc) {
return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
}
static bool isi32Load(unsigned Opc) {
- return Opc == ARM::LDRi12 || isT2i32Load(Opc);
+ return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
+}
+
+static bool isT1i32Store(unsigned Opc) {
+ return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
}
static bool isT2i32Store(unsigned Opc) {
}
static bool isi32Store(unsigned Opc) {
- return Opc == ARM::STRi12 || isT2i32Store(Opc);
+ return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
+}
+
+static unsigned getImmScale(unsigned Opc) {
+ switch (Opc) {
+ default: llvm_unreachable("Unhandled opcode!");
+ case ARM::tLDRi:
+ case ARM::tSTRi:
+ case ARM::tLDRspi:
+ case ARM::tSTRspi:
+ return 1;
+ case ARM::tLDRHi:
+ case ARM::tSTRHi:
+ return 2;
+ case ARM::tLDRBi:
+ case ARM::tSTRBi:
+ return 4;
+ }
+}
+
+/// Update future uses of the base register with the offset introduced
+/// due to writeback. This function only works on Thumb1.
+void
+ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc dl, unsigned Base,
+ unsigned WordOffset,
+ ARMCC::CondCodes Pred, unsigned PredReg) {
+ assert(isThumb1 && "Can only update base register uses for Thumb1!");
+ // Start updating any instructions with immediate offsets. Insert a SUB before
+ // the first non-updateable instruction (if any).
+ for (; MBBI != MBB.end(); ++MBBI) {
+ bool InsertSub = false;
+ unsigned Opc = MBBI->getOpcode();
+
+ if (MBBI->readsRegister(Base)) {
+ int Offset;
+ bool IsLoad =
+ Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
+ bool IsStore =
+ Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
+
+ if (IsLoad || IsStore) {
+ // Loads and stores with immediate offsets can be updated, but only if
+ // the new offset isn't negative.
+ // The MachineOperand containing the offset immediate is the last one
+ // before predicates.
+ MachineOperand &MO =
+ MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
+ // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
+ Offset = MO.getImm() - WordOffset * getImmScale(Opc);
+
+ // If storing the base register, it needs to be reset first.
+ unsigned InstrSrcReg = MBBI->getOperand(0).getReg();
+
+ if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
+ MO.setImm(Offset);
+ else
+ InsertSub = true;
+
+ } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
+ !definesCPSR(MBBI)) {
+ // SUBS/ADDS using this register, with a dead def of the CPSR.
+ // Merge it with the update; if the merged offset is too large,
+ // insert a new sub instead.
+ MachineOperand &MO =
+ MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
+ Offset = (Opc == ARM::tSUBi8) ?
+ MO.getImm() + WordOffset * 4 :
+ MO.getImm() - WordOffset * 4 ;
+ if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
+ // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
+ // Offset == 0.
+ MO.setImm(Offset);
+ // The base register has now been reset, so exit early.
+ return;
+ } else {
+ InsertSub = true;
+ }
+
+ } else {
+ // Can't update the instruction.
+ InsertSub = true;
+ }
+
+ } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
+ // Since SUBS sets the condition flags, we can't place the base reset
+ // after an instruction that has a live CPSR def.
+ // The base register might also contain an argument for a function call.
+ InsertSub = true;
+ }
+
+ if (InsertSub) {
+ // An instruction above couldn't be updated, so insert a sub.
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
+ .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
+ return;
+ }
+
+ if (MBBI->killsRegister(Base))
+ // Register got killed. Stop updating.
+ return;
+ }
+
+ // End of block was reached.
+ if (MBB.succ_size() > 0) {
+ // FIXME: Because of a bug, live registers are sometimes missing from
+ // the successor blocks' live-in sets. This means we can't trust that
+ // information and *always* have to reset at the end of a block.
+ // See PR21029.
+ if (MBBI != MBB.end()) --MBBI;
+ AddDefaultT1CC(
+ BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
+ .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
+ }
}
-/// MergeOps - Create and insert a LDM or STM with Base as base register and
-/// registers in Regs as the register operands that would be loaded / stored.
-/// It returns true if the transformation is done.
+/// Create and insert a LDM or STM with Base as base register and registers in
+/// Regs as the register operands that would be loaded / stored. It returns
+/// true if the transformation is done.
bool
ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
int Offset, unsigned Base, bool BaseKill,
- int Opcode, ARMCC::CondCodes Pred,
+ unsigned Opcode, ARMCC::CondCodes Pred,
unsigned PredReg, unsigned Scratch, DebugLoc dl,
- SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
+ ArrayRef<std::pair<unsigned, bool> > Regs,
+ ArrayRef<unsigned> ImpDefs) {
// Only a single register to load / store. Don't bother.
unsigned NumRegs = Regs.size();
if (NumRegs <= 1)
return false;
+ // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
+ // Compute liveness information for that register to make the decision.
+ bool SafeToClobberCPSR = !isThumb1 ||
+ (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) ==
+ MachineBasicBlock::LQR_Dead);
+
+ bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
+
+ // Exception: If the base register is in the input reglist, Thumb1 LDM is
+ // non-writeback.
+ // It's also not possible to merge an STR of the base register in Thumb1.
+ if (isThumb1)
+ for (const std::pair<unsigned, bool> &R : Regs)
+ if (Base == R.first) {
+ assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
+ if (Opcode == ARM::tLDRi) {
+ Writeback = false;
+ break;
+ } else if (Opcode == ARM::tSTRi) {
+ return false;
+ }
+ }
+
ARM_AM::AMSubMode Mode = ARM_AM::ia;
- // VFP and Thumb2 do not support IB or DA modes.
+ // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
- bool haveIBAndDA = isNotVFP && !isThumb2;
- if (Offset == 4 && haveIBAndDA)
+ bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
+
+ if (Offset == 4 && haveIBAndDA) {
Mode = ARM_AM::ib;
- else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
+ } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
Mode = ARM_AM::da;
- else if (Offset == -4 * (int)NumRegs && isNotVFP)
+ } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
// VLDM/VSTM do not support DB mode without also updating the base reg.
Mode = ARM_AM::db;
- else if (Offset != 0) {
- // Check if this is a supported opcode before we insert instructions to
+ } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
+ // Check if this is a supported opcode before inserting instructions to
// calculate a new base register.
if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
if (NumRegs <= 2)
return false;
+ // On Thumb1, it's not worth materializing a new base register without
+ // clobbering the CPSR (i.e. not using ADDS/SUBS).
+ if (!SafeToClobberCPSR)
+ return false;
+
unsigned NewBase;
- if (isi32Load(Opcode))
+ if (isi32Load(Opcode)) {
// If it is a load, then just use one of the destination register to
// use as the new base.
NewBase = Regs[NumRegs-1].first;
- else {
+ } else {
// Use the scratch register to use as a new base.
NewBase = Scratch;
if (NewBase == 0)
return false;
}
- int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
+
+ int BaseOpc =
+ isThumb2 ? ARM::t2ADDri :
+ (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
+ (isThumb1 && Offset < 8) ? ARM::tADDi3 :
+ isThumb1 ? ARM::tADDi8 : ARM::ADDri;
+
if (Offset < 0) {
- BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
Offset = - Offset;
+ BaseOpc =
+ isThumb2 ? ARM::t2SUBri :
+ (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
+ isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
+ }
+
+ if (!TL->isLegalAddImmediate(Offset))
+ // FIXME: Try add with register operand?
+ return false; // Probably not worth it then.
+
+ if (isThumb1) {
+ // Thumb1: depending on immediate size, use either
+ // ADDS NewBase, Base, #imm3
+ // or
+ // MOV NewBase, Base
+ // ADDS NewBase, #imm8.
+ if (Base != NewBase &&
+ (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
+ // Need to insert a MOV to the new base first.
+ if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
+ !STI->hasV6Ops()) {
+ // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
+ if (Pred != ARMCC::AL)
+ return false;
+ BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVSr), NewBase)
+ .addReg(Base, getKillRegState(BaseKill));
+ } else
+ BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
+ .addReg(Base, getKillRegState(BaseKill))
+ .addImm(Pred).addReg(PredReg);
+
+ // Set up BaseKill and Base correctly to insert the ADDS/SUBS below.
+ Base = NewBase;
+ BaseKill = false;
+ }
+ if (BaseOpc == ARM::tADDrSPi) {
+ assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
+ BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
+ .addReg(Base, getKillRegState(BaseKill)).addImm(Offset/4)
+ .addImm(Pred).addReg(PredReg);
+ } else
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true)
+ .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
+ .addImm(Pred).addReg(PredReg);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
+ .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
+ .addImm(Pred).addReg(PredReg).addReg(0);
}
- int ImmedOffset = isThumb2
- ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
- if (ImmedOffset == -1)
- // FIXME: Try t2ADDri12 or t2SUBri12?
- return false; // Probably not worth it then.
-
- BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
- .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
- .addImm(Pred).addReg(PredReg).addReg(0);
Base = NewBase;
- BaseKill = true; // New base is always killed right its use.
+ BaseKill = true; // New base is always killed straight away.
}
bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
Opcode == ARM::VLDRD);
+
+ // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
+ // base register writeback.
Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
if (!Opcode) return false;
- MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
- .addReg(Base, getKillRegState(BaseKill))
- .addImm(Pred).addReg(PredReg);
- for (unsigned i = 0; i != NumRegs; ++i)
- MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
- | getKillRegState(Regs[i].second));
+
+ // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
+ // - There is no writeback (LDM of base register),
+ // - the base register is killed by the merged instruction,
+ // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
+ // to reset the base register.
+ // Otherwise, don't merge.
+ // It's safe to return here since the code to materialize a new base register
+ // above is also conditional on SafeToClobberCPSR.
+ if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
+ return false;
+
+ MachineInstrBuilder MIB;
+
+ if (Writeback) {
+ if (Opcode == ARM::tLDMIA)
+ // Update tLDMIA with writeback if necessary.
+ Opcode = ARM::tLDMIA_UPD;
+
+ MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
+
+ // Thumb1: we might need to set base writeback when building the MI.
+ MIB.addReg(Base, getDefRegState(true))
+ .addReg(Base, getKillRegState(BaseKill));
+
+ // The base isn't dead after a merged instruction with writeback.
+ // Insert a sub instruction after the newly formed instruction to reset.
+ if (!BaseKill)
+ UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
+
+ } else {
+ // No writeback, simply build the MachineInstr.
+ MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
+ MIB.addReg(Base, getKillRegState(BaseKill));
+ }
+
+ MIB.addImm(Pred).addReg(PredReg);
+
+ for (const std::pair<unsigned, bool> &R : Regs)
+ MIB = MIB.addReg(R.first, getDefRegState(isDef)
+ | getKillRegState(R.second));
+
+ // Add implicit defs for super-registers.
+ for (unsigned ImpDef : ImpDefs)
+ MIB.addReg(ImpDef, RegState::ImplicitDefine);
return true;
}
-// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
-// success.
+/// Find all instructions using a given imp-def within a range.
+///
+/// We are trying to combine a range of instructions, one of which (located at
+/// position RangeBegin) implicitly defines a register. The final LDM/STM will
+/// be placed at RangeEnd, and so any uses of this definition between RangeStart
+/// and RangeEnd must be modified to use an undefined value.
+///
+/// The live range continues until we find a second definition or one of the
+/// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
+/// we must consider all uses and decide which are relevant in a second pass.
+void ARMLoadStoreOpt::findUsesOfImpDef(
+ SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,
+ unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
+ std::map<unsigned, MachineOperand *> Uses;
+ unsigned LastLivePos = RangeEnd;
+
+ // First we find all uses of this register with Position between RangeBegin
+ // and RangeEnd, any or all of these could be uses of a definition at
+ // RangeBegin. We also record the latest position a definition at RangeBegin
+ // would be considered live.
+ for (unsigned i = 0; i < MemOps.size(); ++i) {
+ MachineInstr &MI = *MemOps[i].MBBI;
+ unsigned MIPosition = MemOps[i].Position;
+ if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
+ continue;
+
+ // If this instruction defines the register, then any later use will be of
+ // that definition rather than ours.
+ if (MI.definesRegister(DefReg))
+ LastLivePos = std::min(LastLivePos, MIPosition);
+
+ MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
+ if (!UseOp)
+ continue;
+
+ // If this instruction kills the register then (assuming liveness is
+ // correct when we start) we don't need to think about anything after here.
+ if (UseOp->isKill())
+ LastLivePos = std::min(LastLivePos, MIPosition);
+
+ Uses[MIPosition] = UseOp;
+ }
+
+ // Now we traverse the list of all uses, and append the ones that actually use
+ // our definition to the requested list.
+ for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),
+ E = Uses.end();
+ I != E; ++I) {
+ // List is sorted by position so once we've found one out of range there
+ // will be no more to consider.
+ if (I->first > LastLivePos)
+ break;
+ UsesOfImpDefs.push_back(I->second);
+ }
+}
+
+/// Call MergeOps and update MemOps and merges accordingly on success.
void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
MemOpQueue &memOps,
unsigned memOpsBegin, unsigned memOpsEnd,
unsigned insertAfter, int Offset,
unsigned Base, bool BaseKill,
- int Opcode,
+ unsigned Opcode,
ARMCC::CondCodes Pred, unsigned PredReg,
unsigned Scratch,
DebugLoc dl,
- SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
+ SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
// First calculate which of the registers should be killed by the merged
// instruction.
const unsigned insertPos = memOps[insertAfter].Position;
}
SmallVector<std::pair<unsigned, bool>, 8> Regs;
+ SmallVector<unsigned, 8> ImpDefs;
+ SmallVector<MachineOperand *, 8> UsesOfImpDefs;
for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
unsigned Reg = memOps[i].Reg;
// If we are inserting the merged operation after an operation that
// uses the same register, make sure to transfer any kill flag.
bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
Regs.push_back(std::make_pair(Reg, isKill));
+
+ // Collect any implicit defs of super-registers. They must be preserved.
+ for (const MachineOperand &MO : memOps[i].MBBI->operands()) {
+ if (!MO.isReg() || !MO.isDef() || !MO.isImplicit() || MO.isDead())
+ continue;
+ unsigned DefReg = MO.getReg();
+ if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
+ ImpDefs.push_back(DefReg);
+
+ // There may be other uses of the definition between this instruction and
+ // the eventual LDM/STM position. These should be marked undef if the
+ // merge takes place.
+ findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
+ insertPos);
+ }
}
// Try to do the merge.
MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
++Loc;
if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
- Pred, PredReg, Scratch, dl, Regs))
+ Pred, PredReg, Scratch, dl, Regs, ImpDefs))
return;
// Merge succeeded, update records.
- Merges.push_back(prior(Loc));
+ Merges.push_back(std::prev(Loc));
+
+ // In gathering loads together, we may have moved the imp-def of a register
+ // past one of its uses. This is OK, since we know better than the rest of
+ // LLVM what's OK with ARM loads and stores; but we still have to adjust the
+ // affected uses.
+ for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
+ E = UsesOfImpDefs.end();
+ I != E; ++I)
+ (*I)->setIsUndef();
+
for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
// Remove kill flags from any memops that come before insertPos.
if (Regs[i-memOpsBegin].second) {
memOps[i].MBBI = Merges.back();
memOps[i].Position = insertPos;
}
+
+ // Update memOps offsets, since they may have been modified by MergeOps.
+ for (auto &MemOp : memOps) {
+ MemOp.Offset = getMemoryOpOffset(MemOp.MBBI);
+ }
}
-/// MergeLDR_STR - Merge a number of load / store instructions into one or more
-/// load / store multiple instructions.
+/// Merge a number of load / store instructions into one or more load / store
+/// multiple instructions.
void
ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
- unsigned Base, int Opcode, unsigned Size,
- ARMCC::CondCodes Pred, unsigned PredReg,
- unsigned Scratch, MemOpQueue &MemOps,
- SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
+ unsigned Base, unsigned Opcode, unsigned Size,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned Scratch, MemOpQueue &MemOps,
+ SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
int Offset = MemOps[SIndex].Offset;
int SOffset = Offset;
DebugLoc dl = Loc->getDebugLoc();
const MachineOperand &PMO = Loc->getOperand(0);
unsigned PReg = PMO.getReg();
- unsigned PRegNum = PMO.isUndef() ? UINT_MAX
- : getARMRegisterNumbering(PReg);
+ unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
unsigned Count = 1;
unsigned Limit = ~0U;
-
+ bool BaseKill = false;
// vldm / vstm limit are 32 for S variants, 16 for D variants.
switch (Opcode) {
int NewOffset = MemOps[i].Offset;
const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
unsigned Reg = MO.getReg();
- unsigned RegNum = MO.isUndef() ? UINT_MAX
- : getARMRegisterNumbering(Reg);
+ unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
// Register numbers must be in ascending order. For VFP / NEON load and
// store multiples, the registers must also be consecutive and within the
// limit on the number of registers per instruction.
if (Reg != ARM::SP &&
NewOffset == Offset + (int)Size &&
((isNotVFP && RegNum > PRegNum) ||
- ((Count < Limit) && RegNum == PRegNum+1))) {
+ ((Count < Limit) && RegNum == PRegNum+1)) &&
+ // On Swift we don't want vldm/vstm to start with a odd register num
+ // because Q register unaligned vldm/vstm need more uops.
+ (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) {
Offset += Size;
PRegNum = RegNum;
++Count;
} else {
// Can't merge this in. Try merge the earlier ones first.
- MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
- Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
+ // We need to compute BaseKill here because the MemOps may have been
+ // reordered.
+ BaseKill = Loc->killsRegister(Base);
+
+ MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base,
+ BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
MemOps, Merges);
return;
}
- if (MemOps[i].Position > MemOps[insertAfter].Position)
+ if (MemOps[i].Position > MemOps[insertAfter].Position) {
insertAfter = i;
+ Loc = MemOps[i].MBBI;
+ }
}
- bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
+ BaseKill = Loc->killsRegister(Base);
MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
- return;
-}
-
-static bool definesCPSR(MachineInstr *MI) {
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg())
- continue;
- if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
- // If the instruction has live CPSR def, then it's not safe to fold it
- // into load / store.
- return true;
- }
-
- return false;
}
static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
bool CheckCPSRDef = false;
switch (MI->getOpcode()) {
default: return false;
+ case ARM::tSUBi8:
case ARM::t2SUBri:
case ARM::SUBri:
CheckCPSRDef = true;
- // fallthrough
+ break;
case ARM::tSUBspi:
break;
}
if (Bytes == 0 || (Limit && Bytes >= Limit))
return false;
- unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
+ unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
+ MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
if (!(MI->getOperand(0).getReg() == Base &&
MI->getOperand(1).getReg() == Base &&
- (MI->getOperand(2).getImm()*Scale) == Bytes &&
- llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
+ (MI->getOperand(2).getImm() * Scale) == Bytes &&
+ getInstrPredicate(MI, MyPredReg) == Pred &&
MyPredReg == PredReg))
return false;
bool CheckCPSRDef = false;
switch (MI->getOpcode()) {
default: return false;
+ case ARM::tADDi8:
case ARM::t2ADDri:
case ARM::ADDri:
CheckCPSRDef = true;
- // fallthrough
+ break;
case ARM::tADDspi:
break;
}
// Make sure the offset fits in 8 bits.
return false;
- unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
+ unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
+ MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
if (!(MI->getOperand(0).getReg() == Base &&
MI->getOperand(1).getReg() == Base &&
- (MI->getOperand(2).getImm()*Scale) == Bytes &&
- llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
+ (MI->getOperand(2).getImm() * Scale) == Bytes &&
+ getInstrPredicate(MI, MyPredReg) == Pred &&
MyPredReg == PredReg))
return false;
default: return 0;
case ARM::LDRi12:
case ARM::STRi12:
+ case ARM::tLDRi:
+ case ARM::tSTRi:
+ case ARM::tLDRspi:
+ case ARM::tSTRspi:
case ARM::t2LDRi8:
case ARM::t2LDRi12:
case ARM::t2STRi8:
case ARM::STMDA:
case ARM::STMDB:
case ARM::STMIB:
+ case ARM::tLDMIA:
+ case ARM::tLDMIA_UPD:
+ case ARM::tSTMIA_UPD:
case ARM::t2LDMIA:
case ARM::t2LDMDB:
case ARM::t2STMIA:
}
}
-/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
-/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
+/// Fold proceeding/trailing inc/dec of base register into the
+/// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
///
/// stmia rn, <ra, rb, rc>
/// rn := rn + 4 * 3;
MachineBasicBlock::iterator MBBI,
bool &Advance,
MachineBasicBlock::iterator &I) {
+ // Thumb1 is already using updating loads/stores.
+ if (isThumb1) return false;
+
MachineInstr *MI = MBBI;
unsigned Base = MI->getOperand(0).getReg();
bool BaseKill = MI->getOperand(0).isKill();
unsigned Bytes = getLSMultipleTransferSize(MI);
unsigned PredReg = 0;
- ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
- int Opcode = MI->getOpcode();
+ ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+ unsigned Opcode = MI->getOpcode();
DebugLoc dl = MI->getDebugLoc();
// Can't use an updating ld/st if the base register is also a dest
return false;
bool DoMerge = false;
- ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
+ ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
// Try merging with the previous instruction.
MachineBasicBlock::iterator BeginMBBI = MBB.begin();
if (MBBI != BeginMBBI) {
- MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+ MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
--PrevMBBI;
if (Mode == ARM_AM::ia &&
// Try merging with the next instruction.
MachineBasicBlock::iterator EndMBBI = MBB.end();
if (!DoMerge && MBBI != EndMBBI) {
- MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
+ MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
++NextMBBI;
if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
}
}
-/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
-/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
+/// Fold proceeding/trailing inc/dec of base register into the
+/// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const TargetInstrInfo *TII,
bool &Advance,
MachineBasicBlock::iterator &I) {
+ // Thumb1 doesn't have updating LDR/STR.
+ // FIXME: Use LDM/STM with single register instead.
+ if (isThumb1) return false;
+
MachineInstr *MI = MBBI;
unsigned Base = MI->getOperand(1).getReg();
bool BaseKill = MI->getOperand(1).isKill();
unsigned Bytes = getLSMultipleTransferSize(MI);
- int Opcode = MI->getOpcode();
+ unsigned Opcode = MI->getOpcode();
DebugLoc dl = MI->getDebugLoc();
bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
// Can't do the merge if the destination register is the same as the would-be
// writeback register.
- if (isLd && MI->getOperand(0).getReg() == Base)
+ if (MI->getOperand(0).getReg() == Base)
return false;
unsigned PredReg = 0;
- ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
+ ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
bool DoMerge = false;
ARM_AM::AddrOpc AddSub = ARM_AM::add;
unsigned NewOpc = 0;
// Try merging with the previous instruction.
MachineBasicBlock::iterator BeginMBBI = MBB.begin();
if (MBBI != BeginMBBI) {
- MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+ MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
--PrevMBBI;
if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
// Try merging with the next instruction.
MachineBasicBlock::iterator EndMBBI = MBB.end();
if (!DoMerge && MBBI != EndMBBI) {
- MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
+ MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
++NextMBBI;
if (!isAM5 &&
return false;
if (isAM5) {
- // VLDM[SD}_UPD, VSTM[SD]_UPD
+ // VLDM[SD]_UPD, VSTM[SD]_UPD
// (There are no base-updating versions of VLDR/VSTR instructions, but the
// updating load/store-multiple instructions can be used with only one
// register.)
return true;
}
-/// isMemoryOp - Returns true if instruction is a memory operation that this
-/// pass is capable of operating on.
+/// Returns true if instruction is a memory operation that this pass is capable
+/// of operating on.
static bool isMemoryOp(const MachineInstr *MI) {
// When no memory operands are present, conservatively assume unaligned,
// volatile, unfoldable.
MI->getOperand(1).isUndef())
return false;
- int Opcode = MI->getOpcode();
+ unsigned Opcode = MI->getOpcode();
switch (Opcode) {
default: break;
case ARM::VLDRS:
return MI->getOperand(1).isReg();
case ARM::LDRi12:
case ARM::STRi12:
+ case ARM::tLDRi:
+ case ARM::tSTRi:
+ case ARM::tLDRspi:
+ case ARM::tSTRspi:
case ARM::t2LDRi8:
case ARM::t2LDRi12:
case ARM::t2STRi8:
return false;
}
-/// AdvanceRS - Advance register scavenger to just before the earliest memory
-/// op that is being merged.
+/// Advance register scavenger to just before the earliest memory op that is
+/// being merged.
void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
unsigned Position = MemOps[0].Position;
}
if (Loc != MBB.begin())
- RS->forward(prior(Loc));
-}
-
-static int getMemoryOpOffset(const MachineInstr *MI) {
- int Opcode = MI->getOpcode();
- bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
- unsigned NumOperands = MI->getDesc().getNumOperands();
- unsigned OffField = MI->getOperand(NumOperands-3).getImm();
-
- if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
- Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
- Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
- Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
- return OffField;
-
- int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
- : ARM_AM::getAM5Offset(OffField) * 4;
- if (isAM3) {
- if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
- Offset = -Offset;
- } else {
- if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
- Offset = -Offset;
- }
- return Offset;
+ RS->forward(std::prev(Loc));
}
static void InsertLDR_STR(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI) {
MachineInstr *MI = &*MBBI;
unsigned Opcode = MI->getOpcode();
- if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
- Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
+ if (Opcode == ARM::LDRD || Opcode == ARM::STRD) {
const MachineOperand &BaseOp = MI->getOperand(2);
unsigned BaseReg = BaseOp.getReg();
unsigned EvenReg = MI->getOperand(0).getReg();
bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
int OffImm = getMemoryOpOffset(MI);
unsigned PredReg = 0;
- ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
+ ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
if (OddRegNum > EvenRegNum && OffImm == 0) {
// Ascending register numbers and no offset. It's safe to change it to a
getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
++NumSTRD2STM;
}
- NewBBI = llvm::prior(MBBI);
+ NewBBI = std::prev(MBBI);
} else {
// Split into two instructions.
unsigned NewOpc = (isLd)
? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
: (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
+ // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
+ // so adjust and use t2LDRi12 here for that.
+ unsigned NewOpc2 = (isLd)
+ ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
+ : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
DebugLoc dl = MBBI->getDebugLoc();
// If this is a load and base register is killed, it may have been
// re-defed by the load, make sure the first load does not clobber it.
(BaseKill || OffKill) &&
(TRI->regsOverlap(EvenReg, BaseReg))) {
assert(!TRI->regsOverlap(OddReg, BaseReg));
- InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
+ InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
OddReg, OddDeadKill, false,
BaseReg, false, BaseUndef, false, OffUndef,
Pred, PredReg, TII, isT2);
- NewBBI = llvm::prior(MBBI);
+ NewBBI = std::prev(MBBI);
InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
EvenReg, EvenDeadKill, false,
BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
EvenDeadKill = false;
OddDeadKill = true;
}
+ // Never kill the base register in the first instruction.
+ if (EvenReg == BaseReg)
+ EvenDeadKill = false;
InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
EvenReg, EvenDeadKill, EvenUndef,
BaseReg, false, BaseUndef, false, OffUndef,
Pred, PredReg, TII, isT2);
- NewBBI = llvm::prior(MBBI);
- InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
+ NewBBI = std::prev(MBBI);
+ InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
OddReg, OddDeadKill, OddUndef,
BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
Pred, PredReg, TII, isT2);
return false;
}
-/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
-/// ops of the same base and incrementing offset into LDM / STM ops.
+/// An optimization pass to turn multiple LDR / STR ops of the same base and
+/// incrementing offset into LDM / STM ops.
bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
unsigned NumMerges = 0;
unsigned NumMemOps = 0;
MemOpQueue MemOps;
unsigned CurrBase = 0;
- int CurrOpc = -1;
+ unsigned CurrOpc = ~0u;
unsigned CurrSize = 0;
ARMCC::CondCodes CurrPred = ARMCC::AL;
unsigned CurrPredReg = 0;
bool Advance = false;
bool TryMerge = false;
- bool Clobber = false;
bool isMemOp = isMemoryOp(MBBI);
if (isMemOp) {
- int Opcode = MBBI->getOpcode();
+ unsigned Opcode = MBBI->getOpcode();
unsigned Size = getLSMultipleTransferSize(MBBI);
const MachineOperand &MO = MBBI->getOperand(0);
unsigned Reg = MO.getReg();
bool isKill = MO.isDef() ? false : MO.isKill();
unsigned Base = MBBI->getOperand(1).getReg();
unsigned PredReg = 0;
- ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
+ ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
int Offset = getMemoryOpOffset(MBBI);
// Watch out for:
// r4 := ldr [r5]
// looks like the later ldr(s) use the same base register. Try to
// merge the ldr's so far, including this one. But don't try to
// combine the following ldr(s).
- Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
+ bool Clobber = isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg();
+
+ // Watch out for:
+ // r4 := ldr [r0, #8]
+ // r4 := ldr [r0, #4]
+ //
+ // The optimization may reorder the second ldr in front of the first
+ // ldr, which violates write after write(WAW) dependence. The same as
+ // str. Try to merge inst(s) already in MemOps.
+ bool Overlap = false;
+ for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
+ if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
+ Overlap = true;
+ break;
+ }
+ }
+
if (CurrBase == 0 && !Clobber) {
// Start of a new chain.
CurrBase = Base;
MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
++NumMemOps;
Advance = true;
- } else {
+ } else if (!Overlap) {
if (Clobber) {
TryMerge = true;
Advance = true;
if (MBBI == E)
// Reach the end of the block, try merging the memory instructions.
TryMerge = true;
- } else
+ } else {
TryMerge = true;
+ }
if (TryMerge) {
if (NumMemOps > 1) {
// Try to find a free register to use as a new base in case it's needed.
// First advance to the instruction just before the start of the chain.
AdvanceRS(MBB, MemOps);
+
// Find a scratch register.
- unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
+ unsigned Scratch =
+ RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass);
+
// Process the load / store instructions.
- RS->forward(prior(MBBI));
+ RS->forward(std::prev(MBBI));
// Merge ops.
Merges.clear();
++NumMerges;
// RS may be pointing to an instruction that's deleted.
- RS->skipTo(prior(MBBI));
+ RS->skipTo(std::prev(MBBI));
} else if (NumMemOps == 1) {
// Try folding preceding/trailing base inc/dec into the single
// load/store.
if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
++NumMerges;
- RS->forward(prior(MBBI));
+ RS->forward(std::prev(MBBI));
}
}
CurrBase = 0;
- CurrOpc = -1;
+ CurrOpc = ~0u;
CurrSize = 0;
CurrPred = ARMCC::AL;
CurrPredReg = 0;
return NumMerges > 0;
}
-/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
-/// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
-/// directly restore the value of LR into pc.
+/// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
+/// into the preceding stack restore so it directly restore the value of LR
+/// into pc.
/// ldmfd sp!, {..., lr}
/// bx lr
/// or
/// =>
/// ldmfd sp!, {..., pc}
bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
+ // Thumb1 LDM doesn't allow high registers.
+ if (isThumb1) return false;
if (MBB.empty()) return false;
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
(MBBI->getOpcode() == ARM::BX_RET ||
MBBI->getOpcode() == ARM::tBX_RET ||
MBBI->getOpcode() == ARM::MOVPCLR)) {
- MachineInstr *PrevMI = prior(MBBI);
+ MachineInstr *PrevMI = std::prev(MBBI);
unsigned Opcode = PrevMI->getOpcode();
if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
PrevMI->setDesc(TII->get(NewOpc));
MO.setReg(ARM::PC);
- PrevMI->copyImplicitOps(&*MBBI);
+ PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
MBB.erase(MBBI);
return true;
}
}
bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
- const TargetMachine &TM = Fn.getTarget();
+ STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
+ TL = STI->getTargetLowering();
AFI = Fn.getInfo<ARMFunctionInfo>();
- TII = TM.getInstrInfo();
- TRI = TM.getRegisterInfo();
- STI = &TM.getSubtarget<ARMSubtarget>();
+ TII = STI->getInstrInfo();
+ TRI = STI->getRegisterInfo();
RS = new RegScavenger();
isThumb2 = AFI->isThumb2Function();
+ isThumb1 = AFI->isThumbFunction() && !isThumb2;
bool Modified = false;
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
++MFI) {
MachineBasicBlock &MBB = *MFI;
Modified |= LoadStoreMultipleOpti(MBB);
- if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
+ if (STI->hasV5TOps())
Modified |= MergeReturnIntoLDM(MBB);
}
return Modified;
}
-
-/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
-/// load / stores from consecutive locations close to make it more
-/// likely they will be combined later.
-
namespace {
+ /// Pre- register allocation pass that move load / stores from consecutive
+ /// locations close to make it more likely they will be combined later.
struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
static char ID;
ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
- const TargetData *TD;
+ const DataLayout *TD;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
const ARMSubtarget *STI;
MachineRegisterInfo *MRI;
MachineFunction *MF;
- virtual bool runOnMachineFunction(MachineFunction &Fn);
+ bool runOnMachineFunction(MachineFunction &Fn) override;
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "ARM pre- register allocation load / store optimization pass";
}
unsigned &PredReg, ARMCC::CondCodes &Pred,
bool &isT2);
bool RescheduleOps(MachineBasicBlock *MBB,
- SmallVector<MachineInstr*, 4> &Ops,
+ SmallVectorImpl<MachineInstr *> &Ops,
unsigned Base, bool isLd,
DenseMap<MachineInstr*, unsigned> &MI2LocMap);
bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
}
bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
- TD = Fn.getTarget().getTargetData();
- TII = Fn.getTarget().getInstrInfo();
- TRI = Fn.getTarget().getRegisterInfo();
- STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
+ TD = Fn.getTarget().getDataLayout();
+ STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
+ TII = STI->getInstrInfo();
+ TRI = STI->getRegisterInfo();
MRI = &Fn.getRegInfo();
MF = &Fn;
static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
MachineBasicBlock::iterator I,
MachineBasicBlock::iterator E,
- SmallPtrSet<MachineInstr*, 4> &MemOps,
+ SmallPtrSetImpl<MachineInstr*> &MemOps,
SmallSet<unsigned, 4> &MemRegs,
const TargetRegisterInfo *TRI) {
// Are there stores / loads / calls between them?
}
-/// Copy Op0 and Op1 operands into a new array assigned to MI.
+/// Copy \p Op0 and \p Op1 operands into a new array assigned to MI.
static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
MachineInstr *Op1) {
assert(MI->memoperands_empty() && "expected a new machineinstr");
bool
ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
- DebugLoc &dl,
- unsigned &NewOpc, unsigned &EvenReg,
- unsigned &OddReg, unsigned &BaseReg,
- int &Offset, unsigned &PredReg,
+ DebugLoc &dl, unsigned &NewOpc,
+ unsigned &FirstReg,
+ unsigned &SecondReg,
+ unsigned &BaseReg, int &Offset,
+ unsigned &PredReg,
ARMCC::CondCodes &Pred,
bool &isT2) {
// Make sure we're allowed to generate LDRD/STRD.
// FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
unsigned Scale = 1;
unsigned Opcode = Op0->getOpcode();
- if (Opcode == ARM::LDRi12)
+ if (Opcode == ARM::LDRi12) {
NewOpc = ARM::LDRD;
- else if (Opcode == ARM::STRi12)
+ } else if (Opcode == ARM::STRi12) {
NewOpc = ARM::STRD;
- else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
+ } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
NewOpc = ARM::t2LDRDi8;
Scale = 4;
isT2 = true;
NewOpc = ARM::t2STRDi8;
Scale = 4;
isT2 = true;
- } else
+ } else {
return false;
+ }
// Make sure the base address satisfies i64 ld / st alignment requirement.
+ // At the moment, we ignore the memoryoperand's value.
+ // If we want to use AliasAnalysis, we should check it accordingly.
if (!Op0->hasOneMemOperand() ||
- !(*Op0->memoperands_begin())->getValue() ||
(*Op0->memoperands_begin())->isVolatile())
return false;
return false;
Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
}
- EvenReg = Op0->getOperand(0).getReg();
- OddReg = Op1->getOperand(0).getReg();
- if (EvenReg == OddReg)
+ FirstReg = Op0->getOperand(0).getReg();
+ SecondReg = Op1->getOperand(0).getReg();
+ if (FirstReg == SecondReg)
return false;
BaseReg = Op0->getOperand(1).getReg();
- Pred = llvm::getInstrPredicate(Op0, PredReg);
+ Pred = getInstrPredicate(Op0, PredReg);
dl = Op0->getDebugLoc();
return true;
}
-namespace {
- struct OffsetCompare {
- bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
- int LOffset = getMemoryOpOffset(LHS);
- int ROffset = getMemoryOpOffset(RHS);
- assert(LHS == RHS || LOffset != ROffset);
- return LOffset > ROffset;
- }
- };
-}
-
bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
- SmallVector<MachineInstr*, 4> &Ops,
+ SmallVectorImpl<MachineInstr *> &Ops,
unsigned Base, bool isLd,
DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
bool RetVal = false;
// Sort by offset (in reverse order).
- std::sort(Ops.begin(), Ops.end(), OffsetCompare());
+ std::sort(Ops.begin(), Ops.end(),
+ [](const MachineInstr *LHS, const MachineInstr *RHS) {
+ int LOffset = getMemoryOpOffset(LHS);
+ int ROffset = getMemoryOpOffset(RHS);
+ assert(LHS == RHS || LOffset != ROffset);
+ return LOffset > ROffset;
+ });
// The loads / stores of the same base are in order. Scan them from first to
// last and check for the following:
while (Ops.size() > 1) {
unsigned FirstLoc = ~0U;
unsigned LastLoc = 0;
- MachineInstr *FirstOp = 0;
- MachineInstr *LastOp = 0;
+ MachineInstr *FirstOp = nullptr;
+ MachineInstr *LastOp = nullptr;
int LastOffset = 0;
unsigned LastOpcode = 0;
unsigned LastBytes = 0;
// to try to allocate a pair of registers that can form register pairs.
MachineInstr *Op0 = Ops.back();
MachineInstr *Op1 = Ops[Ops.size()-2];
- unsigned EvenReg = 0, OddReg = 0;
+ unsigned FirstReg = 0, SecondReg = 0;
unsigned BaseReg = 0, PredReg = 0;
ARMCC::CondCodes Pred = ARMCC::AL;
bool isT2 = false;
int Offset = 0;
DebugLoc dl;
if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
- EvenReg, OddReg, BaseReg,
+ FirstReg, SecondReg, BaseReg,
Offset, PredReg, Pred, isT2)) {
Ops.pop_back();
Ops.pop_back();
const MCInstrDesc &MCID = TII->get(NewOpc);
- const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI);
- MRI->constrainRegClass(EvenReg, TRC);
- MRI->constrainRegClass(OddReg, TRC);
+ const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
+ MRI->constrainRegClass(FirstReg, TRC);
+ MRI->constrainRegClass(SecondReg, TRC);
// Form the pair instruction.
if (isLd) {
MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
- .addReg(EvenReg, RegState::Define)
- .addReg(OddReg, RegState::Define)
+ .addReg(FirstReg, RegState::Define)
+ .addReg(SecondReg, RegState::Define)
.addReg(BaseReg);
// FIXME: We're converting from LDRi12 to an insn that still
// uses addrmode2, so we need an explicit offset reg. It should
++NumLDRDFormed;
} else {
MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
- .addReg(EvenReg)
- .addReg(OddReg)
+ .addReg(FirstReg)
+ .addReg(SecondReg)
.addReg(BaseReg);
// FIXME: We're converting from LDRi12 to an insn that still
// uses addrmode2, so we need an explicit offset reg. It should
MBB->erase(Op0);
MBB->erase(Op1);
- // Add register allocation hints to form register pairs.
- MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
- MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
+ if (!isT2) {
+ // Add register allocation hints to form register pairs.
+ MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
+ MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg);
+ }
} else {
for (unsigned i = 0; i != NumMove; ++i) {
MachineInstr *Op = Ops.back();
if (!isMemoryOp(MI))
continue;
unsigned PredReg = 0;
- if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
+ if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
continue;
int Opc = MI->getOpcode();
if (!StopHere)
BI->second.push_back(MI);
} else {
- SmallVector<MachineInstr*, 4> MIs;
- MIs.push_back(MI);
- Base2LdsMap[Base] = MIs;
+ Base2LdsMap[Base].push_back(MI);
LdBases.push_back(Base);
}
} else {
if (!StopHere)
BI->second.push_back(MI);
} else {
- SmallVector<MachineInstr*, 4> MIs;
- MIs.push_back(MI);
- Base2StsMap[Base] = MIs;
+ Base2StsMap[Base].push_back(MI);
StBases.push_back(Base);
}
}
// Re-schedule loads.
for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
unsigned Base = LdBases[i];
- SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
+ SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
if (Lds.size() > 1)
RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
}
// Re-schedule stores.
for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
unsigned Base = StBases[i];
- SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
+ SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
if (Sts.size() > 1)
RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
}
}
-/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
-/// optimization pass.
+/// Returns an instance of the load / store optimization pass.
FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
if (PreAlloc)
return new ARMPreAllocLoadStoreOpt();