//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "arm-ldst-opt"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
+#include "ARMISelLowering.h"
#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
+#include "Thumb1RegisterInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "arm-ldst-opt"
+
STATISTIC(NumLDMGened , "Number of ldm instructions generated");
STATISTIC(NumSTMGened , "Number of stm instructions generated");
STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
const ARMSubtarget *STI;
+ const TargetLowering *TL;
ARMFunctionInfo *AFI;
RegScavenger *RS;
- bool isThumb2;
+ bool isThumb1, isThumb2;
bool runOnMachineFunction(MachineFunction &Fn) override;
void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
const MemOpQueue &MemOps, unsigned DefReg,
unsigned RangeBegin, unsigned RangeEnd);
-
bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
int Offset, unsigned Base, bool BaseKill, int Opcode,
ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
ARMCC::CondCodes Pred, unsigned PredReg,
unsigned Scratch, MemOpQueue &MemOps,
SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
-
void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI);
case ARM_AM::db: return ARM::STMDB;
case ARM_AM::ib: return ARM::STMIB;
}
+ case ARM::tLDRi:
+ // tLDMIA is writeback-only - unless the base register is in the input
+ // reglist.
+ ++NumLDMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::tLDMIA;
+ }
+ case ARM::tSTRi:
+ // There is no non-writeback tSTMIA either.
+ ++NumSTMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::tSTMIA_UPD;
+ }
case ARM::t2LDRi8:
case ARM::t2LDRi12:
++NumLDMGened;
case ARM::LDMIA_UPD:
case ARM::STMIA:
case ARM::STMIA_UPD:
+ case ARM::tLDMIA:
+ case ARM::tLDMIA_UPD:
+ case ARM::tSTMIA_UPD:
case ARM::t2LDMIA_RET:
case ARM::t2LDMIA:
case ARM::t2LDMIA_UPD:
} // end namespace ARM_AM
} // end namespace llvm
+static bool isT1i32Load(unsigned Opc) {
+ return Opc == ARM::tLDRi;
+}
+
static bool isT2i32Load(unsigned Opc) {
return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
}
static bool isi32Load(unsigned Opc) {
- return Opc == ARM::LDRi12 || isT2i32Load(Opc);
+ return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
+}
+
+static bool isT1i32Store(unsigned Opc) {
+ return Opc == ARM::tSTRi;
}
static bool isT2i32Store(unsigned Opc) {
}
static bool isi32Store(unsigned Opc) {
- return Opc == ARM::STRi12 || isT2i32Store(Opc);
+ return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
}
/// MergeOps - Create and insert a LDM or STM with Base as base register and
return false;
ARM_AM::AMSubMode Mode = ARM_AM::ia;
- // VFP and Thumb2 do not support IB or DA modes.
+ // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
- bool haveIBAndDA = isNotVFP && !isThumb2;
- if (Offset == 4 && haveIBAndDA)
+ bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
+
+ if (Offset == 4 && haveIBAndDA) {
Mode = ARM_AM::ib;
- else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
+ } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
Mode = ARM_AM::da;
- else if (Offset == -4 * (int)NumRegs && isNotVFP)
+ } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
// VLDM/VSTM do not support DB mode without also updating the base reg.
Mode = ARM_AM::db;
- else if (Offset != 0) {
- // Check if this is a supported opcode before we insert instructions to
+ } else if (Offset != 0) {
+ // Check if this is a supported opcode before inserting instructions to
// calculate a new base register.
if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
return false;
unsigned NewBase;
- if (isi32Load(Opcode))
+ if (isi32Load(Opcode)) {
// If it is a load, then just use one of the destination register to
// use as the new base.
NewBase = Regs[NumRegs-1].first;
- else {
+ } else {
// Use the scratch register to use as a new base.
NewBase = Scratch;
if (NewBase == 0)
return false;
}
- int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
+
+ int BaseOpc =
+ isThumb2 ? ARM::t2ADDri :
+ isThumb1 ? ARM::tADDi8 : ARM::ADDri;
+
if (Offset < 0) {
- BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
+ BaseOpc =
+ isThumb2 ? ARM::t2SUBri :
+ isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
Offset = - Offset;
}
- int ImmedOffset = isThumb2
- ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
- if (ImmedOffset == -1)
- // FIXME: Try t2ADDri12 or t2SUBri12?
- return false; // Probably not worth it then.
-
- BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
- .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
- .addImm(Pred).addReg(PredReg).addReg(0);
+
+ if (!TL->isLegalAddImmediate(Offset))
+ // FIXME: Try add with register operand?
+ return false; // Probably not worth it then.
+
+ if (isThumb1) {
+ if (Base != NewBase) {
+ // Need to insert a MOV to the new base first.
+ // FIXME: If the immediate fits in 3 bits, use ADD instead.
+ BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
+ .addReg(Base, getKillRegState(BaseKill))
+ .addImm(Pred).addReg(PredReg);
+ }
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase))
+ .addReg(NewBase, getKillRegState(true)).addImm(Offset)
+ .addImm(Pred).addReg(PredReg);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
+ .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
+ .addImm(Pred).addReg(PredReg).addReg(0);
+ }
+
Base = NewBase;
- BaseKill = true; // New base is always killed right its use.
+ BaseKill = true; // New base is always killed straight away.
}
bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
Opcode == ARM::VLDRD);
+
+ // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
+ // base register writeback.
Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
if (!Opcode) return false;
- MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
- .addReg(Base, getKillRegState(BaseKill))
- .addImm(Pred).addReg(PredReg);
+
+ bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
+
+ // Exception: If the base register is in the input reglist, Thumb1 LDM is
+ // non-writeback. Check for this.
+ if (Opcode == ARM::tLDMIA && isThumb1)
+ for (unsigned I = 0; I < NumRegs; ++I)
+ if (Base == Regs[I].first) {
+ Writeback = false;
+ break;
+ }
+
+ // If the merged instruction has writeback and the base register is not killed
+ // it's not safe to do the merge on Thumb1. This is because resetting the base
+ // register writeback by inserting a SUBS sets the condition flags.
+ // FIXME: Try something clever here to see if resetting the base register can
+ // be avoided, e.g. by updating a later ADD/SUB of the base register with the
+ // writeback.
+ if (isThumb1 && Writeback && !BaseKill) return false;
+
+ MachineInstrBuilder MIB;
+
+ if (Writeback) {
+ if (Opcode == ARM::tLDMIA)
+ // Update tLDMIA with writeback if necessary.
+ Opcode = ARM::tLDMIA_UPD;
+
+ MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
+
+ // Thumb1: we might need to set base writeback when building the MI.
+ MIB.addReg(Base, getDefRegState(true))
+ .addReg(Base, getKillRegState(BaseKill));
+ } else {
+ // No writeback, simply build the MachineInstr.
+ MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
+ MIB.addReg(Base, getKillRegState(BaseKill));
+ }
+
+ MIB.addImm(Pred).addReg(PredReg);
+
for (unsigned i = 0; i != NumRegs; ++i)
MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
| getKillRegState(Regs[i].second));
// affected uses.
for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
E = UsesOfImpDefs.end();
- I != E; ++I)
+ I != E; ++I)
(*I)->setIsUndef();
for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
unsigned Count = 1;
unsigned Limit = ~0U;
-
+ bool BaseKill = false;
// vldm / vstm limit are 32 for S variants, 16 for D variants.
switch (Opcode) {
++Count;
} else {
// Can't merge this in. Try merge the earlier ones first.
- MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
- Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
+ // We need to compute BaseKill here because the MemOps may have been
+ // reordered.
+ BaseKill = Loc->killsRegister(Base);
+
+ MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base,
+ BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
MemOps, Merges);
return;
}
- if (MemOps[i].Position > MemOps[insertAfter].Position)
+ if (MemOps[i].Position > MemOps[insertAfter].Position) {
insertAfter = i;
+ Loc = MemOps[i].MBBI;
+ }
}
- bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
+ BaseKill = Loc->killsRegister(Base);
MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
- return;
}
static bool definesCPSR(MachineInstr *MI) {
bool CheckCPSRDef = false;
switch (MI->getOpcode()) {
default: return false;
+ case ARM::tSUBi8:
case ARM::t2SUBri:
case ARM::SUBri:
CheckCPSRDef = true;
if (Bytes == 0 || (Limit && Bytes >= Limit))
return false;
- unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
+ unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
+ MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
if (!(MI->getOperand(0).getReg() == Base &&
MI->getOperand(1).getReg() == Base &&
- (MI->getOperand(2).getImm()*Scale) == Bytes &&
+ (MI->getOperand(2).getImm() * Scale) == Bytes &&
getInstrPredicate(MI, MyPredReg) == Pred &&
MyPredReg == PredReg))
return false;
bool CheckCPSRDef = false;
switch (MI->getOpcode()) {
default: return false;
+ case ARM::tADDi8:
case ARM::t2ADDri:
case ARM::ADDri:
CheckCPSRDef = true;
// Make sure the offset fits in 8 bits.
return false;
- unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
+ unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
+ MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
if (!(MI->getOperand(0).getReg() == Base &&
MI->getOperand(1).getReg() == Base &&
- (MI->getOperand(2).getImm()*Scale) == Bytes &&
+ (MI->getOperand(2).getImm() * Scale) == Bytes &&
getInstrPredicate(MI, MyPredReg) == Pred &&
MyPredReg == PredReg))
return false;
default: return 0;
case ARM::LDRi12:
case ARM::STRi12:
+ case ARM::tLDRi:
+ case ARM::tSTRi:
case ARM::t2LDRi8:
case ARM::t2LDRi12:
case ARM::t2STRi8:
case ARM::STMDA:
case ARM::STMDB:
case ARM::STMIB:
+ case ARM::tLDMIA:
+ case ARM::tLDMIA_UPD:
+ case ARM::tSTMIA_UPD:
case ARM::t2LDMIA:
case ARM::t2LDMDB:
case ARM::t2STMIA:
MachineBasicBlock::iterator MBBI,
bool &Advance,
MachineBasicBlock::iterator &I) {
+ // Thumb1 is already using updating loads/stores.
+ if (isThumb1) return false;
+
MachineInstr *MI = MBBI;
unsigned Base = MI->getOperand(0).getReg();
bool BaseKill = MI->getOperand(0).isKill();
const TargetInstrInfo *TII,
bool &Advance,
MachineBasicBlock::iterator &I) {
+ // Thumb1 doesn't have updating LDR/STR.
+ // FIXME: Use LDM/STM with single register instead.
+ if (isThumb1) return false;
+
MachineInstr *MI = MBBI;
unsigned Base = MI->getOperand(1).getReg();
bool BaseKill = MI->getOperand(1).isKill();
return false;
if (isAM5) {
- // VLDM[SD}_UPD, VSTM[SD]_UPD
+ // VLDM[SD]_UPD, VSTM[SD]_UPD
// (There are no base-updating versions of VLDR/VSTR instructions, but the
// updating load/store-multiple instructions can be used with only one
// register.)
return MI->getOperand(1).isReg();
case ARM::LDRi12:
case ARM::STRi12:
+ case ARM::tLDRi:
+ case ARM::tSTRi:
case ARM::t2LDRi8:
case ARM::t2LDRi12:
case ARM::t2STRi8:
Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
return OffField;
+ // Thumb1 immediate offsets are scaled by 4
+ if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi)
+ return OffField * 4;
+
int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
: ARM_AM::getAM5Offset(OffField) * 4;
if (isAM3) {
if (MBBI == E)
// Reach the end of the block, try merging the memory instructions.
TryMerge = true;
- } else
+ } else {
TryMerge = true;
+ }
if (TryMerge) {
if (NumMemOps > 1) {
// Try to find a free register to use as a new base in case it's needed.
// First advance to the instruction just before the start of the chain.
AdvanceRS(MBB, MemOps);
+
// Find a scratch register.
- unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass);
+ unsigned Scratch =
+ RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass);
+
// Process the load / store instructions.
RS->forward(std::prev(MBBI));
/// =>
/// ldmfd sp!, {..., pc}
bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
+ // Thumb1 LDM doesn't allow high registers.
+ if (isThumb1) return false;
if (MBB.empty()) return false;
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
const TargetMachine &TM = Fn.getTarget();
+ TL = TM.getSubtargetImpl()->getTargetLowering();
AFI = Fn.getInfo<ARMFunctionInfo>();
- TII = TM.getInstrInfo();
- TRI = TM.getRegisterInfo();
+ TII = TM.getSubtargetImpl()->getInstrInfo();
+ TRI = TM.getSubtargetImpl()->getRegisterInfo();
STI = &TM.getSubtarget<ARMSubtarget>();
RS = new RegScavenger();
isThumb2 = AFI->isThumb2Function();
+ isThumb1 = AFI->isThumbFunction() && !isThumb2;
bool Modified = false;
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
}
bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
- TD = Fn.getTarget().getDataLayout();
- TII = Fn.getTarget().getInstrInfo();
- TRI = Fn.getTarget().getRegisterInfo();
+ TD = Fn.getSubtarget().getDataLayout();
+ TII = Fn.getSubtarget().getInstrInfo();
+ TRI = Fn.getSubtarget().getRegisterInfo();
STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
MRI = &Fn.getRegInfo();
MF = &Fn;
static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
MachineBasicBlock::iterator I,
MachineBasicBlock::iterator E,
- SmallPtrSet<MachineInstr*, 4> &MemOps,
+ SmallPtrSetImpl<MachineInstr*> &MemOps,
SmallSet<unsigned, 4> &MemRegs,
const TargetRegisterInfo *TRI) {
// Are there stores / loads / calls between them?
// FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
unsigned Scale = 1;
unsigned Opcode = Op0->getOpcode();
- if (Opcode == ARM::LDRi12)
+ if (Opcode == ARM::LDRi12) {
NewOpc = ARM::LDRD;
- else if (Opcode == ARM::STRi12)
+ } else if (Opcode == ARM::STRi12) {
NewOpc = ARM::STRD;
- else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
+ } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
NewOpc = ARM::t2LDRDi8;
Scale = 4;
isT2 = true;
NewOpc = ARM::t2STRDi8;
Scale = 4;
isT2 = true;
- } else
+ } else {
return false;
+ }
// Make sure the base address satisfies i64 ld / st alignment requirement.
// At the moment, we ignore the memoryoperand's value.
while (Ops.size() > 1) {
unsigned FirstLoc = ~0U;
unsigned LastLoc = 0;
- MachineInstr *FirstOp = 0;
- MachineInstr *LastOp = 0;
+ MachineInstr *FirstOp = nullptr;
+ MachineInstr *LastOp = nullptr;
int LastOffset = 0;
unsigned LastOpcode = 0;
unsigned LastBytes = 0;