namespace {
struct ARMLoadStoreOpt : public MachineFunctionPass {
static char ID;
- ARMLoadStoreOpt() : MachineFunctionPass(&ID) {}
+ ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
private:
struct MemOpQueueEntry {
int Offset;
+ unsigned Reg;
+ bool isKill;
unsigned Position;
MachineBasicBlock::iterator MBBI;
bool Merged;
- MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
- : Offset(o), Position(p), MBBI(i), Merged(false) {}
+ MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
+ MachineBasicBlock::iterator i)
+ : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
};
typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
typedef MemOpQueue::iterator MemOpQueueIter;
static int getLoadStoreMultipleOpcode(int Opcode) {
switch (Opcode) {
case ARM::LDR:
- NumLDMGened++;
+ ++NumLDMGened;
return ARM::LDM;
case ARM::STR:
- NumSTMGened++;
+ ++NumSTMGened;
return ARM::STM;
case ARM::t2LDRi8:
case ARM::t2LDRi12:
- NumLDMGened++;
+ ++NumLDMGened;
return ARM::t2LDM;
case ARM::t2STRi8:
case ARM::t2STRi12:
- NumSTMGened++;
+ ++NumSTMGened;
return ARM::t2STM;
case ARM::VLDRS:
- NumVLDMGened++;
+ ++NumVLDMGened;
return ARM::VLDMS;
case ARM::VSTRS:
- NumVSTMGened++;
+ ++NumVSTMGened;
return ARM::VSTMS;
case ARM::VLDRD:
- NumVLDMGened++;
+ ++NumVLDMGened;
return ARM::VLDMD;
case ARM::VSTRD:
- NumVSTMGened++;
+ ++NumVSTMGened;
return ARM::VSTMD;
default: llvm_unreachable("Unhandled opcode!");
}
BaseKill = true; // New base is always killed right its use.
}
- bool isDPR = Opcode == ARM::VLDRD || Opcode == ARM::VSTRD;
- bool isDef = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
+ bool isDPR = (Opcode == ARM::VLDRD || Opcode == ARM::VSTRD);
+ bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
+ Opcode == ARM::VLDRD);
Opcode = getLoadStoreMultipleOpcode(Opcode);
MachineInstrBuilder MIB = (isAM4)
? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
.addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
: BuildMI(MBB, MBBI, dl, TII->get(Opcode))
.addReg(Base, getKillRegState(BaseKill))
- .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs))
+ .addImm(ARM_AM::getAM5Opc(Mode, isDPR ? NumRegs<<1 : NumRegs))
.addImm(Pred).addReg(PredReg);
- MIB.addReg(0); // Add optional writeback (0 for now).
for (unsigned i = 0; i != NumRegs; ++i)
MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
| getKillRegState(Regs[i].second));
// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
// success.
-void ARMLoadStoreOpt::
-MergeOpsUpdate(MachineBasicBlock &MBB,
- MemOpQueue &memOps,
- unsigned memOpsBegin,
- unsigned memOpsEnd,
- unsigned insertAfter,
- int Offset,
- unsigned Base,
- bool BaseKill,
- int Opcode,
- ARMCC::CondCodes Pred,
- unsigned PredReg,
- unsigned Scratch,
- DebugLoc dl,
- SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
+void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
+ MemOpQueue &memOps,
+ unsigned memOpsBegin, unsigned memOpsEnd,
+ unsigned insertAfter, int Offset,
+ unsigned Base, bool BaseKill,
+ int Opcode,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned Scratch,
+ DebugLoc dl,
+ SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
// First calculate which of the registers should be killed by the merged
// instruction.
+ const unsigned insertPos = memOps[insertAfter].Position;
+
+ SmallSet<unsigned, 4> UnavailRegs;
+ SmallSet<unsigned, 4> KilledRegs;
+ DenseMap<unsigned, unsigned> Killer;
+ for (unsigned i = 0; i < memOpsBegin; ++i) {
+ if (memOps[i].Position < insertPos && memOps[i].isKill) {
+ unsigned Reg = memOps[i].Reg;
+ if (memOps[i].Merged)
+ UnavailRegs.insert(Reg);
+ else {
+ KilledRegs.insert(Reg);
+ Killer[Reg] = i;
+ }
+ }
+ }
+ for (unsigned i = memOpsEnd, e = memOps.size(); i != e; ++i) {
+ if (memOps[i].Position < insertPos && memOps[i].isKill) {
+ unsigned Reg = memOps[i].Reg;
+ KilledRegs.insert(Reg);
+ Killer[Reg] = i;
+ }
+ }
+
SmallVector<std::pair<unsigned, bool>, 8> Regs;
for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
- const MachineOperand &MO = memOps[i].MBBI->getOperand(0);
- Regs.push_back(std::make_pair(MO.getReg(), MO.isKill()));
+ unsigned Reg = memOps[i].Reg;
+ if (UnavailRegs.count(Reg))
+ // Register is killed before and it's not easy / possible to update the
+ // kill marker on already merged instructions. Abort.
+ return;
+
+ // If we are inserting the merged operation after an unmerged operation that
+ // uses the same register, make sure to transfer any kill flag.
+ bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
+ Regs.push_back(std::make_pair(Reg, isKill));
}
// Try to do the merge.
MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
- Loc++;
+ ++Loc;
if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
Pred, PredReg, Scratch, dl, Regs))
return;
// Merge succeeded, update records.
Merges.push_back(prior(Loc));
for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
+ // Remove kill flags from any unmerged memops that come before insertPos.
+ if (Regs[i-memOpsBegin].second) {
+ unsigned Reg = Regs[i-memOpsBegin].first;
+ if (KilledRegs.count(Reg)) {
+ unsigned j = Killer[Reg];
+ memOps[j].MBBI->getOperand(0).setIsKill(false);
+ }
+ }
MBB.erase(memOps[i].MBBI);
memOps[i].Merged = true;
}
unsigned PReg = PMO.getReg();
unsigned PRegNum = PMO.isUndef() ? UINT_MAX
: ARMRegisterInfo::getRegisterNumbering(PReg);
+ unsigned Count = 1;
for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
int NewOffset = MemOps[i].Offset;
: ARMRegisterInfo::getRegisterNumbering(Reg);
// AM4 - register numbers in ascending order.
// AM5 - consecutive register numbers in ascending order.
- if (NewOffset == Offset + (int)Size &&
- ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) {
+ // Can only do up to 16 double-word registers per insn.
+ if (Reg != ARM::SP &&
+ NewOffset == Offset + (int)Size &&
+ ((isAM4 && RegNum > PRegNum)
+ || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) {
Offset += Size;
PRegNum = RegNum;
+ ++Count;
} else {
// Can't merge this in. Try merge the earlier ones first.
MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
case ARM::STM:
case ARM::t2LDM:
case ARM::t2STM:
- return (MI->getNumOperands() - 5) * 4;
+ return (MI->getNumOperands() - 4) * 4;
case ARM::VLDMS:
case ARM::VSTMS:
case ARM::VLDMD:
}
}
+static unsigned getUpdatingLSMultipleOpcode(unsigned Opc) {
+ switch (Opc) {
+ case ARM::LDM: return ARM::LDM_UPD;
+ case ARM::STM: return ARM::STM_UPD;
+ case ARM::t2LDM: return ARM::t2LDM_UPD;
+ case ARM::t2STM: return ARM::t2STM_UPD;
+ case ARM::VLDMS: return ARM::VLDMS_UPD;
+ case ARM::VLDMD: return ARM::VLDMD_UPD;
+ case ARM::VSTMS: return ARM::VSTMS_UPD;
+ case ARM::VSTMD: return ARM::VSTMD_UPD;
+ default: llvm_unreachable("Unhandled opcode!");
+ }
+ return 0;
+}
+
/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
///
MachineBasicBlock::iterator &I) {
MachineInstr *MI = MBBI;
unsigned Base = MI->getOperand(0).getReg();
+ bool BaseKill = MI->getOperand(0).isKill();
unsigned Bytes = getLSMultipleTransferSize(MI);
unsigned PredReg = 0;
ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
int Opcode = MI->getOpcode();
- bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::t2LDM ||
- Opcode == ARM::STM || Opcode == ARM::t2STM;
+ DebugLoc dl = MI->getDebugLoc();
+ bool isAM4 = (Opcode == ARM::LDM || Opcode == ARM::t2LDM ||
+ Opcode == ARM::STM || Opcode == ARM::t2STM);
- if (isAM4) {
- if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm()))
- return false;
+ bool DoMerge = false;
+ ARM_AM::AMSubMode Mode = ARM_AM::ia;
+ unsigned Offset = 0;
- // Can't use the updating AM4 sub-mode if the base register is also a dest
+ if (isAM4) {
+ // Can't use an updating ld/st if the base register is also a dest
// register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
if (MI->getOperand(i).getReg() == Base)
return false;
}
+ Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
+ } else {
+ // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops.
+ Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
+ Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
+ }
- ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
- if (MBBI != MBB.begin()) {
- MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+ // Try merging with the previous instruction.
+ MachineBasicBlock::iterator BeginMBBI = MBB.begin();
+ if (MBBI != BeginMBBI) {
+ MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+ while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
+ --PrevMBBI;
+ if (isAM4) {
if (Mode == ARM_AM::ia &&
isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
- MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true));
- MI->getOperand(4).setReg(Base);
- MI->getOperand(4).setIsDef();
- MBB.erase(PrevMBBI);
- return true;
- } else if (Mode == ARM_AM::ib &&
+ DoMerge = true;
+ Mode = ARM_AM::db;
+ } else if (isAM4 && Mode == ARM_AM::ib &&
isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
- MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true));
- MI->getOperand(4).setReg(Base); // WB to base
- MI->getOperand(4).setIsDef();
- MBB.erase(PrevMBBI);
- return true;
+ DoMerge = true;
+ Mode = ARM_AM::da;
+ }
+ } else {
+ if (Mode == ARM_AM::ia &&
+ isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ Mode = ARM_AM::db;
+ DoMerge = true;
}
}
+ if (DoMerge)
+ MBB.erase(PrevMBBI);
+ }
- if (MBBI != MBB.end()) {
- MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
+ // Try merging with the next instruction.
+ MachineBasicBlock::iterator EndMBBI = MBB.end();
+ if (!DoMerge && MBBI != EndMBBI) {
+ MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
+ while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
+ ++NextMBBI;
+ if (isAM4) {
if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
- MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
- MI->getOperand(4).setReg(Base); // WB to base
- MI->getOperand(4).setIsDef();
- if (NextMBBI == I) {
- Advance = true;
- ++I;
- }
- MBB.erase(NextMBBI);
- return true;
+ DoMerge = true;
} else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
- MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
- MI->getOperand(4).setReg(Base); // WB to base
- MI->getOperand(4).setIsDef();
- if (NextMBBI == I) {
- Advance = true;
- ++I;
- }
- MBB.erase(NextMBBI);
- return true;
+ DoMerge = true;
}
- }
- } else {
- // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops.
- if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm()))
- return false;
-
- ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
- unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
- if (MBBI != MBB.begin()) {
- MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+ } else {
if (Mode == ARM_AM::ia &&
- isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
- MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset));
- MI->getOperand(4).setReg(Base); // WB to base
- MI->getOperand(4).setIsDef();
- MBB.erase(PrevMBBI);
- return true;
+ isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ DoMerge = true;
}
}
-
- if (MBBI != MBB.end()) {
- MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
- if (Mode == ARM_AM::ia &&
- isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
- MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset));
- MI->getOperand(4).setReg(Base); // WB to base
- MI->getOperand(4).setIsDef();
- if (NextMBBI == I) {
- Advance = true;
- ++I;
- }
- MBB.erase(NextMBBI);
+ if (DoMerge) {
+ if (NextMBBI == I) {
+ Advance = true;
+ ++I;
}
- return true;
+ MBB.erase(NextMBBI);
}
}
- return false;
+ if (!DoMerge)
+ return false;
+
+ unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode);
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
+ .addReg(Base, getDefRegState(true)) // WB base register
+ .addReg(Base, getKillRegState(BaseKill));
+ if (isAM4) {
+ // [t2]LDM_UPD, [t2]STM_UPD
+ MIB.addImm(ARM_AM::getAM4ModeImm(Mode))
+ .addImm(Pred).addReg(PredReg);
+ } else {
+ // VLDM[SD}_UPD, VSTM[SD]_UPD
+ MIB.addImm(ARM_AM::getAM5Opc(Mode, Offset))
+ .addImm(Pred).addReg(PredReg);
+ }
+ // Transfer the rest of operands.
+ for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum)
+ MIB.addOperand(MI->getOperand(OpNum));
+ // Transfer memoperands.
+ (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+
+ MBB.erase(MBBI);
+ return true;
}
static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
switch (Opc) {
case ARM::LDR: return ARM::LDR_PRE;
case ARM::STR: return ARM::STR_PRE;
- case ARM::VLDRS: return ARM::VLDMS;
- case ARM::VLDRD: return ARM::VLDMD;
- case ARM::VSTRS: return ARM::VSTMS;
- case ARM::VSTRD: return ARM::VSTMD;
+ case ARM::VLDRS: return ARM::VLDMS_UPD;
+ case ARM::VLDRD: return ARM::VLDMD_UPD;
+ case ARM::VSTRS: return ARM::VSTMS_UPD;
+ case ARM::VSTRD: return ARM::VSTMD_UPD;
case ARM::t2LDRi8:
case ARM::t2LDRi12:
return ARM::t2LDR_PRE;
switch (Opc) {
case ARM::LDR: return ARM::LDR_POST;
case ARM::STR: return ARM::STR_POST;
- case ARM::VLDRS: return ARM::VLDMS;
- case ARM::VLDRD: return ARM::VLDMD;
- case ARM::VSTRS: return ARM::VSTMS;
- case ARM::VSTRD: return ARM::VSTMD;
+ case ARM::VLDRS: return ARM::VLDMS_UPD;
+ case ARM::VLDRD: return ARM::VLDMD_UPD;
+ case ARM::VSTRS: return ARM::VSTMS_UPD;
+ case ARM::VSTRD: return ARM::VSTMD_UPD;
case ARM::t2LDRi8:
case ARM::t2LDRi12:
return ARM::t2LDR_POST;
unsigned Bytes = getLSMultipleTransferSize(MI);
int Opcode = MI->getOpcode();
DebugLoc dl = MI->getDebugLoc();
- bool isAM5 = Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
- Opcode == ARM::VSTRD || Opcode == ARM::VSTRS;
- bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
+ bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
+ Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
+ bool isAM2 = (Opcode == ARM::LDR || Opcode == ARM::STR);
if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
return false;
- else if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
+ if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
return false;
- else if (isT2i32Load(Opcode) || isT2i32Store(Opcode))
+ if (isT2i32Load(Opcode) || isT2i32Store(Opcode))
if (MI->getOperand(2).getImm() != 0)
return false;
unsigned NewOpc = 0;
// AM2 - 12 bits, thumb2 - 8 bits.
unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
- if (MBBI != MBB.begin()) {
+
+ // Try merging with the previous instruction.
+ MachineBasicBlock::iterator BeginMBBI = MBB.begin();
+ if (MBBI != BeginMBBI) {
MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+ while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
+ --PrevMBBI;
if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
DoMerge = true;
AddSub = ARM_AM::sub;
- NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
} else if (!isAM5 &&
isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
DoMerge = true;
- NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
}
- if (DoMerge)
+ if (DoMerge) {
+ NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
MBB.erase(PrevMBBI);
+ }
}
- if (!DoMerge && MBBI != MBB.end()) {
+ // Try merging with the next instruction.
+ MachineBasicBlock::iterator EndMBBI = MBB.end();
+ if (!DoMerge && MBBI != EndMBBI) {
MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
+ while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
+ ++NextMBBI;
if (!isAM5 &&
isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
DoMerge = true;
AddSub = ARM_AM::sub;
- NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
} else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
DoMerge = true;
- NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
}
if (DoMerge) {
+ NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
if (NextMBBI == I) {
Advance = true;
++I;
bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD;
unsigned Offset = 0;
if (isAM5)
- Offset = ARM_AM::getAM5Opc((AddSub == ARM_AM::sub)
- ? ARM_AM::db
- : ARM_AM::ia, true, (isDPR ? 2 : 1));
+ Offset = ARM_AM::getAM5Opc(AddSub == ARM_AM::sub ? ARM_AM::db : ARM_AM::ia,
+ (isDPR ? 2 : 1));
else if (isAM2)
Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
else
Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
- if (isLd) {
- if (isAM5)
- // VLDMS, VLDMD
- BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
- .addReg(Base, getKillRegState(BaseKill))
- .addImm(Offset).addImm(Pred).addReg(PredReg)
- .addReg(Base, getDefRegState(true)) // WB base register
- .addReg(MI->getOperand(0).getReg(), RegState::Define);
- else if (isAM2)
+
+ if (isAM5) {
+ // VLDM[SD}_UPD, VSTM[SD]_UPD
+ MachineOperand &MO = MI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
+ .addReg(Base, getDefRegState(true)) // WB base register
+ .addReg(Base, getKillRegState(isLd ? BaseKill : false))
+ .addImm(Offset)
+ .addImm(Pred).addReg(PredReg)
+ .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
+ getKillRegState(MO.isKill())));
+ } else if (isLd) {
+ if (isAM2)
// LDR_PRE, LDR_POST,
BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
.addReg(Base, RegState::Define)
.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
} else {
MachineOperand &MO = MI->getOperand(0);
- if (isAM5)
- // VSTMS, VSTMD
- BuildMI(MBB, MBBI, dl, TII->get(NewOpc)).addReg(Base).addImm(Offset)
- .addImm(Pred).addReg(PredReg)
- .addReg(Base, getDefRegState(true)) // WB base register
- .addReg(MO.getReg(), getKillRegState(MO.isKill()));
- else if (isAM2)
+ if (isAM2)
// STR_PRE, STR_POST
BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
.addReg(MO.getReg(), getKillRegState(MO.isKill()))
/// isMemoryOp - Returns true if instruction is a memory operations (that this
/// pass is capable of operating on).
static bool isMemoryOp(const MachineInstr *MI) {
+ // When no memory operands are present, conservatively assume unaligned,
+ // volatile, unfoldable.
+ if (!MI->hasOneMemOperand())
+ return false;
+
+ const MachineMemOperand *MMO = *MI->memoperands_begin();
+
+ // Don't touch volatile memory accesses - we may be changing their order.
+ if (MMO->isVolatile())
+ return false;
+
+ // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
+ // not.
+ if (MMO->getAlignment() < 4)
+ return false;
+
+ // str <undef> could probably be eliminated entirely, but for now we just want
+ // to avoid making a mess of it.
+ // FIXME: Use str <undef> as a wildcard to enable better stm folding.
+ if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
+ MI->getOperand(0).isUndef())
+ return false;
+
+ // Likewise don't mess with references to undefined addresses.
+ if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
+ MI->getOperand(1).isUndef())
+ return false;
+
int Opcode = MI->getOpcode();
switch (Opcode) {
default: break;
if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
return false;
+ MachineBasicBlock::iterator NewBBI = MBBI;
bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
bool EvenDeadKill = isLd ?
.addReg(BaseReg, getKillRegState(BaseKill))
.addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
.addImm(Pred).addReg(PredReg)
- .addReg(0)
.addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
.addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
++NumLDRD2LDM;
.addReg(BaseReg, getKillRegState(BaseKill))
.addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
.addImm(Pred).addReg(PredReg)
- .addReg(0)
.addReg(EvenReg,
getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
.addReg(OddReg,
getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
++NumSTRD2STM;
}
+ NewBBI = llvm::prior(MBBI);
} else {
// Split into two instructions.
assert((!isT2 || !OffReg) &&
OddReg, OddDeadKill, false,
BaseReg, false, BaseUndef, OffReg, false, OffUndef,
Pred, PredReg, TII, isT2);
+ NewBBI = llvm::prior(MBBI);
InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
EvenReg, EvenDeadKill, false,
BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
Pred, PredReg, TII, isT2);
} else {
if (OddReg == EvenReg && EvenDeadKill) {
- // If the two source operands are the same, the kill marker is probably
- // on the first one. e.g.
+ // If the two source operands are the same, the kill marker is
+ // probably on the first one. e.g.
// t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
EvenDeadKill = false;
OddDeadKill = true;
EvenReg, EvenDeadKill, EvenUndef,
BaseReg, false, BaseUndef, OffReg, false, OffUndef,
Pred, PredReg, TII, isT2);
+ NewBBI = llvm::prior(MBBI);
InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
OddReg, OddDeadKill, OddUndef,
BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
++NumSTRD2STR;
}
- MBBI = prior(MBBI);
MBB.erase(MI);
+ MBBI = NewBBI;
+ return true;
}
return false;
}
if (isMemOp) {
int Opcode = MBBI->getOpcode();
unsigned Size = getLSMultipleTransferSize(MBBI);
+ const MachineOperand &MO = MBBI->getOperand(0);
+ unsigned Reg = MO.getReg();
+ bool isKill = MO.isDef() ? false : MO.isKill();
unsigned Base = MBBI->getOperand(1).getReg();
unsigned PredReg = 0;
ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
CurrSize = Size;
CurrPred = Pred;
CurrPredReg = PredReg;
- MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
- NumMemOps++;
+ MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
+ ++NumMemOps;
Advance = true;
} else {
if (Clobber) {
// No need to match PredReg.
// Continue adding to the queue.
if (Offset > MemOps.back().Offset) {
- MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
- NumMemOps++;
+ MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
+ Position, MBBI));
+ ++NumMemOps;
Advance = true;
} else {
for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
I != E; ++I) {
if (Offset < I->Offset) {
- MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI));
- NumMemOps++;
+ MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
+ Position, MBBI));
+ ++NumMemOps;
Advance = true;
break;
} else if (Offset == I->Offset) {
}
}
- if (Advance) {
+ if (MBBI->isDebugValue()) {
+ ++MBBI;
+ if (MBBI == E)
+ // Reach the end of the block, try merging the memory instructions.
+ TryMerge = true;
+ } else if (Advance) {
++Position;
++MBBI;
if (MBBI == E)
};
}
-/// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
-/// (bx lr) into the preceeding stack restore so it directly restore the value
-/// of LR into pc.
-/// ldmfd sp!, {r7, lr}
+/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
+/// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it
+/// directly restore the value of LR into pc.
+/// ldmfd sp!, {..., lr}
/// bx lr
+/// or
+/// ldmfd sp!, {..., lr}
+/// mov pc, lr
/// =>
-/// ldmfd sp!, {r7, pc}
+/// ldmfd sp!, {..., pc}
bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
if (MBB.empty()) return false;
MachineBasicBlock::iterator MBBI = prior(MBB.end());
if (MBBI != MBB.begin() &&
- (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::tBX_RET)) {
+ (MBBI->getOpcode() == ARM::BX_RET ||
+ MBBI->getOpcode() == ARM::tBX_RET ||
+ MBBI->getOpcode() == ARM::MOVPCLR)) {
MachineInstr *PrevMI = prior(MBBI);
- if (PrevMI->getOpcode() == ARM::LDM || PrevMI->getOpcode() == ARM::t2LDM) {
+ if (PrevMI->getOpcode() == ARM::LDM_UPD ||
+ PrevMI->getOpcode() == ARM::t2LDM_UPD) {
MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
if (MO.getReg() != ARM::LR)
return false;
namespace {
struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
static char ID;
- ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
+ ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
const TargetData *TD;
const TargetInstrInfo *TII;
// some day.
SmallSet<unsigned, 4> AddedRegPressure;
while (++I != E) {
- if (MemOps.count(&*I))
+ if (I->isDebugValue() || MemOps.count(&*I))
continue;
const TargetInstrDesc &TID = I->getDesc();
if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
return false;
unsigned Align = (*Op0->memoperands_begin())->getAlignment();
- Function *Func = MF->getFunction();
+ const Function *Func = MF->getFunction();
unsigned ReqAlign = STI->hasV6Ops()
? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext()))
: 8; // Pre-v6 need 8-byte align
std::sort(Ops.begin(), Ops.end(), OffsetCompare());
// The loads / stores of the same base are in order. Scan them from first to
- // last and check for the followins:
+ // last and check for the following:
// 1. Any def of base.
// 2. Any gaps.
while (Ops.size() > 1) {
} else {
// This is the new location for the loads / stores.
MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
- while (InsertPos != MBB->end() && MemOps.count(InsertPos))
+ while (InsertPos != MBB->end()
+ && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
++InsertPos;
// If we are moving a pair of loads / stores, see if it makes sense
break;
}
- MI2LocMap[MI] = Loc++;
+ if (!MI->isDebugValue())
+ MI2LocMap[MI] = ++Loc;
+
if (!isMemoryOp(MI))
continue;
unsigned PredReg = 0;