1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMBaseInstrInfo.h"
18 #include "ARMBaseRegisterInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "MCTargetDesc/ARMAddressingModes.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/CodeGen/SelectionDAGNodes.h"
30 #include "llvm/Target/TargetData.h"
31 #include "llvm/Target/TargetInstrInfo.h"
32 #include "llvm/Target/TargetMachine.h"
33 #include "llvm/Target/TargetRegisterInfo.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/raw_ostream.h"
37 #include "llvm/ADT/DenseMap.h"
38 #include "llvm/ADT/STLExtras.h"
39 #include "llvm/ADT/SmallPtrSet.h"
40 #include "llvm/ADT/SmallSet.h"
41 #include "llvm/ADT/SmallVector.h"
42 #include "llvm/ADT/Statistic.h"
45 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
46 STATISTIC(NumSTMGened , "Number of stm instructions generated");
47 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
48 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
49 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
50 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
51 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
52 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
53 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
54 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
55 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
57 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
58 /// load / store instructions to form ldm / stm instructions.
61 struct ARMLoadStoreOpt : public MachineFunctionPass {
63 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
65 const TargetInstrInfo *TII;
66 const TargetRegisterInfo *TRI;
67 const ARMSubtarget *STI;
72 virtual bool runOnMachineFunction(MachineFunction &Fn);
74 virtual const char *getPassName() const {
75 return "ARM load / store optimization pass";
79 struct MemOpQueueEntry {
84 MachineBasicBlock::iterator MBBI;
86 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
87 MachineBasicBlock::iterator i)
88 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
90 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
91 typedef MemOpQueue::iterator MemOpQueueIter;
93 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
94 int Offset, unsigned Base, bool BaseKill, int Opcode,
95 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
97 ArrayRef<std::pair<unsigned, bool> > Regs,
98 ArrayRef<unsigned> ImpDefs);
99 void MergeOpsUpdate(MachineBasicBlock &MBB,
101 unsigned memOpsBegin,
103 unsigned insertAfter,
108 ARMCC::CondCodes Pred,
112 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
113 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
114 int Opcode, unsigned Size,
115 ARMCC::CondCodes Pred, unsigned PredReg,
116 unsigned Scratch, MemOpQueue &MemOps,
117 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
119 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
120 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
121 MachineBasicBlock::iterator &MBBI);
122 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
123 MachineBasicBlock::iterator MBBI,
124 const TargetInstrInfo *TII,
126 MachineBasicBlock::iterator &I);
127 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
128 MachineBasicBlock::iterator MBBI,
130 MachineBasicBlock::iterator &I);
131 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
132 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
134 char ARMLoadStoreOpt::ID = 0;
137 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
139 default: llvm_unreachable("Unhandled opcode!");
143 default: llvm_unreachable("Unhandled submode!");
144 case ARM_AM::ia: return ARM::LDMIA;
145 case ARM_AM::da: return ARM::LDMDA;
146 case ARM_AM::db: return ARM::LDMDB;
147 case ARM_AM::ib: return ARM::LDMIB;
152 default: llvm_unreachable("Unhandled submode!");
153 case ARM_AM::ia: return ARM::STMIA;
154 case ARM_AM::da: return ARM::STMDA;
155 case ARM_AM::db: return ARM::STMDB;
156 case ARM_AM::ib: return ARM::STMIB;
162 default: llvm_unreachable("Unhandled submode!");
163 case ARM_AM::ia: return ARM::t2LDMIA;
164 case ARM_AM::db: return ARM::t2LDMDB;
170 default: llvm_unreachable("Unhandled submode!");
171 case ARM_AM::ia: return ARM::t2STMIA;
172 case ARM_AM::db: return ARM::t2STMDB;
177 default: llvm_unreachable("Unhandled submode!");
178 case ARM_AM::ia: return ARM::VLDMSIA;
179 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
184 default: llvm_unreachable("Unhandled submode!");
185 case ARM_AM::ia: return ARM::VSTMSIA;
186 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
191 default: llvm_unreachable("Unhandled submode!");
192 case ARM_AM::ia: return ARM::VLDMDIA;
193 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
198 default: llvm_unreachable("Unhandled submode!");
199 case ARM_AM::ia: return ARM::VSTMDIA;
200 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
208 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
210 default: llvm_unreachable("Unhandled opcode!");
216 case ARM::t2LDMIA_RET:
218 case ARM::t2LDMIA_UPD:
220 case ARM::t2STMIA_UPD:
222 case ARM::VLDMSIA_UPD:
224 case ARM::VSTMSIA_UPD:
226 case ARM::VLDMDIA_UPD:
228 case ARM::VSTMDIA_UPD:
242 case ARM::t2LDMDB_UPD:
244 case ARM::t2STMDB_UPD:
245 case ARM::VLDMSDB_UPD:
246 case ARM::VSTMSDB_UPD:
247 case ARM::VLDMDDB_UPD:
248 case ARM::VSTMDDB_UPD:
259 } // end namespace ARM_AM
260 } // end namespace llvm
262 static bool isT2i32Load(unsigned Opc) {
263 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
266 static bool isi32Load(unsigned Opc) {
267 return Opc == ARM::LDRi12 || isT2i32Load(Opc);
270 static bool isT2i32Store(unsigned Opc) {
271 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
274 static bool isi32Store(unsigned Opc) {
275 return Opc == ARM::STRi12 || isT2i32Store(Opc);
278 /// MergeOps - Create and insert a LDM or STM with Base as base register and
279 /// registers in Regs as the register operands that would be loaded / stored.
280 /// It returns true if the transformation is done.
282 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
283 MachineBasicBlock::iterator MBBI,
284 int Offset, unsigned Base, bool BaseKill,
285 int Opcode, ARMCC::CondCodes Pred,
286 unsigned PredReg, unsigned Scratch, DebugLoc dl,
287 ArrayRef<std::pair<unsigned, bool> > Regs,
288 ArrayRef<unsigned> ImpDefs) {
289 // Only a single register to load / store. Don't bother.
290 unsigned NumRegs = Regs.size();
294 ARM_AM::AMSubMode Mode = ARM_AM::ia;
295 // VFP and Thumb2 do not support IB or DA modes.
296 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
297 bool haveIBAndDA = isNotVFP && !isThumb2;
298 if (Offset == 4 && haveIBAndDA)
300 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
302 else if (Offset == -4 * (int)NumRegs && isNotVFP)
303 // VLDM/VSTM do not support DB mode without also updating the base reg.
305 else if (Offset != 0) {
306 // Check if this is a supported opcode before we insert instructions to
307 // calculate a new base register.
308 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
310 // If starting offset isn't zero, insert a MI to materialize a new base.
311 // But only do so if it is cost effective, i.e. merging more than two
317 if (isi32Load(Opcode))
318 // If it is a load, then just use one of the destination register to
319 // use as the new base.
320 NewBase = Regs[NumRegs-1].first;
322 // Use the scratch register to use as a new base.
327 int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
329 BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
332 int ImmedOffset = isThumb2
333 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
334 if (ImmedOffset == -1)
335 // FIXME: Try t2ADDri12 or t2SUBri12?
336 return false; // Probably not worth it then.
338 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
339 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
340 .addImm(Pred).addReg(PredReg).addReg(0);
342 BaseKill = true; // New base is always killed right its use.
345 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
346 Opcode == ARM::VLDRD);
347 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
348 if (!Opcode) return false;
349 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
350 .addReg(Base, getKillRegState(BaseKill))
351 .addImm(Pred).addReg(PredReg);
352 for (unsigned i = 0; i != NumRegs; ++i)
353 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
354 | getKillRegState(Regs[i].second));
356 // Add implicit defs for super-registers.
357 for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i)
358 MIB.addReg(ImpDefs[i], RegState::ImplicitDefine);
363 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
365 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
367 unsigned memOpsBegin, unsigned memOpsEnd,
368 unsigned insertAfter, int Offset,
369 unsigned Base, bool BaseKill,
371 ARMCC::CondCodes Pred, unsigned PredReg,
374 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
375 // First calculate which of the registers should be killed by the merged
377 const unsigned insertPos = memOps[insertAfter].Position;
378 SmallSet<unsigned, 4> KilledRegs;
379 DenseMap<unsigned, unsigned> Killer;
380 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
381 if (i == memOpsBegin) {
386 if (memOps[i].Position < insertPos && memOps[i].isKill) {
387 unsigned Reg = memOps[i].Reg;
388 KilledRegs.insert(Reg);
393 SmallVector<std::pair<unsigned, bool>, 8> Regs;
394 SmallVector<unsigned, 8> ImpDefs;
395 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
396 unsigned Reg = memOps[i].Reg;
397 // If we are inserting the merged operation after an operation that
398 // uses the same register, make sure to transfer any kill flag.
399 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
400 Regs.push_back(std::make_pair(Reg, isKill));
402 // Collect any implicit defs of super-registers. They must be preserved.
403 for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) {
404 if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead())
406 unsigned DefReg = MO->getReg();
407 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
408 ImpDefs.push_back(DefReg);
412 // Try to do the merge.
413 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
415 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
416 Pred, PredReg, Scratch, dl, Regs, ImpDefs))
419 // Merge succeeded, update records.
420 Merges.push_back(prior(Loc));
421 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
422 // Remove kill flags from any memops that come before insertPos.
423 if (Regs[i-memOpsBegin].second) {
424 unsigned Reg = Regs[i-memOpsBegin].first;
425 if (KilledRegs.count(Reg)) {
426 unsigned j = Killer[Reg];
427 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
428 assert(Idx >= 0 && "Cannot find killing operand");
429 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
430 memOps[j].isKill = false;
432 memOps[i].isKill = true;
434 MBB.erase(memOps[i].MBBI);
435 // Update this memop to refer to the merged instruction.
436 // We may need to move kill flags again.
437 memOps[i].Merged = true;
438 memOps[i].MBBI = Merges.back();
439 memOps[i].Position = insertPos;
443 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
444 /// load / store multiple instructions.
446 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
447 unsigned Base, int Opcode, unsigned Size,
448 ARMCC::CondCodes Pred, unsigned PredReg,
449 unsigned Scratch, MemOpQueue &MemOps,
450 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
451 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
452 int Offset = MemOps[SIndex].Offset;
453 int SOffset = Offset;
454 unsigned insertAfter = SIndex;
455 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
456 DebugLoc dl = Loc->getDebugLoc();
457 const MachineOperand &PMO = Loc->getOperand(0);
458 unsigned PReg = PMO.getReg();
459 unsigned PRegNum = PMO.isUndef() ? UINT_MAX
460 : getARMRegisterNumbering(PReg);
462 unsigned Limit = ~0U;
464 // vldm / vstm limit are 32 for S variants, 16 for D variants.
482 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
483 int NewOffset = MemOps[i].Offset;
484 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
485 unsigned Reg = MO.getReg();
486 unsigned RegNum = MO.isUndef() ? UINT_MAX
487 : getARMRegisterNumbering(Reg);
488 // Register numbers must be in ascending order. For VFP / NEON load and
489 // store multiples, the registers must also be consecutive and within the
490 // limit on the number of registers per instruction.
491 if (Reg != ARM::SP &&
492 NewOffset == Offset + (int)Size &&
493 ((isNotVFP && RegNum > PRegNum) ||
494 ((Count < Limit) && RegNum == PRegNum+1))) {
499 // Can't merge this in. Try merge the earlier ones first.
500 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
501 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
502 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
507 if (MemOps[i].Position > MemOps[insertAfter].Position)
511 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
512 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
513 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
517 static bool definesCPSR(MachineInstr *MI) {
518 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
519 const MachineOperand &MO = MI->getOperand(i);
522 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
523 // If the instruction has live CPSR def, then it's not safe to fold it
524 // into load / store.
531 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
532 unsigned Bytes, unsigned Limit,
533 ARMCC::CondCodes Pred, unsigned PredReg) {
534 unsigned MyPredReg = 0;
538 bool CheckCPSRDef = false;
539 switch (MI->getOpcode()) {
540 default: return false;
549 // Make sure the offset fits in 8 bits.
550 if (Bytes == 0 || (Limit && Bytes >= Limit))
553 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
554 if (!(MI->getOperand(0).getReg() == Base &&
555 MI->getOperand(1).getReg() == Base &&
556 (MI->getOperand(2).getImm()*Scale) == Bytes &&
557 getInstrPredicate(MI, MyPredReg) == Pred &&
558 MyPredReg == PredReg))
561 return CheckCPSRDef ? !definesCPSR(MI) : true;
564 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
565 unsigned Bytes, unsigned Limit,
566 ARMCC::CondCodes Pred, unsigned PredReg) {
567 unsigned MyPredReg = 0;
571 bool CheckCPSRDef = false;
572 switch (MI->getOpcode()) {
573 default: return false;
582 if (Bytes == 0 || (Limit && Bytes >= Limit))
583 // Make sure the offset fits in 8 bits.
586 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
587 if (!(MI->getOperand(0).getReg() == Base &&
588 MI->getOperand(1).getReg() == Base &&
589 (MI->getOperand(2).getImm()*Scale) == Bytes &&
590 getInstrPredicate(MI, MyPredReg) == Pred &&
591 MyPredReg == PredReg))
594 return CheckCPSRDef ? !definesCPSR(MI) : true;
597 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
598 switch (MI->getOpcode()) {
626 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
629 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
633 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
634 ARM_AM::AMSubMode Mode) {
636 default: llvm_unreachable("Unhandled opcode!");
642 default: llvm_unreachable("Unhandled submode!");
643 case ARM_AM::ia: return ARM::LDMIA_UPD;
644 case ARM_AM::ib: return ARM::LDMIB_UPD;
645 case ARM_AM::da: return ARM::LDMDA_UPD;
646 case ARM_AM::db: return ARM::LDMDB_UPD;
653 default: llvm_unreachable("Unhandled submode!");
654 case ARM_AM::ia: return ARM::STMIA_UPD;
655 case ARM_AM::ib: return ARM::STMIB_UPD;
656 case ARM_AM::da: return ARM::STMDA_UPD;
657 case ARM_AM::db: return ARM::STMDB_UPD;
662 default: llvm_unreachable("Unhandled submode!");
663 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
664 case ARM_AM::db: return ARM::t2LDMDB_UPD;
669 default: llvm_unreachable("Unhandled submode!");
670 case ARM_AM::ia: return ARM::t2STMIA_UPD;
671 case ARM_AM::db: return ARM::t2STMDB_UPD;
675 default: llvm_unreachable("Unhandled submode!");
676 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
677 case ARM_AM::db: return ARM::VLDMSDB_UPD;
681 default: llvm_unreachable("Unhandled submode!");
682 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
683 case ARM_AM::db: return ARM::VLDMDDB_UPD;
687 default: llvm_unreachable("Unhandled submode!");
688 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
689 case ARM_AM::db: return ARM::VSTMSDB_UPD;
693 default: llvm_unreachable("Unhandled submode!");
694 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
695 case ARM_AM::db: return ARM::VSTMDDB_UPD;
700 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
701 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
703 /// stmia rn, <ra, rb, rc>
704 /// rn := rn + 4 * 3;
706 /// stmia rn!, <ra, rb, rc>
708 /// rn := rn - 4 * 3;
709 /// ldmia rn, <ra, rb, rc>
711 /// ldmdb rn!, <ra, rb, rc>
712 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
713 MachineBasicBlock::iterator MBBI,
715 MachineBasicBlock::iterator &I) {
716 MachineInstr *MI = MBBI;
717 unsigned Base = MI->getOperand(0).getReg();
718 bool BaseKill = MI->getOperand(0).isKill();
719 unsigned Bytes = getLSMultipleTransferSize(MI);
720 unsigned PredReg = 0;
721 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
722 int Opcode = MI->getOpcode();
723 DebugLoc dl = MI->getDebugLoc();
725 // Can't use an updating ld/st if the base register is also a dest
726 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
727 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
728 if (MI->getOperand(i).getReg() == Base)
731 bool DoMerge = false;
732 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
734 // Try merging with the previous instruction.
735 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
736 if (MBBI != BeginMBBI) {
737 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
738 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
740 if (Mode == ARM_AM::ia &&
741 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
744 } else if (Mode == ARM_AM::ib &&
745 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
753 // Try merging with the next instruction.
754 MachineBasicBlock::iterator EndMBBI = MBB.end();
755 if (!DoMerge && MBBI != EndMBBI) {
756 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
757 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
759 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
760 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
762 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
763 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
778 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
779 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
780 .addReg(Base, getDefRegState(true)) // WB base register
781 .addReg(Base, getKillRegState(BaseKill))
782 .addImm(Pred).addReg(PredReg);
784 // Transfer the rest of operands.
785 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
786 MIB.addOperand(MI->getOperand(OpNum));
788 // Transfer memoperands.
789 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
795 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
796 ARM_AM::AddrOpc Mode) {
799 return ARM::LDR_PRE_IMM;
801 return ARM::STR_PRE_IMM;
803 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
805 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
807 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
809 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
812 return ARM::t2LDR_PRE;
815 return ARM::t2STR_PRE;
816 default: llvm_unreachable("Unhandled opcode!");
820 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
821 ARM_AM::AddrOpc Mode) {
824 return ARM::LDR_POST_IMM;
826 return ARM::STR_POST_IMM;
828 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
830 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
832 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
834 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
837 return ARM::t2LDR_POST;
840 return ARM::t2STR_POST;
841 default: llvm_unreachable("Unhandled opcode!");
845 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
846 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
847 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
848 MachineBasicBlock::iterator MBBI,
849 const TargetInstrInfo *TII,
851 MachineBasicBlock::iterator &I) {
852 MachineInstr *MI = MBBI;
853 unsigned Base = MI->getOperand(1).getReg();
854 bool BaseKill = MI->getOperand(1).isKill();
855 unsigned Bytes = getLSMultipleTransferSize(MI);
856 int Opcode = MI->getOpcode();
857 DebugLoc dl = MI->getDebugLoc();
858 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
859 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
860 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
861 if (isi32Load(Opcode) || isi32Store(Opcode))
862 if (MI->getOperand(2).getImm() != 0)
864 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
867 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
868 // Can't do the merge if the destination register is the same as the would-be
869 // writeback register.
870 if (isLd && MI->getOperand(0).getReg() == Base)
873 unsigned PredReg = 0;
874 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
875 bool DoMerge = false;
876 ARM_AM::AddrOpc AddSub = ARM_AM::add;
878 // AM2 - 12 bits, thumb2 - 8 bits.
879 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
881 // Try merging with the previous instruction.
882 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
883 if (MBBI != BeginMBBI) {
884 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
885 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
887 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
889 AddSub = ARM_AM::sub;
891 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
895 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
900 // Try merging with the next instruction.
901 MachineBasicBlock::iterator EndMBBI = MBB.end();
902 if (!DoMerge && MBBI != EndMBBI) {
903 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
904 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
907 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
909 AddSub = ARM_AM::sub;
910 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
914 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
927 // VLDM[SD}_UPD, VSTM[SD]_UPD
928 // (There are no base-updating versions of VLDR/VSTR instructions, but the
929 // updating load/store-multiple instructions can be used with only one
931 MachineOperand &MO = MI->getOperand(0);
932 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
933 .addReg(Base, getDefRegState(true)) // WB base register
934 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
935 .addImm(Pred).addReg(PredReg)
936 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
937 getKillRegState(MO.isKill())));
941 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
942 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
943 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
944 .addReg(Base, RegState::Define)
945 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
947 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
948 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
949 .addReg(Base, RegState::Define)
950 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
953 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
954 // t2LDR_PRE, t2LDR_POST
955 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
956 .addReg(Base, RegState::Define)
957 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
960 MachineOperand &MO = MI->getOperand(0);
961 // FIXME: post-indexed stores use am2offset_imm, which still encodes
962 // the vestigal zero-reg offset register. When that's fixed, this clause
963 // can be removed entirely.
964 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
965 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
967 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
968 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
969 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
971 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
972 // t2STR_PRE, t2STR_POST
973 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
974 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
975 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
983 /// isMemoryOp - Returns true if instruction is a memory operation that this
984 /// pass is capable of operating on.
985 static bool isMemoryOp(const MachineInstr *MI) {
986 // When no memory operands are present, conservatively assume unaligned,
987 // volatile, unfoldable.
988 if (!MI->hasOneMemOperand())
991 const MachineMemOperand *MMO = *MI->memoperands_begin();
993 // Don't touch volatile memory accesses - we may be changing their order.
994 if (MMO->isVolatile())
997 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
999 if (MMO->getAlignment() < 4)
1002 // str <undef> could probably be eliminated entirely, but for now we just want
1003 // to avoid making a mess of it.
1004 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1005 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1006 MI->getOperand(0).isUndef())
1009 // Likewise don't mess with references to undefined addresses.
1010 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1011 MI->getOperand(1).isUndef())
1014 int Opcode = MI->getOpcode();
1019 return MI->getOperand(1).isReg();
1022 return MI->getOperand(1).isReg();
1029 return MI->getOperand(1).isReg();
1034 /// AdvanceRS - Advance register scavenger to just before the earliest memory
1035 /// op that is being merged.
1036 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1037 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1038 unsigned Position = MemOps[0].Position;
1039 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1040 if (MemOps[i].Position < Position) {
1041 Position = MemOps[i].Position;
1042 Loc = MemOps[i].MBBI;
1046 if (Loc != MBB.begin())
1047 RS->forward(prior(Loc));
1050 static int getMemoryOpOffset(const MachineInstr *MI) {
1051 int Opcode = MI->getOpcode();
1052 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1053 unsigned NumOperands = MI->getDesc().getNumOperands();
1054 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1056 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1057 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1058 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1059 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
1062 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1063 : ARM_AM::getAM5Offset(OffField) * 4;
1065 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1068 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1074 static void InsertLDR_STR(MachineBasicBlock &MBB,
1075 MachineBasicBlock::iterator &MBBI,
1076 int Offset, bool isDef,
1077 DebugLoc dl, unsigned NewOpc,
1078 unsigned Reg, bool RegDeadKill, bool RegUndef,
1079 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1080 bool OffKill, bool OffUndef,
1081 ARMCC::CondCodes Pred, unsigned PredReg,
1082 const TargetInstrInfo *TII, bool isT2) {
1084 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1086 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1087 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1088 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1090 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1092 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1093 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1094 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1098 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1099 MachineBasicBlock::iterator &MBBI) {
1100 MachineInstr *MI = &*MBBI;
1101 unsigned Opcode = MI->getOpcode();
1102 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1103 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1104 const MachineOperand &BaseOp = MI->getOperand(2);
1105 unsigned BaseReg = BaseOp.getReg();
1106 unsigned EvenReg = MI->getOperand(0).getReg();
1107 unsigned OddReg = MI->getOperand(1).getReg();
1108 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1109 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1110 // ARM errata 602117: LDRD with base in list may result in incorrect base
1111 // register when interrupted or faulted.
1112 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1113 if (!Errata602117 &&
1114 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1117 MachineBasicBlock::iterator NewBBI = MBBI;
1118 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1119 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1120 bool EvenDeadKill = isLd ?
1121 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1122 bool EvenUndef = MI->getOperand(0).isUndef();
1123 bool OddDeadKill = isLd ?
1124 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1125 bool OddUndef = MI->getOperand(1).isUndef();
1126 bool BaseKill = BaseOp.isKill();
1127 bool BaseUndef = BaseOp.isUndef();
1128 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1129 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1130 int OffImm = getMemoryOpOffset(MI);
1131 unsigned PredReg = 0;
1132 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1134 if (OddRegNum > EvenRegNum && OffImm == 0) {
1135 // Ascending register numbers and no offset. It's safe to change it to a
1137 unsigned NewOpc = (isLd)
1138 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1139 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1141 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1142 .addReg(BaseReg, getKillRegState(BaseKill))
1143 .addImm(Pred).addReg(PredReg)
1144 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1145 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1148 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1149 .addReg(BaseReg, getKillRegState(BaseKill))
1150 .addImm(Pred).addReg(PredReg)
1152 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1154 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1157 NewBBI = llvm::prior(MBBI);
1159 // Split into two instructions.
1160 unsigned NewOpc = (isLd)
1161 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1162 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1163 DebugLoc dl = MBBI->getDebugLoc();
1164 // If this is a load and base register is killed, it may have been
1165 // re-defed by the load, make sure the first load does not clobber it.
1167 (BaseKill || OffKill) &&
1168 (TRI->regsOverlap(EvenReg, BaseReg))) {
1169 assert(!TRI->regsOverlap(OddReg, BaseReg));
1170 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1171 OddReg, OddDeadKill, false,
1172 BaseReg, false, BaseUndef, false, OffUndef,
1173 Pred, PredReg, TII, isT2);
1174 NewBBI = llvm::prior(MBBI);
1175 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1176 // so adjust and use t2LDRi12 here for that.
1177 if (isT2 && NewOpc == ARM::t2LDRi8 && OffImm+4 >= 0)
1178 NewOpc = ARM::t2LDRi12;
1179 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1180 EvenReg, EvenDeadKill, false,
1181 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1182 Pred, PredReg, TII, isT2);
1184 if (OddReg == EvenReg && EvenDeadKill) {
1185 // If the two source operands are the same, the kill marker is
1186 // probably on the first one. e.g.
1187 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1188 EvenDeadKill = false;
1191 // Never kill the base register in the first instruction.
1192 // <rdar://problem/11101911>
1193 if (EvenReg == BaseReg)
1194 EvenDeadKill = false;
1195 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1196 EvenReg, EvenDeadKill, EvenUndef,
1197 BaseReg, false, BaseUndef, false, OffUndef,
1198 Pred, PredReg, TII, isT2);
1199 NewBBI = llvm::prior(MBBI);
1200 // Be extra careful for thumb2. t2STRi8 can't reference a zero offset,
1201 // so adjust and use t2STRi12 here for that.
1202 if (isT2 && NewOpc == ARM::t2STRi8 && OffImm+4 >= 0)
1203 NewOpc = ARM::t2STRi12;
1204 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1205 OddReg, OddDeadKill, OddUndef,
1206 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1207 Pred, PredReg, TII, isT2);
1222 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1223 /// ops of the same base and incrementing offset into LDM / STM ops.
1224 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1225 unsigned NumMerges = 0;
1226 unsigned NumMemOps = 0;
1228 unsigned CurrBase = 0;
1230 unsigned CurrSize = 0;
1231 ARMCC::CondCodes CurrPred = ARMCC::AL;
1232 unsigned CurrPredReg = 0;
1233 unsigned Position = 0;
1234 SmallVector<MachineBasicBlock::iterator,4> Merges;
1236 RS->enterBasicBlock(&MBB);
1237 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1239 if (FixInvalidRegPairOp(MBB, MBBI))
1242 bool Advance = false;
1243 bool TryMerge = false;
1244 bool Clobber = false;
1246 bool isMemOp = isMemoryOp(MBBI);
1248 int Opcode = MBBI->getOpcode();
1249 unsigned Size = getLSMultipleTransferSize(MBBI);
1250 const MachineOperand &MO = MBBI->getOperand(0);
1251 unsigned Reg = MO.getReg();
1252 bool isKill = MO.isDef() ? false : MO.isKill();
1253 unsigned Base = MBBI->getOperand(1).getReg();
1254 unsigned PredReg = 0;
1255 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1256 int Offset = getMemoryOpOffset(MBBI);
1259 // r5 := ldr [r5, #4]
1260 // r6 := ldr [r5, #8]
1262 // The second ldr has effectively broken the chain even though it
1263 // looks like the later ldr(s) use the same base register. Try to
1264 // merge the ldr's so far, including this one. But don't try to
1265 // combine the following ldr(s).
1266 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1267 if (CurrBase == 0 && !Clobber) {
1268 // Start of a new chain.
1273 CurrPredReg = PredReg;
1274 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1283 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1284 // No need to match PredReg.
1285 // Continue adding to the queue.
1286 if (Offset > MemOps.back().Offset) {
1287 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1292 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1294 if (Offset < I->Offset) {
1295 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1300 } else if (Offset == I->Offset) {
1301 // Collision! This can't be merged!
1310 if (MBBI->isDebugValue()) {
1313 // Reach the end of the block, try merging the memory instructions.
1315 } else if (Advance) {
1319 // Reach the end of the block, try merging the memory instructions.
1325 if (NumMemOps > 1) {
1326 // Try to find a free register to use as a new base in case it's needed.
1327 // First advance to the instruction just before the start of the chain.
1328 AdvanceRS(MBB, MemOps);
1329 // Find a scratch register.
1330 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1331 // Process the load / store instructions.
1332 RS->forward(prior(MBBI));
1336 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1337 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1339 // Try folding preceding/trailing base inc/dec into the generated
1341 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1342 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1344 NumMerges += Merges.size();
1346 // Try folding preceding/trailing base inc/dec into those load/store
1347 // that were not merged to form LDM/STM ops.
1348 for (unsigned i = 0; i != NumMemOps; ++i)
1349 if (!MemOps[i].Merged)
1350 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1353 // RS may be pointing to an instruction that's deleted.
1354 RS->skipTo(prior(MBBI));
1355 } else if (NumMemOps == 1) {
1356 // Try folding preceding/trailing base inc/dec into the single
1358 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1360 RS->forward(prior(MBBI));
1367 CurrPred = ARMCC::AL;
1374 // If iterator hasn't been advanced and this is not a memory op, skip it.
1375 // It can't start a new chain anyway.
1376 if (!Advance && !isMemOp && MBBI != E) {
1382 return NumMerges > 0;
1385 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1386 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1387 /// directly restore the value of LR into pc.
1388 /// ldmfd sp!, {..., lr}
1391 /// ldmfd sp!, {..., lr}
1394 /// ldmfd sp!, {..., pc}
1395 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1396 if (MBB.empty()) return false;
1398 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1399 if (MBBI != MBB.begin() &&
1400 (MBBI->getOpcode() == ARM::BX_RET ||
1401 MBBI->getOpcode() == ARM::tBX_RET ||
1402 MBBI->getOpcode() == ARM::MOVPCLR)) {
1403 MachineInstr *PrevMI = prior(MBBI);
1404 unsigned Opcode = PrevMI->getOpcode();
1405 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1406 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1407 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1408 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1409 if (MO.getReg() != ARM::LR)
1411 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1412 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1413 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1414 PrevMI->setDesc(TII->get(NewOpc));
1416 PrevMI->copyImplicitOps(&*MBBI);
1424 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1425 const TargetMachine &TM = Fn.getTarget();
1426 AFI = Fn.getInfo<ARMFunctionInfo>();
1427 TII = TM.getInstrInfo();
1428 TRI = TM.getRegisterInfo();
1429 STI = &TM.getSubtarget<ARMSubtarget>();
1430 RS = new RegScavenger();
1431 isThumb2 = AFI->isThumb2Function();
1433 bool Modified = false;
1434 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1436 MachineBasicBlock &MBB = *MFI;
1437 Modified |= LoadStoreMultipleOpti(MBB);
1438 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1439 Modified |= MergeReturnIntoLDM(MBB);
1447 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1448 /// load / stores from consecutive locations close to make it more
1449 /// likely they will be combined later.
1452 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1454 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1456 const TargetData *TD;
1457 const TargetInstrInfo *TII;
1458 const TargetRegisterInfo *TRI;
1459 const ARMSubtarget *STI;
1460 MachineRegisterInfo *MRI;
1461 MachineFunction *MF;
1463 virtual bool runOnMachineFunction(MachineFunction &Fn);
1465 virtual const char *getPassName() const {
1466 return "ARM pre- register allocation load / store optimization pass";
1470 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1471 unsigned &NewOpc, unsigned &EvenReg,
1472 unsigned &OddReg, unsigned &BaseReg,
1474 unsigned &PredReg, ARMCC::CondCodes &Pred,
1476 bool RescheduleOps(MachineBasicBlock *MBB,
1477 SmallVector<MachineInstr*, 4> &Ops,
1478 unsigned Base, bool isLd,
1479 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1480 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1482 char ARMPreAllocLoadStoreOpt::ID = 0;
1485 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1486 TD = Fn.getTarget().getTargetData();
1487 TII = Fn.getTarget().getInstrInfo();
1488 TRI = Fn.getTarget().getRegisterInfo();
1489 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1490 MRI = &Fn.getRegInfo();
1493 bool Modified = false;
1494 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1496 Modified |= RescheduleLoadStoreInstrs(MFI);
1501 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1502 MachineBasicBlock::iterator I,
1503 MachineBasicBlock::iterator E,
1504 SmallPtrSet<MachineInstr*, 4> &MemOps,
1505 SmallSet<unsigned, 4> &MemRegs,
1506 const TargetRegisterInfo *TRI) {
1507 // Are there stores / loads / calls between them?
1508 // FIXME: This is overly conservative. We should make use of alias information
1510 SmallSet<unsigned, 4> AddedRegPressure;
1512 if (I->isDebugValue() || MemOps.count(&*I))
1514 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1516 if (isLd && I->mayStore())
1521 // It's not safe to move the first 'str' down.
1524 // str r4, [r0, #+4]
1528 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1529 MachineOperand &MO = I->getOperand(j);
1532 unsigned Reg = MO.getReg();
1533 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1535 if (Reg != Base && !MemRegs.count(Reg))
1536 AddedRegPressure.insert(Reg);
1540 // Estimate register pressure increase due to the transformation.
1541 if (MemRegs.size() <= 4)
1542 // Ok if we are moving small number of instructions.
1544 return AddedRegPressure.size() <= MemRegs.size() * 2;
1548 /// Copy Op0 and Op1 operands into a new array assigned to MI.
1549 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1550 MachineInstr *Op1) {
1551 assert(MI->memoperands_empty() && "expected a new machineinstr");
1552 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1553 + (Op1->memoperands_end() - Op1->memoperands_begin());
1555 MachineFunction *MF = MI->getParent()->getParent();
1556 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1557 MachineSDNode::mmo_iterator MemEnd =
1558 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1560 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1561 MI->setMemRefs(MemBegin, MemEnd);
1565 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1567 unsigned &NewOpc, unsigned &EvenReg,
1568 unsigned &OddReg, unsigned &BaseReg,
1569 int &Offset, unsigned &PredReg,
1570 ARMCC::CondCodes &Pred,
1572 // Make sure we're allowed to generate LDRD/STRD.
1573 if (!STI->hasV5TEOps())
1576 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1578 unsigned Opcode = Op0->getOpcode();
1579 if (Opcode == ARM::LDRi12)
1581 else if (Opcode == ARM::STRi12)
1583 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1584 NewOpc = ARM::t2LDRDi8;
1587 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1588 NewOpc = ARM::t2STRDi8;
1594 // Make sure the base address satisfies i64 ld / st alignment requirement.
1595 if (!Op0->hasOneMemOperand() ||
1596 !(*Op0->memoperands_begin())->getValue() ||
1597 (*Op0->memoperands_begin())->isVolatile())
1600 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1601 const Function *Func = MF->getFunction();
1602 unsigned ReqAlign = STI->hasV6Ops()
1603 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1604 : 8; // Pre-v6 need 8-byte align
1605 if (Align < ReqAlign)
1608 // Then make sure the immediate offset fits.
1609 int OffImm = getMemoryOpOffset(Op0);
1611 int Limit = (1 << 8) * Scale;
1612 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1616 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1618 AddSub = ARM_AM::sub;
1621 int Limit = (1 << 8) * Scale;
1622 if (OffImm >= Limit || (OffImm & (Scale-1)))
1624 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1626 EvenReg = Op0->getOperand(0).getReg();
1627 OddReg = Op1->getOperand(0).getReg();
1628 if (EvenReg == OddReg)
1630 BaseReg = Op0->getOperand(1).getReg();
1631 Pred = getInstrPredicate(Op0, PredReg);
1632 dl = Op0->getDebugLoc();
1637 struct OffsetCompare {
1638 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1639 int LOffset = getMemoryOpOffset(LHS);
1640 int ROffset = getMemoryOpOffset(RHS);
1641 assert(LHS == RHS || LOffset != ROffset);
1642 return LOffset > ROffset;
1647 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1648 SmallVector<MachineInstr*, 4> &Ops,
1649 unsigned Base, bool isLd,
1650 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1651 bool RetVal = false;
1653 // Sort by offset (in reverse order).
1654 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1656 // The loads / stores of the same base are in order. Scan them from first to
1657 // last and check for the following:
1658 // 1. Any def of base.
1660 while (Ops.size() > 1) {
1661 unsigned FirstLoc = ~0U;
1662 unsigned LastLoc = 0;
1663 MachineInstr *FirstOp = 0;
1664 MachineInstr *LastOp = 0;
1666 unsigned LastOpcode = 0;
1667 unsigned LastBytes = 0;
1668 unsigned NumMove = 0;
1669 for (int i = Ops.size() - 1; i >= 0; --i) {
1670 MachineInstr *Op = Ops[i];
1671 unsigned Loc = MI2LocMap[Op];
1672 if (Loc <= FirstLoc) {
1676 if (Loc >= LastLoc) {
1682 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
1683 if (LastOpcode && LSMOpcode != LastOpcode)
1686 int Offset = getMemoryOpOffset(Op);
1687 unsigned Bytes = getLSMultipleTransferSize(Op);
1689 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1692 LastOffset = Offset;
1694 LastOpcode = LSMOpcode;
1695 if (++NumMove == 8) // FIXME: Tune this limit.
1702 SmallPtrSet<MachineInstr*, 4> MemOps;
1703 SmallSet<unsigned, 4> MemRegs;
1704 for (int i = NumMove-1; i >= 0; --i) {
1705 MemOps.insert(Ops[i]);
1706 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1709 // Be conservative, if the instructions are too far apart, don't
1710 // move them. We want to limit the increase of register pressure.
1711 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1713 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1714 MemOps, MemRegs, TRI);
1716 for (unsigned i = 0; i != NumMove; ++i)
1719 // This is the new location for the loads / stores.
1720 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1721 while (InsertPos != MBB->end()
1722 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1725 // If we are moving a pair of loads / stores, see if it makes sense
1726 // to try to allocate a pair of registers that can form register pairs.
1727 MachineInstr *Op0 = Ops.back();
1728 MachineInstr *Op1 = Ops[Ops.size()-2];
1729 unsigned EvenReg = 0, OddReg = 0;
1730 unsigned BaseReg = 0, PredReg = 0;
1731 ARMCC::CondCodes Pred = ARMCC::AL;
1733 unsigned NewOpc = 0;
1736 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1737 EvenReg, OddReg, BaseReg,
1738 Offset, PredReg, Pred, isT2)) {
1742 const MCInstrDesc &MCID = TII->get(NewOpc);
1743 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI);
1744 MRI->constrainRegClass(EvenReg, TRC);
1745 MRI->constrainRegClass(OddReg, TRC);
1747 // Form the pair instruction.
1749 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1750 .addReg(EvenReg, RegState::Define)
1751 .addReg(OddReg, RegState::Define)
1753 // FIXME: We're converting from LDRi12 to an insn that still
1754 // uses addrmode2, so we need an explicit offset reg. It should
1755 // always by reg0 since we're transforming LDRi12s.
1758 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1759 concatenateMemOperands(MIB, Op0, Op1);
1760 DEBUG(dbgs() << "Formed " << *MIB << "\n");
1763 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1767 // FIXME: We're converting from LDRi12 to an insn that still
1768 // uses addrmode2, so we need an explicit offset reg. It should
1769 // always by reg0 since we're transforming STRi12s.
1772 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1773 concatenateMemOperands(MIB, Op0, Op1);
1774 DEBUG(dbgs() << "Formed " << *MIB << "\n");
1780 // Add register allocation hints to form register pairs.
1781 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1782 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1784 for (unsigned i = 0; i != NumMove; ++i) {
1785 MachineInstr *Op = Ops.back();
1787 MBB->splice(InsertPos, MBB, Op);
1791 NumLdStMoved += NumMove;
1801 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1802 bool RetVal = false;
1804 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1805 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1806 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1807 SmallVector<unsigned, 4> LdBases;
1808 SmallVector<unsigned, 4> StBases;
1811 MachineBasicBlock::iterator MBBI = MBB->begin();
1812 MachineBasicBlock::iterator E = MBB->end();
1814 for (; MBBI != E; ++MBBI) {
1815 MachineInstr *MI = MBBI;
1816 if (MI->isCall() || MI->isTerminator()) {
1817 // Stop at barriers.
1822 if (!MI->isDebugValue())
1823 MI2LocMap[MI] = ++Loc;
1825 if (!isMemoryOp(MI))
1827 unsigned PredReg = 0;
1828 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
1831 int Opc = MI->getOpcode();
1832 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1833 unsigned Base = MI->getOperand(1).getReg();
1834 int Offset = getMemoryOpOffset(MI);
1836 bool StopHere = false;
1838 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1839 Base2LdsMap.find(Base);
1840 if (BI != Base2LdsMap.end()) {
1841 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1842 if (Offset == getMemoryOpOffset(BI->second[i])) {
1848 BI->second.push_back(MI);
1850 SmallVector<MachineInstr*, 4> MIs;
1852 Base2LdsMap[Base] = MIs;
1853 LdBases.push_back(Base);
1856 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1857 Base2StsMap.find(Base);
1858 if (BI != Base2StsMap.end()) {
1859 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1860 if (Offset == getMemoryOpOffset(BI->second[i])) {
1866 BI->second.push_back(MI);
1868 SmallVector<MachineInstr*, 4> MIs;
1870 Base2StsMap[Base] = MIs;
1871 StBases.push_back(Base);
1876 // Found a duplicate (a base+offset combination that's seen earlier).
1883 // Re-schedule loads.
1884 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1885 unsigned Base = LdBases[i];
1886 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1888 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1891 // Re-schedule stores.
1892 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1893 unsigned Base = StBases[i];
1894 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1896 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1900 Base2LdsMap.clear();
1901 Base2StsMap.clear();
1911 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1912 /// optimization pass.
1913 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1915 return new ARMPreAllocLoadStoreOpt();
1916 return new ARMLoadStoreOpt();