1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMISelLowering.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMSubtarget.h"
21 #include "MCTargetDesc/ARMAddressingModes.h"
22 #include "ThumbRegisterInfo.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/CodeGen/MachineBasicBlock.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include "llvm/CodeGen/MachineInstr.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/RegisterScavenging.h"
35 #include "llvm/CodeGen/SelectionDAGNodes.h"
36 #include "llvm/IR/DataLayout.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/Support/Debug.h"
40 #include "llvm/Support/ErrorHandling.h"
41 #include "llvm/Support/raw_ostream.h"
42 #include "llvm/Target/TargetInstrInfo.h"
43 #include "llvm/Target/TargetMachine.h"
44 #include "llvm/Target/TargetRegisterInfo.h"
47 #define DEBUG_TYPE "arm-ldst-opt"
49 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
50 STATISTIC(NumSTMGened , "Number of stm instructions generated");
51 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
52 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
53 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
54 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
55 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
56 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
57 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
58 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
59 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
61 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
62 /// load / store instructions to form ldm / stm instructions.
65 struct ARMLoadStoreOpt : public MachineFunctionPass {
67 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
69 const TargetInstrInfo *TII;
70 const TargetRegisterInfo *TRI;
71 const ARMSubtarget *STI;
72 const TargetLowering *TL;
75 bool isThumb1, isThumb2;
77 bool runOnMachineFunction(MachineFunction &Fn) override;
79 const char *getPassName() const override {
80 return "ARM load / store optimization pass";
84 struct MemOpQueueEntry {
89 MachineBasicBlock::iterator MBBI;
91 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
92 MachineBasicBlock::iterator i)
93 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
95 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
96 typedef MemOpQueue::iterator MemOpQueueIter;
98 void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
99 const MemOpQueue &MemOps, unsigned DefReg,
100 unsigned RangeBegin, unsigned RangeEnd);
101 void UpdateBaseRegUses(MachineBasicBlock &MBB,
102 MachineBasicBlock::iterator MBBI,
103 DebugLoc dl, unsigned Base, unsigned WordOffset,
104 ARMCC::CondCodes Pred, unsigned PredReg);
105 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
106 int Offset, unsigned Base, bool BaseKill, int Opcode,
107 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
109 ArrayRef<std::pair<unsigned, bool> > Regs,
110 ArrayRef<unsigned> ImpDefs);
111 void MergeOpsUpdate(MachineBasicBlock &MBB,
113 unsigned memOpsBegin,
115 unsigned insertAfter,
120 ARMCC::CondCodes Pred,
124 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
125 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
126 int Opcode, unsigned Size,
127 ARMCC::CondCodes Pred, unsigned PredReg,
128 unsigned Scratch, MemOpQueue &MemOps,
129 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
130 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
131 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
132 MachineBasicBlock::iterator &MBBI);
133 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
134 MachineBasicBlock::iterator MBBI,
135 const TargetInstrInfo *TII,
137 MachineBasicBlock::iterator &I);
138 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
139 MachineBasicBlock::iterator MBBI,
141 MachineBasicBlock::iterator &I);
142 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
143 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
145 char ARMLoadStoreOpt::ID = 0;
148 static bool definesCPSR(const MachineInstr *MI) {
149 for (const auto &MO : MI->operands()) {
152 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
153 // If the instruction has live CPSR def, then it's not safe to fold it
154 // into load / store.
161 static int getMemoryOpOffset(const MachineInstr *MI) {
162 int Opcode = MI->getOpcode();
163 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
164 unsigned NumOperands = MI->getDesc().getNumOperands();
165 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
167 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
168 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
169 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
170 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
173 // Thumb1 immediate offsets are scaled by 4
174 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
175 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
178 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
179 : ARM_AM::getAM5Offset(OffField) * 4;
180 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
181 : ARM_AM::getAM5Op(OffField);
183 if (Op == ARM_AM::sub)
189 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
191 default: llvm_unreachable("Unhandled opcode!");
195 default: llvm_unreachable("Unhandled submode!");
196 case ARM_AM::ia: return ARM::LDMIA;
197 case ARM_AM::da: return ARM::LDMDA;
198 case ARM_AM::db: return ARM::LDMDB;
199 case ARM_AM::ib: return ARM::LDMIB;
204 default: llvm_unreachable("Unhandled submode!");
205 case ARM_AM::ia: return ARM::STMIA;
206 case ARM_AM::da: return ARM::STMDA;
207 case ARM_AM::db: return ARM::STMDB;
208 case ARM_AM::ib: return ARM::STMIB;
212 // tLDMIA is writeback-only - unless the base register is in the input
216 default: llvm_unreachable("Unhandled submode!");
217 case ARM_AM::ia: return ARM::tLDMIA;
221 // There is no non-writeback tSTMIA either.
224 default: llvm_unreachable("Unhandled submode!");
225 case ARM_AM::ia: return ARM::tSTMIA_UPD;
231 default: llvm_unreachable("Unhandled submode!");
232 case ARM_AM::ia: return ARM::t2LDMIA;
233 case ARM_AM::db: return ARM::t2LDMDB;
239 default: llvm_unreachable("Unhandled submode!");
240 case ARM_AM::ia: return ARM::t2STMIA;
241 case ARM_AM::db: return ARM::t2STMDB;
246 default: llvm_unreachable("Unhandled submode!");
247 case ARM_AM::ia: return ARM::VLDMSIA;
248 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
253 default: llvm_unreachable("Unhandled submode!");
254 case ARM_AM::ia: return ARM::VSTMSIA;
255 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
260 default: llvm_unreachable("Unhandled submode!");
261 case ARM_AM::ia: return ARM::VLDMDIA;
262 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
267 default: llvm_unreachable("Unhandled submode!");
268 case ARM_AM::ia: return ARM::VSTMDIA;
269 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
277 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
279 default: llvm_unreachable("Unhandled opcode!");
286 case ARM::tLDMIA_UPD:
287 case ARM::tSTMIA_UPD:
288 case ARM::t2LDMIA_RET:
290 case ARM::t2LDMIA_UPD:
292 case ARM::t2STMIA_UPD:
294 case ARM::VLDMSIA_UPD:
296 case ARM::VSTMSIA_UPD:
298 case ARM::VLDMDIA_UPD:
300 case ARM::VSTMDIA_UPD:
314 case ARM::t2LDMDB_UPD:
316 case ARM::t2STMDB_UPD:
317 case ARM::VLDMSDB_UPD:
318 case ARM::VSTMSDB_UPD:
319 case ARM::VLDMDDB_UPD:
320 case ARM::VSTMDDB_UPD:
331 } // end namespace ARM_AM
332 } // end namespace llvm
334 static bool isT1i32Load(unsigned Opc) {
335 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
338 static bool isT2i32Load(unsigned Opc) {
339 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
342 static bool isi32Load(unsigned Opc) {
343 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
346 static bool isT1i32Store(unsigned Opc) {
347 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
350 static bool isT2i32Store(unsigned Opc) {
351 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
354 static bool isi32Store(unsigned Opc) {
355 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
358 static unsigned getImmScale(unsigned Opc) {
360 default: llvm_unreachable("Unhandled opcode!");
375 /// Update future uses of the base register with the offset introduced
376 /// due to writeback. This function only works on Thumb1.
378 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
379 MachineBasicBlock::iterator MBBI,
380 DebugLoc dl, unsigned Base,
382 ARMCC::CondCodes Pred, unsigned PredReg) {
383 assert(isThumb1 && "Can only update base register uses for Thumb1!");
384 // Start updating any instructions with immediate offsets. Insert a SUB before
385 // the first non-updateable instruction (if any).
386 for (; MBBI != MBB.end(); ++MBBI) {
387 bool InsertSub = false;
388 unsigned Opc = MBBI->getOpcode();
390 if (MBBI->readsRegister(Base)) {
393 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
395 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
397 if (IsLoad || IsStore) {
398 // Loads and stores with immediate offsets can be updated, but only if
399 // the new offset isn't negative.
400 // The MachineOperand containing the offset immediate is the last one
401 // before predicates.
403 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
404 // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
405 Offset = MO.getImm() - WordOffset * getImmScale(Opc);
407 // If storing the base register, it needs to be reset first.
408 unsigned InstrSrcReg = MBBI->getOperand(0).getReg();
410 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
415 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
416 !definesCPSR(MBBI)) {
417 // SUBS/ADDS using this register, with a dead def of the CPSR.
418 // Merge it with the update; if the merged offset is too large,
419 // insert a new sub instead.
421 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
422 Offset = (Opc == ARM::tSUBi8) ?
423 MO.getImm() + WordOffset * 4 :
424 MO.getImm() - WordOffset * 4 ;
425 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
426 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
429 // The base register has now been reset, so exit early.
436 // Can't update the instruction.
440 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
441 // Since SUBS sets the condition flags, we can't place the base reset
442 // after an instruction that has a live CPSR def.
443 // The base register might also contain an argument for a function call.
448 // An instruction above couldn't be updated, so insert a sub.
449 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
450 .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4)
451 .addImm(Pred).addReg(PredReg);
455 if (MBBI->killsRegister(Base))
456 // Register got killed. Stop updating.
460 // End of block was reached.
461 if (MBB.succ_size() > 0) {
462 // FIXME: Because of a bug, live registers are sometimes missing from
463 // the successor blocks' live-in sets. This means we can't trust that
464 // information and *always* have to reset at the end of a block.
466 if (MBBI != MBB.end()) --MBBI;
468 BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
469 .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4)
470 .addImm(Pred).addReg(PredReg);
474 /// MergeOps - Create and insert a LDM or STM with Base as base register and
475 /// registers in Regs as the register operands that would be loaded / stored.
476 /// It returns true if the transformation is done.
478 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
479 MachineBasicBlock::iterator MBBI,
480 int Offset, unsigned Base, bool BaseKill,
481 int Opcode, ARMCC::CondCodes Pred,
482 unsigned PredReg, unsigned Scratch, DebugLoc dl,
483 ArrayRef<std::pair<unsigned, bool> > Regs,
484 ArrayRef<unsigned> ImpDefs) {
485 // Only a single register to load / store. Don't bother.
486 unsigned NumRegs = Regs.size();
490 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
491 // Compute liveness information for that register to make the decision.
492 bool SafeToClobberCPSR = !isThumb1 ||
493 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) ==
494 MachineBasicBlock::LQR_Dead);
496 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
498 // Exception: If the base register is in the input reglist, Thumb1 LDM is
500 // It's also not possible to merge an STR of the base register in Thumb1.
502 for (unsigned I = 0; I < NumRegs; ++I)
503 if (Base == Regs[I].first) {
504 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
505 if (Opcode == ARM::tLDRi) {
508 } else if (Opcode == ARM::tSTRi) {
513 ARM_AM::AMSubMode Mode = ARM_AM::ia;
514 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
515 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
516 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
518 if (Offset == 4 && haveIBAndDA) {
520 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
522 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
523 // VLDM/VSTM do not support DB mode without also updating the base reg.
525 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
526 // Check if this is a supported opcode before inserting instructions to
527 // calculate a new base register.
528 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
530 // If starting offset isn't zero, insert a MI to materialize a new base.
531 // But only do so if it is cost effective, i.e. merging more than two
536 // On Thumb1, it's not worth materializing a new base register without
537 // clobbering the CPSR (i.e. not using ADDS/SUBS).
538 if (!SafeToClobberCPSR)
542 if (isi32Load(Opcode)) {
543 // If it is a load, then just use one of the destination register to
544 // use as the new base.
545 NewBase = Regs[NumRegs-1].first;
547 // Use the scratch register to use as a new base.
554 isThumb2 ? ARM::t2ADDri :
555 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
556 (isThumb1 && Offset < 8) ? ARM::tADDi3 :
557 isThumb1 ? ARM::tADDi8 : ARM::ADDri;
562 isThumb2 ? ARM::t2SUBri :
563 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
564 isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
567 if (!TL->isLegalAddImmediate(Offset))
568 // FIXME: Try add with register operand?
569 return false; // Probably not worth it then.
572 // Thumb1: depending on immediate size, use either
573 // ADDS NewBase, Base, #imm3
576 // ADDS NewBase, #imm8.
577 if (Base != NewBase &&
578 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
579 // Need to insert a MOV to the new base first.
580 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
582 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
583 if (Pred != ARMCC::AL)
585 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVSr), NewBase)
586 .addReg(Base, getKillRegState(BaseKill));
588 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
589 .addReg(Base, getKillRegState(BaseKill))
590 .addImm(Pred).addReg(PredReg);
592 // Set up BaseKill and Base correctly to insert the ADDS/SUBS below.
596 if (BaseOpc == ARM::tADDrSPi) {
597 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
598 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
599 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset/4)
600 .addImm(Pred).addReg(PredReg);
602 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true)
603 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
604 .addImm(Pred).addReg(PredReg);
606 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
607 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
608 .addImm(Pred).addReg(PredReg).addReg(0);
611 BaseKill = true; // New base is always killed straight away.
614 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
615 Opcode == ARM::VLDRD);
617 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
618 // base register writeback.
619 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
620 if (!Opcode) return false;
622 // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
623 // - There is no writeback (LDM of base register),
624 // - the base register is killed by the merged instruction,
625 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
626 // to reset the base register.
627 // Otherwise, don't merge.
628 // It's safe to return here since the code to materialize a new base register
629 // above is also conditional on SafeToClobberCPSR.
630 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
633 MachineInstrBuilder MIB;
636 if (Opcode == ARM::tLDMIA)
637 // Update tLDMIA with writeback if necessary.
638 Opcode = ARM::tLDMIA_UPD;
640 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
642 // Thumb1: we might need to set base writeback when building the MI.
643 MIB.addReg(Base, getDefRegState(true))
644 .addReg(Base, getKillRegState(BaseKill));
646 // The base isn't dead after a merged instruction with writeback.
647 // Insert a sub instruction after the newly formed instruction to reset.
649 UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
652 // No writeback, simply build the MachineInstr.
653 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
654 MIB.addReg(Base, getKillRegState(BaseKill));
657 MIB.addImm(Pred).addReg(PredReg);
659 for (unsigned i = 0; i != NumRegs; ++i)
660 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
661 | getKillRegState(Regs[i].second));
663 // Add implicit defs for super-registers.
664 for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i)
665 MIB.addReg(ImpDefs[i], RegState::ImplicitDefine);
670 /// \brief Find all instructions using a given imp-def within a range.
672 /// We are trying to combine a range of instructions, one of which (located at
673 /// position RangeBegin) implicitly defines a register. The final LDM/STM will
674 /// be placed at RangeEnd, and so any uses of this definition between RangeStart
675 /// and RangeEnd must be modified to use an undefined value.
677 /// The live range continues until we find a second definition or one of the
678 /// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
679 /// we must consider all uses and decide which are relevant in a second pass.
680 void ARMLoadStoreOpt::findUsesOfImpDef(
681 SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,
682 unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
683 std::map<unsigned, MachineOperand *> Uses;
684 unsigned LastLivePos = RangeEnd;
686 // First we find all uses of this register with Position between RangeBegin
687 // and RangeEnd, any or all of these could be uses of a definition at
688 // RangeBegin. We also record the latest position a definition at RangeBegin
689 // would be considered live.
690 for (unsigned i = 0; i < MemOps.size(); ++i) {
691 MachineInstr &MI = *MemOps[i].MBBI;
692 unsigned MIPosition = MemOps[i].Position;
693 if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
696 // If this instruction defines the register, then any later use will be of
697 // that definition rather than ours.
698 if (MI.definesRegister(DefReg))
699 LastLivePos = std::min(LastLivePos, MIPosition);
701 MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
705 // If this instruction kills the register then (assuming liveness is
706 // correct when we start) we don't need to think about anything after here.
708 LastLivePos = std::min(LastLivePos, MIPosition);
710 Uses[MIPosition] = UseOp;
713 // Now we traverse the list of all uses, and append the ones that actually use
714 // our definition to the requested list.
715 for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),
718 // List is sorted by position so once we've found one out of range there
719 // will be no more to consider.
720 if (I->first > LastLivePos)
722 UsesOfImpDefs.push_back(I->second);
726 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
728 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
730 unsigned memOpsBegin, unsigned memOpsEnd,
731 unsigned insertAfter, int Offset,
732 unsigned Base, bool BaseKill,
734 ARMCC::CondCodes Pred, unsigned PredReg,
737 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
738 // First calculate which of the registers should be killed by the merged
740 const unsigned insertPos = memOps[insertAfter].Position;
741 SmallSet<unsigned, 4> KilledRegs;
742 DenseMap<unsigned, unsigned> Killer;
743 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
744 if (i == memOpsBegin) {
749 if (memOps[i].Position < insertPos && memOps[i].isKill) {
750 unsigned Reg = memOps[i].Reg;
751 KilledRegs.insert(Reg);
756 SmallVector<std::pair<unsigned, bool>, 8> Regs;
757 SmallVector<unsigned, 8> ImpDefs;
758 SmallVector<MachineOperand *, 8> UsesOfImpDefs;
759 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
760 unsigned Reg = memOps[i].Reg;
761 // If we are inserting the merged operation after an operation that
762 // uses the same register, make sure to transfer any kill flag.
763 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
764 Regs.push_back(std::make_pair(Reg, isKill));
766 // Collect any implicit defs of super-registers. They must be preserved.
767 for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) {
768 if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead())
770 unsigned DefReg = MO->getReg();
771 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
772 ImpDefs.push_back(DefReg);
774 // There may be other uses of the definition between this instruction and
775 // the eventual LDM/STM position. These should be marked undef if the
776 // merge takes place.
777 findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
782 // Try to do the merge.
783 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
785 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
786 Pred, PredReg, Scratch, dl, Regs, ImpDefs))
789 // Merge succeeded, update records.
790 Merges.push_back(std::prev(Loc));
792 // In gathering loads together, we may have moved the imp-def of a register
793 // past one of its uses. This is OK, since we know better than the rest of
794 // LLVM what's OK with ARM loads and stores; but we still have to adjust the
796 for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
797 E = UsesOfImpDefs.end();
801 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
802 // Remove kill flags from any memops that come before insertPos.
803 if (Regs[i-memOpsBegin].second) {
804 unsigned Reg = Regs[i-memOpsBegin].first;
805 if (KilledRegs.count(Reg)) {
806 unsigned j = Killer[Reg];
807 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
808 assert(Idx >= 0 && "Cannot find killing operand");
809 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
810 memOps[j].isKill = false;
812 memOps[i].isKill = true;
814 MBB.erase(memOps[i].MBBI);
815 // Update this memop to refer to the merged instruction.
816 // We may need to move kill flags again.
817 memOps[i].Merged = true;
818 memOps[i].MBBI = Merges.back();
819 memOps[i].Position = insertPos;
822 // Update memOps offsets, since they may have been modified by MergeOps.
823 for (auto &MemOp : memOps) {
824 MemOp.Offset = getMemoryOpOffset(MemOp.MBBI);
828 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
829 /// load / store multiple instructions.
831 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
832 unsigned Base, int Opcode, unsigned Size,
833 ARMCC::CondCodes Pred, unsigned PredReg,
834 unsigned Scratch, MemOpQueue &MemOps,
835 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
836 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
837 int Offset = MemOps[SIndex].Offset;
838 int SOffset = Offset;
839 unsigned insertAfter = SIndex;
840 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
841 DebugLoc dl = Loc->getDebugLoc();
842 const MachineOperand &PMO = Loc->getOperand(0);
843 unsigned PReg = PMO.getReg();
844 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
846 unsigned Limit = ~0U;
847 bool BaseKill = false;
848 // vldm / vstm limit are 32 for S variants, 16 for D variants.
866 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
867 int NewOffset = MemOps[i].Offset;
868 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
869 unsigned Reg = MO.getReg();
870 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
871 // Register numbers must be in ascending order. For VFP / NEON load and
872 // store multiples, the registers must also be consecutive and within the
873 // limit on the number of registers per instruction.
874 if (Reg != ARM::SP &&
875 NewOffset == Offset + (int)Size &&
876 ((isNotVFP && RegNum > PRegNum) ||
877 ((Count < Limit) && RegNum == PRegNum+1)) &&
878 // On Swift we don't want vldm/vstm to start with a odd register num
879 // because Q register unaligned vldm/vstm need more uops.
880 (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) {
885 // Can't merge this in. Try merge the earlier ones first.
886 // We need to compute BaseKill here because the MemOps may have been
888 BaseKill = Loc->killsRegister(Base);
890 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base,
891 BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
892 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
897 if (MemOps[i].Position > MemOps[insertAfter].Position) {
899 Loc = MemOps[i].MBBI;
903 BaseKill = Loc->killsRegister(Base);
904 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
905 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
908 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
909 unsigned Bytes, unsigned Limit,
910 ARMCC::CondCodes Pred, unsigned PredReg) {
911 unsigned MyPredReg = 0;
915 bool CheckCPSRDef = false;
916 switch (MI->getOpcode()) {
917 default: return false;
927 // Make sure the offset fits in 8 bits.
928 if (Bytes == 0 || (Limit && Bytes >= Limit))
931 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
932 MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
933 if (!(MI->getOperand(0).getReg() == Base &&
934 MI->getOperand(1).getReg() == Base &&
935 (MI->getOperand(2).getImm() * Scale) == Bytes &&
936 getInstrPredicate(MI, MyPredReg) == Pred &&
937 MyPredReg == PredReg))
940 return CheckCPSRDef ? !definesCPSR(MI) : true;
943 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
944 unsigned Bytes, unsigned Limit,
945 ARMCC::CondCodes Pred, unsigned PredReg) {
946 unsigned MyPredReg = 0;
950 bool CheckCPSRDef = false;
951 switch (MI->getOpcode()) {
952 default: return false;
962 if (Bytes == 0 || (Limit && Bytes >= Limit))
963 // Make sure the offset fits in 8 bits.
966 unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
967 MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
968 if (!(MI->getOperand(0).getReg() == Base &&
969 MI->getOperand(1).getReg() == Base &&
970 (MI->getOperand(2).getImm() * Scale) == Bytes &&
971 getInstrPredicate(MI, MyPredReg) == Pred &&
972 MyPredReg == PredReg))
975 return CheckCPSRDef ? !definesCPSR(MI) : true;
978 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
979 switch (MI->getOpcode()) {
1006 case ARM::tLDMIA_UPD:
1007 case ARM::tSTMIA_UPD:
1014 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
1017 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
1021 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
1022 ARM_AM::AMSubMode Mode) {
1024 default: llvm_unreachable("Unhandled opcode!");
1030 default: llvm_unreachable("Unhandled submode!");
1031 case ARM_AM::ia: return ARM::LDMIA_UPD;
1032 case ARM_AM::ib: return ARM::LDMIB_UPD;
1033 case ARM_AM::da: return ARM::LDMDA_UPD;
1034 case ARM_AM::db: return ARM::LDMDB_UPD;
1041 default: llvm_unreachable("Unhandled submode!");
1042 case ARM_AM::ia: return ARM::STMIA_UPD;
1043 case ARM_AM::ib: return ARM::STMIB_UPD;
1044 case ARM_AM::da: return ARM::STMDA_UPD;
1045 case ARM_AM::db: return ARM::STMDB_UPD;
1050 default: llvm_unreachable("Unhandled submode!");
1051 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1052 case ARM_AM::db: return ARM::t2LDMDB_UPD;
1057 default: llvm_unreachable("Unhandled submode!");
1058 case ARM_AM::ia: return ARM::t2STMIA_UPD;
1059 case ARM_AM::db: return ARM::t2STMDB_UPD;
1063 default: llvm_unreachable("Unhandled submode!");
1064 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1065 case ARM_AM::db: return ARM::VLDMSDB_UPD;
1069 default: llvm_unreachable("Unhandled submode!");
1070 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1071 case ARM_AM::db: return ARM::VLDMDDB_UPD;
1075 default: llvm_unreachable("Unhandled submode!");
1076 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1077 case ARM_AM::db: return ARM::VSTMSDB_UPD;
1081 default: llvm_unreachable("Unhandled submode!");
1082 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1083 case ARM_AM::db: return ARM::VSTMDDB_UPD;
1088 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
1089 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1091 /// stmia rn, <ra, rb, rc>
1092 /// rn := rn + 4 * 3;
1094 /// stmia rn!, <ra, rb, rc>
1096 /// rn := rn - 4 * 3;
1097 /// ldmia rn, <ra, rb, rc>
1099 /// ldmdb rn!, <ra, rb, rc>
1100 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
1101 MachineBasicBlock::iterator MBBI,
1103 MachineBasicBlock::iterator &I) {
1104 // Thumb1 is already using updating loads/stores.
1105 if (isThumb1) return false;
1107 MachineInstr *MI = MBBI;
1108 unsigned Base = MI->getOperand(0).getReg();
1109 bool BaseKill = MI->getOperand(0).isKill();
1110 unsigned Bytes = getLSMultipleTransferSize(MI);
1111 unsigned PredReg = 0;
1112 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1113 int Opcode = MI->getOpcode();
1114 DebugLoc dl = MI->getDebugLoc();
1116 // Can't use an updating ld/st if the base register is also a dest
1117 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1118 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
1119 if (MI->getOperand(i).getReg() == Base)
1122 bool DoMerge = false;
1123 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
1125 // Try merging with the previous instruction.
1126 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1127 if (MBBI != BeginMBBI) {
1128 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1129 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1131 if (Mode == ARM_AM::ia &&
1132 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1135 } else if (Mode == ARM_AM::ib &&
1136 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1141 MBB.erase(PrevMBBI);
1144 // Try merging with the next instruction.
1145 MachineBasicBlock::iterator EndMBBI = MBB.end();
1146 if (!DoMerge && MBBI != EndMBBI) {
1147 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1148 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1150 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
1151 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1153 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
1154 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1158 if (NextMBBI == I) {
1162 MBB.erase(NextMBBI);
1169 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1170 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1171 .addReg(Base, getDefRegState(true)) // WB base register
1172 .addReg(Base, getKillRegState(BaseKill))
1173 .addImm(Pred).addReg(PredReg);
1175 // Transfer the rest of operands.
1176 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
1177 MIB.addOperand(MI->getOperand(OpNum));
1179 // Transfer memoperands.
1180 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1186 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1187 ARM_AM::AddrOpc Mode) {
1190 return ARM::LDR_PRE_IMM;
1192 return ARM::STR_PRE_IMM;
1194 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1196 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1198 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1200 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1203 return ARM::t2LDR_PRE;
1206 return ARM::t2STR_PRE;
1207 default: llvm_unreachable("Unhandled opcode!");
1211 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1212 ARM_AM::AddrOpc Mode) {
1215 return ARM::LDR_POST_IMM;
1217 return ARM::STR_POST_IMM;
1219 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1221 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1223 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1225 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1228 return ARM::t2LDR_POST;
1231 return ARM::t2STR_POST;
1232 default: llvm_unreachable("Unhandled opcode!");
1236 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
1237 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1238 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
1239 MachineBasicBlock::iterator MBBI,
1240 const TargetInstrInfo *TII,
1242 MachineBasicBlock::iterator &I) {
1243 // Thumb1 doesn't have updating LDR/STR.
1244 // FIXME: Use LDM/STM with single register instead.
1245 if (isThumb1) return false;
1247 MachineInstr *MI = MBBI;
1248 unsigned Base = MI->getOperand(1).getReg();
1249 bool BaseKill = MI->getOperand(1).isKill();
1250 unsigned Bytes = getLSMultipleTransferSize(MI);
1251 int Opcode = MI->getOpcode();
1252 DebugLoc dl = MI->getDebugLoc();
1253 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1254 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1255 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1256 if (isi32Load(Opcode) || isi32Store(Opcode))
1257 if (MI->getOperand(2).getImm() != 0)
1259 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1262 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
1263 // Can't do the merge if the destination register is the same as the would-be
1264 // writeback register.
1265 if (MI->getOperand(0).getReg() == Base)
1268 unsigned PredReg = 0;
1269 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1270 bool DoMerge = false;
1271 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1272 unsigned NewOpc = 0;
1273 // AM2 - 12 bits, thumb2 - 8 bits.
1274 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
1276 // Try merging with the previous instruction.
1277 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1278 if (MBBI != BeginMBBI) {
1279 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1280 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1282 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1284 AddSub = ARM_AM::sub;
1285 } else if (!isAM5 &&
1286 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1290 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
1291 MBB.erase(PrevMBBI);
1295 // Try merging with the next instruction.
1296 MachineBasicBlock::iterator EndMBBI = MBB.end();
1297 if (!DoMerge && MBBI != EndMBBI) {
1298 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1299 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1302 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1304 AddSub = ARM_AM::sub;
1305 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1309 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
1310 if (NextMBBI == I) {
1314 MBB.erase(NextMBBI);
1322 // VLDM[SD]_UPD, VSTM[SD]_UPD
1323 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1324 // updating load/store-multiple instructions can be used with only one
1326 MachineOperand &MO = MI->getOperand(0);
1327 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1328 .addReg(Base, getDefRegState(true)) // WB base register
1329 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1330 .addImm(Pred).addReg(PredReg)
1331 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1332 getKillRegState(MO.isKill())));
1335 // LDR_PRE, LDR_POST
1336 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1337 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1338 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1339 .addReg(Base, RegState::Define)
1340 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1342 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1343 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1344 .addReg(Base, RegState::Define)
1345 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1348 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1349 // t2LDR_PRE, t2LDR_POST
1350 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1351 .addReg(Base, RegState::Define)
1352 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1355 MachineOperand &MO = MI->getOperand(0);
1356 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1357 // the vestigal zero-reg offset register. When that's fixed, this clause
1358 // can be removed entirely.
1359 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1360 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1361 // STR_PRE, STR_POST
1362 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1363 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1364 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1366 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1367 // t2STR_PRE, t2STR_POST
1368 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1369 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1370 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1378 /// isMemoryOp - Returns true if instruction is a memory operation that this
1379 /// pass is capable of operating on.
1380 static bool isMemoryOp(const MachineInstr *MI) {
1381 // When no memory operands are present, conservatively assume unaligned,
1382 // volatile, unfoldable.
1383 if (!MI->hasOneMemOperand())
1386 const MachineMemOperand *MMO = *MI->memoperands_begin();
1388 // Don't touch volatile memory accesses - we may be changing their order.
1389 if (MMO->isVolatile())
1392 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1394 if (MMO->getAlignment() < 4)
1397 // str <undef> could probably be eliminated entirely, but for now we just want
1398 // to avoid making a mess of it.
1399 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1400 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1401 MI->getOperand(0).isUndef())
1404 // Likewise don't mess with references to undefined addresses.
1405 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1406 MI->getOperand(1).isUndef())
1409 int Opcode = MI->getOpcode();
1414 return MI->getOperand(1).isReg();
1417 return MI->getOperand(1).isReg();
1428 return MI->getOperand(1).isReg();
1433 /// AdvanceRS - Advance register scavenger to just before the earliest memory
1434 /// op that is being merged.
1435 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1436 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1437 unsigned Position = MemOps[0].Position;
1438 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1439 if (MemOps[i].Position < Position) {
1440 Position = MemOps[i].Position;
1441 Loc = MemOps[i].MBBI;
1445 if (Loc != MBB.begin())
1446 RS->forward(std::prev(Loc));
1449 static void InsertLDR_STR(MachineBasicBlock &MBB,
1450 MachineBasicBlock::iterator &MBBI,
1451 int Offset, bool isDef,
1452 DebugLoc dl, unsigned NewOpc,
1453 unsigned Reg, bool RegDeadKill, bool RegUndef,
1454 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1455 bool OffKill, bool OffUndef,
1456 ARMCC::CondCodes Pred, unsigned PredReg,
1457 const TargetInstrInfo *TII, bool isT2) {
1459 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1461 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1462 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1463 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1465 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1467 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1468 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1469 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1473 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1474 MachineBasicBlock::iterator &MBBI) {
1475 MachineInstr *MI = &*MBBI;
1476 unsigned Opcode = MI->getOpcode();
1477 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1478 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1479 const MachineOperand &BaseOp = MI->getOperand(2);
1480 unsigned BaseReg = BaseOp.getReg();
1481 unsigned EvenReg = MI->getOperand(0).getReg();
1482 unsigned OddReg = MI->getOperand(1).getReg();
1483 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1484 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1485 // ARM errata 602117: LDRD with base in list may result in incorrect base
1486 // register when interrupted or faulted.
1487 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1488 if (!Errata602117 &&
1489 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1492 MachineBasicBlock::iterator NewBBI = MBBI;
1493 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1494 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1495 bool EvenDeadKill = isLd ?
1496 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1497 bool EvenUndef = MI->getOperand(0).isUndef();
1498 bool OddDeadKill = isLd ?
1499 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1500 bool OddUndef = MI->getOperand(1).isUndef();
1501 bool BaseKill = BaseOp.isKill();
1502 bool BaseUndef = BaseOp.isUndef();
1503 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1504 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1505 int OffImm = getMemoryOpOffset(MI);
1506 unsigned PredReg = 0;
1507 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1509 if (OddRegNum > EvenRegNum && OffImm == 0) {
1510 // Ascending register numbers and no offset. It's safe to change it to a
1512 unsigned NewOpc = (isLd)
1513 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1514 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1516 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1517 .addReg(BaseReg, getKillRegState(BaseKill))
1518 .addImm(Pred).addReg(PredReg)
1519 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1520 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1523 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1524 .addReg(BaseReg, getKillRegState(BaseKill))
1525 .addImm(Pred).addReg(PredReg)
1527 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1529 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1532 NewBBI = std::prev(MBBI);
1534 // Split into two instructions.
1535 unsigned NewOpc = (isLd)
1536 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1537 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1538 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1539 // so adjust and use t2LDRi12 here for that.
1540 unsigned NewOpc2 = (isLd)
1541 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1542 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1543 DebugLoc dl = MBBI->getDebugLoc();
1544 // If this is a load and base register is killed, it may have been
1545 // re-defed by the load, make sure the first load does not clobber it.
1547 (BaseKill || OffKill) &&
1548 (TRI->regsOverlap(EvenReg, BaseReg))) {
1549 assert(!TRI->regsOverlap(OddReg, BaseReg));
1550 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1551 OddReg, OddDeadKill, false,
1552 BaseReg, false, BaseUndef, false, OffUndef,
1553 Pred, PredReg, TII, isT2);
1554 NewBBI = std::prev(MBBI);
1555 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1556 EvenReg, EvenDeadKill, false,
1557 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1558 Pred, PredReg, TII, isT2);
1560 if (OddReg == EvenReg && EvenDeadKill) {
1561 // If the two source operands are the same, the kill marker is
1562 // probably on the first one. e.g.
1563 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1564 EvenDeadKill = false;
1567 // Never kill the base register in the first instruction.
1568 if (EvenReg == BaseReg)
1569 EvenDeadKill = false;
1570 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1571 EvenReg, EvenDeadKill, EvenUndef,
1572 BaseReg, false, BaseUndef, false, OffUndef,
1573 Pred, PredReg, TII, isT2);
1574 NewBBI = std::prev(MBBI);
1575 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1576 OddReg, OddDeadKill, OddUndef,
1577 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1578 Pred, PredReg, TII, isT2);
1593 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1594 /// ops of the same base and incrementing offset into LDM / STM ops.
1595 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1596 unsigned NumMerges = 0;
1597 unsigned NumMemOps = 0;
1599 unsigned CurrBase = 0;
1601 unsigned CurrSize = 0;
1602 ARMCC::CondCodes CurrPred = ARMCC::AL;
1603 unsigned CurrPredReg = 0;
1604 unsigned Position = 0;
1605 SmallVector<MachineBasicBlock::iterator,4> Merges;
1607 RS->enterBasicBlock(&MBB);
1608 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1610 if (FixInvalidRegPairOp(MBB, MBBI))
1613 bool Advance = false;
1614 bool TryMerge = false;
1615 bool Clobber = false;
1617 bool isMemOp = isMemoryOp(MBBI);
1619 int Opcode = MBBI->getOpcode();
1620 unsigned Size = getLSMultipleTransferSize(MBBI);
1621 const MachineOperand &MO = MBBI->getOperand(0);
1622 unsigned Reg = MO.getReg();
1623 bool isKill = MO.isDef() ? false : MO.isKill();
1624 unsigned Base = MBBI->getOperand(1).getReg();
1625 unsigned PredReg = 0;
1626 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1627 int Offset = getMemoryOpOffset(MBBI);
1630 // r5 := ldr [r5, #4]
1631 // r6 := ldr [r5, #8]
1633 // The second ldr has effectively broken the chain even though it
1634 // looks like the later ldr(s) use the same base register. Try to
1635 // merge the ldr's so far, including this one. But don't try to
1636 // combine the following ldr(s).
1637 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1640 // r4 := ldr [r0, #8]
1641 // r4 := ldr [r0, #4]
1643 // The optimization may reorder the second ldr in front of the first
1644 // ldr, which violates write after write(WAW) dependence. The same as
1645 // str. Try to merge inst(s) already in MemOps.
1646 bool Overlap = false;
1647 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
1648 if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
1654 if (CurrBase == 0 && !Clobber) {
1655 // Start of a new chain.
1660 CurrPredReg = PredReg;
1661 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1664 } else if (!Overlap) {
1670 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1671 // No need to match PredReg.
1672 // Continue adding to the queue.
1673 if (Offset > MemOps.back().Offset) {
1674 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1679 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1681 if (Offset < I->Offset) {
1682 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1687 } else if (Offset == I->Offset) {
1688 // Collision! This can't be merged!
1697 if (MBBI->isDebugValue()) {
1700 // Reach the end of the block, try merging the memory instructions.
1702 } else if (Advance) {
1706 // Reach the end of the block, try merging the memory instructions.
1713 if (NumMemOps > 1) {
1714 // Try to find a free register to use as a new base in case it's needed.
1715 // First advance to the instruction just before the start of the chain.
1716 AdvanceRS(MBB, MemOps);
1718 // Find a scratch register.
1720 RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass);
1722 // Process the load / store instructions.
1723 RS->forward(std::prev(MBBI));
1727 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1728 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1730 // Try folding preceding/trailing base inc/dec into the generated
1732 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1733 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1735 NumMerges += Merges.size();
1737 // Try folding preceding/trailing base inc/dec into those load/store
1738 // that were not merged to form LDM/STM ops.
1739 for (unsigned i = 0; i != NumMemOps; ++i)
1740 if (!MemOps[i].Merged)
1741 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1744 // RS may be pointing to an instruction that's deleted.
1745 RS->skipTo(std::prev(MBBI));
1746 } else if (NumMemOps == 1) {
1747 // Try folding preceding/trailing base inc/dec into the single
1749 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1751 RS->forward(std::prev(MBBI));
1758 CurrPred = ARMCC::AL;
1765 // If iterator hasn't been advanced and this is not a memory op, skip it.
1766 // It can't start a new chain anyway.
1767 if (!Advance && !isMemOp && MBBI != E) {
1773 return NumMerges > 0;
1776 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1777 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1778 /// directly restore the value of LR into pc.
1779 /// ldmfd sp!, {..., lr}
1782 /// ldmfd sp!, {..., lr}
1785 /// ldmfd sp!, {..., pc}
1786 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1787 // Thumb1 LDM doesn't allow high registers.
1788 if (isThumb1) return false;
1789 if (MBB.empty()) return false;
1791 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1792 if (MBBI != MBB.begin() &&
1793 (MBBI->getOpcode() == ARM::BX_RET ||
1794 MBBI->getOpcode() == ARM::tBX_RET ||
1795 MBBI->getOpcode() == ARM::MOVPCLR)) {
1796 MachineInstr *PrevMI = std::prev(MBBI);
1797 unsigned Opcode = PrevMI->getOpcode();
1798 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1799 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1800 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1801 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1802 if (MO.getReg() != ARM::LR)
1804 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1805 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1806 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1807 PrevMI->setDesc(TII->get(NewOpc));
1809 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1817 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1818 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1819 TL = STI->getTargetLowering();
1820 AFI = Fn.getInfo<ARMFunctionInfo>();
1821 TII = STI->getInstrInfo();
1822 TRI = STI->getRegisterInfo();
1823 RS = new RegScavenger();
1824 isThumb2 = AFI->isThumb2Function();
1825 isThumb1 = AFI->isThumbFunction() && !isThumb2;
1827 bool Modified = false;
1828 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1830 MachineBasicBlock &MBB = *MFI;
1831 Modified |= LoadStoreMultipleOpti(MBB);
1832 if (STI->hasV5TOps())
1833 Modified |= MergeReturnIntoLDM(MBB);
1841 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1842 /// load / stores from consecutive locations close to make it more
1843 /// likely they will be combined later.
1846 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1848 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1850 const DataLayout *TD;
1851 const TargetInstrInfo *TII;
1852 const TargetRegisterInfo *TRI;
1853 const ARMSubtarget *STI;
1854 MachineRegisterInfo *MRI;
1855 MachineFunction *MF;
1857 bool runOnMachineFunction(MachineFunction &Fn) override;
1859 const char *getPassName() const override {
1860 return "ARM pre- register allocation load / store optimization pass";
1864 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1865 unsigned &NewOpc, unsigned &EvenReg,
1866 unsigned &OddReg, unsigned &BaseReg,
1868 unsigned &PredReg, ARMCC::CondCodes &Pred,
1870 bool RescheduleOps(MachineBasicBlock *MBB,
1871 SmallVectorImpl<MachineInstr *> &Ops,
1872 unsigned Base, bool isLd,
1873 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1874 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1876 char ARMPreAllocLoadStoreOpt::ID = 0;
1879 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1880 TD = Fn.getTarget().getDataLayout();
1881 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1882 TII = STI->getInstrInfo();
1883 TRI = STI->getRegisterInfo();
1884 MRI = &Fn.getRegInfo();
1887 bool Modified = false;
1888 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1890 Modified |= RescheduleLoadStoreInstrs(MFI);
1895 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1896 MachineBasicBlock::iterator I,
1897 MachineBasicBlock::iterator E,
1898 SmallPtrSetImpl<MachineInstr*> &MemOps,
1899 SmallSet<unsigned, 4> &MemRegs,
1900 const TargetRegisterInfo *TRI) {
1901 // Are there stores / loads / calls between them?
1902 // FIXME: This is overly conservative. We should make use of alias information
1904 SmallSet<unsigned, 4> AddedRegPressure;
1906 if (I->isDebugValue() || MemOps.count(&*I))
1908 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1910 if (isLd && I->mayStore())
1915 // It's not safe to move the first 'str' down.
1918 // str r4, [r0, #+4]
1922 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1923 MachineOperand &MO = I->getOperand(j);
1926 unsigned Reg = MO.getReg();
1927 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1929 if (Reg != Base && !MemRegs.count(Reg))
1930 AddedRegPressure.insert(Reg);
1934 // Estimate register pressure increase due to the transformation.
1935 if (MemRegs.size() <= 4)
1936 // Ok if we are moving small number of instructions.
1938 return AddedRegPressure.size() <= MemRegs.size() * 2;
1942 /// Copy Op0 and Op1 operands into a new array assigned to MI.
1943 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1944 MachineInstr *Op1) {
1945 assert(MI->memoperands_empty() && "expected a new machineinstr");
1946 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1947 + (Op1->memoperands_end() - Op1->memoperands_begin());
1949 MachineFunction *MF = MI->getParent()->getParent();
1950 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1951 MachineSDNode::mmo_iterator MemEnd =
1952 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1954 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1955 MI->setMemRefs(MemBegin, MemEnd);
1959 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1961 unsigned &NewOpc, unsigned &EvenReg,
1962 unsigned &OddReg, unsigned &BaseReg,
1963 int &Offset, unsigned &PredReg,
1964 ARMCC::CondCodes &Pred,
1966 // Make sure we're allowed to generate LDRD/STRD.
1967 if (!STI->hasV5TEOps())
1970 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1972 unsigned Opcode = Op0->getOpcode();
1973 if (Opcode == ARM::LDRi12) {
1975 } else if (Opcode == ARM::STRi12) {
1977 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1978 NewOpc = ARM::t2LDRDi8;
1981 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1982 NewOpc = ARM::t2STRDi8;
1989 // Make sure the base address satisfies i64 ld / st alignment requirement.
1990 // At the moment, we ignore the memoryoperand's value.
1991 // If we want to use AliasAnalysis, we should check it accordingly.
1992 if (!Op0->hasOneMemOperand() ||
1993 (*Op0->memoperands_begin())->isVolatile())
1996 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1997 const Function *Func = MF->getFunction();
1998 unsigned ReqAlign = STI->hasV6Ops()
1999 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
2000 : 8; // Pre-v6 need 8-byte align
2001 if (Align < ReqAlign)
2004 // Then make sure the immediate offset fits.
2005 int OffImm = getMemoryOpOffset(Op0);
2007 int Limit = (1 << 8) * Scale;
2008 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
2012 ARM_AM::AddrOpc AddSub = ARM_AM::add;
2014 AddSub = ARM_AM::sub;
2017 int Limit = (1 << 8) * Scale;
2018 if (OffImm >= Limit || (OffImm & (Scale-1)))
2020 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
2022 EvenReg = Op0->getOperand(0).getReg();
2023 OddReg = Op1->getOperand(0).getReg();
2024 if (EvenReg == OddReg)
2026 BaseReg = Op0->getOperand(1).getReg();
2027 Pred = getInstrPredicate(Op0, PredReg);
2028 dl = Op0->getDebugLoc();
2032 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
2033 SmallVectorImpl<MachineInstr *> &Ops,
2034 unsigned Base, bool isLd,
2035 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
2036 bool RetVal = false;
2038 // Sort by offset (in reverse order).
2039 std::sort(Ops.begin(), Ops.end(),
2040 [](const MachineInstr *LHS, const MachineInstr *RHS) {
2041 int LOffset = getMemoryOpOffset(LHS);
2042 int ROffset = getMemoryOpOffset(RHS);
2043 assert(LHS == RHS || LOffset != ROffset);
2044 return LOffset > ROffset;
2047 // The loads / stores of the same base are in order. Scan them from first to
2048 // last and check for the following:
2049 // 1. Any def of base.
2051 while (Ops.size() > 1) {
2052 unsigned FirstLoc = ~0U;
2053 unsigned LastLoc = 0;
2054 MachineInstr *FirstOp = nullptr;
2055 MachineInstr *LastOp = nullptr;
2057 unsigned LastOpcode = 0;
2058 unsigned LastBytes = 0;
2059 unsigned NumMove = 0;
2060 for (int i = Ops.size() - 1; i >= 0; --i) {
2061 MachineInstr *Op = Ops[i];
2062 unsigned Loc = MI2LocMap[Op];
2063 if (Loc <= FirstLoc) {
2067 if (Loc >= LastLoc) {
2073 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
2074 if (LastOpcode && LSMOpcode != LastOpcode)
2077 int Offset = getMemoryOpOffset(Op);
2078 unsigned Bytes = getLSMultipleTransferSize(Op);
2080 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2083 LastOffset = Offset;
2085 LastOpcode = LSMOpcode;
2086 if (++NumMove == 8) // FIXME: Tune this limit.
2093 SmallPtrSet<MachineInstr*, 4> MemOps;
2094 SmallSet<unsigned, 4> MemRegs;
2095 for (int i = NumMove-1; i >= 0; --i) {
2096 MemOps.insert(Ops[i]);
2097 MemRegs.insert(Ops[i]->getOperand(0).getReg());
2100 // Be conservative, if the instructions are too far apart, don't
2101 // move them. We want to limit the increase of register pressure.
2102 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2104 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2105 MemOps, MemRegs, TRI);
2107 for (unsigned i = 0; i != NumMove; ++i)
2110 // This is the new location for the loads / stores.
2111 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2112 while (InsertPos != MBB->end()
2113 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
2116 // If we are moving a pair of loads / stores, see if it makes sense
2117 // to try to allocate a pair of registers that can form register pairs.
2118 MachineInstr *Op0 = Ops.back();
2119 MachineInstr *Op1 = Ops[Ops.size()-2];
2120 unsigned EvenReg = 0, OddReg = 0;
2121 unsigned BaseReg = 0, PredReg = 0;
2122 ARMCC::CondCodes Pred = ARMCC::AL;
2124 unsigned NewOpc = 0;
2127 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2128 EvenReg, OddReg, BaseReg,
2129 Offset, PredReg, Pred, isT2)) {
2133 const MCInstrDesc &MCID = TII->get(NewOpc);
2134 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
2135 MRI->constrainRegClass(EvenReg, TRC);
2136 MRI->constrainRegClass(OddReg, TRC);
2138 // Form the pair instruction.
2140 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2141 .addReg(EvenReg, RegState::Define)
2142 .addReg(OddReg, RegState::Define)
2144 // FIXME: We're converting from LDRi12 to an insn that still
2145 // uses addrmode2, so we need an explicit offset reg. It should
2146 // always by reg0 since we're transforming LDRi12s.
2149 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2150 concatenateMemOperands(MIB, Op0, Op1);
2151 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2154 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2158 // FIXME: We're converting from LDRi12 to an insn that still
2159 // uses addrmode2, so we need an explicit offset reg. It should
2160 // always by reg0 since we're transforming STRi12s.
2163 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2164 concatenateMemOperands(MIB, Op0, Op1);
2165 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2171 // Add register allocation hints to form register pairs.
2172 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
2173 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
2175 for (unsigned i = 0; i != NumMove; ++i) {
2176 MachineInstr *Op = Ops.back();
2178 MBB->splice(InsertPos, MBB, Op);
2182 NumLdStMoved += NumMove;
2192 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2193 bool RetVal = false;
2195 DenseMap<MachineInstr*, unsigned> MI2LocMap;
2196 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
2197 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
2198 SmallVector<unsigned, 4> LdBases;
2199 SmallVector<unsigned, 4> StBases;
2202 MachineBasicBlock::iterator MBBI = MBB->begin();
2203 MachineBasicBlock::iterator E = MBB->end();
2205 for (; MBBI != E; ++MBBI) {
2206 MachineInstr *MI = MBBI;
2207 if (MI->isCall() || MI->isTerminator()) {
2208 // Stop at barriers.
2213 if (!MI->isDebugValue())
2214 MI2LocMap[MI] = ++Loc;
2216 if (!isMemoryOp(MI))
2218 unsigned PredReg = 0;
2219 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2222 int Opc = MI->getOpcode();
2223 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
2224 unsigned Base = MI->getOperand(1).getReg();
2225 int Offset = getMemoryOpOffset(MI);
2227 bool StopHere = false;
2229 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2230 Base2LdsMap.find(Base);
2231 if (BI != Base2LdsMap.end()) {
2232 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2233 if (Offset == getMemoryOpOffset(BI->second[i])) {
2239 BI->second.push_back(MI);
2241 Base2LdsMap[Base].push_back(MI);
2242 LdBases.push_back(Base);
2245 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2246 Base2StsMap.find(Base);
2247 if (BI != Base2StsMap.end()) {
2248 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2249 if (Offset == getMemoryOpOffset(BI->second[i])) {
2255 BI->second.push_back(MI);
2257 Base2StsMap[Base].push_back(MI);
2258 StBases.push_back(Base);
2263 // Found a duplicate (a base+offset combination that's seen earlier).
2270 // Re-schedule loads.
2271 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2272 unsigned Base = LdBases[i];
2273 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2275 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2278 // Re-schedule stores.
2279 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2280 unsigned Base = StBases[i];
2281 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2283 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2287 Base2LdsMap.clear();
2288 Base2StsMap.clear();
2298 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
2299 /// optimization pass.
2300 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2302 return new ARMPreAllocLoadStoreOpt();
2303 return new ARMLoadStoreOpt();