1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 /// \file This file contains a pass that performs load / store related peephole
11 /// optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMISelLowering.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMSubtarget.h"
21 #include "MCTargetDesc/ARMAddressingModes.h"
22 #include "ThumbRegisterInfo.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/CodeGen/MachineBasicBlock.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include "llvm/CodeGen/MachineInstr.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/RegisterClassInfo.h"
35 #include "llvm/CodeGen/SelectionDAGNodes.h"
36 #include "llvm/CodeGen/LivePhysRegs.h"
37 #include "llvm/IR/DataLayout.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/Support/Allocator.h"
41 #include "llvm/Support/Debug.h"
42 #include "llvm/Support/ErrorHandling.h"
43 #include "llvm/Support/raw_ostream.h"
44 #include "llvm/Target/TargetInstrInfo.h"
45 #include "llvm/Target/TargetMachine.h"
46 #include "llvm/Target/TargetRegisterInfo.h"
49 #define DEBUG_TYPE "arm-ldst-opt"
51 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
52 STATISTIC(NumSTMGened , "Number of stm instructions generated");
53 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
54 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
55 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
56 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
57 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
58 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
59 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
60 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
61 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
64 /// Post- register allocation pass the combine load / store instructions to
65 /// form ldm / stm instructions.
66 struct ARMLoadStoreOpt : public MachineFunctionPass {
68 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
70 const MachineFunction *MF;
71 const TargetInstrInfo *TII;
72 const TargetRegisterInfo *TRI;
73 const MachineRegisterInfo *MRI;
74 const ARMSubtarget *STI;
75 const TargetLowering *TL;
77 LivePhysRegs LiveRegs;
78 RegisterClassInfo RegClassInfo;
79 MachineBasicBlock::const_iterator LiveRegPos;
81 bool RegClassInfoValid;
82 bool isThumb1, isThumb2;
84 bool runOnMachineFunction(MachineFunction &Fn) override;
86 const char *getPassName() const override {
87 return "ARM load / store optimization pass";
91 /// A set of load/store MachineInstrs with same base register sorted by
93 struct MemOpQueueEntry {
95 int Offset; ///< Load/Store offset.
96 unsigned Position; ///< Position as counted from end of basic block.
97 MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position)
98 : MI(MI), Offset(Offset), Position(Position) {}
100 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
102 /// A set of MachineInstrs that fulfill (nearly all) conditions to get
103 /// merged into a LDM/STM.
104 struct MergeCandidate {
105 /// List of instructions ordered by load/store offset.
106 SmallVector<MachineInstr*, 4> Instrs;
107 /// Index in Instrs of the instruction being latest in the schedule.
108 unsigned LatestMIIdx;
109 /// Index in Instrs of the instruction being earliest in the schedule.
110 unsigned EarliestMIIdx;
111 /// Index into the basic block where the merged instruction will be
112 /// inserted. (See MemOpQueueEntry.Position)
114 /// Whether the instructions can be merged into a ldm/stm instruction.
115 bool CanMergeToLSMulti;
116 /// Whether the instructions can be merged into a ldrd/strd instruction.
117 bool CanMergeToLSDouble;
119 SpecificBumpPtrAllocator<MergeCandidate> Allocator;
120 SmallVector<const MergeCandidate*,4> Candidates;
121 SmallVector<MachineInstr*,4> MergeBaseCandidates;
123 void moveLiveRegsBefore(const MachineBasicBlock &MBB,
124 MachineBasicBlock::const_iterator Before);
125 unsigned findFreeReg(const TargetRegisterClass &RegClass);
126 void UpdateBaseRegUses(MachineBasicBlock &MBB,
127 MachineBasicBlock::iterator MBBI,
128 DebugLoc DL, unsigned Base, unsigned WordOffset,
129 ARMCC::CondCodes Pred, unsigned PredReg);
130 MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB,
131 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
132 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
133 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs);
134 MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB,
135 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
136 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
137 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const;
138 void FormCandidates(const MemOpQueue &MemOps);
139 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
140 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
141 MachineBasicBlock::iterator &MBBI);
142 bool MergeBaseUpdateLoadStore(MachineInstr *MI);
143 bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
144 bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
145 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
146 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
148 char ARMLoadStoreOpt::ID = 0;
151 static bool definesCPSR(const MachineInstr *MI) {
152 for (const auto &MO : MI->operands()) {
155 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
156 // If the instruction has live CPSR def, then it's not safe to fold it
157 // into load / store.
164 static int getMemoryOpOffset(const MachineInstr *MI) {
165 unsigned Opcode = MI->getOpcode();
166 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
167 unsigned NumOperands = MI->getDesc().getNumOperands();
168 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
170 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
171 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
172 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
173 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
176 // Thumb1 immediate offsets are scaled by 4
177 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
178 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
181 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
182 : ARM_AM::getAM5Offset(OffField) * 4;
183 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
184 : ARM_AM::getAM5Op(OffField);
186 if (Op == ARM_AM::sub)
192 static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) {
193 return MI.getOperand(1);
196 static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) {
197 return MI.getOperand(0);
200 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
202 default: llvm_unreachable("Unhandled opcode!");
206 default: llvm_unreachable("Unhandled submode!");
207 case ARM_AM::ia: return ARM::LDMIA;
208 case ARM_AM::da: return ARM::LDMDA;
209 case ARM_AM::db: return ARM::LDMDB;
210 case ARM_AM::ib: return ARM::LDMIB;
215 default: llvm_unreachable("Unhandled submode!");
216 case ARM_AM::ia: return ARM::STMIA;
217 case ARM_AM::da: return ARM::STMDA;
218 case ARM_AM::db: return ARM::STMDB;
219 case ARM_AM::ib: return ARM::STMIB;
223 // tLDMIA is writeback-only - unless the base register is in the input
227 default: llvm_unreachable("Unhandled submode!");
228 case ARM_AM::ia: return ARM::tLDMIA;
232 // There is no non-writeback tSTMIA either.
235 default: llvm_unreachable("Unhandled submode!");
236 case ARM_AM::ia: return ARM::tSTMIA_UPD;
242 default: llvm_unreachable("Unhandled submode!");
243 case ARM_AM::ia: return ARM::t2LDMIA;
244 case ARM_AM::db: return ARM::t2LDMDB;
250 default: llvm_unreachable("Unhandled submode!");
251 case ARM_AM::ia: return ARM::t2STMIA;
252 case ARM_AM::db: return ARM::t2STMDB;
257 default: llvm_unreachable("Unhandled submode!");
258 case ARM_AM::ia: return ARM::VLDMSIA;
259 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
264 default: llvm_unreachable("Unhandled submode!");
265 case ARM_AM::ia: return ARM::VSTMSIA;
266 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
271 default: llvm_unreachable("Unhandled submode!");
272 case ARM_AM::ia: return ARM::VLDMDIA;
273 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
278 default: llvm_unreachable("Unhandled submode!");
279 case ARM_AM::ia: return ARM::VSTMDIA;
280 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
285 static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
287 default: llvm_unreachable("Unhandled opcode!");
294 case ARM::tLDMIA_UPD:
295 case ARM::tSTMIA_UPD:
296 case ARM::t2LDMIA_RET:
298 case ARM::t2LDMIA_UPD:
300 case ARM::t2STMIA_UPD:
302 case ARM::VLDMSIA_UPD:
304 case ARM::VSTMSIA_UPD:
306 case ARM::VLDMDIA_UPD:
308 case ARM::VSTMDIA_UPD:
322 case ARM::t2LDMDB_UPD:
324 case ARM::t2STMDB_UPD:
325 case ARM::VLDMSDB_UPD:
326 case ARM::VSTMSDB_UPD:
327 case ARM::VLDMDDB_UPD:
328 case ARM::VSTMDDB_UPD:
339 static bool isT1i32Load(unsigned Opc) {
340 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
343 static bool isT2i32Load(unsigned Opc) {
344 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
347 static bool isi32Load(unsigned Opc) {
348 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
351 static bool isT1i32Store(unsigned Opc) {
352 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
355 static bool isT2i32Store(unsigned Opc) {
356 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
359 static bool isi32Store(unsigned Opc) {
360 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
363 static bool isLoadSingle(unsigned Opc) {
364 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
367 static unsigned getImmScale(unsigned Opc) {
369 default: llvm_unreachable("Unhandled opcode!");
384 static unsigned getLSMultipleTransferSize(const MachineInstr *MI) {
385 switch (MI->getOpcode()) {
412 case ARM::tLDMIA_UPD:
413 case ARM::tSTMIA_UPD:
420 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
423 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
427 /// Update future uses of the base register with the offset introduced
428 /// due to writeback. This function only works on Thumb1.
430 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
431 MachineBasicBlock::iterator MBBI,
432 DebugLoc DL, unsigned Base,
434 ARMCC::CondCodes Pred, unsigned PredReg) {
435 assert(isThumb1 && "Can only update base register uses for Thumb1!");
436 // Start updating any instructions with immediate offsets. Insert a SUB before
437 // the first non-updateable instruction (if any).
438 for (; MBBI != MBB.end(); ++MBBI) {
439 bool InsertSub = false;
440 unsigned Opc = MBBI->getOpcode();
442 if (MBBI->readsRegister(Base)) {
445 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
447 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
449 if (IsLoad || IsStore) {
450 // Loads and stores with immediate offsets can be updated, but only if
451 // the new offset isn't negative.
452 // The MachineOperand containing the offset immediate is the last one
453 // before predicates.
455 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
456 // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
457 Offset = MO.getImm() - WordOffset * getImmScale(Opc);
459 // If storing the base register, it needs to be reset first.
460 unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
462 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
467 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
468 !definesCPSR(MBBI)) {
469 // SUBS/ADDS using this register, with a dead def of the CPSR.
470 // Merge it with the update; if the merged offset is too large,
471 // insert a new sub instead.
473 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
474 Offset = (Opc == ARM::tSUBi8) ?
475 MO.getImm() + WordOffset * 4 :
476 MO.getImm() - WordOffset * 4 ;
477 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
478 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
481 // The base register has now been reset, so exit early.
488 // Can't update the instruction.
492 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
493 // Since SUBS sets the condition flags, we can't place the base reset
494 // after an instruction that has a live CPSR def.
495 // The base register might also contain an argument for a function call.
500 // An instruction above couldn't be updated, so insert a sub.
501 AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
502 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
506 if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
507 // Register got killed. Stop updating.
511 // End of block was reached.
512 if (MBB.succ_size() > 0) {
513 // FIXME: Because of a bug, live registers are sometimes missing from
514 // the successor blocks' live-in sets. This means we can't trust that
515 // information and *always* have to reset at the end of a block.
517 if (MBBI != MBB.end()) --MBBI;
519 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
520 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
524 /// Return the first register of class \p RegClass that is not in \p Regs.
525 unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
526 if (!RegClassInfoValid) {
527 RegClassInfo.runOnMachineFunction(*MF);
528 RegClassInfoValid = true;
531 for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
532 if (!LiveRegs.contains(Reg))
537 /// Compute live registers just before instruction \p Before (in normal schedule
538 /// direction). Computes backwards so multiple queries in the same block must
539 /// come in reverse order.
540 void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
541 MachineBasicBlock::const_iterator Before) {
542 // Initialize if we never queried in this block.
543 if (!LiveRegsValid) {
545 LiveRegs.addLiveOuts(&MBB, true);
546 LiveRegPos = MBB.end();
547 LiveRegsValid = true;
549 // Move backward just before the "Before" position.
550 while (LiveRegPos != Before) {
552 LiveRegs.stepBackward(*LiveRegPos);
556 static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
558 for (const std::pair<unsigned, bool> &R : Regs)
564 /// Create and insert a LDM or STM with Base as base register and registers in
565 /// Regs as the register operands that would be loaded / stored. It returns
566 /// true if the transformation is done.
567 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB,
568 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
569 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
570 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) {
571 unsigned NumRegs = Regs.size();
574 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
575 // Compute liveness information for that register to make the decision.
576 bool SafeToClobberCPSR = !isThumb1 ||
577 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
578 MachineBasicBlock::LQR_Dead);
580 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
582 // Exception: If the base register is in the input reglist, Thumb1 LDM is
584 // It's also not possible to merge an STR of the base register in Thumb1.
585 if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) {
586 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
587 if (Opcode == ARM::tLDRi) {
589 } else if (Opcode == ARM::tSTRi) {
594 ARM_AM::AMSubMode Mode = ARM_AM::ia;
595 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
596 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
597 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
599 if (Offset == 4 && haveIBAndDA) {
601 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
603 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
604 // VLDM/VSTM do not support DB mode without also updating the base reg.
606 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
607 // Check if this is a supported opcode before inserting instructions to
608 // calculate a new base register.
609 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
611 // If starting offset isn't zero, insert a MI to materialize a new base.
612 // But only do so if it is cost effective, i.e. merging more than two
617 // On Thumb1, it's not worth materializing a new base register without
618 // clobbering the CPSR (i.e. not using ADDS/SUBS).
619 if (!SafeToClobberCPSR)
623 if (isi32Load(Opcode)) {
624 // If it is a load, then just use one of the destination register to
625 // use as the new base.
626 NewBase = Regs[NumRegs-1].first;
628 // Find a free register that we can use as scratch register.
629 moveLiveRegsBefore(MBB, InsertBefore);
630 // The merged instruction does not exist yet but will use several Regs if
632 if (!isLoadSingle(Opcode))
633 for (const std::pair<unsigned, bool> &R : Regs)
634 LiveRegs.addReg(R.first);
636 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
642 isThumb2 ? ARM::t2ADDri :
643 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
644 (isThumb1 && Offset < 8) ? ARM::tADDi3 :
645 isThumb1 ? ARM::tADDi8 : ARM::ADDri;
650 isThumb2 ? ARM::t2SUBri :
651 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
652 isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
655 if (!TL->isLegalAddImmediate(Offset))
656 // FIXME: Try add with register operand?
657 return nullptr; // Probably not worth it then.
659 // We can only append a kill flag to the add/sub input if the value is not
660 // used in the register list of the stm as well.
661 bool KillOldBase = BaseKill &&
662 (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
665 // Thumb1: depending on immediate size, use either
666 // ADDS NewBase, Base, #imm3
669 // ADDS NewBase, #imm8.
670 if (Base != NewBase &&
671 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
672 // Need to insert a MOV to the new base first.
673 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
675 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
676 if (Pred != ARMCC::AL)
678 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
679 .addReg(Base, getKillRegState(KillOldBase));
681 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
682 .addReg(Base, getKillRegState(KillOldBase))
683 .addImm(Pred).addReg(PredReg);
685 // The following ADDS/SUBS becomes an update.
689 if (BaseOpc == ARM::tADDrSPi) {
690 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
691 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
692 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4)
693 .addImm(Pred).addReg(PredReg);
696 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true)
697 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
698 .addImm(Pred).addReg(PredReg);
700 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
701 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
702 .addImm(Pred).addReg(PredReg).addReg(0);
705 BaseKill = true; // New base is always killed straight away.
708 bool isDef = isLoadSingle(Opcode);
710 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
711 // base register writeback.
712 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
716 // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
717 // - There is no writeback (LDM of base register),
718 // - the base register is killed by the merged instruction,
719 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
720 // to reset the base register.
721 // Otherwise, don't merge.
722 // It's safe to return here since the code to materialize a new base register
723 // above is also conditional on SafeToClobberCPSR.
724 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
727 MachineInstrBuilder MIB;
730 if (Opcode == ARM::tLDMIA)
731 // Update tLDMIA with writeback if necessary.
732 Opcode = ARM::tLDMIA_UPD;
734 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
736 // Thumb1: we might need to set base writeback when building the MI.
737 MIB.addReg(Base, getDefRegState(true))
738 .addReg(Base, getKillRegState(BaseKill));
740 // The base isn't dead after a merged instruction with writeback.
741 // Insert a sub instruction after the newly formed instruction to reset.
743 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
746 // No writeback, simply build the MachineInstr.
747 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
748 MIB.addReg(Base, getKillRegState(BaseKill));
751 MIB.addImm(Pred).addReg(PredReg);
753 for (const std::pair<unsigned, bool> &R : Regs)
754 MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
756 return MIB.getInstr();
759 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB,
760 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
761 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
762 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const {
763 bool IsLoad = isi32Load(Opcode);
764 assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
765 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
767 assert(Regs.size() == 2);
768 MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
769 TII->get(LoadStoreOpcode));
771 MIB.addReg(Regs[0].first, RegState::Define)
772 .addReg(Regs[1].first, RegState::Define);
774 MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
775 .addReg(Regs[1].first, getKillRegState(Regs[1].second));
777 MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
778 return MIB.getInstr();
781 /// Call MergeOps and update MemOps and merges accordingly on success.
782 MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
783 const MachineInstr *First = Cand.Instrs.front();
784 unsigned Opcode = First->getOpcode();
785 bool IsLoad = isLoadSingle(Opcode);
786 SmallVector<std::pair<unsigned, bool>, 8> Regs;
787 SmallVector<unsigned, 4> ImpDefs;
788 DenseSet<unsigned> KilledRegs;
789 DenseSet<unsigned> UsedRegs;
790 // Determine list of registers and list of implicit super-register defs.
791 for (const MachineInstr *MI : Cand.Instrs) {
792 const MachineOperand &MO = getLoadStoreRegOp(*MI);
793 unsigned Reg = MO.getReg();
794 bool IsKill = MO.isKill();
796 KilledRegs.insert(Reg);
797 Regs.push_back(std::make_pair(Reg, IsKill));
798 UsedRegs.insert(Reg);
801 // Collect any implicit defs of super-registers, after merging we can't
802 // be sure anymore that we properly preserved these live ranges and must
803 // removed these implicit operands.
804 for (const MachineOperand &MO : MI->implicit_operands()) {
805 if (!MO.isReg() || !MO.isDef() || MO.isDead())
807 assert(MO.isImplicit());
808 unsigned DefReg = MO.getReg();
810 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end())
812 // We can ignore cases where the super-reg is read and written.
813 if (MI->readsRegister(DefReg))
815 ImpDefs.push_back(DefReg);
820 // Attempt the merge.
821 typedef MachineBasicBlock::iterator iterator;
822 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
823 iterator InsertBefore = std::next(iterator(LatestMI));
824 MachineBasicBlock &MBB = *LatestMI->getParent();
825 unsigned Offset = getMemoryOpOffset(First);
826 unsigned Base = getLoadStoreBaseOp(*First).getReg();
827 bool BaseKill = LatestMI->killsRegister(Base);
828 unsigned PredReg = 0;
829 ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg);
830 DebugLoc DL = First->getDebugLoc();
831 MachineInstr *Merged = nullptr;
832 if (Cand.CanMergeToLSDouble)
833 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
834 Opcode, Pred, PredReg, DL, Regs);
835 if (!Merged && Cand.CanMergeToLSMulti)
836 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
837 Opcode, Pred, PredReg, DL, Regs);
841 // Determine earliest instruction that will get removed. We then keep an
842 // iterator just above it so the following erases don't invalidated it.
843 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
844 bool EarliestAtBegin = false;
845 if (EarliestI == MBB.begin()) {
846 EarliestAtBegin = true;
848 EarliestI = std::prev(EarliestI);
851 // Remove instructions which have been merged.
852 for (MachineInstr *MI : Cand.Instrs)
855 // Determine range between the earliest removed instruction and the new one.
857 EarliestI = MBB.begin();
859 EarliestI = std::next(EarliestI);
860 auto FixupRange = make_range(EarliestI, iterator(Merged));
862 if (isLoadSingle(Opcode)) {
863 // If the previous loads defined a super-reg, then we have to mark earlier
864 // operands undef; Replicate the super-reg def on the merged instruction.
865 for (MachineInstr &MI : FixupRange) {
866 for (unsigned &ImpDefReg : ImpDefs) {
867 for (MachineOperand &MO : MI.implicit_operands()) {
868 if (!MO.isReg() || MO.getReg() != ImpDefReg)
878 MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
879 for (unsigned ImpDef : ImpDefs)
880 MIB.addReg(ImpDef, RegState::ImplicitDefine);
882 // Remove kill flags: We are possibly storing the values later now.
883 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
884 for (MachineInstr &MI : FixupRange) {
885 for (MachineOperand &MO : MI.uses()) {
886 if (!MO.isReg() || !MO.isKill())
888 if (UsedRegs.count(MO.getReg()))
892 assert(ImpDefs.empty());
898 static bool isValidLSDoubleOffset(int Offset) {
899 unsigned Value = abs(Offset);
900 // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
902 return (Value % 4) == 0 && Value < 1024;
905 /// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
906 void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
907 const MachineInstr *FirstMI = MemOps[0].MI;
908 unsigned Opcode = FirstMI->getOpcode();
909 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
910 unsigned Size = getLSMultipleTransferSize(FirstMI);
913 unsigned EIndex = MemOps.size();
915 // Look at the first instruction.
916 const MachineInstr *MI = MemOps[SIndex].MI;
917 int Offset = MemOps[SIndex].Offset;
918 const MachineOperand &PMO = getLoadStoreRegOp(*MI);
919 unsigned PReg = PMO.getReg();
920 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
921 unsigned Latest = SIndex;
922 unsigned Earliest = SIndex;
924 bool CanMergeToLSDouble =
925 STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
926 // ARM errata 602117: LDRD with base in list may result in incorrect base
927 // register when interrupted or faulted.
928 if (STI->isCortexM3() && isi32Load(Opcode) &&
929 PReg == getLoadStoreBaseOp(*MI).getReg())
930 CanMergeToLSDouble = false;
932 bool CanMergeToLSMulti = true;
933 // On swift vldm/vstm starting with an odd register number as that needs
934 // more uops than single vldrs.
935 if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1)
936 CanMergeToLSMulti = false;
938 // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it
939 // deprecated; LDM to PC is fine but cannot happen here.
940 if (PReg == ARM::SP || PReg == ARM::PC)
941 CanMergeToLSMulti = CanMergeToLSDouble = false;
943 // Merge following instructions where possible.
944 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
945 int NewOffset = MemOps[I].Offset;
946 if (NewOffset != Offset + (int)Size)
948 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
949 unsigned Reg = MO.getReg();
950 if (Reg == ARM::SP || Reg == ARM::PC)
953 // See if the current load/store may be part of a multi load/store.
954 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
955 bool PartOfLSMulti = CanMergeToLSMulti;
957 // Register numbers must be in ascending order.
958 if (RegNum <= PRegNum)
959 PartOfLSMulti = false;
960 // For VFP / NEON load/store multiples, the registers must be
961 // consecutive and within the limit on the number of registers per
963 else if (!isNotVFP && RegNum != PRegNum+1)
964 PartOfLSMulti = false;
966 // See if the current load/store may be part of a double load/store.
967 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
969 if (!PartOfLSMulti && !PartOfLSDouble)
971 CanMergeToLSMulti &= PartOfLSMulti;
972 CanMergeToLSDouble &= PartOfLSDouble;
973 // Track MemOp with latest and earliest position (Positions are
974 // counted in reverse).
975 unsigned Position = MemOps[I].Position;
976 if (Position < MemOps[Latest].Position)
978 else if (Position > MemOps[Earliest].Position)
980 // Prepare for next MemOp.
985 // Form a candidate from the Ops collected so far.
986 MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;
987 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
988 Candidate->Instrs.push_back(MemOps[C].MI);
989 Candidate->LatestMIIdx = Latest - SIndex;
990 Candidate->EarliestMIIdx = Earliest - SIndex;
991 Candidate->InsertPos = MemOps[Latest].Position;
993 CanMergeToLSMulti = CanMergeToLSDouble = false;
994 Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
995 Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
996 Candidates.push_back(Candidate);
997 // Continue after the chain.
999 } while (SIndex < EIndex);
1002 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
1003 ARM_AM::AMSubMode Mode) {
1005 default: llvm_unreachable("Unhandled opcode!");
1011 default: llvm_unreachable("Unhandled submode!");
1012 case ARM_AM::ia: return ARM::LDMIA_UPD;
1013 case ARM_AM::ib: return ARM::LDMIB_UPD;
1014 case ARM_AM::da: return ARM::LDMDA_UPD;
1015 case ARM_AM::db: return ARM::LDMDB_UPD;
1022 default: llvm_unreachable("Unhandled submode!");
1023 case ARM_AM::ia: return ARM::STMIA_UPD;
1024 case ARM_AM::ib: return ARM::STMIB_UPD;
1025 case ARM_AM::da: return ARM::STMDA_UPD;
1026 case ARM_AM::db: return ARM::STMDB_UPD;
1031 default: llvm_unreachable("Unhandled submode!");
1032 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1033 case ARM_AM::db: return ARM::t2LDMDB_UPD;
1038 default: llvm_unreachable("Unhandled submode!");
1039 case ARM_AM::ia: return ARM::t2STMIA_UPD;
1040 case ARM_AM::db: return ARM::t2STMDB_UPD;
1044 default: llvm_unreachable("Unhandled submode!");
1045 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1046 case ARM_AM::db: return ARM::VLDMSDB_UPD;
1050 default: llvm_unreachable("Unhandled submode!");
1051 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1052 case ARM_AM::db: return ARM::VLDMDDB_UPD;
1056 default: llvm_unreachable("Unhandled submode!");
1057 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1058 case ARM_AM::db: return ARM::VSTMSDB_UPD;
1062 default: llvm_unreachable("Unhandled submode!");
1063 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1064 case ARM_AM::db: return ARM::VSTMDDB_UPD;
1069 /// Check if the given instruction increments or decrements a register and
1070 /// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
1071 /// generated by the instruction are possibly read as well.
1072 static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg,
1073 ARMCC::CondCodes Pred, unsigned PredReg) {
1076 switch (MI.getOpcode()) {
1077 case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break;
1078 case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break;
1080 case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break;
1082 case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break;
1083 case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break;
1084 case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
1089 if (MI.getOperand(0).getReg() != Reg ||
1090 MI.getOperand(1).getReg() != Reg ||
1091 getInstrPredicate(&MI, MIPredReg) != Pred ||
1092 MIPredReg != PredReg)
1095 if (CheckCPSRDef && definesCPSR(&MI))
1097 return MI.getOperand(2).getImm() * Scale;
1100 /// Searches for an increment or decrement of \p Reg before \p MBBI.
1101 static MachineBasicBlock::iterator
1102 findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg,
1103 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
1105 MachineBasicBlock &MBB = *MBBI->getParent();
1106 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1107 MachineBasicBlock::iterator EndMBBI = MBB.end();
1108 if (MBBI == BeginMBBI)
1111 // Skip debug values.
1112 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1113 while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI)
1116 Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
1117 return Offset == 0 ? EndMBBI : PrevMBBI;
1120 /// Searches for a increment or decrement of \p Reg after \p MBBI.
1121 static MachineBasicBlock::iterator
1122 findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg,
1123 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
1125 MachineBasicBlock &MBB = *MBBI->getParent();
1126 MachineBasicBlock::iterator EndMBBI = MBB.end();
1127 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1128 // Skip debug values.
1129 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1131 if (NextMBBI == EndMBBI)
1134 Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
1135 return Offset == 0 ? EndMBBI : NextMBBI;
1138 /// Fold proceeding/trailing inc/dec of base register into the
1139 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1141 /// stmia rn, <ra, rb, rc>
1142 /// rn := rn + 4 * 3;
1144 /// stmia rn!, <ra, rb, rc>
1146 /// rn := rn - 4 * 3;
1147 /// ldmia rn, <ra, rb, rc>
1149 /// ldmdb rn!, <ra, rb, rc>
1150 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
1151 // Thumb1 is already using updating loads/stores.
1152 if (isThumb1) return false;
1154 const MachineOperand &BaseOP = MI->getOperand(0);
1155 unsigned Base = BaseOP.getReg();
1156 bool BaseKill = BaseOP.isKill();
1157 unsigned PredReg = 0;
1158 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1159 unsigned Opcode = MI->getOpcode();
1160 DebugLoc DL = MI->getDebugLoc();
1162 // Can't use an updating ld/st if the base register is also a dest
1163 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1164 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
1165 if (MI->getOperand(i).getReg() == Base)
1168 int Bytes = getLSMultipleTransferSize(MI);
1169 MachineBasicBlock &MBB = *MI->getParent();
1170 MachineBasicBlock::iterator MBBI(MI);
1172 MachineBasicBlock::iterator MergeInstr
1173 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1174 ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
1175 if (Mode == ARM_AM::ia && Offset == -Bytes) {
1177 } else if (Mode == ARM_AM::ib && Offset == -Bytes) {
1180 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
1181 if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
1182 ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes))
1185 MBB.erase(MergeInstr);
1187 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1188 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1189 .addReg(Base, getDefRegState(true)) // WB base register
1190 .addReg(Base, getKillRegState(BaseKill))
1191 .addImm(Pred).addReg(PredReg);
1193 // Transfer the rest of operands.
1194 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
1195 MIB.addOperand(MI->getOperand(OpNum));
1197 // Transfer memoperands.
1198 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1204 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1205 ARM_AM::AddrOpc Mode) {
1208 return ARM::LDR_PRE_IMM;
1210 return ARM::STR_PRE_IMM;
1212 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1214 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1216 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1218 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1221 return ARM::t2LDR_PRE;
1224 return ARM::t2STR_PRE;
1225 default: llvm_unreachable("Unhandled opcode!");
1229 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1230 ARM_AM::AddrOpc Mode) {
1233 return ARM::LDR_POST_IMM;
1235 return ARM::STR_POST_IMM;
1237 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1239 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1241 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1243 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1246 return ARM::t2LDR_POST;
1249 return ARM::t2STR_POST;
1250 default: llvm_unreachable("Unhandled opcode!");
1254 /// Fold proceeding/trailing inc/dec of base register into the
1255 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1256 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
1257 // Thumb1 doesn't have updating LDR/STR.
1258 // FIXME: Use LDM/STM with single register instead.
1259 if (isThumb1) return false;
1261 unsigned Base = getLoadStoreBaseOp(*MI).getReg();
1262 bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
1263 unsigned Opcode = MI->getOpcode();
1264 DebugLoc DL = MI->getDebugLoc();
1265 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1266 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1267 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1268 if (isi32Load(Opcode) || isi32Store(Opcode))
1269 if (MI->getOperand(2).getImm() != 0)
1271 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1274 // Can't do the merge if the destination register is the same as the would-be
1275 // writeback register.
1276 if (MI->getOperand(0).getReg() == Base)
1279 unsigned PredReg = 0;
1280 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1281 int Bytes = getLSMultipleTransferSize(MI);
1282 MachineBasicBlock &MBB = *MI->getParent();
1283 MachineBasicBlock::iterator MBBI(MI);
1285 MachineBasicBlock::iterator MergeInstr
1286 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1288 if (!isAM5 && Offset == Bytes) {
1289 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
1290 } else if (Offset == -Bytes) {
1291 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
1293 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
1294 if (Offset == Bytes) {
1295 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
1296 } else if (!isAM5 && Offset == -Bytes) {
1297 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
1301 MBB.erase(MergeInstr);
1303 ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add;
1305 bool isLd = isLoadSingle(Opcode);
1307 // VLDM[SD]_UPD, VSTM[SD]_UPD
1308 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1309 // updating load/store-multiple instructions can be used with only one
1311 MachineOperand &MO = MI->getOperand(0);
1312 BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1313 .addReg(Base, getDefRegState(true)) // WB base register
1314 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1315 .addImm(Pred).addReg(PredReg)
1316 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1317 getKillRegState(MO.isKill())));
1320 // LDR_PRE, LDR_POST
1321 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1322 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1323 .addReg(Base, RegState::Define)
1324 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1326 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1327 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1328 .addReg(Base, RegState::Define)
1329 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
1332 // t2LDR_PRE, t2LDR_POST
1333 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1334 .addReg(Base, RegState::Define)
1335 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1338 MachineOperand &MO = MI->getOperand(0);
1339 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1340 // the vestigal zero-reg offset register. When that's fixed, this clause
1341 // can be removed entirely.
1342 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1343 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1344 // STR_PRE, STR_POST
1345 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1346 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1347 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
1349 // t2STR_PRE, t2STR_POST
1350 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1351 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1352 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1360 bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
1361 unsigned Opcode = MI.getOpcode();
1362 assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
1363 "Must have t2STRDi8 or t2LDRDi8");
1364 if (MI.getOperand(3).getImm() != 0)
1367 // Behaviour for writeback is undefined if base register is the same as one
1369 const MachineOperand &BaseOp = MI.getOperand(2);
1370 unsigned Base = BaseOp.getReg();
1371 const MachineOperand &Reg0Op = MI.getOperand(0);
1372 const MachineOperand &Reg1Op = MI.getOperand(1);
1373 if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
1377 ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg);
1378 MachineBasicBlock::iterator MBBI(MI);
1379 MachineBasicBlock &MBB = *MI.getParent();
1381 MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred,
1384 if (Offset == 8 || Offset == -8) {
1385 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
1387 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
1388 if (Offset == 8 || Offset == -8) {
1389 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
1393 MBB.erase(MergeInstr);
1395 DebugLoc DL = MI.getDebugLoc();
1396 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
1397 if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
1398 MIB.addOperand(Reg0Op).addOperand(Reg1Op)
1399 .addReg(BaseOp.getReg(), RegState::Define);
1401 assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
1402 MIB.addReg(BaseOp.getReg(), RegState::Define)
1403 .addOperand(Reg0Op).addOperand(Reg1Op);
1405 MIB.addReg(BaseOp.getReg(), RegState::Kill)
1406 .addImm(Offset).addImm(Pred).addReg(PredReg);
1407 assert(TII->get(Opcode).getNumOperands() == 6 &&
1408 TII->get(NewOpc).getNumOperands() == 7 &&
1409 "Unexpected number of operands in Opcode specification.");
1411 // Transfer implicit operands.
1412 for (const MachineOperand &MO : MI.implicit_operands())
1414 MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
1420 /// Returns true if instruction is a memory operation that this pass is capable
1421 /// of operating on.
1422 static bool isMemoryOp(const MachineInstr *MI) {
1423 // When no memory operands are present, conservatively assume unaligned,
1424 // volatile, unfoldable.
1425 if (!MI->hasOneMemOperand())
1428 const MachineMemOperand *MMO = *MI->memoperands_begin();
1430 // Don't touch volatile memory accesses - we may be changing their order.
1431 if (MMO->isVolatile())
1434 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1436 if (MMO->getAlignment() < 4)
1439 // str <undef> could probably be eliminated entirely, but for now we just want
1440 // to avoid making a mess of it.
1441 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1442 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1443 MI->getOperand(0).isUndef())
1446 // Likewise don't mess with references to undefined addresses.
1447 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1448 MI->getOperand(1).isUndef())
1451 unsigned Opcode = MI->getOpcode();
1456 return MI->getOperand(1).isReg();
1459 return MI->getOperand(1).isReg();
1470 return MI->getOperand(1).isReg();
1475 static void InsertLDR_STR(MachineBasicBlock &MBB,
1476 MachineBasicBlock::iterator &MBBI,
1477 int Offset, bool isDef,
1478 DebugLoc DL, unsigned NewOpc,
1479 unsigned Reg, bool RegDeadKill, bool RegUndef,
1480 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1481 bool OffKill, bool OffUndef,
1482 ARMCC::CondCodes Pred, unsigned PredReg,
1483 const TargetInstrInfo *TII, bool isT2) {
1485 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1487 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1488 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1489 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1491 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1493 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1494 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1495 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1499 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1500 MachineBasicBlock::iterator &MBBI) {
1501 MachineInstr *MI = &*MBBI;
1502 unsigned Opcode = MI->getOpcode();
1503 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
1506 const MachineOperand &BaseOp = MI->getOperand(2);
1507 unsigned BaseReg = BaseOp.getReg();
1508 unsigned EvenReg = MI->getOperand(0).getReg();
1509 unsigned OddReg = MI->getOperand(1).getReg();
1510 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1511 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1513 // ARM errata 602117: LDRD with base in list may result in incorrect base
1514 // register when interrupted or faulted.
1515 bool Errata602117 = EvenReg == BaseReg &&
1516 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
1517 // ARM LDRD/STRD needs consecutive registers.
1518 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
1519 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
1521 if (!Errata602117 && !NonConsecutiveRegs)
1524 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1525 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1526 bool EvenDeadKill = isLd ?
1527 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1528 bool EvenUndef = MI->getOperand(0).isUndef();
1529 bool OddDeadKill = isLd ?
1530 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1531 bool OddUndef = MI->getOperand(1).isUndef();
1532 bool BaseKill = BaseOp.isKill();
1533 bool BaseUndef = BaseOp.isUndef();
1534 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1535 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1536 int OffImm = getMemoryOpOffset(MI);
1537 unsigned PredReg = 0;
1538 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1540 if (OddRegNum > EvenRegNum && OffImm == 0) {
1541 // Ascending register numbers and no offset. It's safe to change it to a
1543 unsigned NewOpc = (isLd)
1544 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1545 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1547 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1548 .addReg(BaseReg, getKillRegState(BaseKill))
1549 .addImm(Pred).addReg(PredReg)
1550 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1551 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1554 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1555 .addReg(BaseReg, getKillRegState(BaseKill))
1556 .addImm(Pred).addReg(PredReg)
1558 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1560 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1564 // Split into two instructions.
1565 unsigned NewOpc = (isLd)
1566 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1567 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1568 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1569 // so adjust and use t2LDRi12 here for that.
1570 unsigned NewOpc2 = (isLd)
1571 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1572 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1573 DebugLoc dl = MBBI->getDebugLoc();
1574 // If this is a load and base register is killed, it may have been
1575 // re-defed by the load, make sure the first load does not clobber it.
1577 (BaseKill || OffKill) &&
1578 (TRI->regsOverlap(EvenReg, BaseReg))) {
1579 assert(!TRI->regsOverlap(OddReg, BaseReg));
1580 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1581 OddReg, OddDeadKill, false,
1582 BaseReg, false, BaseUndef, false, OffUndef,
1583 Pred, PredReg, TII, isT2);
1584 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1585 EvenReg, EvenDeadKill, false,
1586 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1587 Pred, PredReg, TII, isT2);
1589 if (OddReg == EvenReg && EvenDeadKill) {
1590 // If the two source operands are the same, the kill marker is
1591 // probably on the first one. e.g.
1592 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1593 EvenDeadKill = false;
1596 // Never kill the base register in the first instruction.
1597 if (EvenReg == BaseReg)
1598 EvenDeadKill = false;
1599 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1600 EvenReg, EvenDeadKill, EvenUndef,
1601 BaseReg, false, BaseUndef, false, OffUndef,
1602 Pred, PredReg, TII, isT2);
1603 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1604 OddReg, OddDeadKill, OddUndef,
1605 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1606 Pred, PredReg, TII, isT2);
1614 MBBI = MBB.erase(MBBI);
1618 /// An optimization pass to turn multiple LDR / STR ops of the same base and
1619 /// incrementing offset into LDM / STM ops.
1620 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1622 unsigned CurrBase = 0;
1623 unsigned CurrOpc = ~0u;
1624 ARMCC::CondCodes CurrPred = ARMCC::AL;
1625 unsigned Position = 0;
1626 assert(Candidates.size() == 0);
1627 assert(MergeBaseCandidates.size() == 0);
1628 LiveRegsValid = false;
1630 for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
1632 // The instruction in front of the iterator is the one we look at.
1633 MBBI = std::prev(I);
1634 if (FixInvalidRegPairOp(MBB, MBBI))
1638 if (isMemoryOp(MBBI)) {
1639 unsigned Opcode = MBBI->getOpcode();
1640 const MachineOperand &MO = MBBI->getOperand(0);
1641 unsigned Reg = MO.getReg();
1642 unsigned Base = getLoadStoreBaseOp(*MBBI).getReg();
1643 unsigned PredReg = 0;
1644 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1645 int Offset = getMemoryOpOffset(MBBI);
1646 if (CurrBase == 0) {
1647 // Start of a new chain.
1651 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
1654 // Note: No need to match PredReg in the next if.
1655 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1657 // r4 := ldr [r0, #8]
1658 // r4 := ldr [r0, #4]
1661 // If a load overrides the base register or a register loaded by
1662 // another load in our chain, we cannot take this instruction.
1663 bool Overlap = false;
1664 if (isLoadSingle(Opcode)) {
1665 Overlap = (Base == Reg);
1667 for (const MemOpQueueEntry &E : MemOps) {
1668 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
1677 // Check offset and sort memory operation into the current chain.
1678 if (Offset > MemOps.back().Offset) {
1679 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
1682 MemOpQueue::iterator MI, ME;
1683 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
1684 if (Offset < MI->Offset) {
1685 // Found a place to insert.
1688 if (Offset == MI->Offset) {
1689 // Collision, abort.
1694 if (MI != MemOps.end()) {
1695 MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position));
1702 // Don't advance the iterator; The op will start a new chain next.
1705 // Fallthrough to look into existing chain.
1706 } else if (MBBI->isDebugValue()) {
1708 } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
1709 MBBI->getOpcode() == ARM::t2STRDi8) {
1710 // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
1711 // remember them because we may still be able to merge add/sub into them.
1712 MergeBaseCandidates.push_back(MBBI);
1716 // If we are here then the chain is broken; Extract candidates for a merge.
1717 if (MemOps.size() > 0) {
1718 FormCandidates(MemOps);
1719 // Reset for the next chain.
1722 CurrPred = ARMCC::AL;
1726 if (MemOps.size() > 0)
1727 FormCandidates(MemOps);
1729 // Sort candidates so they get processed from end to begin of the basic
1730 // block later; This is necessary for liveness calculation.
1731 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
1732 return M0->InsertPos < M1->InsertPos;
1734 std::sort(Candidates.begin(), Candidates.end(), LessThan);
1736 // Go through list of candidates and merge.
1737 bool Changed = false;
1738 for (const MergeCandidate *Candidate : Candidates) {
1739 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
1740 MachineInstr *Merged = MergeOpsUpdate(*Candidate);
1741 // Merge preceding/trailing base inc/dec into the merged op.
1744 unsigned Opcode = Merged->getOpcode();
1745 if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
1746 MergeBaseUpdateLSDouble(*Merged);
1748 MergeBaseUpdateLSMultiple(Merged);
1750 for (MachineInstr *MI : Candidate->Instrs) {
1751 if (MergeBaseUpdateLoadStore(MI))
1756 assert(Candidate->Instrs.size() == 1);
1757 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
1762 // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
1763 for (MachineInstr *MI : MergeBaseCandidates)
1764 MergeBaseUpdateLSDouble(*MI);
1765 MergeBaseCandidates.clear();
1770 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
1771 /// into the preceding stack restore so it directly restore the value of LR
1773 /// ldmfd sp!, {..., lr}
1776 /// ldmfd sp!, {..., lr}
1779 /// ldmfd sp!, {..., pc}
1780 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1781 // Thumb1 LDM doesn't allow high registers.
1782 if (isThumb1) return false;
1783 if (MBB.empty()) return false;
1785 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1786 if (MBBI != MBB.begin() &&
1787 (MBBI->getOpcode() == ARM::BX_RET ||
1788 MBBI->getOpcode() == ARM::tBX_RET ||
1789 MBBI->getOpcode() == ARM::MOVPCLR)) {
1790 MachineInstr *PrevMI = std::prev(MBBI);
1791 unsigned Opcode = PrevMI->getOpcode();
1792 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1793 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1794 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1795 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1796 if (MO.getReg() != ARM::LR)
1798 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1799 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1800 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1801 PrevMI->setDesc(TII->get(NewOpc));
1803 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1811 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1813 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1814 TL = STI->getTargetLowering();
1815 AFI = Fn.getInfo<ARMFunctionInfo>();
1816 TII = STI->getInstrInfo();
1817 TRI = STI->getRegisterInfo();
1818 MRI = &Fn.getRegInfo();
1819 RegClassInfoValid = false;
1820 isThumb2 = AFI->isThumb2Function();
1821 isThumb1 = AFI->isThumbFunction() && !isThumb2;
1823 bool Modified = false;
1824 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1826 MachineBasicBlock &MBB = *MFI;
1827 Modified |= LoadStoreMultipleOpti(MBB);
1828 if (STI->hasV5TOps())
1829 Modified |= MergeReturnIntoLDM(MBB);
1832 Allocator.DestroyAll();
1837 /// Pre- register allocation pass that move load / stores from consecutive
1838 /// locations close to make it more likely they will be combined later.
1839 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1841 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1843 const DataLayout *TD;
1844 const TargetInstrInfo *TII;
1845 const TargetRegisterInfo *TRI;
1846 const ARMSubtarget *STI;
1847 MachineRegisterInfo *MRI;
1848 MachineFunction *MF;
1850 bool runOnMachineFunction(MachineFunction &Fn) override;
1852 const char *getPassName() const override {
1853 return "ARM pre- register allocation load / store optimization pass";
1857 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1858 unsigned &NewOpc, unsigned &EvenReg,
1859 unsigned &OddReg, unsigned &BaseReg,
1861 unsigned &PredReg, ARMCC::CondCodes &Pred,
1863 bool RescheduleOps(MachineBasicBlock *MBB,
1864 SmallVectorImpl<MachineInstr *> &Ops,
1865 unsigned Base, bool isLd,
1866 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1867 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1869 char ARMPreAllocLoadStoreOpt::ID = 0;
1872 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1873 TD = &Fn.getDataLayout();
1874 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1875 TII = STI->getInstrInfo();
1876 TRI = STI->getRegisterInfo();
1877 MRI = &Fn.getRegInfo();
1880 bool Modified = false;
1881 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1883 Modified |= RescheduleLoadStoreInstrs(MFI);
1888 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1889 MachineBasicBlock::iterator I,
1890 MachineBasicBlock::iterator E,
1891 SmallPtrSetImpl<MachineInstr*> &MemOps,
1892 SmallSet<unsigned, 4> &MemRegs,
1893 const TargetRegisterInfo *TRI) {
1894 // Are there stores / loads / calls between them?
1895 // FIXME: This is overly conservative. We should make use of alias information
1897 SmallSet<unsigned, 4> AddedRegPressure;
1899 if (I->isDebugValue() || MemOps.count(&*I))
1901 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1903 if (isLd && I->mayStore())
1908 // It's not safe to move the first 'str' down.
1911 // str r4, [r0, #+4]
1915 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1916 MachineOperand &MO = I->getOperand(j);
1919 unsigned Reg = MO.getReg();
1920 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1922 if (Reg != Base && !MemRegs.count(Reg))
1923 AddedRegPressure.insert(Reg);
1927 // Estimate register pressure increase due to the transformation.
1928 if (MemRegs.size() <= 4)
1929 // Ok if we are moving small number of instructions.
1931 return AddedRegPressure.size() <= MemRegs.size() * 2;
1935 /// Copy \p Op0 and \p Op1 operands into a new array assigned to MI.
1936 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1937 MachineInstr *Op1) {
1938 assert(MI->memoperands_empty() && "expected a new machineinstr");
1939 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1940 + (Op1->memoperands_end() - Op1->memoperands_begin());
1942 MachineFunction *MF = MI->getParent()->getParent();
1943 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1944 MachineSDNode::mmo_iterator MemEnd =
1945 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1947 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1948 MI->setMemRefs(MemBegin, MemEnd);
1952 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1953 DebugLoc &dl, unsigned &NewOpc,
1955 unsigned &SecondReg,
1956 unsigned &BaseReg, int &Offset,
1958 ARMCC::CondCodes &Pred,
1960 // Make sure we're allowed to generate LDRD/STRD.
1961 if (!STI->hasV5TEOps())
1964 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1966 unsigned Opcode = Op0->getOpcode();
1967 if (Opcode == ARM::LDRi12) {
1969 } else if (Opcode == ARM::STRi12) {
1971 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1972 NewOpc = ARM::t2LDRDi8;
1975 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1976 NewOpc = ARM::t2STRDi8;
1983 // Make sure the base address satisfies i64 ld / st alignment requirement.
1984 // At the moment, we ignore the memoryoperand's value.
1985 // If we want to use AliasAnalysis, we should check it accordingly.
1986 if (!Op0->hasOneMemOperand() ||
1987 (*Op0->memoperands_begin())->isVolatile())
1990 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1991 const Function *Func = MF->getFunction();
1992 unsigned ReqAlign = STI->hasV6Ops()
1993 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1994 : 8; // Pre-v6 need 8-byte align
1995 if (Align < ReqAlign)
1998 // Then make sure the immediate offset fits.
1999 int OffImm = getMemoryOpOffset(Op0);
2001 int Limit = (1 << 8) * Scale;
2002 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
2006 ARM_AM::AddrOpc AddSub = ARM_AM::add;
2008 AddSub = ARM_AM::sub;
2011 int Limit = (1 << 8) * Scale;
2012 if (OffImm >= Limit || (OffImm & (Scale-1)))
2014 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
2016 FirstReg = Op0->getOperand(0).getReg();
2017 SecondReg = Op1->getOperand(0).getReg();
2018 if (FirstReg == SecondReg)
2020 BaseReg = Op0->getOperand(1).getReg();
2021 Pred = getInstrPredicate(Op0, PredReg);
2022 dl = Op0->getDebugLoc();
2026 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
2027 SmallVectorImpl<MachineInstr *> &Ops,
2028 unsigned Base, bool isLd,
2029 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
2030 bool RetVal = false;
2032 // Sort by offset (in reverse order).
2033 std::sort(Ops.begin(), Ops.end(),
2034 [](const MachineInstr *LHS, const MachineInstr *RHS) {
2035 int LOffset = getMemoryOpOffset(LHS);
2036 int ROffset = getMemoryOpOffset(RHS);
2037 assert(LHS == RHS || LOffset != ROffset);
2038 return LOffset > ROffset;
2041 // The loads / stores of the same base are in order. Scan them from first to
2042 // last and check for the following:
2043 // 1. Any def of base.
2045 while (Ops.size() > 1) {
2046 unsigned FirstLoc = ~0U;
2047 unsigned LastLoc = 0;
2048 MachineInstr *FirstOp = nullptr;
2049 MachineInstr *LastOp = nullptr;
2051 unsigned LastOpcode = 0;
2052 unsigned LastBytes = 0;
2053 unsigned NumMove = 0;
2054 for (int i = Ops.size() - 1; i >= 0; --i) {
2055 MachineInstr *Op = Ops[i];
2056 unsigned Loc = MI2LocMap[Op];
2057 if (Loc <= FirstLoc) {
2061 if (Loc >= LastLoc) {
2067 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
2068 if (LastOpcode && LSMOpcode != LastOpcode)
2071 int Offset = getMemoryOpOffset(Op);
2072 unsigned Bytes = getLSMultipleTransferSize(Op);
2074 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2077 LastOffset = Offset;
2079 LastOpcode = LSMOpcode;
2080 if (++NumMove == 8) // FIXME: Tune this limit.
2087 SmallPtrSet<MachineInstr*, 4> MemOps;
2088 SmallSet<unsigned, 4> MemRegs;
2089 for (int i = NumMove-1; i >= 0; --i) {
2090 MemOps.insert(Ops[i]);
2091 MemRegs.insert(Ops[i]->getOperand(0).getReg());
2094 // Be conservative, if the instructions are too far apart, don't
2095 // move them. We want to limit the increase of register pressure.
2096 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2098 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2099 MemOps, MemRegs, TRI);
2101 for (unsigned i = 0; i != NumMove; ++i)
2104 // This is the new location for the loads / stores.
2105 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2106 while (InsertPos != MBB->end()
2107 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
2110 // If we are moving a pair of loads / stores, see if it makes sense
2111 // to try to allocate a pair of registers that can form register pairs.
2112 MachineInstr *Op0 = Ops.back();
2113 MachineInstr *Op1 = Ops[Ops.size()-2];
2114 unsigned FirstReg = 0, SecondReg = 0;
2115 unsigned BaseReg = 0, PredReg = 0;
2116 ARMCC::CondCodes Pred = ARMCC::AL;
2118 unsigned NewOpc = 0;
2121 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2122 FirstReg, SecondReg, BaseReg,
2123 Offset, PredReg, Pred, isT2)) {
2127 const MCInstrDesc &MCID = TII->get(NewOpc);
2128 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
2129 MRI->constrainRegClass(FirstReg, TRC);
2130 MRI->constrainRegClass(SecondReg, TRC);
2132 // Form the pair instruction.
2134 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2135 .addReg(FirstReg, RegState::Define)
2136 .addReg(SecondReg, RegState::Define)
2138 // FIXME: We're converting from LDRi12 to an insn that still
2139 // uses addrmode2, so we need an explicit offset reg. It should
2140 // always by reg0 since we're transforming LDRi12s.
2143 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2144 concatenateMemOperands(MIB, Op0, Op1);
2145 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2148 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2152 // FIXME: We're converting from LDRi12 to an insn that still
2153 // uses addrmode2, so we need an explicit offset reg. It should
2154 // always by reg0 since we're transforming STRi12s.
2157 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2158 concatenateMemOperands(MIB, Op0, Op1);
2159 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2166 // Add register allocation hints to form register pairs.
2167 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
2168 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg);
2171 for (unsigned i = 0; i != NumMove; ++i) {
2172 MachineInstr *Op = Ops.back();
2174 MBB->splice(InsertPos, MBB, Op);
2178 NumLdStMoved += NumMove;
2188 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2189 bool RetVal = false;
2191 DenseMap<MachineInstr*, unsigned> MI2LocMap;
2192 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
2193 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
2194 SmallVector<unsigned, 4> LdBases;
2195 SmallVector<unsigned, 4> StBases;
2198 MachineBasicBlock::iterator MBBI = MBB->begin();
2199 MachineBasicBlock::iterator E = MBB->end();
2201 for (; MBBI != E; ++MBBI) {
2202 MachineInstr *MI = MBBI;
2203 if (MI->isCall() || MI->isTerminator()) {
2204 // Stop at barriers.
2209 if (!MI->isDebugValue())
2210 MI2LocMap[MI] = ++Loc;
2212 if (!isMemoryOp(MI))
2214 unsigned PredReg = 0;
2215 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2218 int Opc = MI->getOpcode();
2219 bool isLd = isLoadSingle(Opc);
2220 unsigned Base = MI->getOperand(1).getReg();
2221 int Offset = getMemoryOpOffset(MI);
2223 bool StopHere = false;
2225 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2226 Base2LdsMap.find(Base);
2227 if (BI != Base2LdsMap.end()) {
2228 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2229 if (Offset == getMemoryOpOffset(BI->second[i])) {
2235 BI->second.push_back(MI);
2237 Base2LdsMap[Base].push_back(MI);
2238 LdBases.push_back(Base);
2241 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2242 Base2StsMap.find(Base);
2243 if (BI != Base2StsMap.end()) {
2244 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2245 if (Offset == getMemoryOpOffset(BI->second[i])) {
2251 BI->second.push_back(MI);
2253 Base2StsMap[Base].push_back(MI);
2254 StBases.push_back(Base);
2259 // Found a duplicate (a base+offset combination that's seen earlier).
2266 // Re-schedule loads.
2267 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2268 unsigned Base = LdBases[i];
2269 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2271 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2274 // Re-schedule stores.
2275 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2276 unsigned Base = StBases[i];
2277 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2279 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2283 Base2LdsMap.clear();
2284 Base2StsMap.clear();
2294 /// Returns an instance of the load / store optimization pass.
2295 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2297 return new ARMPreAllocLoadStoreOpt();
2298 return new ARMLoadStoreOpt();