1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 /// \file This file contains a pass that performs load / store related peephole
11 /// optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMISelLowering.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMSubtarget.h"
21 #include "MCTargetDesc/ARMAddressingModes.h"
22 #include "ThumbRegisterInfo.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/CodeGen/MachineBasicBlock.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include "llvm/CodeGen/MachineInstr.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/RegisterClassInfo.h"
35 #include "llvm/CodeGen/SelectionDAGNodes.h"
36 #include "llvm/CodeGen/LivePhysRegs.h"
37 #include "llvm/IR/DataLayout.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/Support/Allocator.h"
41 #include "llvm/Support/Debug.h"
42 #include "llvm/Support/ErrorHandling.h"
43 #include "llvm/Support/raw_ostream.h"
44 #include "llvm/Target/TargetInstrInfo.h"
45 #include "llvm/Target/TargetMachine.h"
46 #include "llvm/Target/TargetRegisterInfo.h"
49 #define DEBUG_TYPE "arm-ldst-opt"
51 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
52 STATISTIC(NumSTMGened , "Number of stm instructions generated");
53 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
54 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
55 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
56 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
57 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
58 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
59 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
60 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
61 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
64 void initializeARMLoadStoreOptPass(PassRegistry &);
67 #define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
70 /// Post- register allocation pass the combine load / store instructions to
71 /// form ldm / stm instructions.
72 struct ARMLoadStoreOpt : public MachineFunctionPass {
74 ARMLoadStoreOpt() : MachineFunctionPass(ID) {
75 initializeARMLoadStoreOptPass(*PassRegistry::getPassRegistry());
78 const MachineFunction *MF;
79 const TargetInstrInfo *TII;
80 const TargetRegisterInfo *TRI;
81 const ARMSubtarget *STI;
82 const TargetLowering *TL;
84 LivePhysRegs LiveRegs;
85 RegisterClassInfo RegClassInfo;
86 MachineBasicBlock::const_iterator LiveRegPos;
88 bool RegClassInfoValid;
89 bool isThumb1, isThumb2;
91 bool runOnMachineFunction(MachineFunction &Fn) override;
93 const char *getPassName() const override {
94 return ARM_LOAD_STORE_OPT_NAME;
98 /// A set of load/store MachineInstrs with same base register sorted by
100 struct MemOpQueueEntry {
102 int Offset; ///< Load/Store offset.
103 unsigned Position; ///< Position as counted from end of basic block.
104 MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position)
105 : MI(MI), Offset(Offset), Position(Position) {}
107 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
109 /// A set of MachineInstrs that fulfill (nearly all) conditions to get
110 /// merged into a LDM/STM.
111 struct MergeCandidate {
112 /// List of instructions ordered by load/store offset.
113 SmallVector<MachineInstr*, 4> Instrs;
114 /// Index in Instrs of the instruction being latest in the schedule.
115 unsigned LatestMIIdx;
116 /// Index in Instrs of the instruction being earliest in the schedule.
117 unsigned EarliestMIIdx;
118 /// Index into the basic block where the merged instruction will be
119 /// inserted. (See MemOpQueueEntry.Position)
121 /// Whether the instructions can be merged into a ldm/stm instruction.
122 bool CanMergeToLSMulti;
123 /// Whether the instructions can be merged into a ldrd/strd instruction.
124 bool CanMergeToLSDouble;
126 SpecificBumpPtrAllocator<MergeCandidate> Allocator;
127 SmallVector<const MergeCandidate*,4> Candidates;
128 SmallVector<MachineInstr*,4> MergeBaseCandidates;
130 void moveLiveRegsBefore(const MachineBasicBlock &MBB,
131 MachineBasicBlock::const_iterator Before);
132 unsigned findFreeReg(const TargetRegisterClass &RegClass);
133 void UpdateBaseRegUses(MachineBasicBlock &MBB,
134 MachineBasicBlock::iterator MBBI,
135 DebugLoc DL, unsigned Base, unsigned WordOffset,
136 ARMCC::CondCodes Pred, unsigned PredReg);
137 MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB,
138 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
139 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
140 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs);
141 MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB,
142 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
143 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
144 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const;
145 void FormCandidates(const MemOpQueue &MemOps);
146 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
147 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
148 MachineBasicBlock::iterator &MBBI);
149 bool MergeBaseUpdateLoadStore(MachineInstr *MI);
150 bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
151 bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
152 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
153 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
155 char ARMLoadStoreOpt::ID = 0;
158 INITIALIZE_PASS(ARMLoadStoreOpt, "arm-load-store-opt", ARM_LOAD_STORE_OPT_NAME, false, false)
160 static bool definesCPSR(const MachineInstr *MI) {
161 for (const auto &MO : MI->operands()) {
164 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
165 // If the instruction has live CPSR def, then it's not safe to fold it
166 // into load / store.
173 static int getMemoryOpOffset(const MachineInstr *MI) {
174 unsigned Opcode = MI->getOpcode();
175 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
176 unsigned NumOperands = MI->getDesc().getNumOperands();
177 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
179 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
180 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
181 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
182 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
185 // Thumb1 immediate offsets are scaled by 4
186 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
187 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
190 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
191 : ARM_AM::getAM5Offset(OffField) * 4;
192 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
193 : ARM_AM::getAM5Op(OffField);
195 if (Op == ARM_AM::sub)
201 static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) {
202 return MI.getOperand(1);
205 static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) {
206 return MI.getOperand(0);
209 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
211 default: llvm_unreachable("Unhandled opcode!");
215 default: llvm_unreachable("Unhandled submode!");
216 case ARM_AM::ia: return ARM::LDMIA;
217 case ARM_AM::da: return ARM::LDMDA;
218 case ARM_AM::db: return ARM::LDMDB;
219 case ARM_AM::ib: return ARM::LDMIB;
224 default: llvm_unreachable("Unhandled submode!");
225 case ARM_AM::ia: return ARM::STMIA;
226 case ARM_AM::da: return ARM::STMDA;
227 case ARM_AM::db: return ARM::STMDB;
228 case ARM_AM::ib: return ARM::STMIB;
232 // tLDMIA is writeback-only - unless the base register is in the input
236 default: llvm_unreachable("Unhandled submode!");
237 case ARM_AM::ia: return ARM::tLDMIA;
241 // There is no non-writeback tSTMIA either.
244 default: llvm_unreachable("Unhandled submode!");
245 case ARM_AM::ia: return ARM::tSTMIA_UPD;
251 default: llvm_unreachable("Unhandled submode!");
252 case ARM_AM::ia: return ARM::t2LDMIA;
253 case ARM_AM::db: return ARM::t2LDMDB;
259 default: llvm_unreachable("Unhandled submode!");
260 case ARM_AM::ia: return ARM::t2STMIA;
261 case ARM_AM::db: return ARM::t2STMDB;
266 default: llvm_unreachable("Unhandled submode!");
267 case ARM_AM::ia: return ARM::VLDMSIA;
268 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
273 default: llvm_unreachable("Unhandled submode!");
274 case ARM_AM::ia: return ARM::VSTMSIA;
275 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
280 default: llvm_unreachable("Unhandled submode!");
281 case ARM_AM::ia: return ARM::VLDMDIA;
282 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
287 default: llvm_unreachable("Unhandled submode!");
288 case ARM_AM::ia: return ARM::VSTMDIA;
289 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
294 static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
296 default: llvm_unreachable("Unhandled opcode!");
303 case ARM::tLDMIA_UPD:
304 case ARM::tSTMIA_UPD:
305 case ARM::t2LDMIA_RET:
307 case ARM::t2LDMIA_UPD:
309 case ARM::t2STMIA_UPD:
311 case ARM::VLDMSIA_UPD:
313 case ARM::VSTMSIA_UPD:
315 case ARM::VLDMDIA_UPD:
317 case ARM::VSTMDIA_UPD:
331 case ARM::t2LDMDB_UPD:
333 case ARM::t2STMDB_UPD:
334 case ARM::VLDMSDB_UPD:
335 case ARM::VSTMSDB_UPD:
336 case ARM::VLDMDDB_UPD:
337 case ARM::VSTMDDB_UPD:
348 static bool isT1i32Load(unsigned Opc) {
349 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
352 static bool isT2i32Load(unsigned Opc) {
353 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
356 static bool isi32Load(unsigned Opc) {
357 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
360 static bool isT1i32Store(unsigned Opc) {
361 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
364 static bool isT2i32Store(unsigned Opc) {
365 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
368 static bool isi32Store(unsigned Opc) {
369 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
372 static bool isLoadSingle(unsigned Opc) {
373 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
376 static unsigned getImmScale(unsigned Opc) {
378 default: llvm_unreachable("Unhandled opcode!");
393 static unsigned getLSMultipleTransferSize(const MachineInstr *MI) {
394 switch (MI->getOpcode()) {
421 case ARM::tLDMIA_UPD:
422 case ARM::tSTMIA_UPD:
429 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
432 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
436 /// Update future uses of the base register with the offset introduced
437 /// due to writeback. This function only works on Thumb1.
439 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
440 MachineBasicBlock::iterator MBBI,
441 DebugLoc DL, unsigned Base,
443 ARMCC::CondCodes Pred, unsigned PredReg) {
444 assert(isThumb1 && "Can only update base register uses for Thumb1!");
445 // Start updating any instructions with immediate offsets. Insert a SUB before
446 // the first non-updateable instruction (if any).
447 for (; MBBI != MBB.end(); ++MBBI) {
448 bool InsertSub = false;
449 unsigned Opc = MBBI->getOpcode();
451 if (MBBI->readsRegister(Base)) {
454 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
456 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
458 if (IsLoad || IsStore) {
459 // Loads and stores with immediate offsets can be updated, but only if
460 // the new offset isn't negative.
461 // The MachineOperand containing the offset immediate is the last one
462 // before predicates.
464 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
465 // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
466 Offset = MO.getImm() - WordOffset * getImmScale(Opc);
468 // If storing the base register, it needs to be reset first.
469 unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
471 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
476 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
477 !definesCPSR(MBBI)) {
478 // SUBS/ADDS using this register, with a dead def of the CPSR.
479 // Merge it with the update; if the merged offset is too large,
480 // insert a new sub instead.
482 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
483 Offset = (Opc == ARM::tSUBi8) ?
484 MO.getImm() + WordOffset * 4 :
485 MO.getImm() - WordOffset * 4 ;
486 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
487 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
490 // The base register has now been reset, so exit early.
497 // Can't update the instruction.
501 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
502 // Since SUBS sets the condition flags, we can't place the base reset
503 // after an instruction that has a live CPSR def.
504 // The base register might also contain an argument for a function call.
509 // An instruction above couldn't be updated, so insert a sub.
510 AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
511 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
515 if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
516 // Register got killed. Stop updating.
520 // End of block was reached.
521 if (MBB.succ_size() > 0) {
522 // FIXME: Because of a bug, live registers are sometimes missing from
523 // the successor blocks' live-in sets. This means we can't trust that
524 // information and *always* have to reset at the end of a block.
526 if (MBBI != MBB.end()) --MBBI;
528 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
529 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
533 /// Return the first register of class \p RegClass that is not in \p Regs.
534 unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
535 if (!RegClassInfoValid) {
536 RegClassInfo.runOnMachineFunction(*MF);
537 RegClassInfoValid = true;
540 for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
541 if (!LiveRegs.contains(Reg))
546 /// Compute live registers just before instruction \p Before (in normal schedule
547 /// direction). Computes backwards so multiple queries in the same block must
548 /// come in reverse order.
549 void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
550 MachineBasicBlock::const_iterator Before) {
551 // Initialize if we never queried in this block.
552 if (!LiveRegsValid) {
554 LiveRegs.addLiveOuts(&MBB, true);
555 LiveRegPos = MBB.end();
556 LiveRegsValid = true;
558 // Move backward just before the "Before" position.
559 while (LiveRegPos != Before) {
561 LiveRegs.stepBackward(*LiveRegPos);
565 static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
567 for (const std::pair<unsigned, bool> &R : Regs)
573 /// Create and insert a LDM or STM with Base as base register and registers in
574 /// Regs as the register operands that would be loaded / stored. It returns
575 /// true if the transformation is done.
576 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB,
577 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
578 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
579 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) {
580 unsigned NumRegs = Regs.size();
583 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
584 // Compute liveness information for that register to make the decision.
585 bool SafeToClobberCPSR = !isThumb1 ||
586 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
587 MachineBasicBlock::LQR_Dead);
589 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
591 // Exception: If the base register is in the input reglist, Thumb1 LDM is
593 // It's also not possible to merge an STR of the base register in Thumb1.
594 if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) {
595 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
596 if (Opcode == ARM::tLDRi) {
598 } else if (Opcode == ARM::tSTRi) {
603 ARM_AM::AMSubMode Mode = ARM_AM::ia;
604 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
605 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
606 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
608 if (Offset == 4 && haveIBAndDA) {
610 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
612 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
613 // VLDM/VSTM do not support DB mode without also updating the base reg.
615 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
616 // Check if this is a supported opcode before inserting instructions to
617 // calculate a new base register.
618 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
620 // If starting offset isn't zero, insert a MI to materialize a new base.
621 // But only do so if it is cost effective, i.e. merging more than two
626 // On Thumb1, it's not worth materializing a new base register without
627 // clobbering the CPSR (i.e. not using ADDS/SUBS).
628 if (!SafeToClobberCPSR)
632 if (isi32Load(Opcode)) {
633 // If it is a load, then just use one of the destination register to
634 // use as the new base.
635 NewBase = Regs[NumRegs-1].first;
637 // Find a free register that we can use as scratch register.
638 moveLiveRegsBefore(MBB, InsertBefore);
639 // The merged instruction does not exist yet but will use several Regs if
641 if (!isLoadSingle(Opcode))
642 for (const std::pair<unsigned, bool> &R : Regs)
643 LiveRegs.addReg(R.first);
645 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
651 isThumb2 ? ARM::t2ADDri :
652 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
653 (isThumb1 && Offset < 8) ? ARM::tADDi3 :
654 isThumb1 ? ARM::tADDi8 : ARM::ADDri;
659 isThumb2 ? ARM::t2SUBri :
660 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
661 isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
664 if (!TL->isLegalAddImmediate(Offset))
665 // FIXME: Try add with register operand?
666 return nullptr; // Probably not worth it then.
668 // We can only append a kill flag to the add/sub input if the value is not
669 // used in the register list of the stm as well.
670 bool KillOldBase = BaseKill &&
671 (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
674 // Thumb1: depending on immediate size, use either
675 // ADDS NewBase, Base, #imm3
678 // ADDS NewBase, #imm8.
679 if (Base != NewBase &&
680 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
681 // Need to insert a MOV to the new base first.
682 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
684 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
685 if (Pred != ARMCC::AL)
687 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
688 .addReg(Base, getKillRegState(KillOldBase));
690 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
691 .addReg(Base, getKillRegState(KillOldBase))
692 .addImm(Pred).addReg(PredReg);
694 // The following ADDS/SUBS becomes an update.
698 if (BaseOpc == ARM::tADDrSPi) {
699 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
700 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
701 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4)
702 .addImm(Pred).addReg(PredReg);
705 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true)
706 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
707 .addImm(Pred).addReg(PredReg);
709 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
710 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
711 .addImm(Pred).addReg(PredReg).addReg(0);
714 BaseKill = true; // New base is always killed straight away.
717 bool isDef = isLoadSingle(Opcode);
719 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
720 // base register writeback.
721 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
725 // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
726 // - There is no writeback (LDM of base register),
727 // - the base register is killed by the merged instruction,
728 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
729 // to reset the base register.
730 // Otherwise, don't merge.
731 // It's safe to return here since the code to materialize a new base register
732 // above is also conditional on SafeToClobberCPSR.
733 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
736 MachineInstrBuilder MIB;
739 if (Opcode == ARM::tLDMIA)
740 // Update tLDMIA with writeback if necessary.
741 Opcode = ARM::tLDMIA_UPD;
743 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
745 // Thumb1: we might need to set base writeback when building the MI.
746 MIB.addReg(Base, getDefRegState(true))
747 .addReg(Base, getKillRegState(BaseKill));
749 // The base isn't dead after a merged instruction with writeback.
750 // Insert a sub instruction after the newly formed instruction to reset.
752 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
755 // No writeback, simply build the MachineInstr.
756 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
757 MIB.addReg(Base, getKillRegState(BaseKill));
760 MIB.addImm(Pred).addReg(PredReg);
762 for (const std::pair<unsigned, bool> &R : Regs)
763 MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
765 return MIB.getInstr();
768 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB,
769 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
770 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
771 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const {
772 bool IsLoad = isi32Load(Opcode);
773 assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
774 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
776 assert(Regs.size() == 2);
777 MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
778 TII->get(LoadStoreOpcode));
780 MIB.addReg(Regs[0].first, RegState::Define)
781 .addReg(Regs[1].first, RegState::Define);
783 MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
784 .addReg(Regs[1].first, getKillRegState(Regs[1].second));
786 MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
787 return MIB.getInstr();
790 /// Call MergeOps and update MemOps and merges accordingly on success.
791 MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
792 const MachineInstr *First = Cand.Instrs.front();
793 unsigned Opcode = First->getOpcode();
794 bool IsLoad = isLoadSingle(Opcode);
795 SmallVector<std::pair<unsigned, bool>, 8> Regs;
796 SmallVector<unsigned, 4> ImpDefs;
797 DenseSet<unsigned> KilledRegs;
798 DenseSet<unsigned> UsedRegs;
799 // Determine list of registers and list of implicit super-register defs.
800 for (const MachineInstr *MI : Cand.Instrs) {
801 const MachineOperand &MO = getLoadStoreRegOp(*MI);
802 unsigned Reg = MO.getReg();
803 bool IsKill = MO.isKill();
805 KilledRegs.insert(Reg);
806 Regs.push_back(std::make_pair(Reg, IsKill));
807 UsedRegs.insert(Reg);
810 // Collect any implicit defs of super-registers, after merging we can't
811 // be sure anymore that we properly preserved these live ranges and must
812 // removed these implicit operands.
813 for (const MachineOperand &MO : MI->implicit_operands()) {
814 if (!MO.isReg() || !MO.isDef() || MO.isDead())
816 assert(MO.isImplicit());
817 unsigned DefReg = MO.getReg();
819 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end())
821 // We can ignore cases where the super-reg is read and written.
822 if (MI->readsRegister(DefReg))
824 ImpDefs.push_back(DefReg);
829 // Attempt the merge.
830 typedef MachineBasicBlock::iterator iterator;
831 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
832 iterator InsertBefore = std::next(iterator(LatestMI));
833 MachineBasicBlock &MBB = *LatestMI->getParent();
834 unsigned Offset = getMemoryOpOffset(First);
835 unsigned Base = getLoadStoreBaseOp(*First).getReg();
836 bool BaseKill = LatestMI->killsRegister(Base);
837 unsigned PredReg = 0;
838 ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg);
839 DebugLoc DL = First->getDebugLoc();
840 MachineInstr *Merged = nullptr;
841 if (Cand.CanMergeToLSDouble)
842 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
843 Opcode, Pred, PredReg, DL, Regs);
844 if (!Merged && Cand.CanMergeToLSMulti)
845 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
846 Opcode, Pred, PredReg, DL, Regs);
850 // Determine earliest instruction that will get removed. We then keep an
851 // iterator just above it so the following erases don't invalidated it.
852 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
853 bool EarliestAtBegin = false;
854 if (EarliestI == MBB.begin()) {
855 EarliestAtBegin = true;
857 EarliestI = std::prev(EarliestI);
860 // Remove instructions which have been merged.
861 for (MachineInstr *MI : Cand.Instrs)
864 // Determine range between the earliest removed instruction and the new one.
866 EarliestI = MBB.begin();
868 EarliestI = std::next(EarliestI);
869 auto FixupRange = make_range(EarliestI, iterator(Merged));
871 if (isLoadSingle(Opcode)) {
872 // If the previous loads defined a super-reg, then we have to mark earlier
873 // operands undef; Replicate the super-reg def on the merged instruction.
874 for (MachineInstr &MI : FixupRange) {
875 for (unsigned &ImpDefReg : ImpDefs) {
876 for (MachineOperand &MO : MI.implicit_operands()) {
877 if (!MO.isReg() || MO.getReg() != ImpDefReg)
887 MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
888 for (unsigned ImpDef : ImpDefs)
889 MIB.addReg(ImpDef, RegState::ImplicitDefine);
891 // Remove kill flags: We are possibly storing the values later now.
892 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
893 for (MachineInstr &MI : FixupRange) {
894 for (MachineOperand &MO : MI.uses()) {
895 if (!MO.isReg() || !MO.isKill())
897 if (UsedRegs.count(MO.getReg()))
901 assert(ImpDefs.empty());
907 static bool isValidLSDoubleOffset(int Offset) {
908 unsigned Value = abs(Offset);
909 // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
911 return (Value % 4) == 0 && Value < 1024;
914 /// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
915 void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
916 const MachineInstr *FirstMI = MemOps[0].MI;
917 unsigned Opcode = FirstMI->getOpcode();
918 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
919 unsigned Size = getLSMultipleTransferSize(FirstMI);
922 unsigned EIndex = MemOps.size();
924 // Look at the first instruction.
925 const MachineInstr *MI = MemOps[SIndex].MI;
926 int Offset = MemOps[SIndex].Offset;
927 const MachineOperand &PMO = getLoadStoreRegOp(*MI);
928 unsigned PReg = PMO.getReg();
929 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
930 unsigned Latest = SIndex;
931 unsigned Earliest = SIndex;
933 bool CanMergeToLSDouble =
934 STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
935 // ARM errata 602117: LDRD with base in list may result in incorrect base
936 // register when interrupted or faulted.
937 if (STI->isCortexM3() && isi32Load(Opcode) &&
938 PReg == getLoadStoreBaseOp(*MI).getReg())
939 CanMergeToLSDouble = false;
941 bool CanMergeToLSMulti = true;
942 // On swift vldm/vstm starting with an odd register number as that needs
943 // more uops than single vldrs.
944 if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1)
945 CanMergeToLSMulti = false;
947 // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it
948 // deprecated; LDM to PC is fine but cannot happen here.
949 if (PReg == ARM::SP || PReg == ARM::PC)
950 CanMergeToLSMulti = CanMergeToLSDouble = false;
952 // Merge following instructions where possible.
953 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
954 int NewOffset = MemOps[I].Offset;
955 if (NewOffset != Offset + (int)Size)
957 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
958 unsigned Reg = MO.getReg();
959 if (Reg == ARM::SP || Reg == ARM::PC)
962 // See if the current load/store may be part of a multi load/store.
963 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
964 bool PartOfLSMulti = CanMergeToLSMulti;
966 // Register numbers must be in ascending order.
967 if (RegNum <= PRegNum)
968 PartOfLSMulti = false;
969 // For VFP / NEON load/store multiples, the registers must be
970 // consecutive and within the limit on the number of registers per
972 else if (!isNotVFP && RegNum != PRegNum+1)
973 PartOfLSMulti = false;
975 // See if the current load/store may be part of a double load/store.
976 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
978 if (!PartOfLSMulti && !PartOfLSDouble)
980 CanMergeToLSMulti &= PartOfLSMulti;
981 CanMergeToLSDouble &= PartOfLSDouble;
982 // Track MemOp with latest and earliest position (Positions are
983 // counted in reverse).
984 unsigned Position = MemOps[I].Position;
985 if (Position < MemOps[Latest].Position)
987 else if (Position > MemOps[Earliest].Position)
989 // Prepare for next MemOp.
994 // Form a candidate from the Ops collected so far.
995 MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;
996 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
997 Candidate->Instrs.push_back(MemOps[C].MI);
998 Candidate->LatestMIIdx = Latest - SIndex;
999 Candidate->EarliestMIIdx = Earliest - SIndex;
1000 Candidate->InsertPos = MemOps[Latest].Position;
1002 CanMergeToLSMulti = CanMergeToLSDouble = false;
1003 Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
1004 Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
1005 Candidates.push_back(Candidate);
1006 // Continue after the chain.
1008 } while (SIndex < EIndex);
1011 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
1012 ARM_AM::AMSubMode Mode) {
1014 default: llvm_unreachable("Unhandled opcode!");
1020 default: llvm_unreachable("Unhandled submode!");
1021 case ARM_AM::ia: return ARM::LDMIA_UPD;
1022 case ARM_AM::ib: return ARM::LDMIB_UPD;
1023 case ARM_AM::da: return ARM::LDMDA_UPD;
1024 case ARM_AM::db: return ARM::LDMDB_UPD;
1031 default: llvm_unreachable("Unhandled submode!");
1032 case ARM_AM::ia: return ARM::STMIA_UPD;
1033 case ARM_AM::ib: return ARM::STMIB_UPD;
1034 case ARM_AM::da: return ARM::STMDA_UPD;
1035 case ARM_AM::db: return ARM::STMDB_UPD;
1040 default: llvm_unreachable("Unhandled submode!");
1041 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1042 case ARM_AM::db: return ARM::t2LDMDB_UPD;
1047 default: llvm_unreachable("Unhandled submode!");
1048 case ARM_AM::ia: return ARM::t2STMIA_UPD;
1049 case ARM_AM::db: return ARM::t2STMDB_UPD;
1053 default: llvm_unreachable("Unhandled submode!");
1054 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1055 case ARM_AM::db: return ARM::VLDMSDB_UPD;
1059 default: llvm_unreachable("Unhandled submode!");
1060 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1061 case ARM_AM::db: return ARM::VLDMDDB_UPD;
1065 default: llvm_unreachable("Unhandled submode!");
1066 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1067 case ARM_AM::db: return ARM::VSTMSDB_UPD;
1071 default: llvm_unreachable("Unhandled submode!");
1072 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1073 case ARM_AM::db: return ARM::VSTMDDB_UPD;
1078 /// Check if the given instruction increments or decrements a register and
1079 /// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
1080 /// generated by the instruction are possibly read as well.
1081 static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg,
1082 ARMCC::CondCodes Pred, unsigned PredReg) {
1085 switch (MI.getOpcode()) {
1086 case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break;
1087 case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break;
1089 case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break;
1091 case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break;
1092 case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break;
1093 case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
1098 if (MI.getOperand(0).getReg() != Reg ||
1099 MI.getOperand(1).getReg() != Reg ||
1100 getInstrPredicate(&MI, MIPredReg) != Pred ||
1101 MIPredReg != PredReg)
1104 if (CheckCPSRDef && definesCPSR(&MI))
1106 return MI.getOperand(2).getImm() * Scale;
1109 /// Searches for an increment or decrement of \p Reg before \p MBBI.
1110 static MachineBasicBlock::iterator
1111 findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg,
1112 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
1114 MachineBasicBlock &MBB = *MBBI->getParent();
1115 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1116 MachineBasicBlock::iterator EndMBBI = MBB.end();
1117 if (MBBI == BeginMBBI)
1120 // Skip debug values.
1121 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1122 while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI)
1125 Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
1126 return Offset == 0 ? EndMBBI : PrevMBBI;
1129 /// Searches for a increment or decrement of \p Reg after \p MBBI.
1130 static MachineBasicBlock::iterator
1131 findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg,
1132 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
1134 MachineBasicBlock &MBB = *MBBI->getParent();
1135 MachineBasicBlock::iterator EndMBBI = MBB.end();
1136 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1137 // Skip debug values.
1138 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1140 if (NextMBBI == EndMBBI)
1143 Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
1144 return Offset == 0 ? EndMBBI : NextMBBI;
1147 /// Fold proceeding/trailing inc/dec of base register into the
1148 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1150 /// stmia rn, <ra, rb, rc>
1151 /// rn := rn + 4 * 3;
1153 /// stmia rn!, <ra, rb, rc>
1155 /// rn := rn - 4 * 3;
1156 /// ldmia rn, <ra, rb, rc>
1158 /// ldmdb rn!, <ra, rb, rc>
1159 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
1160 // Thumb1 is already using updating loads/stores.
1161 if (isThumb1) return false;
1163 const MachineOperand &BaseOP = MI->getOperand(0);
1164 unsigned Base = BaseOP.getReg();
1165 bool BaseKill = BaseOP.isKill();
1166 unsigned PredReg = 0;
1167 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1168 unsigned Opcode = MI->getOpcode();
1169 DebugLoc DL = MI->getDebugLoc();
1171 // Can't use an updating ld/st if the base register is also a dest
1172 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1173 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
1174 if (MI->getOperand(i).getReg() == Base)
1177 int Bytes = getLSMultipleTransferSize(MI);
1178 MachineBasicBlock &MBB = *MI->getParent();
1179 MachineBasicBlock::iterator MBBI(MI);
1181 MachineBasicBlock::iterator MergeInstr
1182 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1183 ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
1184 if (Mode == ARM_AM::ia && Offset == -Bytes) {
1186 } else if (Mode == ARM_AM::ib && Offset == -Bytes) {
1189 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
1190 if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
1191 ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes))
1194 MBB.erase(MergeInstr);
1196 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1197 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1198 .addReg(Base, getDefRegState(true)) // WB base register
1199 .addReg(Base, getKillRegState(BaseKill))
1200 .addImm(Pred).addReg(PredReg);
1202 // Transfer the rest of operands.
1203 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
1204 MIB.addOperand(MI->getOperand(OpNum));
1206 // Transfer memoperands.
1207 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1213 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1214 ARM_AM::AddrOpc Mode) {
1217 return ARM::LDR_PRE_IMM;
1219 return ARM::STR_PRE_IMM;
1221 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1223 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1225 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1227 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1230 return ARM::t2LDR_PRE;
1233 return ARM::t2STR_PRE;
1234 default: llvm_unreachable("Unhandled opcode!");
1238 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1239 ARM_AM::AddrOpc Mode) {
1242 return ARM::LDR_POST_IMM;
1244 return ARM::STR_POST_IMM;
1246 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1248 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1250 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1252 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1255 return ARM::t2LDR_POST;
1258 return ARM::t2STR_POST;
1259 default: llvm_unreachable("Unhandled opcode!");
1263 /// Fold proceeding/trailing inc/dec of base register into the
1264 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1265 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
1266 // Thumb1 doesn't have updating LDR/STR.
1267 // FIXME: Use LDM/STM with single register instead.
1268 if (isThumb1) return false;
1270 unsigned Base = getLoadStoreBaseOp(*MI).getReg();
1271 bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
1272 unsigned Opcode = MI->getOpcode();
1273 DebugLoc DL = MI->getDebugLoc();
1274 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1275 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1276 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1277 if (isi32Load(Opcode) || isi32Store(Opcode))
1278 if (MI->getOperand(2).getImm() != 0)
1280 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1283 // Can't do the merge if the destination register is the same as the would-be
1284 // writeback register.
1285 if (MI->getOperand(0).getReg() == Base)
1288 unsigned PredReg = 0;
1289 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1290 int Bytes = getLSMultipleTransferSize(MI);
1291 MachineBasicBlock &MBB = *MI->getParent();
1292 MachineBasicBlock::iterator MBBI(MI);
1294 MachineBasicBlock::iterator MergeInstr
1295 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1297 if (!isAM5 && Offset == Bytes) {
1298 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
1299 } else if (Offset == -Bytes) {
1300 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
1302 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
1303 if (Offset == Bytes) {
1304 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
1305 } else if (!isAM5 && Offset == -Bytes) {
1306 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
1310 MBB.erase(MergeInstr);
1312 ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add;
1314 bool isLd = isLoadSingle(Opcode);
1316 // VLDM[SD]_UPD, VSTM[SD]_UPD
1317 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1318 // updating load/store-multiple instructions can be used with only one
1320 MachineOperand &MO = MI->getOperand(0);
1321 BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1322 .addReg(Base, getDefRegState(true)) // WB base register
1323 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1324 .addImm(Pred).addReg(PredReg)
1325 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1326 getKillRegState(MO.isKill())));
1329 // LDR_PRE, LDR_POST
1330 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1331 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1332 .addReg(Base, RegState::Define)
1333 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1335 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1336 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1337 .addReg(Base, RegState::Define)
1338 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
1341 // t2LDR_PRE, t2LDR_POST
1342 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1343 .addReg(Base, RegState::Define)
1344 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1347 MachineOperand &MO = MI->getOperand(0);
1348 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1349 // the vestigal zero-reg offset register. When that's fixed, this clause
1350 // can be removed entirely.
1351 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1352 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1353 // STR_PRE, STR_POST
1354 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1355 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1356 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
1358 // t2STR_PRE, t2STR_POST
1359 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1360 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1361 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1369 bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
1370 unsigned Opcode = MI.getOpcode();
1371 assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
1372 "Must have t2STRDi8 or t2LDRDi8");
1373 if (MI.getOperand(3).getImm() != 0)
1376 // Behaviour for writeback is undefined if base register is the same as one
1378 const MachineOperand &BaseOp = MI.getOperand(2);
1379 unsigned Base = BaseOp.getReg();
1380 const MachineOperand &Reg0Op = MI.getOperand(0);
1381 const MachineOperand &Reg1Op = MI.getOperand(1);
1382 if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
1386 ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg);
1387 MachineBasicBlock::iterator MBBI(MI);
1388 MachineBasicBlock &MBB = *MI.getParent();
1390 MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred,
1393 if (Offset == 8 || Offset == -8) {
1394 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
1396 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
1397 if (Offset == 8 || Offset == -8) {
1398 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
1402 MBB.erase(MergeInstr);
1404 DebugLoc DL = MI.getDebugLoc();
1405 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
1406 if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
1407 MIB.addOperand(Reg0Op).addOperand(Reg1Op)
1408 .addReg(BaseOp.getReg(), RegState::Define);
1410 assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
1411 MIB.addReg(BaseOp.getReg(), RegState::Define)
1412 .addOperand(Reg0Op).addOperand(Reg1Op);
1414 MIB.addReg(BaseOp.getReg(), RegState::Kill)
1415 .addImm(Offset).addImm(Pred).addReg(PredReg);
1416 assert(TII->get(Opcode).getNumOperands() == 6 &&
1417 TII->get(NewOpc).getNumOperands() == 7 &&
1418 "Unexpected number of operands in Opcode specification.");
1420 // Transfer implicit operands.
1421 for (const MachineOperand &MO : MI.implicit_operands())
1423 MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
1429 /// Returns true if instruction is a memory operation that this pass is capable
1430 /// of operating on.
1431 static bool isMemoryOp(const MachineInstr *MI) {
1432 // When no memory operands are present, conservatively assume unaligned,
1433 // volatile, unfoldable.
1434 if (!MI->hasOneMemOperand())
1437 const MachineMemOperand *MMO = *MI->memoperands_begin();
1439 // Don't touch volatile memory accesses - we may be changing their order.
1440 if (MMO->isVolatile())
1443 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1445 if (MMO->getAlignment() < 4)
1448 // str <undef> could probably be eliminated entirely, but for now we just want
1449 // to avoid making a mess of it.
1450 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1451 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1452 MI->getOperand(0).isUndef())
1455 // Likewise don't mess with references to undefined addresses.
1456 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1457 MI->getOperand(1).isUndef())
1460 unsigned Opcode = MI->getOpcode();
1465 return MI->getOperand(1).isReg();
1468 return MI->getOperand(1).isReg();
1479 return MI->getOperand(1).isReg();
1484 static void InsertLDR_STR(MachineBasicBlock &MBB,
1485 MachineBasicBlock::iterator &MBBI,
1486 int Offset, bool isDef,
1487 DebugLoc DL, unsigned NewOpc,
1488 unsigned Reg, bool RegDeadKill, bool RegUndef,
1489 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1490 bool OffKill, bool OffUndef,
1491 ARMCC::CondCodes Pred, unsigned PredReg,
1492 const TargetInstrInfo *TII, bool isT2) {
1494 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1496 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1497 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1498 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1500 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1502 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1503 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1504 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1508 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1509 MachineBasicBlock::iterator &MBBI) {
1510 MachineInstr *MI = &*MBBI;
1511 unsigned Opcode = MI->getOpcode();
1512 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
1515 const MachineOperand &BaseOp = MI->getOperand(2);
1516 unsigned BaseReg = BaseOp.getReg();
1517 unsigned EvenReg = MI->getOperand(0).getReg();
1518 unsigned OddReg = MI->getOperand(1).getReg();
1519 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1520 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1522 // ARM errata 602117: LDRD with base in list may result in incorrect base
1523 // register when interrupted or faulted.
1524 bool Errata602117 = EvenReg == BaseReg &&
1525 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
1526 // ARM LDRD/STRD needs consecutive registers.
1527 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
1528 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
1530 if (!Errata602117 && !NonConsecutiveRegs)
1533 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1534 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1535 bool EvenDeadKill = isLd ?
1536 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1537 bool EvenUndef = MI->getOperand(0).isUndef();
1538 bool OddDeadKill = isLd ?
1539 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1540 bool OddUndef = MI->getOperand(1).isUndef();
1541 bool BaseKill = BaseOp.isKill();
1542 bool BaseUndef = BaseOp.isUndef();
1543 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1544 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1545 int OffImm = getMemoryOpOffset(MI);
1546 unsigned PredReg = 0;
1547 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1549 if (OddRegNum > EvenRegNum && OffImm == 0) {
1550 // Ascending register numbers and no offset. It's safe to change it to a
1552 unsigned NewOpc = (isLd)
1553 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1554 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1556 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1557 .addReg(BaseReg, getKillRegState(BaseKill))
1558 .addImm(Pred).addReg(PredReg)
1559 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1560 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1563 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1564 .addReg(BaseReg, getKillRegState(BaseKill))
1565 .addImm(Pred).addReg(PredReg)
1567 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1569 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1573 // Split into two instructions.
1574 unsigned NewOpc = (isLd)
1575 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1576 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1577 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1578 // so adjust and use t2LDRi12 here for that.
1579 unsigned NewOpc2 = (isLd)
1580 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1581 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1582 DebugLoc dl = MBBI->getDebugLoc();
1583 // If this is a load and base register is killed, it may have been
1584 // re-defed by the load, make sure the first load does not clobber it.
1586 (BaseKill || OffKill) &&
1587 (TRI->regsOverlap(EvenReg, BaseReg))) {
1588 assert(!TRI->regsOverlap(OddReg, BaseReg));
1589 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1590 OddReg, OddDeadKill, false,
1591 BaseReg, false, BaseUndef, false, OffUndef,
1592 Pred, PredReg, TII, isT2);
1593 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1594 EvenReg, EvenDeadKill, false,
1595 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1596 Pred, PredReg, TII, isT2);
1598 if (OddReg == EvenReg && EvenDeadKill) {
1599 // If the two source operands are the same, the kill marker is
1600 // probably on the first one. e.g.
1601 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1602 EvenDeadKill = false;
1605 // Never kill the base register in the first instruction.
1606 if (EvenReg == BaseReg)
1607 EvenDeadKill = false;
1608 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1609 EvenReg, EvenDeadKill, EvenUndef,
1610 BaseReg, false, BaseUndef, false, OffUndef,
1611 Pred, PredReg, TII, isT2);
1612 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1613 OddReg, OddDeadKill, OddUndef,
1614 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1615 Pred, PredReg, TII, isT2);
1623 MBBI = MBB.erase(MBBI);
1627 /// An optimization pass to turn multiple LDR / STR ops of the same base and
1628 /// incrementing offset into LDM / STM ops.
1629 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1631 unsigned CurrBase = 0;
1632 unsigned CurrOpc = ~0u;
1633 ARMCC::CondCodes CurrPred = ARMCC::AL;
1634 unsigned Position = 0;
1635 assert(Candidates.size() == 0);
1636 assert(MergeBaseCandidates.size() == 0);
1637 LiveRegsValid = false;
1639 for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
1641 // The instruction in front of the iterator is the one we look at.
1642 MBBI = std::prev(I);
1643 if (FixInvalidRegPairOp(MBB, MBBI))
1647 if (isMemoryOp(MBBI)) {
1648 unsigned Opcode = MBBI->getOpcode();
1649 const MachineOperand &MO = MBBI->getOperand(0);
1650 unsigned Reg = MO.getReg();
1651 unsigned Base = getLoadStoreBaseOp(*MBBI).getReg();
1652 unsigned PredReg = 0;
1653 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1654 int Offset = getMemoryOpOffset(MBBI);
1655 if (CurrBase == 0) {
1656 // Start of a new chain.
1660 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
1663 // Note: No need to match PredReg in the next if.
1664 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1666 // r4 := ldr [r0, #8]
1667 // r4 := ldr [r0, #4]
1670 // If a load overrides the base register or a register loaded by
1671 // another load in our chain, we cannot take this instruction.
1672 bool Overlap = false;
1673 if (isLoadSingle(Opcode)) {
1674 Overlap = (Base == Reg);
1676 for (const MemOpQueueEntry &E : MemOps) {
1677 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
1686 // Check offset and sort memory operation into the current chain.
1687 if (Offset > MemOps.back().Offset) {
1688 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
1691 MemOpQueue::iterator MI, ME;
1692 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
1693 if (Offset < MI->Offset) {
1694 // Found a place to insert.
1697 if (Offset == MI->Offset) {
1698 // Collision, abort.
1703 if (MI != MemOps.end()) {
1704 MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position));
1711 // Don't advance the iterator; The op will start a new chain next.
1714 // Fallthrough to look into existing chain.
1715 } else if (MBBI->isDebugValue()) {
1717 } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
1718 MBBI->getOpcode() == ARM::t2STRDi8) {
1719 // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
1720 // remember them because we may still be able to merge add/sub into them.
1721 MergeBaseCandidates.push_back(MBBI);
1725 // If we are here then the chain is broken; Extract candidates for a merge.
1726 if (MemOps.size() > 0) {
1727 FormCandidates(MemOps);
1728 // Reset for the next chain.
1731 CurrPred = ARMCC::AL;
1735 if (MemOps.size() > 0)
1736 FormCandidates(MemOps);
1738 // Sort candidates so they get processed from end to begin of the basic
1739 // block later; This is necessary for liveness calculation.
1740 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
1741 return M0->InsertPos < M1->InsertPos;
1743 std::sort(Candidates.begin(), Candidates.end(), LessThan);
1745 // Go through list of candidates and merge.
1746 bool Changed = false;
1747 for (const MergeCandidate *Candidate : Candidates) {
1748 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
1749 MachineInstr *Merged = MergeOpsUpdate(*Candidate);
1750 // Merge preceding/trailing base inc/dec into the merged op.
1753 unsigned Opcode = Merged->getOpcode();
1754 if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
1755 MergeBaseUpdateLSDouble(*Merged);
1757 MergeBaseUpdateLSMultiple(Merged);
1759 for (MachineInstr *MI : Candidate->Instrs) {
1760 if (MergeBaseUpdateLoadStore(MI))
1765 assert(Candidate->Instrs.size() == 1);
1766 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
1771 // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
1772 for (MachineInstr *MI : MergeBaseCandidates)
1773 MergeBaseUpdateLSDouble(*MI);
1774 MergeBaseCandidates.clear();
1779 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
1780 /// into the preceding stack restore so it directly restore the value of LR
1782 /// ldmfd sp!, {..., lr}
1785 /// ldmfd sp!, {..., lr}
1788 /// ldmfd sp!, {..., pc}
1789 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1790 // Thumb1 LDM doesn't allow high registers.
1791 if (isThumb1) return false;
1792 if (MBB.empty()) return false;
1794 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1795 if (MBBI != MBB.begin() &&
1796 (MBBI->getOpcode() == ARM::BX_RET ||
1797 MBBI->getOpcode() == ARM::tBX_RET ||
1798 MBBI->getOpcode() == ARM::MOVPCLR)) {
1799 MachineInstr *PrevMI = std::prev(MBBI);
1800 unsigned Opcode = PrevMI->getOpcode();
1801 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1802 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1803 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1804 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1805 if (MO.getReg() != ARM::LR)
1807 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1808 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1809 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1810 PrevMI->setDesc(TII->get(NewOpc));
1812 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1820 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1822 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1823 TL = STI->getTargetLowering();
1824 AFI = Fn.getInfo<ARMFunctionInfo>();
1825 TII = STI->getInstrInfo();
1826 TRI = STI->getRegisterInfo();
1828 RegClassInfoValid = false;
1829 isThumb2 = AFI->isThumb2Function();
1830 isThumb1 = AFI->isThumbFunction() && !isThumb2;
1832 bool Modified = false;
1833 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1835 MachineBasicBlock &MBB = *MFI;
1836 Modified |= LoadStoreMultipleOpti(MBB);
1837 if (STI->hasV5TOps())
1838 Modified |= MergeReturnIntoLDM(MBB);
1841 Allocator.DestroyAll();
1846 /// Pre- register allocation pass that move load / stores from consecutive
1847 /// locations close to make it more likely they will be combined later.
1848 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1850 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1852 const DataLayout *TD;
1853 const TargetInstrInfo *TII;
1854 const TargetRegisterInfo *TRI;
1855 const ARMSubtarget *STI;
1856 MachineRegisterInfo *MRI;
1857 MachineFunction *MF;
1859 bool runOnMachineFunction(MachineFunction &Fn) override;
1861 const char *getPassName() const override {
1862 return "ARM pre- register allocation load / store optimization pass";
1866 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1867 unsigned &NewOpc, unsigned &EvenReg,
1868 unsigned &OddReg, unsigned &BaseReg,
1870 unsigned &PredReg, ARMCC::CondCodes &Pred,
1872 bool RescheduleOps(MachineBasicBlock *MBB,
1873 SmallVectorImpl<MachineInstr *> &Ops,
1874 unsigned Base, bool isLd,
1875 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1876 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1878 char ARMPreAllocLoadStoreOpt::ID = 0;
1881 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1882 TD = &Fn.getDataLayout();
1883 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1884 TII = STI->getInstrInfo();
1885 TRI = STI->getRegisterInfo();
1886 MRI = &Fn.getRegInfo();
1889 bool Modified = false;
1890 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1892 Modified |= RescheduleLoadStoreInstrs(MFI);
1897 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1898 MachineBasicBlock::iterator I,
1899 MachineBasicBlock::iterator E,
1900 SmallPtrSetImpl<MachineInstr*> &MemOps,
1901 SmallSet<unsigned, 4> &MemRegs,
1902 const TargetRegisterInfo *TRI) {
1903 // Are there stores / loads / calls between them?
1904 // FIXME: This is overly conservative. We should make use of alias information
1906 SmallSet<unsigned, 4> AddedRegPressure;
1908 if (I->isDebugValue() || MemOps.count(&*I))
1910 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1912 if (isLd && I->mayStore())
1917 // It's not safe to move the first 'str' down.
1920 // str r4, [r0, #+4]
1924 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1925 MachineOperand &MO = I->getOperand(j);
1928 unsigned Reg = MO.getReg();
1929 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1931 if (Reg != Base && !MemRegs.count(Reg))
1932 AddedRegPressure.insert(Reg);
1936 // Estimate register pressure increase due to the transformation.
1937 if (MemRegs.size() <= 4)
1938 // Ok if we are moving small number of instructions.
1940 return AddedRegPressure.size() <= MemRegs.size() * 2;
1944 /// Copy \p Op0 and \p Op1 operands into a new array assigned to MI.
1945 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1946 MachineInstr *Op1) {
1947 assert(MI->memoperands_empty() && "expected a new machineinstr");
1948 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1949 + (Op1->memoperands_end() - Op1->memoperands_begin());
1951 MachineFunction *MF = MI->getParent()->getParent();
1952 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1953 MachineSDNode::mmo_iterator MemEnd =
1954 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1956 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1957 MI->setMemRefs(MemBegin, MemEnd);
1961 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1962 DebugLoc &dl, unsigned &NewOpc,
1964 unsigned &SecondReg,
1965 unsigned &BaseReg, int &Offset,
1967 ARMCC::CondCodes &Pred,
1969 // Make sure we're allowed to generate LDRD/STRD.
1970 if (!STI->hasV5TEOps())
1973 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1975 unsigned Opcode = Op0->getOpcode();
1976 if (Opcode == ARM::LDRi12) {
1978 } else if (Opcode == ARM::STRi12) {
1980 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1981 NewOpc = ARM::t2LDRDi8;
1984 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1985 NewOpc = ARM::t2STRDi8;
1992 // Make sure the base address satisfies i64 ld / st alignment requirement.
1993 // At the moment, we ignore the memoryoperand's value.
1994 // If we want to use AliasAnalysis, we should check it accordingly.
1995 if (!Op0->hasOneMemOperand() ||
1996 (*Op0->memoperands_begin())->isVolatile())
1999 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
2000 const Function *Func = MF->getFunction();
2001 unsigned ReqAlign = STI->hasV6Ops()
2002 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
2003 : 8; // Pre-v6 need 8-byte align
2004 if (Align < ReqAlign)
2007 // Then make sure the immediate offset fits.
2008 int OffImm = getMemoryOpOffset(Op0);
2010 int Limit = (1 << 8) * Scale;
2011 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
2015 ARM_AM::AddrOpc AddSub = ARM_AM::add;
2017 AddSub = ARM_AM::sub;
2020 int Limit = (1 << 8) * Scale;
2021 if (OffImm >= Limit || (OffImm & (Scale-1)))
2023 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
2025 FirstReg = Op0->getOperand(0).getReg();
2026 SecondReg = Op1->getOperand(0).getReg();
2027 if (FirstReg == SecondReg)
2029 BaseReg = Op0->getOperand(1).getReg();
2030 Pred = getInstrPredicate(Op0, PredReg);
2031 dl = Op0->getDebugLoc();
2035 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
2036 SmallVectorImpl<MachineInstr *> &Ops,
2037 unsigned Base, bool isLd,
2038 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
2039 bool RetVal = false;
2041 // Sort by offset (in reverse order).
2042 std::sort(Ops.begin(), Ops.end(),
2043 [](const MachineInstr *LHS, const MachineInstr *RHS) {
2044 int LOffset = getMemoryOpOffset(LHS);
2045 int ROffset = getMemoryOpOffset(RHS);
2046 assert(LHS == RHS || LOffset != ROffset);
2047 return LOffset > ROffset;
2050 // The loads / stores of the same base are in order. Scan them from first to
2051 // last and check for the following:
2052 // 1. Any def of base.
2054 while (Ops.size() > 1) {
2055 unsigned FirstLoc = ~0U;
2056 unsigned LastLoc = 0;
2057 MachineInstr *FirstOp = nullptr;
2058 MachineInstr *LastOp = nullptr;
2060 unsigned LastOpcode = 0;
2061 unsigned LastBytes = 0;
2062 unsigned NumMove = 0;
2063 for (int i = Ops.size() - 1; i >= 0; --i) {
2064 MachineInstr *Op = Ops[i];
2065 unsigned Loc = MI2LocMap[Op];
2066 if (Loc <= FirstLoc) {
2070 if (Loc >= LastLoc) {
2076 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
2077 if (LastOpcode && LSMOpcode != LastOpcode)
2080 int Offset = getMemoryOpOffset(Op);
2081 unsigned Bytes = getLSMultipleTransferSize(Op);
2083 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2086 LastOffset = Offset;
2088 LastOpcode = LSMOpcode;
2089 if (++NumMove == 8) // FIXME: Tune this limit.
2096 SmallPtrSet<MachineInstr*, 4> MemOps;
2097 SmallSet<unsigned, 4> MemRegs;
2098 for (int i = NumMove-1; i >= 0; --i) {
2099 MemOps.insert(Ops[i]);
2100 MemRegs.insert(Ops[i]->getOperand(0).getReg());
2103 // Be conservative, if the instructions are too far apart, don't
2104 // move them. We want to limit the increase of register pressure.
2105 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2107 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2108 MemOps, MemRegs, TRI);
2110 for (unsigned i = 0; i != NumMove; ++i)
2113 // This is the new location for the loads / stores.
2114 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2115 while (InsertPos != MBB->end()
2116 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
2119 // If we are moving a pair of loads / stores, see if it makes sense
2120 // to try to allocate a pair of registers that can form register pairs.
2121 MachineInstr *Op0 = Ops.back();
2122 MachineInstr *Op1 = Ops[Ops.size()-2];
2123 unsigned FirstReg = 0, SecondReg = 0;
2124 unsigned BaseReg = 0, PredReg = 0;
2125 ARMCC::CondCodes Pred = ARMCC::AL;
2127 unsigned NewOpc = 0;
2130 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2131 FirstReg, SecondReg, BaseReg,
2132 Offset, PredReg, Pred, isT2)) {
2136 const MCInstrDesc &MCID = TII->get(NewOpc);
2137 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
2138 MRI->constrainRegClass(FirstReg, TRC);
2139 MRI->constrainRegClass(SecondReg, TRC);
2141 // Form the pair instruction.
2143 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2144 .addReg(FirstReg, RegState::Define)
2145 .addReg(SecondReg, RegState::Define)
2147 // FIXME: We're converting from LDRi12 to an insn that still
2148 // uses addrmode2, so we need an explicit offset reg. It should
2149 // always by reg0 since we're transforming LDRi12s.
2152 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2153 concatenateMemOperands(MIB, Op0, Op1);
2154 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2157 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2161 // FIXME: We're converting from LDRi12 to an insn that still
2162 // uses addrmode2, so we need an explicit offset reg. It should
2163 // always by reg0 since we're transforming STRi12s.
2166 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2167 concatenateMemOperands(MIB, Op0, Op1);
2168 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2175 // Add register allocation hints to form register pairs.
2176 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
2177 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg);
2180 for (unsigned i = 0; i != NumMove; ++i) {
2181 MachineInstr *Op = Ops.back();
2183 MBB->splice(InsertPos, MBB, Op);
2187 NumLdStMoved += NumMove;
2197 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2198 bool RetVal = false;
2200 DenseMap<MachineInstr*, unsigned> MI2LocMap;
2201 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
2202 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
2203 SmallVector<unsigned, 4> LdBases;
2204 SmallVector<unsigned, 4> StBases;
2207 MachineBasicBlock::iterator MBBI = MBB->begin();
2208 MachineBasicBlock::iterator E = MBB->end();
2210 for (; MBBI != E; ++MBBI) {
2211 MachineInstr *MI = MBBI;
2212 if (MI->isCall() || MI->isTerminator()) {
2213 // Stop at barriers.
2218 if (!MI->isDebugValue())
2219 MI2LocMap[MI] = ++Loc;
2221 if (!isMemoryOp(MI))
2223 unsigned PredReg = 0;
2224 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2227 int Opc = MI->getOpcode();
2228 bool isLd = isLoadSingle(Opc);
2229 unsigned Base = MI->getOperand(1).getReg();
2230 int Offset = getMemoryOpOffset(MI);
2232 bool StopHere = false;
2234 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2235 Base2LdsMap.find(Base);
2236 if (BI != Base2LdsMap.end()) {
2237 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2238 if (Offset == getMemoryOpOffset(BI->second[i])) {
2244 BI->second.push_back(MI);
2246 Base2LdsMap[Base].push_back(MI);
2247 LdBases.push_back(Base);
2250 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2251 Base2StsMap.find(Base);
2252 if (BI != Base2StsMap.end()) {
2253 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2254 if (Offset == getMemoryOpOffset(BI->second[i])) {
2260 BI->second.push_back(MI);
2262 Base2StsMap[Base].push_back(MI);
2263 StBases.push_back(Base);
2268 // Found a duplicate (a base+offset combination that's seen earlier).
2275 // Re-schedule loads.
2276 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2277 unsigned Base = LdBases[i];
2278 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2280 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2283 // Re-schedule stores.
2284 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2285 unsigned Base = StBases[i];
2286 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2288 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2292 Base2LdsMap.clear();
2293 Base2StsMap.clear();
2303 /// Returns an instance of the load / store optimization pass.
2304 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2306 return new ARMPreAllocLoadStoreOpt();
2307 return new ARMLoadStoreOpt();