1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 /// \file This file contains a pass that performs load / store related peephole
11 /// optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMISelLowering.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMSubtarget.h"
21 #include "MCTargetDesc/ARMAddressingModes.h"
22 #include "ThumbRegisterInfo.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/CodeGen/MachineBasicBlock.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include "llvm/CodeGen/MachineInstr.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/RegisterClassInfo.h"
35 #include "llvm/CodeGen/SelectionDAGNodes.h"
36 #include "llvm/CodeGen/LivePhysRegs.h"
37 #include "llvm/IR/DataLayout.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/Support/Allocator.h"
41 #include "llvm/Support/Debug.h"
42 #include "llvm/Support/ErrorHandling.h"
43 #include "llvm/Support/raw_ostream.h"
44 #include "llvm/Target/TargetInstrInfo.h"
45 #include "llvm/Target/TargetMachine.h"
46 #include "llvm/Target/TargetRegisterInfo.h"
49 #define DEBUG_TYPE "arm-ldst-opt"
51 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
52 STATISTIC(NumSTMGened , "Number of stm instructions generated");
53 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
54 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
55 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
56 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
57 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
58 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
59 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
60 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
61 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
64 void initializeARMLoadStoreOptPass(PassRegistry &);
67 #define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
70 /// Post- register allocation pass the combine load / store instructions to
71 /// form ldm / stm instructions.
72 struct ARMLoadStoreOpt : public MachineFunctionPass {
74 ARMLoadStoreOpt() : MachineFunctionPass(ID) {
75 initializeARMLoadStoreOptPass(*PassRegistry::getPassRegistry());
78 const MachineFunction *MF;
79 const TargetInstrInfo *TII;
80 const TargetRegisterInfo *TRI;
81 const ARMSubtarget *STI;
82 const TargetLowering *TL;
84 LivePhysRegs LiveRegs;
85 RegisterClassInfo RegClassInfo;
86 MachineBasicBlock::const_iterator LiveRegPos;
88 bool RegClassInfoValid;
89 bool isThumb1, isThumb2;
91 bool runOnMachineFunction(MachineFunction &Fn) override;
93 const char *getPassName() const override {
94 return ARM_LOAD_STORE_OPT_NAME;
98 /// A set of load/store MachineInstrs with same base register sorted by
100 struct MemOpQueueEntry {
102 int Offset; ///< Load/Store offset.
103 unsigned Position; ///< Position as counted from end of basic block.
104 MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position)
105 : MI(MI), Offset(Offset), Position(Position) {}
107 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
109 /// A set of MachineInstrs that fulfill (nearly all) conditions to get
110 /// merged into a LDM/STM.
111 struct MergeCandidate {
112 /// List of instructions ordered by load/store offset.
113 SmallVector<MachineInstr*, 4> Instrs;
114 /// Index in Instrs of the instruction being latest in the schedule.
115 unsigned LatestMIIdx;
116 /// Index in Instrs of the instruction being earliest in the schedule.
117 unsigned EarliestMIIdx;
118 /// Index into the basic block where the merged instruction will be
119 /// inserted. (See MemOpQueueEntry.Position)
121 /// Whether the instructions can be merged into a ldm/stm instruction.
122 bool CanMergeToLSMulti;
123 /// Whether the instructions can be merged into a ldrd/strd instruction.
124 bool CanMergeToLSDouble;
126 SpecificBumpPtrAllocator<MergeCandidate> Allocator;
127 SmallVector<const MergeCandidate*,4> Candidates;
128 SmallVector<MachineInstr*,4> MergeBaseCandidates;
130 void moveLiveRegsBefore(const MachineBasicBlock &MBB,
131 MachineBasicBlock::const_iterator Before);
132 unsigned findFreeReg(const TargetRegisterClass &RegClass);
133 void UpdateBaseRegUses(MachineBasicBlock &MBB,
134 MachineBasicBlock::iterator MBBI,
135 DebugLoc DL, unsigned Base, unsigned WordOffset,
136 ARMCC::CondCodes Pred, unsigned PredReg);
137 MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB,
138 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
139 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
140 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs);
141 MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB,
142 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
143 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
144 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const;
145 void FormCandidates(const MemOpQueue &MemOps);
146 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
147 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
148 MachineBasicBlock::iterator &MBBI);
149 bool MergeBaseUpdateLoadStore(MachineInstr *MI);
150 bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
151 bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
152 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
153 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
155 char ARMLoadStoreOpt::ID = 0;
158 INITIALIZE_PASS(ARMLoadStoreOpt, "arm-load-store-opt", ARM_LOAD_STORE_OPT_NAME, false, false)
160 static bool definesCPSR(const MachineInstr *MI) {
161 for (const auto &MO : MI->operands()) {
164 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
165 // If the instruction has live CPSR def, then it's not safe to fold it
166 // into load / store.
173 static int getMemoryOpOffset(const MachineInstr *MI) {
174 unsigned Opcode = MI->getOpcode();
175 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
176 unsigned NumOperands = MI->getDesc().getNumOperands();
177 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
179 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
180 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
181 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
182 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
185 // Thumb1 immediate offsets are scaled by 4
186 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
187 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
190 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
191 : ARM_AM::getAM5Offset(OffField) * 4;
192 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
193 : ARM_AM::getAM5Op(OffField);
195 if (Op == ARM_AM::sub)
201 static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) {
202 return MI.getOperand(1);
205 static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) {
206 return MI.getOperand(0);
209 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
211 default: llvm_unreachable("Unhandled opcode!");
215 default: llvm_unreachable("Unhandled submode!");
216 case ARM_AM::ia: return ARM::LDMIA;
217 case ARM_AM::da: return ARM::LDMDA;
218 case ARM_AM::db: return ARM::LDMDB;
219 case ARM_AM::ib: return ARM::LDMIB;
224 default: llvm_unreachable("Unhandled submode!");
225 case ARM_AM::ia: return ARM::STMIA;
226 case ARM_AM::da: return ARM::STMDA;
227 case ARM_AM::db: return ARM::STMDB;
228 case ARM_AM::ib: return ARM::STMIB;
232 // tLDMIA is writeback-only - unless the base register is in the input
236 default: llvm_unreachable("Unhandled submode!");
237 case ARM_AM::ia: return ARM::tLDMIA;
241 // There is no non-writeback tSTMIA either.
244 default: llvm_unreachable("Unhandled submode!");
245 case ARM_AM::ia: return ARM::tSTMIA_UPD;
251 default: llvm_unreachable("Unhandled submode!");
252 case ARM_AM::ia: return ARM::t2LDMIA;
253 case ARM_AM::db: return ARM::t2LDMDB;
259 default: llvm_unreachable("Unhandled submode!");
260 case ARM_AM::ia: return ARM::t2STMIA;
261 case ARM_AM::db: return ARM::t2STMDB;
266 default: llvm_unreachable("Unhandled submode!");
267 case ARM_AM::ia: return ARM::VLDMSIA;
268 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
273 default: llvm_unreachable("Unhandled submode!");
274 case ARM_AM::ia: return ARM::VSTMSIA;
275 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
280 default: llvm_unreachable("Unhandled submode!");
281 case ARM_AM::ia: return ARM::VLDMDIA;
282 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
287 default: llvm_unreachable("Unhandled submode!");
288 case ARM_AM::ia: return ARM::VSTMDIA;
289 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
294 static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
296 default: llvm_unreachable("Unhandled opcode!");
303 case ARM::tLDMIA_UPD:
304 case ARM::tSTMIA_UPD:
305 case ARM::t2LDMIA_RET:
307 case ARM::t2LDMIA_UPD:
309 case ARM::t2STMIA_UPD:
311 case ARM::VLDMSIA_UPD:
313 case ARM::VSTMSIA_UPD:
315 case ARM::VLDMDIA_UPD:
317 case ARM::VSTMDIA_UPD:
331 case ARM::t2LDMDB_UPD:
333 case ARM::t2STMDB_UPD:
334 case ARM::VLDMSDB_UPD:
335 case ARM::VSTMSDB_UPD:
336 case ARM::VLDMDDB_UPD:
337 case ARM::VSTMDDB_UPD:
348 static bool isT1i32Load(unsigned Opc) {
349 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
352 static bool isT2i32Load(unsigned Opc) {
353 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
356 static bool isi32Load(unsigned Opc) {
357 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
360 static bool isT1i32Store(unsigned Opc) {
361 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
364 static bool isT2i32Store(unsigned Opc) {
365 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
368 static bool isi32Store(unsigned Opc) {
369 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
372 static bool isLoadSingle(unsigned Opc) {
373 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
376 static unsigned getImmScale(unsigned Opc) {
378 default: llvm_unreachable("Unhandled opcode!");
393 static unsigned getLSMultipleTransferSize(const MachineInstr *MI) {
394 switch (MI->getOpcode()) {
421 case ARM::tLDMIA_UPD:
422 case ARM::tSTMIA_UPD:
429 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
432 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
436 /// Update future uses of the base register with the offset introduced
437 /// due to writeback. This function only works on Thumb1.
439 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
440 MachineBasicBlock::iterator MBBI,
441 DebugLoc DL, unsigned Base,
443 ARMCC::CondCodes Pred, unsigned PredReg) {
444 assert(isThumb1 && "Can only update base register uses for Thumb1!");
445 // Start updating any instructions with immediate offsets. Insert a SUB before
446 // the first non-updateable instruction (if any).
447 for (; MBBI != MBB.end(); ++MBBI) {
448 bool InsertSub = false;
449 unsigned Opc = MBBI->getOpcode();
451 if (MBBI->readsRegister(Base)) {
454 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
456 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
458 if (IsLoad || IsStore) {
459 // Loads and stores with immediate offsets can be updated, but only if
460 // the new offset isn't negative.
461 // The MachineOperand containing the offset immediate is the last one
462 // before predicates.
464 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
465 // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
466 Offset = MO.getImm() - WordOffset * getImmScale(Opc);
468 // If storing the base register, it needs to be reset first.
469 unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
471 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
476 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
477 !definesCPSR(MBBI)) {
478 // SUBS/ADDS using this register, with a dead def of the CPSR.
479 // Merge it with the update; if the merged offset is too large,
480 // insert a new sub instead.
482 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
483 Offset = (Opc == ARM::tSUBi8) ?
484 MO.getImm() + WordOffset * 4 :
485 MO.getImm() - WordOffset * 4 ;
486 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
487 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
490 // The base register has now been reset, so exit early.
497 // Can't update the instruction.
501 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
502 // Since SUBS sets the condition flags, we can't place the base reset
503 // after an instruction that has a live CPSR def.
504 // The base register might also contain an argument for a function call.
509 // An instruction above couldn't be updated, so insert a sub.
510 AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
511 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
515 if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
516 // Register got killed. Stop updating.
520 // End of block was reached.
521 if (MBB.succ_size() > 0) {
522 // FIXME: Because of a bug, live registers are sometimes missing from
523 // the successor blocks' live-in sets. This means we can't trust that
524 // information and *always* have to reset at the end of a block.
526 if (MBBI != MBB.end()) --MBBI;
528 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
529 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
533 /// Return the first register of class \p RegClass that is not in \p Regs.
534 unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
535 if (!RegClassInfoValid) {
536 RegClassInfo.runOnMachineFunction(*MF);
537 RegClassInfoValid = true;
540 for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
541 if (!LiveRegs.contains(Reg))
546 /// Compute live registers just before instruction \p Before (in normal schedule
547 /// direction). Computes backwards so multiple queries in the same block must
548 /// come in reverse order.
549 void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
550 MachineBasicBlock::const_iterator Before) {
551 // Initialize if we never queried in this block.
552 if (!LiveRegsValid) {
554 LiveRegs.addLiveOuts(&MBB, true);
555 LiveRegPos = MBB.end();
556 LiveRegsValid = true;
558 // Move backward just before the "Before" position.
559 while (LiveRegPos != Before) {
561 LiveRegs.stepBackward(*LiveRegPos);
565 static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
567 for (const std::pair<unsigned, bool> &R : Regs)
573 /// Create and insert a LDM or STM with Base as base register and registers in
574 /// Regs as the register operands that would be loaded / stored. It returns
575 /// true if the transformation is done.
576 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB,
577 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
578 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
579 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) {
580 unsigned NumRegs = Regs.size();
583 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
584 // Compute liveness information for that register to make the decision.
585 bool SafeToClobberCPSR = !isThumb1 ||
586 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
587 MachineBasicBlock::LQR_Dead);
589 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
591 // Exception: If the base register is in the input reglist, Thumb1 LDM is
593 // It's also not possible to merge an STR of the base register in Thumb1.
594 if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) {
595 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
596 if (Opcode == ARM::tLDRi) {
598 } else if (Opcode == ARM::tSTRi) {
603 ARM_AM::AMSubMode Mode = ARM_AM::ia;
604 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
605 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
606 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
608 if (Offset == 4 && haveIBAndDA) {
610 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
612 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
613 // VLDM/VSTM do not support DB mode without also updating the base reg.
615 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
616 // Check if this is a supported opcode before inserting instructions to
617 // calculate a new base register.
618 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
620 // If starting offset isn't zero, insert a MI to materialize a new base.
621 // But only do so if it is cost effective, i.e. merging more than two
626 // On Thumb1, it's not worth materializing a new base register without
627 // clobbering the CPSR (i.e. not using ADDS/SUBS).
628 if (!SafeToClobberCPSR)
632 if (isi32Load(Opcode)) {
633 // If it is a load, then just use one of the destination registers
634 // as the new base. Will no longer be writeback in Thumb1.
635 NewBase = Regs[NumRegs-1].first;
638 // Find a free register that we can use as scratch register.
639 moveLiveRegsBefore(MBB, InsertBefore);
640 // The merged instruction does not exist yet but will use several Regs if
642 if (!isLoadSingle(Opcode))
643 for (const std::pair<unsigned, bool> &R : Regs)
644 LiveRegs.addReg(R.first);
646 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
652 isThumb2 ? ARM::t2ADDri :
653 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
654 (isThumb1 && Offset < 8) ? ARM::tADDi3 :
655 isThumb1 ? ARM::tADDi8 : ARM::ADDri;
660 isThumb2 ? ARM::t2SUBri :
661 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
662 isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
665 if (!TL->isLegalAddImmediate(Offset))
666 // FIXME: Try add with register operand?
667 return nullptr; // Probably not worth it then.
669 // We can only append a kill flag to the add/sub input if the value is not
670 // used in the register list of the stm as well.
671 bool KillOldBase = BaseKill &&
672 (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
675 // Thumb1: depending on immediate size, use either
676 // ADDS NewBase, Base, #imm3
679 // ADDS NewBase, #imm8.
680 if (Base != NewBase &&
681 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
682 // Need to insert a MOV to the new base first.
683 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
685 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
686 if (Pred != ARMCC::AL)
688 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
689 .addReg(Base, getKillRegState(KillOldBase));
691 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
692 .addReg(Base, getKillRegState(KillOldBase))
693 .addImm(Pred).addReg(PredReg);
695 // The following ADDS/SUBS becomes an update.
699 if (BaseOpc == ARM::tADDrSPi) {
700 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
701 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
702 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4)
703 .addImm(Pred).addReg(PredReg);
706 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true)
707 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
708 .addImm(Pred).addReg(PredReg);
710 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
711 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
712 .addImm(Pred).addReg(PredReg).addReg(0);
715 BaseKill = true; // New base is always killed straight away.
718 bool isDef = isLoadSingle(Opcode);
720 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
721 // base register writeback.
722 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
726 // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
727 // - There is no writeback (LDM of base register),
728 // - the base register is killed by the merged instruction,
729 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
730 // to reset the base register.
731 // Otherwise, don't merge.
732 // It's safe to return here since the code to materialize a new base register
733 // above is also conditional on SafeToClobberCPSR.
734 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
737 MachineInstrBuilder MIB;
740 assert(isThumb1 && "expected Writeback only inThumb1");
741 if (Opcode == ARM::tLDMIA) {
742 assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs");
743 // Update tLDMIA with writeback if necessary.
744 Opcode = ARM::tLDMIA_UPD;
747 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
749 // Thumb1: we might need to set base writeback when building the MI.
750 MIB.addReg(Base, getDefRegState(true))
751 .addReg(Base, getKillRegState(BaseKill));
753 // The base isn't dead after a merged instruction with writeback.
754 // Insert a sub instruction after the newly formed instruction to reset.
756 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
759 // No writeback, simply build the MachineInstr.
760 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
761 MIB.addReg(Base, getKillRegState(BaseKill));
764 MIB.addImm(Pred).addReg(PredReg);
766 for (const std::pair<unsigned, bool> &R : Regs)
767 MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
769 return MIB.getInstr();
772 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB,
773 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
774 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
775 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const {
776 bool IsLoad = isi32Load(Opcode);
777 assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
778 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
780 assert(Regs.size() == 2);
781 MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
782 TII->get(LoadStoreOpcode));
784 MIB.addReg(Regs[0].first, RegState::Define)
785 .addReg(Regs[1].first, RegState::Define);
787 MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
788 .addReg(Regs[1].first, getKillRegState(Regs[1].second));
790 MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
791 return MIB.getInstr();
794 /// Call MergeOps and update MemOps and merges accordingly on success.
795 MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
796 const MachineInstr *First = Cand.Instrs.front();
797 unsigned Opcode = First->getOpcode();
798 bool IsLoad = isLoadSingle(Opcode);
799 SmallVector<std::pair<unsigned, bool>, 8> Regs;
800 SmallVector<unsigned, 4> ImpDefs;
801 DenseSet<unsigned> KilledRegs;
802 DenseSet<unsigned> UsedRegs;
803 // Determine list of registers and list of implicit super-register defs.
804 for (const MachineInstr *MI : Cand.Instrs) {
805 const MachineOperand &MO = getLoadStoreRegOp(*MI);
806 unsigned Reg = MO.getReg();
807 bool IsKill = MO.isKill();
809 KilledRegs.insert(Reg);
810 Regs.push_back(std::make_pair(Reg, IsKill));
811 UsedRegs.insert(Reg);
814 // Collect any implicit defs of super-registers, after merging we can't
815 // be sure anymore that we properly preserved these live ranges and must
816 // removed these implicit operands.
817 for (const MachineOperand &MO : MI->implicit_operands()) {
818 if (!MO.isReg() || !MO.isDef() || MO.isDead())
820 assert(MO.isImplicit());
821 unsigned DefReg = MO.getReg();
823 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end())
825 // We can ignore cases where the super-reg is read and written.
826 if (MI->readsRegister(DefReg))
828 ImpDefs.push_back(DefReg);
833 // Attempt the merge.
834 typedef MachineBasicBlock::iterator iterator;
835 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
836 iterator InsertBefore = std::next(iterator(LatestMI));
837 MachineBasicBlock &MBB = *LatestMI->getParent();
838 unsigned Offset = getMemoryOpOffset(First);
839 unsigned Base = getLoadStoreBaseOp(*First).getReg();
840 bool BaseKill = LatestMI->killsRegister(Base);
841 unsigned PredReg = 0;
842 ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg);
843 DebugLoc DL = First->getDebugLoc();
844 MachineInstr *Merged = nullptr;
845 if (Cand.CanMergeToLSDouble)
846 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
847 Opcode, Pred, PredReg, DL, Regs);
848 if (!Merged && Cand.CanMergeToLSMulti)
849 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
850 Opcode, Pred, PredReg, DL, Regs);
854 // Determine earliest instruction that will get removed. We then keep an
855 // iterator just above it so the following erases don't invalidated it.
856 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
857 bool EarliestAtBegin = false;
858 if (EarliestI == MBB.begin()) {
859 EarliestAtBegin = true;
861 EarliestI = std::prev(EarliestI);
864 // Remove instructions which have been merged.
865 for (MachineInstr *MI : Cand.Instrs)
868 // Determine range between the earliest removed instruction and the new one.
870 EarliestI = MBB.begin();
872 EarliestI = std::next(EarliestI);
873 auto FixupRange = make_range(EarliestI, iterator(Merged));
875 if (isLoadSingle(Opcode)) {
876 // If the previous loads defined a super-reg, then we have to mark earlier
877 // operands undef; Replicate the super-reg def on the merged instruction.
878 for (MachineInstr &MI : FixupRange) {
879 for (unsigned &ImpDefReg : ImpDefs) {
880 for (MachineOperand &MO : MI.implicit_operands()) {
881 if (!MO.isReg() || MO.getReg() != ImpDefReg)
891 MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
892 for (unsigned ImpDef : ImpDefs)
893 MIB.addReg(ImpDef, RegState::ImplicitDefine);
895 // Remove kill flags: We are possibly storing the values later now.
896 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
897 for (MachineInstr &MI : FixupRange) {
898 for (MachineOperand &MO : MI.uses()) {
899 if (!MO.isReg() || !MO.isKill())
901 if (UsedRegs.count(MO.getReg()))
905 assert(ImpDefs.empty());
911 static bool isValidLSDoubleOffset(int Offset) {
912 unsigned Value = abs(Offset);
913 // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
915 return (Value % 4) == 0 && Value < 1024;
918 /// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
919 void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
920 const MachineInstr *FirstMI = MemOps[0].MI;
921 unsigned Opcode = FirstMI->getOpcode();
922 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
923 unsigned Size = getLSMultipleTransferSize(FirstMI);
926 unsigned EIndex = MemOps.size();
928 // Look at the first instruction.
929 const MachineInstr *MI = MemOps[SIndex].MI;
930 int Offset = MemOps[SIndex].Offset;
931 const MachineOperand &PMO = getLoadStoreRegOp(*MI);
932 unsigned PReg = PMO.getReg();
933 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
934 unsigned Latest = SIndex;
935 unsigned Earliest = SIndex;
937 bool CanMergeToLSDouble =
938 STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
939 // ARM errata 602117: LDRD with base in list may result in incorrect base
940 // register when interrupted or faulted.
941 if (STI->isCortexM3() && isi32Load(Opcode) &&
942 PReg == getLoadStoreBaseOp(*MI).getReg())
943 CanMergeToLSDouble = false;
945 bool CanMergeToLSMulti = true;
946 // On swift vldm/vstm starting with an odd register number as that needs
947 // more uops than single vldrs.
948 if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1)
949 CanMergeToLSMulti = false;
951 // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it
952 // deprecated; LDM to PC is fine but cannot happen here.
953 if (PReg == ARM::SP || PReg == ARM::PC)
954 CanMergeToLSMulti = CanMergeToLSDouble = false;
956 // Merge following instructions where possible.
957 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
958 int NewOffset = MemOps[I].Offset;
959 if (NewOffset != Offset + (int)Size)
961 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
962 unsigned Reg = MO.getReg();
963 if (Reg == ARM::SP || Reg == ARM::PC)
966 // See if the current load/store may be part of a multi load/store.
967 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
968 bool PartOfLSMulti = CanMergeToLSMulti;
970 // Register numbers must be in ascending order.
971 if (RegNum <= PRegNum)
972 PartOfLSMulti = false;
973 // For VFP / NEON load/store multiples, the registers must be
974 // consecutive and within the limit on the number of registers per
976 else if (!isNotVFP && RegNum != PRegNum+1)
977 PartOfLSMulti = false;
979 // See if the current load/store may be part of a double load/store.
980 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
982 if (!PartOfLSMulti && !PartOfLSDouble)
984 CanMergeToLSMulti &= PartOfLSMulti;
985 CanMergeToLSDouble &= PartOfLSDouble;
986 // Track MemOp with latest and earliest position (Positions are
987 // counted in reverse).
988 unsigned Position = MemOps[I].Position;
989 if (Position < MemOps[Latest].Position)
991 else if (Position > MemOps[Earliest].Position)
993 // Prepare for next MemOp.
998 // Form a candidate from the Ops collected so far.
999 MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;
1000 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
1001 Candidate->Instrs.push_back(MemOps[C].MI);
1002 Candidate->LatestMIIdx = Latest - SIndex;
1003 Candidate->EarliestMIIdx = Earliest - SIndex;
1004 Candidate->InsertPos = MemOps[Latest].Position;
1006 CanMergeToLSMulti = CanMergeToLSDouble = false;
1007 Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
1008 Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
1009 Candidates.push_back(Candidate);
1010 // Continue after the chain.
1012 } while (SIndex < EIndex);
1015 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
1016 ARM_AM::AMSubMode Mode) {
1018 default: llvm_unreachable("Unhandled opcode!");
1024 default: llvm_unreachable("Unhandled submode!");
1025 case ARM_AM::ia: return ARM::LDMIA_UPD;
1026 case ARM_AM::ib: return ARM::LDMIB_UPD;
1027 case ARM_AM::da: return ARM::LDMDA_UPD;
1028 case ARM_AM::db: return ARM::LDMDB_UPD;
1035 default: llvm_unreachable("Unhandled submode!");
1036 case ARM_AM::ia: return ARM::STMIA_UPD;
1037 case ARM_AM::ib: return ARM::STMIB_UPD;
1038 case ARM_AM::da: return ARM::STMDA_UPD;
1039 case ARM_AM::db: return ARM::STMDB_UPD;
1044 default: llvm_unreachable("Unhandled submode!");
1045 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1046 case ARM_AM::db: return ARM::t2LDMDB_UPD;
1051 default: llvm_unreachable("Unhandled submode!");
1052 case ARM_AM::ia: return ARM::t2STMIA_UPD;
1053 case ARM_AM::db: return ARM::t2STMDB_UPD;
1057 default: llvm_unreachable("Unhandled submode!");
1058 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1059 case ARM_AM::db: return ARM::VLDMSDB_UPD;
1063 default: llvm_unreachable("Unhandled submode!");
1064 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1065 case ARM_AM::db: return ARM::VLDMDDB_UPD;
1069 default: llvm_unreachable("Unhandled submode!");
1070 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1071 case ARM_AM::db: return ARM::VSTMSDB_UPD;
1075 default: llvm_unreachable("Unhandled submode!");
1076 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1077 case ARM_AM::db: return ARM::VSTMDDB_UPD;
1082 /// Check if the given instruction increments or decrements a register and
1083 /// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
1084 /// generated by the instruction are possibly read as well.
1085 static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg,
1086 ARMCC::CondCodes Pred, unsigned PredReg) {
1089 switch (MI.getOpcode()) {
1090 case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break;
1091 case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break;
1093 case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break;
1095 case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break;
1096 case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break;
1097 case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
1102 if (MI.getOperand(0).getReg() != Reg ||
1103 MI.getOperand(1).getReg() != Reg ||
1104 getInstrPredicate(&MI, MIPredReg) != Pred ||
1105 MIPredReg != PredReg)
1108 if (CheckCPSRDef && definesCPSR(&MI))
1110 return MI.getOperand(2).getImm() * Scale;
1113 /// Searches for an increment or decrement of \p Reg before \p MBBI.
1114 static MachineBasicBlock::iterator
1115 findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg,
1116 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
1118 MachineBasicBlock &MBB = *MBBI->getParent();
1119 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1120 MachineBasicBlock::iterator EndMBBI = MBB.end();
1121 if (MBBI == BeginMBBI)
1124 // Skip debug values.
1125 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1126 while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI)
1129 Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
1130 return Offset == 0 ? EndMBBI : PrevMBBI;
1133 /// Searches for a increment or decrement of \p Reg after \p MBBI.
1134 static MachineBasicBlock::iterator
1135 findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg,
1136 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
1138 MachineBasicBlock &MBB = *MBBI->getParent();
1139 MachineBasicBlock::iterator EndMBBI = MBB.end();
1140 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1141 // Skip debug values.
1142 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1144 if (NextMBBI == EndMBBI)
1147 Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
1148 return Offset == 0 ? EndMBBI : NextMBBI;
1151 /// Fold proceeding/trailing inc/dec of base register into the
1152 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1154 /// stmia rn, <ra, rb, rc>
1155 /// rn := rn + 4 * 3;
1157 /// stmia rn!, <ra, rb, rc>
1159 /// rn := rn - 4 * 3;
1160 /// ldmia rn, <ra, rb, rc>
1162 /// ldmdb rn!, <ra, rb, rc>
1163 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
1164 // Thumb1 is already using updating loads/stores.
1165 if (isThumb1) return false;
1167 const MachineOperand &BaseOP = MI->getOperand(0);
1168 unsigned Base = BaseOP.getReg();
1169 bool BaseKill = BaseOP.isKill();
1170 unsigned PredReg = 0;
1171 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1172 unsigned Opcode = MI->getOpcode();
1173 DebugLoc DL = MI->getDebugLoc();
1175 // Can't use an updating ld/st if the base register is also a dest
1176 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1177 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
1178 if (MI->getOperand(i).getReg() == Base)
1181 int Bytes = getLSMultipleTransferSize(MI);
1182 MachineBasicBlock &MBB = *MI->getParent();
1183 MachineBasicBlock::iterator MBBI(MI);
1185 MachineBasicBlock::iterator MergeInstr
1186 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1187 ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
1188 if (Mode == ARM_AM::ia && Offset == -Bytes) {
1190 } else if (Mode == ARM_AM::ib && Offset == -Bytes) {
1193 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
1194 if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
1195 ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes))
1198 MBB.erase(MergeInstr);
1200 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1201 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1202 .addReg(Base, getDefRegState(true)) // WB base register
1203 .addReg(Base, getKillRegState(BaseKill))
1204 .addImm(Pred).addReg(PredReg);
1206 // Transfer the rest of operands.
1207 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
1208 MIB.addOperand(MI->getOperand(OpNum));
1210 // Transfer memoperands.
1211 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1217 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1218 ARM_AM::AddrOpc Mode) {
1221 return ARM::LDR_PRE_IMM;
1223 return ARM::STR_PRE_IMM;
1225 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1227 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1229 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1231 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1234 return ARM::t2LDR_PRE;
1237 return ARM::t2STR_PRE;
1238 default: llvm_unreachable("Unhandled opcode!");
1242 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1243 ARM_AM::AddrOpc Mode) {
1246 return ARM::LDR_POST_IMM;
1248 return ARM::STR_POST_IMM;
1250 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1252 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1254 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1256 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1259 return ARM::t2LDR_POST;
1262 return ARM::t2STR_POST;
1263 default: llvm_unreachable("Unhandled opcode!");
1267 /// Fold proceeding/trailing inc/dec of base register into the
1268 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1269 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
1270 // Thumb1 doesn't have updating LDR/STR.
1271 // FIXME: Use LDM/STM with single register instead.
1272 if (isThumb1) return false;
1274 unsigned Base = getLoadStoreBaseOp(*MI).getReg();
1275 bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
1276 unsigned Opcode = MI->getOpcode();
1277 DebugLoc DL = MI->getDebugLoc();
1278 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1279 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1280 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1281 if (isi32Load(Opcode) || isi32Store(Opcode))
1282 if (MI->getOperand(2).getImm() != 0)
1284 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1287 // Can't do the merge if the destination register is the same as the would-be
1288 // writeback register.
1289 if (MI->getOperand(0).getReg() == Base)
1292 unsigned PredReg = 0;
1293 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1294 int Bytes = getLSMultipleTransferSize(MI);
1295 MachineBasicBlock &MBB = *MI->getParent();
1296 MachineBasicBlock::iterator MBBI(MI);
1298 MachineBasicBlock::iterator MergeInstr
1299 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1301 if (!isAM5 && Offset == Bytes) {
1302 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
1303 } else if (Offset == -Bytes) {
1304 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
1306 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
1307 if (Offset == Bytes) {
1308 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
1309 } else if (!isAM5 && Offset == -Bytes) {
1310 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
1314 MBB.erase(MergeInstr);
1316 ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add;
1318 bool isLd = isLoadSingle(Opcode);
1320 // VLDM[SD]_UPD, VSTM[SD]_UPD
1321 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1322 // updating load/store-multiple instructions can be used with only one
1324 MachineOperand &MO = MI->getOperand(0);
1325 BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1326 .addReg(Base, getDefRegState(true)) // WB base register
1327 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1328 .addImm(Pred).addReg(PredReg)
1329 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1330 getKillRegState(MO.isKill())));
1333 // LDR_PRE, LDR_POST
1334 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1335 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1336 .addReg(Base, RegState::Define)
1337 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1339 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1340 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1341 .addReg(Base, RegState::Define)
1342 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
1345 // t2LDR_PRE, t2LDR_POST
1346 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1347 .addReg(Base, RegState::Define)
1348 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1351 MachineOperand &MO = MI->getOperand(0);
1352 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1353 // the vestigal zero-reg offset register. When that's fixed, this clause
1354 // can be removed entirely.
1355 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1356 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1357 // STR_PRE, STR_POST
1358 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1359 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1360 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
1362 // t2STR_PRE, t2STR_POST
1363 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1364 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1365 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1373 bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
1374 unsigned Opcode = MI.getOpcode();
1375 assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
1376 "Must have t2STRDi8 or t2LDRDi8");
1377 if (MI.getOperand(3).getImm() != 0)
1380 // Behaviour for writeback is undefined if base register is the same as one
1382 const MachineOperand &BaseOp = MI.getOperand(2);
1383 unsigned Base = BaseOp.getReg();
1384 const MachineOperand &Reg0Op = MI.getOperand(0);
1385 const MachineOperand &Reg1Op = MI.getOperand(1);
1386 if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
1390 ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg);
1391 MachineBasicBlock::iterator MBBI(MI);
1392 MachineBasicBlock &MBB = *MI.getParent();
1394 MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred,
1397 if (Offset == 8 || Offset == -8) {
1398 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
1400 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
1401 if (Offset == 8 || Offset == -8) {
1402 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
1406 MBB.erase(MergeInstr);
1408 DebugLoc DL = MI.getDebugLoc();
1409 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
1410 if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
1411 MIB.addOperand(Reg0Op).addOperand(Reg1Op)
1412 .addReg(BaseOp.getReg(), RegState::Define);
1414 assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
1415 MIB.addReg(BaseOp.getReg(), RegState::Define)
1416 .addOperand(Reg0Op).addOperand(Reg1Op);
1418 MIB.addReg(BaseOp.getReg(), RegState::Kill)
1419 .addImm(Offset).addImm(Pred).addReg(PredReg);
1420 assert(TII->get(Opcode).getNumOperands() == 6 &&
1421 TII->get(NewOpc).getNumOperands() == 7 &&
1422 "Unexpected number of operands in Opcode specification.");
1424 // Transfer implicit operands.
1425 for (const MachineOperand &MO : MI.implicit_operands())
1427 MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
1433 /// Returns true if instruction is a memory operation that this pass is capable
1434 /// of operating on.
1435 static bool isMemoryOp(const MachineInstr *MI) {
1436 // When no memory operands are present, conservatively assume unaligned,
1437 // volatile, unfoldable.
1438 if (!MI->hasOneMemOperand())
1441 const MachineMemOperand *MMO = *MI->memoperands_begin();
1443 // Don't touch volatile memory accesses - we may be changing their order.
1444 if (MMO->isVolatile())
1447 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1449 if (MMO->getAlignment() < 4)
1452 // str <undef> could probably be eliminated entirely, but for now we just want
1453 // to avoid making a mess of it.
1454 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1455 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1456 MI->getOperand(0).isUndef())
1459 // Likewise don't mess with references to undefined addresses.
1460 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1461 MI->getOperand(1).isUndef())
1464 unsigned Opcode = MI->getOpcode();
1469 return MI->getOperand(1).isReg();
1472 return MI->getOperand(1).isReg();
1483 return MI->getOperand(1).isReg();
1488 static void InsertLDR_STR(MachineBasicBlock &MBB,
1489 MachineBasicBlock::iterator &MBBI,
1490 int Offset, bool isDef,
1491 DebugLoc DL, unsigned NewOpc,
1492 unsigned Reg, bool RegDeadKill, bool RegUndef,
1493 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1494 bool OffKill, bool OffUndef,
1495 ARMCC::CondCodes Pred, unsigned PredReg,
1496 const TargetInstrInfo *TII, bool isT2) {
1498 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1500 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1501 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1502 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1504 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1506 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1507 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1508 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1512 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1513 MachineBasicBlock::iterator &MBBI) {
1514 MachineInstr *MI = &*MBBI;
1515 unsigned Opcode = MI->getOpcode();
1516 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
1519 const MachineOperand &BaseOp = MI->getOperand(2);
1520 unsigned BaseReg = BaseOp.getReg();
1521 unsigned EvenReg = MI->getOperand(0).getReg();
1522 unsigned OddReg = MI->getOperand(1).getReg();
1523 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1524 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1526 // ARM errata 602117: LDRD with base in list may result in incorrect base
1527 // register when interrupted or faulted.
1528 bool Errata602117 = EvenReg == BaseReg &&
1529 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
1530 // ARM LDRD/STRD needs consecutive registers.
1531 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
1532 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
1534 if (!Errata602117 && !NonConsecutiveRegs)
1537 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1538 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1539 bool EvenDeadKill = isLd ?
1540 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1541 bool EvenUndef = MI->getOperand(0).isUndef();
1542 bool OddDeadKill = isLd ?
1543 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1544 bool OddUndef = MI->getOperand(1).isUndef();
1545 bool BaseKill = BaseOp.isKill();
1546 bool BaseUndef = BaseOp.isUndef();
1547 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1548 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1549 int OffImm = getMemoryOpOffset(MI);
1550 unsigned PredReg = 0;
1551 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1553 if (OddRegNum > EvenRegNum && OffImm == 0) {
1554 // Ascending register numbers and no offset. It's safe to change it to a
1556 unsigned NewOpc = (isLd)
1557 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1558 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1560 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1561 .addReg(BaseReg, getKillRegState(BaseKill))
1562 .addImm(Pred).addReg(PredReg)
1563 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1564 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1567 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1568 .addReg(BaseReg, getKillRegState(BaseKill))
1569 .addImm(Pred).addReg(PredReg)
1571 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1573 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1577 // Split into two instructions.
1578 unsigned NewOpc = (isLd)
1579 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1580 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1581 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1582 // so adjust and use t2LDRi12 here for that.
1583 unsigned NewOpc2 = (isLd)
1584 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1585 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1586 DebugLoc dl = MBBI->getDebugLoc();
1587 // If this is a load and base register is killed, it may have been
1588 // re-defed by the load, make sure the first load does not clobber it.
1590 (BaseKill || OffKill) &&
1591 (TRI->regsOverlap(EvenReg, BaseReg))) {
1592 assert(!TRI->regsOverlap(OddReg, BaseReg));
1593 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1594 OddReg, OddDeadKill, false,
1595 BaseReg, false, BaseUndef, false, OffUndef,
1596 Pred, PredReg, TII, isT2);
1597 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1598 EvenReg, EvenDeadKill, false,
1599 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1600 Pred, PredReg, TII, isT2);
1602 if (OddReg == EvenReg && EvenDeadKill) {
1603 // If the two source operands are the same, the kill marker is
1604 // probably on the first one. e.g.
1605 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1606 EvenDeadKill = false;
1609 // Never kill the base register in the first instruction.
1610 if (EvenReg == BaseReg)
1611 EvenDeadKill = false;
1612 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1613 EvenReg, EvenDeadKill, EvenUndef,
1614 BaseReg, false, BaseUndef, false, OffUndef,
1615 Pred, PredReg, TII, isT2);
1616 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1617 OddReg, OddDeadKill, OddUndef,
1618 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1619 Pred, PredReg, TII, isT2);
1627 MBBI = MBB.erase(MBBI);
1631 /// An optimization pass to turn multiple LDR / STR ops of the same base and
1632 /// incrementing offset into LDM / STM ops.
1633 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1635 unsigned CurrBase = 0;
1636 unsigned CurrOpc = ~0u;
1637 ARMCC::CondCodes CurrPred = ARMCC::AL;
1638 unsigned Position = 0;
1639 assert(Candidates.size() == 0);
1640 assert(MergeBaseCandidates.size() == 0);
1641 LiveRegsValid = false;
1643 for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
1645 // The instruction in front of the iterator is the one we look at.
1646 MBBI = std::prev(I);
1647 if (FixInvalidRegPairOp(MBB, MBBI))
1651 if (isMemoryOp(MBBI)) {
1652 unsigned Opcode = MBBI->getOpcode();
1653 const MachineOperand &MO = MBBI->getOperand(0);
1654 unsigned Reg = MO.getReg();
1655 unsigned Base = getLoadStoreBaseOp(*MBBI).getReg();
1656 unsigned PredReg = 0;
1657 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1658 int Offset = getMemoryOpOffset(MBBI);
1659 if (CurrBase == 0) {
1660 // Start of a new chain.
1664 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
1667 // Note: No need to match PredReg in the next if.
1668 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1670 // r4 := ldr [r0, #8]
1671 // r4 := ldr [r0, #4]
1674 // If a load overrides the base register or a register loaded by
1675 // another load in our chain, we cannot take this instruction.
1676 bool Overlap = false;
1677 if (isLoadSingle(Opcode)) {
1678 Overlap = (Base == Reg);
1680 for (const MemOpQueueEntry &E : MemOps) {
1681 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
1690 // Check offset and sort memory operation into the current chain.
1691 if (Offset > MemOps.back().Offset) {
1692 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
1695 MemOpQueue::iterator MI, ME;
1696 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
1697 if (Offset < MI->Offset) {
1698 // Found a place to insert.
1701 if (Offset == MI->Offset) {
1702 // Collision, abort.
1707 if (MI != MemOps.end()) {
1708 MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position));
1715 // Don't advance the iterator; The op will start a new chain next.
1718 // Fallthrough to look into existing chain.
1719 } else if (MBBI->isDebugValue()) {
1721 } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
1722 MBBI->getOpcode() == ARM::t2STRDi8) {
1723 // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
1724 // remember them because we may still be able to merge add/sub into them.
1725 MergeBaseCandidates.push_back(MBBI);
1729 // If we are here then the chain is broken; Extract candidates for a merge.
1730 if (MemOps.size() > 0) {
1731 FormCandidates(MemOps);
1732 // Reset for the next chain.
1735 CurrPred = ARMCC::AL;
1739 if (MemOps.size() > 0)
1740 FormCandidates(MemOps);
1742 // Sort candidates so they get processed from end to begin of the basic
1743 // block later; This is necessary for liveness calculation.
1744 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
1745 return M0->InsertPos < M1->InsertPos;
1747 std::sort(Candidates.begin(), Candidates.end(), LessThan);
1749 // Go through list of candidates and merge.
1750 bool Changed = false;
1751 for (const MergeCandidate *Candidate : Candidates) {
1752 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
1753 MachineInstr *Merged = MergeOpsUpdate(*Candidate);
1754 // Merge preceding/trailing base inc/dec into the merged op.
1757 unsigned Opcode = Merged->getOpcode();
1758 if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
1759 MergeBaseUpdateLSDouble(*Merged);
1761 MergeBaseUpdateLSMultiple(Merged);
1763 for (MachineInstr *MI : Candidate->Instrs) {
1764 if (MergeBaseUpdateLoadStore(MI))
1769 assert(Candidate->Instrs.size() == 1);
1770 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
1775 // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
1776 for (MachineInstr *MI : MergeBaseCandidates)
1777 MergeBaseUpdateLSDouble(*MI);
1778 MergeBaseCandidates.clear();
1783 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
1784 /// into the preceding stack restore so it directly restore the value of LR
1786 /// ldmfd sp!, {..., lr}
1789 /// ldmfd sp!, {..., lr}
1792 /// ldmfd sp!, {..., pc}
1793 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1794 // Thumb1 LDM doesn't allow high registers.
1795 if (isThumb1) return false;
1796 if (MBB.empty()) return false;
1798 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1799 if (MBBI != MBB.begin() &&
1800 (MBBI->getOpcode() == ARM::BX_RET ||
1801 MBBI->getOpcode() == ARM::tBX_RET ||
1802 MBBI->getOpcode() == ARM::MOVPCLR)) {
1803 MachineInstr *PrevMI = std::prev(MBBI);
1804 unsigned Opcode = PrevMI->getOpcode();
1805 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1806 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1807 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1808 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1809 if (MO.getReg() != ARM::LR)
1811 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1812 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1813 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1814 PrevMI->setDesc(TII->get(NewOpc));
1816 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1824 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1826 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1827 TL = STI->getTargetLowering();
1828 AFI = Fn.getInfo<ARMFunctionInfo>();
1829 TII = STI->getInstrInfo();
1830 TRI = STI->getRegisterInfo();
1832 RegClassInfoValid = false;
1833 isThumb2 = AFI->isThumb2Function();
1834 isThumb1 = AFI->isThumbFunction() && !isThumb2;
1836 bool Modified = false;
1837 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1839 MachineBasicBlock &MBB = *MFI;
1840 Modified |= LoadStoreMultipleOpti(MBB);
1841 if (STI->hasV5TOps())
1842 Modified |= MergeReturnIntoLDM(MBB);
1845 Allocator.DestroyAll();
1850 void initializeARMPreAllocLoadStoreOptPass(PassRegistry &);
1853 #define ARM_PREALLOC_LOAD_STORE_OPT_NAME \
1854 "ARM pre- register allocation load / store optimization pass"
1857 /// Pre- register allocation pass that move load / stores from consecutive
1858 /// locations close to make it more likely they will be combined later.
1859 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1861 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {
1862 initializeARMPreAllocLoadStoreOptPass(*PassRegistry::getPassRegistry());
1865 const DataLayout *TD;
1866 const TargetInstrInfo *TII;
1867 const TargetRegisterInfo *TRI;
1868 const ARMSubtarget *STI;
1869 MachineRegisterInfo *MRI;
1870 MachineFunction *MF;
1872 bool runOnMachineFunction(MachineFunction &Fn) override;
1874 const char *getPassName() const override {
1875 return ARM_PREALLOC_LOAD_STORE_OPT_NAME;
1879 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1880 unsigned &NewOpc, unsigned &EvenReg,
1881 unsigned &OddReg, unsigned &BaseReg,
1883 unsigned &PredReg, ARMCC::CondCodes &Pred,
1885 bool RescheduleOps(MachineBasicBlock *MBB,
1886 SmallVectorImpl<MachineInstr *> &Ops,
1887 unsigned Base, bool isLd,
1888 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1889 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1891 char ARMPreAllocLoadStoreOpt::ID = 0;
1894 INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-load-store-opt",
1895 ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false)
1897 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1898 TD = &Fn.getDataLayout();
1899 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1900 TII = STI->getInstrInfo();
1901 TRI = STI->getRegisterInfo();
1902 MRI = &Fn.getRegInfo();
1905 bool Modified = false;
1906 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1908 Modified |= RescheduleLoadStoreInstrs(MFI);
1913 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1914 MachineBasicBlock::iterator I,
1915 MachineBasicBlock::iterator E,
1916 SmallPtrSetImpl<MachineInstr*> &MemOps,
1917 SmallSet<unsigned, 4> &MemRegs,
1918 const TargetRegisterInfo *TRI) {
1919 // Are there stores / loads / calls between them?
1920 // FIXME: This is overly conservative. We should make use of alias information
1922 SmallSet<unsigned, 4> AddedRegPressure;
1924 if (I->isDebugValue() || MemOps.count(&*I))
1926 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1928 if (isLd && I->mayStore())
1933 // It's not safe to move the first 'str' down.
1936 // str r4, [r0, #+4]
1940 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1941 MachineOperand &MO = I->getOperand(j);
1944 unsigned Reg = MO.getReg();
1945 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1947 if (Reg != Base && !MemRegs.count(Reg))
1948 AddedRegPressure.insert(Reg);
1952 // Estimate register pressure increase due to the transformation.
1953 if (MemRegs.size() <= 4)
1954 // Ok if we are moving small number of instructions.
1956 return AddedRegPressure.size() <= MemRegs.size() * 2;
1960 /// Copy \p Op0 and \p Op1 operands into a new array assigned to MI.
1961 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1962 MachineInstr *Op1) {
1963 assert(MI->memoperands_empty() && "expected a new machineinstr");
1964 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1965 + (Op1->memoperands_end() - Op1->memoperands_begin());
1967 MachineFunction *MF = MI->getParent()->getParent();
1968 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1969 MachineSDNode::mmo_iterator MemEnd =
1970 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1972 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1973 MI->setMemRefs(MemBegin, MemEnd);
1977 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1978 DebugLoc &dl, unsigned &NewOpc,
1980 unsigned &SecondReg,
1981 unsigned &BaseReg, int &Offset,
1983 ARMCC::CondCodes &Pred,
1985 // Make sure we're allowed to generate LDRD/STRD.
1986 if (!STI->hasV5TEOps())
1989 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1991 unsigned Opcode = Op0->getOpcode();
1992 if (Opcode == ARM::LDRi12) {
1994 } else if (Opcode == ARM::STRi12) {
1996 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1997 NewOpc = ARM::t2LDRDi8;
2000 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
2001 NewOpc = ARM::t2STRDi8;
2008 // Make sure the base address satisfies i64 ld / st alignment requirement.
2009 // At the moment, we ignore the memoryoperand's value.
2010 // If we want to use AliasAnalysis, we should check it accordingly.
2011 if (!Op0->hasOneMemOperand() ||
2012 (*Op0->memoperands_begin())->isVolatile())
2015 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
2016 const Function *Func = MF->getFunction();
2017 unsigned ReqAlign = STI->hasV6Ops()
2018 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
2019 : 8; // Pre-v6 need 8-byte align
2020 if (Align < ReqAlign)
2023 // Then make sure the immediate offset fits.
2024 int OffImm = getMemoryOpOffset(Op0);
2026 int Limit = (1 << 8) * Scale;
2027 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
2031 ARM_AM::AddrOpc AddSub = ARM_AM::add;
2033 AddSub = ARM_AM::sub;
2036 int Limit = (1 << 8) * Scale;
2037 if (OffImm >= Limit || (OffImm & (Scale-1)))
2039 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
2041 FirstReg = Op0->getOperand(0).getReg();
2042 SecondReg = Op1->getOperand(0).getReg();
2043 if (FirstReg == SecondReg)
2045 BaseReg = Op0->getOperand(1).getReg();
2046 Pred = getInstrPredicate(Op0, PredReg);
2047 dl = Op0->getDebugLoc();
2051 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
2052 SmallVectorImpl<MachineInstr *> &Ops,
2053 unsigned Base, bool isLd,
2054 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
2055 bool RetVal = false;
2057 // Sort by offset (in reverse order).
2058 std::sort(Ops.begin(), Ops.end(),
2059 [](const MachineInstr *LHS, const MachineInstr *RHS) {
2060 int LOffset = getMemoryOpOffset(LHS);
2061 int ROffset = getMemoryOpOffset(RHS);
2062 assert(LHS == RHS || LOffset != ROffset);
2063 return LOffset > ROffset;
2066 // The loads / stores of the same base are in order. Scan them from first to
2067 // last and check for the following:
2068 // 1. Any def of base.
2070 while (Ops.size() > 1) {
2071 unsigned FirstLoc = ~0U;
2072 unsigned LastLoc = 0;
2073 MachineInstr *FirstOp = nullptr;
2074 MachineInstr *LastOp = nullptr;
2076 unsigned LastOpcode = 0;
2077 unsigned LastBytes = 0;
2078 unsigned NumMove = 0;
2079 for (int i = Ops.size() - 1; i >= 0; --i) {
2080 MachineInstr *Op = Ops[i];
2081 unsigned Loc = MI2LocMap[Op];
2082 if (Loc <= FirstLoc) {
2086 if (Loc >= LastLoc) {
2092 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
2093 if (LastOpcode && LSMOpcode != LastOpcode)
2096 int Offset = getMemoryOpOffset(Op);
2097 unsigned Bytes = getLSMultipleTransferSize(Op);
2099 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2102 LastOffset = Offset;
2104 LastOpcode = LSMOpcode;
2105 if (++NumMove == 8) // FIXME: Tune this limit.
2112 SmallPtrSet<MachineInstr*, 4> MemOps;
2113 SmallSet<unsigned, 4> MemRegs;
2114 for (int i = NumMove-1; i >= 0; --i) {
2115 MemOps.insert(Ops[i]);
2116 MemRegs.insert(Ops[i]->getOperand(0).getReg());
2119 // Be conservative, if the instructions are too far apart, don't
2120 // move them. We want to limit the increase of register pressure.
2121 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2123 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2124 MemOps, MemRegs, TRI);
2126 for (unsigned i = 0; i != NumMove; ++i)
2129 // This is the new location for the loads / stores.
2130 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2131 while (InsertPos != MBB->end()
2132 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
2135 // If we are moving a pair of loads / stores, see if it makes sense
2136 // to try to allocate a pair of registers that can form register pairs.
2137 MachineInstr *Op0 = Ops.back();
2138 MachineInstr *Op1 = Ops[Ops.size()-2];
2139 unsigned FirstReg = 0, SecondReg = 0;
2140 unsigned BaseReg = 0, PredReg = 0;
2141 ARMCC::CondCodes Pred = ARMCC::AL;
2143 unsigned NewOpc = 0;
2146 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2147 FirstReg, SecondReg, BaseReg,
2148 Offset, PredReg, Pred, isT2)) {
2152 const MCInstrDesc &MCID = TII->get(NewOpc);
2153 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
2154 MRI->constrainRegClass(FirstReg, TRC);
2155 MRI->constrainRegClass(SecondReg, TRC);
2157 // Form the pair instruction.
2159 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2160 .addReg(FirstReg, RegState::Define)
2161 .addReg(SecondReg, RegState::Define)
2163 // FIXME: We're converting from LDRi12 to an insn that still
2164 // uses addrmode2, so we need an explicit offset reg. It should
2165 // always by reg0 since we're transforming LDRi12s.
2168 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2169 concatenateMemOperands(MIB, Op0, Op1);
2170 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2173 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2177 // FIXME: We're converting from LDRi12 to an insn that still
2178 // uses addrmode2, so we need an explicit offset reg. It should
2179 // always by reg0 since we're transforming STRi12s.
2182 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2183 concatenateMemOperands(MIB, Op0, Op1);
2184 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2191 // Add register allocation hints to form register pairs.
2192 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
2193 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg);
2196 for (unsigned i = 0; i != NumMove; ++i) {
2197 MachineInstr *Op = Ops.back();
2199 MBB->splice(InsertPos, MBB, Op);
2203 NumLdStMoved += NumMove;
2213 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2214 bool RetVal = false;
2216 DenseMap<MachineInstr*, unsigned> MI2LocMap;
2217 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
2218 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
2219 SmallVector<unsigned, 4> LdBases;
2220 SmallVector<unsigned, 4> StBases;
2223 MachineBasicBlock::iterator MBBI = MBB->begin();
2224 MachineBasicBlock::iterator E = MBB->end();
2226 for (; MBBI != E; ++MBBI) {
2227 MachineInstr *MI = MBBI;
2228 if (MI->isCall() || MI->isTerminator()) {
2229 // Stop at barriers.
2234 if (!MI->isDebugValue())
2235 MI2LocMap[MI] = ++Loc;
2237 if (!isMemoryOp(MI))
2239 unsigned PredReg = 0;
2240 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2243 int Opc = MI->getOpcode();
2244 bool isLd = isLoadSingle(Opc);
2245 unsigned Base = MI->getOperand(1).getReg();
2246 int Offset = getMemoryOpOffset(MI);
2248 bool StopHere = false;
2250 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2251 Base2LdsMap.find(Base);
2252 if (BI != Base2LdsMap.end()) {
2253 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2254 if (Offset == getMemoryOpOffset(BI->second[i])) {
2260 BI->second.push_back(MI);
2262 Base2LdsMap[Base].push_back(MI);
2263 LdBases.push_back(Base);
2266 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2267 Base2StsMap.find(Base);
2268 if (BI != Base2StsMap.end()) {
2269 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2270 if (Offset == getMemoryOpOffset(BI->second[i])) {
2276 BI->second.push_back(MI);
2278 Base2StsMap[Base].push_back(MI);
2279 StBases.push_back(Base);
2284 // Found a duplicate (a base+offset combination that's seen earlier).
2291 // Re-schedule loads.
2292 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2293 unsigned Base = LdBases[i];
2294 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2296 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2299 // Re-schedule stores.
2300 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2301 unsigned Base = StBases[i];
2302 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2304 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2308 Base2LdsMap.clear();
2309 Base2StsMap.clear();
2319 /// Returns an instance of the load / store optimization pass.
2320 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2322 return new ARMPreAllocLoadStoreOpt();
2323 return new ARMLoadStoreOpt();