1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 /// \file This file contains a pass that performs load / store related peephole
11 /// optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMISelLowering.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMSubtarget.h"
21 #include "MCTargetDesc/ARMAddressingModes.h"
22 #include "ThumbRegisterInfo.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/CodeGen/MachineBasicBlock.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include "llvm/CodeGen/MachineInstr.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/RegisterClassInfo.h"
35 #include "llvm/CodeGen/SelectionDAGNodes.h"
36 #include "llvm/CodeGen/LivePhysRegs.h"
37 #include "llvm/IR/DataLayout.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/Support/Allocator.h"
41 #include "llvm/Support/Debug.h"
42 #include "llvm/Support/ErrorHandling.h"
43 #include "llvm/Support/raw_ostream.h"
44 #include "llvm/Target/TargetInstrInfo.h"
45 #include "llvm/Target/TargetMachine.h"
46 #include "llvm/Target/TargetRegisterInfo.h"
49 #define DEBUG_TYPE "arm-ldst-opt"
51 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
52 STATISTIC(NumSTMGened , "Number of stm instructions generated");
53 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
54 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
55 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
56 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
57 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
58 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
59 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
60 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
61 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
64 /// Post- register allocation pass the combine load / store instructions to
65 /// form ldm / stm instructions.
66 struct ARMLoadStoreOpt : public MachineFunctionPass {
68 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
70 const MachineFunction *MF;
71 const TargetInstrInfo *TII;
72 const TargetRegisterInfo *TRI;
73 const MachineRegisterInfo *MRI;
74 const ARMSubtarget *STI;
75 const TargetLowering *TL;
77 LivePhysRegs LiveRegs;
78 RegisterClassInfo RegClassInfo;
79 MachineBasicBlock::const_iterator LiveRegPos;
81 bool RegClassInfoValid;
82 bool isThumb1, isThumb2;
84 bool runOnMachineFunction(MachineFunction &Fn) override;
86 const char *getPassName() const override {
87 return "ARM load / store optimization pass";
91 /// A set of load/store MachineInstrs with same base register sorted by
93 struct MemOpQueueEntry {
95 int Offset; ///< Load/Store offset.
96 unsigned Position; ///< Position as counted from end of basic block.
97 MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position)
98 : MI(MI), Offset(Offset), Position(Position) {}
100 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
102 /// A set of MachineInstrs that fulfill (nearly all) conditions to get
103 /// merged into a LDM/STM.
104 struct MergeCandidate {
105 /// List of instructions ordered by load/store offset.
106 SmallVector<MachineInstr*, 4> Instrs;
107 /// Index in Instrs of the instruction being latest in the schedule.
108 unsigned LatestMIIdx;
109 /// Index in Instrs of the instruction being earliest in the schedule.
110 unsigned EarliestMIIdx;
111 /// Index into the basic block where the merged instruction will be
112 /// inserted. (See MemOpQueueEntry.Position)
115 BumpPtrAllocator Allocator;
116 SmallVector<const MergeCandidate*,4> Candidates;
118 void moveLiveRegsBefore(const MachineBasicBlock &MBB,
119 MachineBasicBlock::const_iterator Before);
120 unsigned findFreeReg(const TargetRegisterClass &RegClass);
121 void UpdateBaseRegUses(MachineBasicBlock &MBB,
122 MachineBasicBlock::iterator MBBI,
123 DebugLoc DL, unsigned Base, unsigned WordOffset,
124 ARMCC::CondCodes Pred, unsigned PredReg);
125 MachineInstr *MergeOps(MachineBasicBlock &MBB,
126 MachineBasicBlock::iterator InsertBefore, int Offset,
127 unsigned Base, bool BaseKill, unsigned Opcode,
128 ARMCC::CondCodes Pred, unsigned PredReg, DebugLoc DL,
129 ArrayRef<std::pair<unsigned, bool>> Regs);
130 void FormCandidates(const MemOpQueue &MemOps);
131 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
132 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
133 MachineBasicBlock::iterator &MBBI);
134 bool MergeBaseUpdateLoadStore(MachineInstr *MI);
135 bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
136 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
137 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
139 char ARMLoadStoreOpt::ID = 0;
142 static bool definesCPSR(const MachineInstr *MI) {
143 for (const auto &MO : MI->operands()) {
146 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
147 // If the instruction has live CPSR def, then it's not safe to fold it
148 // into load / store.
155 static int getMemoryOpOffset(const MachineInstr *MI) {
156 unsigned Opcode = MI->getOpcode();
157 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
158 unsigned NumOperands = MI->getDesc().getNumOperands();
159 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
161 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
162 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
163 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
164 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
167 // Thumb1 immediate offsets are scaled by 4
168 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
169 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
172 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
173 : ARM_AM::getAM5Offset(OffField) * 4;
174 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
175 : ARM_AM::getAM5Op(OffField);
177 if (Op == ARM_AM::sub)
183 static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) {
184 return MI.getOperand(1);
187 static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) {
188 return MI.getOperand(0);
191 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
193 default: llvm_unreachable("Unhandled opcode!");
197 default: llvm_unreachable("Unhandled submode!");
198 case ARM_AM::ia: return ARM::LDMIA;
199 case ARM_AM::da: return ARM::LDMDA;
200 case ARM_AM::db: return ARM::LDMDB;
201 case ARM_AM::ib: return ARM::LDMIB;
206 default: llvm_unreachable("Unhandled submode!");
207 case ARM_AM::ia: return ARM::STMIA;
208 case ARM_AM::da: return ARM::STMDA;
209 case ARM_AM::db: return ARM::STMDB;
210 case ARM_AM::ib: return ARM::STMIB;
214 // tLDMIA is writeback-only - unless the base register is in the input
218 default: llvm_unreachable("Unhandled submode!");
219 case ARM_AM::ia: return ARM::tLDMIA;
223 // There is no non-writeback tSTMIA either.
226 default: llvm_unreachable("Unhandled submode!");
227 case ARM_AM::ia: return ARM::tSTMIA_UPD;
233 default: llvm_unreachable("Unhandled submode!");
234 case ARM_AM::ia: return ARM::t2LDMIA;
235 case ARM_AM::db: return ARM::t2LDMDB;
241 default: llvm_unreachable("Unhandled submode!");
242 case ARM_AM::ia: return ARM::t2STMIA;
243 case ARM_AM::db: return ARM::t2STMDB;
248 default: llvm_unreachable("Unhandled submode!");
249 case ARM_AM::ia: return ARM::VLDMSIA;
250 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
255 default: llvm_unreachable("Unhandled submode!");
256 case ARM_AM::ia: return ARM::VSTMSIA;
257 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
262 default: llvm_unreachable("Unhandled submode!");
263 case ARM_AM::ia: return ARM::VLDMDIA;
264 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
269 default: llvm_unreachable("Unhandled submode!");
270 case ARM_AM::ia: return ARM::VSTMDIA;
271 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
276 static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
278 default: llvm_unreachable("Unhandled opcode!");
285 case ARM::tLDMIA_UPD:
286 case ARM::tSTMIA_UPD:
287 case ARM::t2LDMIA_RET:
289 case ARM::t2LDMIA_UPD:
291 case ARM::t2STMIA_UPD:
293 case ARM::VLDMSIA_UPD:
295 case ARM::VSTMSIA_UPD:
297 case ARM::VLDMDIA_UPD:
299 case ARM::VSTMDIA_UPD:
313 case ARM::t2LDMDB_UPD:
315 case ARM::t2STMDB_UPD:
316 case ARM::VLDMSDB_UPD:
317 case ARM::VSTMSDB_UPD:
318 case ARM::VLDMDDB_UPD:
319 case ARM::VSTMDDB_UPD:
330 static bool isT1i32Load(unsigned Opc) {
331 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
334 static bool isT2i32Load(unsigned Opc) {
335 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
338 static bool isi32Load(unsigned Opc) {
339 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
342 static bool isT1i32Store(unsigned Opc) {
343 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
346 static bool isT2i32Store(unsigned Opc) {
347 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
350 static bool isi32Store(unsigned Opc) {
351 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
354 static bool isLoadSingle(unsigned Opc) {
355 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
358 static unsigned getImmScale(unsigned Opc) {
360 default: llvm_unreachable("Unhandled opcode!");
375 static unsigned getLSMultipleTransferSize(const MachineInstr *MI) {
376 switch (MI->getOpcode()) {
403 case ARM::tLDMIA_UPD:
404 case ARM::tSTMIA_UPD:
411 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
414 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
418 /// Update future uses of the base register with the offset introduced
419 /// due to writeback. This function only works on Thumb1.
421 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
422 MachineBasicBlock::iterator MBBI,
423 DebugLoc DL, unsigned Base,
425 ARMCC::CondCodes Pred, unsigned PredReg) {
426 assert(isThumb1 && "Can only update base register uses for Thumb1!");
427 // Start updating any instructions with immediate offsets. Insert a SUB before
428 // the first non-updateable instruction (if any).
429 for (; MBBI != MBB.end(); ++MBBI) {
430 bool InsertSub = false;
431 unsigned Opc = MBBI->getOpcode();
433 if (MBBI->readsRegister(Base)) {
436 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
438 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
440 if (IsLoad || IsStore) {
441 // Loads and stores with immediate offsets can be updated, but only if
442 // the new offset isn't negative.
443 // The MachineOperand containing the offset immediate is the last one
444 // before predicates.
446 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
447 // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
448 Offset = MO.getImm() - WordOffset * getImmScale(Opc);
450 // If storing the base register, it needs to be reset first.
451 unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
453 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
458 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
459 !definesCPSR(MBBI)) {
460 // SUBS/ADDS using this register, with a dead def of the CPSR.
461 // Merge it with the update; if the merged offset is too large,
462 // insert a new sub instead.
464 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
465 Offset = (Opc == ARM::tSUBi8) ?
466 MO.getImm() + WordOffset * 4 :
467 MO.getImm() - WordOffset * 4 ;
468 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
469 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
472 // The base register has now been reset, so exit early.
479 // Can't update the instruction.
483 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
484 // Since SUBS sets the condition flags, we can't place the base reset
485 // after an instruction that has a live CPSR def.
486 // The base register might also contain an argument for a function call.
491 // An instruction above couldn't be updated, so insert a sub.
492 AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
493 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
497 if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
498 // Register got killed. Stop updating.
502 // End of block was reached.
503 if (MBB.succ_size() > 0) {
504 // FIXME: Because of a bug, live registers are sometimes missing from
505 // the successor blocks' live-in sets. This means we can't trust that
506 // information and *always* have to reset at the end of a block.
508 if (MBBI != MBB.end()) --MBBI;
510 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
511 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
515 /// Return the first register of class \p RegClass that is not in \p Regs.
516 unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
517 if (!RegClassInfoValid) {
518 RegClassInfo.runOnMachineFunction(*MF);
519 RegClassInfoValid = true;
522 for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
523 if (!LiveRegs.contains(Reg))
528 /// Compute live registers just before instruction \p Before (in normal schedule
529 /// direction). Computes backwards so multiple queries in the same block must
530 /// come in reverse order.
531 void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
532 MachineBasicBlock::const_iterator Before) {
533 // Initialize if we never queried in this block.
534 if (!LiveRegsValid) {
536 LiveRegs.addLiveOuts(&MBB, true);
537 LiveRegPos = MBB.end();
538 LiveRegsValid = true;
540 // Move backward just before the "Before" position.
541 while (LiveRegPos != Before) {
543 LiveRegs.stepBackward(*LiveRegPos);
547 static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
549 for (const std::pair<unsigned, bool> &R : Regs)
555 /// Create and insert a LDM or STM with Base as base register and registers in
556 /// Regs as the register operands that would be loaded / stored. It returns
557 /// true if the transformation is done.
559 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
560 MachineBasicBlock::iterator InsertBefore, int Offset,
561 unsigned Base, bool BaseKill, unsigned Opcode,
562 ARMCC::CondCodes Pred, unsigned PredReg, DebugLoc DL,
563 ArrayRef<std::pair<unsigned, bool>> Regs) {
564 unsigned NumRegs = Regs.size();
567 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
568 // Compute liveness information for that register to make the decision.
569 bool SafeToClobberCPSR = !isThumb1 ||
570 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
571 MachineBasicBlock::LQR_Dead);
573 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
575 // Exception: If the base register is in the input reglist, Thumb1 LDM is
577 // It's also not possible to merge an STR of the base register in Thumb1.
578 if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) {
579 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
580 if (Opcode == ARM::tLDRi) {
582 } else if (Opcode == ARM::tSTRi) {
587 ARM_AM::AMSubMode Mode = ARM_AM::ia;
588 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
589 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
590 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
592 if (Offset == 4 && haveIBAndDA) {
594 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
596 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
597 // VLDM/VSTM do not support DB mode without also updating the base reg.
599 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
600 // Check if this is a supported opcode before inserting instructions to
601 // calculate a new base register.
602 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
604 // If starting offset isn't zero, insert a MI to materialize a new base.
605 // But only do so if it is cost effective, i.e. merging more than two
610 // On Thumb1, it's not worth materializing a new base register without
611 // clobbering the CPSR (i.e. not using ADDS/SUBS).
612 if (!SafeToClobberCPSR)
616 if (isi32Load(Opcode)) {
617 // If it is a load, then just use one of the destination register to
618 // use as the new base.
619 NewBase = Regs[NumRegs-1].first;
621 // Find a free register that we can use as scratch register.
622 moveLiveRegsBefore(MBB, InsertBefore);
623 // The merged instruction does not exist yet but will use several Regs if
625 if (!isLoadSingle(Opcode))
626 for (const std::pair<unsigned, bool> &R : Regs)
627 LiveRegs.addReg(R.first);
629 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
635 isThumb2 ? ARM::t2ADDri :
636 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
637 (isThumb1 && Offset < 8) ? ARM::tADDi3 :
638 isThumb1 ? ARM::tADDi8 : ARM::ADDri;
643 isThumb2 ? ARM::t2SUBri :
644 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
645 isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
648 if (!TL->isLegalAddImmediate(Offset))
649 // FIXME: Try add with register operand?
650 return nullptr; // Probably not worth it then.
652 // We can only append a kill flag to the add/sub input if the value is not
653 // used in the register list of the stm as well.
654 bool KillOldBase = BaseKill &&
655 (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
658 // Thumb1: depending on immediate size, use either
659 // ADDS NewBase, Base, #imm3
662 // ADDS NewBase, #imm8.
663 if (Base != NewBase &&
664 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
665 // Need to insert a MOV to the new base first.
666 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
668 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
669 if (Pred != ARMCC::AL)
671 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
672 .addReg(Base, getKillRegState(KillOldBase));
674 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
675 .addReg(Base, getKillRegState(KillOldBase))
676 .addImm(Pred).addReg(PredReg);
678 // The following ADDS/SUBS becomes an update.
682 if (BaseOpc == ARM::tADDrSPi) {
683 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
684 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
685 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4)
686 .addImm(Pred).addReg(PredReg);
689 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true)
690 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
691 .addImm(Pred).addReg(PredReg);
693 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
694 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
695 .addImm(Pred).addReg(PredReg).addReg(0);
698 BaseKill = true; // New base is always killed straight away.
701 bool isDef = isLoadSingle(Opcode);
703 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
704 // base register writeback.
705 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
709 // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
710 // - There is no writeback (LDM of base register),
711 // - the base register is killed by the merged instruction,
712 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
713 // to reset the base register.
714 // Otherwise, don't merge.
715 // It's safe to return here since the code to materialize a new base register
716 // above is also conditional on SafeToClobberCPSR.
717 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
720 MachineInstrBuilder MIB;
723 if (Opcode == ARM::tLDMIA)
724 // Update tLDMIA with writeback if necessary.
725 Opcode = ARM::tLDMIA_UPD;
727 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
729 // Thumb1: we might need to set base writeback when building the MI.
730 MIB.addReg(Base, getDefRegState(true))
731 .addReg(Base, getKillRegState(BaseKill));
733 // The base isn't dead after a merged instruction with writeback.
734 // Insert a sub instruction after the newly formed instruction to reset.
736 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
739 // No writeback, simply build the MachineInstr.
740 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
741 MIB.addReg(Base, getKillRegState(BaseKill));
744 MIB.addImm(Pred).addReg(PredReg);
746 for (const std::pair<unsigned, bool> &R : Regs)
747 MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
749 return MIB.getInstr();
752 /// Call MergeOps and update MemOps and merges accordingly on success.
753 MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
754 const MachineInstr *First = Cand.Instrs.front();
755 unsigned Opcode = First->getOpcode();
756 bool IsLoad = isLoadSingle(Opcode);
757 SmallVector<std::pair<unsigned, bool>, 8> Regs;
758 SmallVector<unsigned, 4> ImpDefs;
759 DenseSet<unsigned> KilledRegs;
760 // Determine list of registers and list of implicit super-register defs.
761 for (const MachineInstr *MI : Cand.Instrs) {
762 const MachineOperand &MO = getLoadStoreRegOp(*MI);
763 unsigned Reg = MO.getReg();
764 bool IsKill = MO.isKill();
766 KilledRegs.insert(Reg);
767 Regs.push_back(std::make_pair(Reg, IsKill));
770 // Collect any implicit defs of super-registers, after merging we can't
771 // be sure anymore that we properly preserved these live ranges and must
772 // removed these implicit operands.
773 for (const MachineOperand &MO : MI->implicit_operands()) {
774 if (!MO.isReg() || !MO.isDef() || MO.isDead())
776 assert(MO.isImplicit());
777 unsigned DefReg = MO.getReg();
779 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end())
781 // We can ignore cases where the super-reg is read and written.
782 if (MI->readsRegister(DefReg))
784 ImpDefs.push_back(DefReg);
789 // Attempt the merge.
790 typedef MachineBasicBlock::iterator iterator;
791 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
792 iterator InsertBefore = std::next(iterator(LatestMI));
793 MachineBasicBlock &MBB = *LatestMI->getParent();
794 unsigned Offset = getMemoryOpOffset(First);
795 unsigned Base = getLoadStoreBaseOp(*First).getReg();
796 bool BaseKill = LatestMI->killsRegister(Base);
797 unsigned PredReg = 0;
798 ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg);
799 DebugLoc DL = First->getDebugLoc();
800 MachineInstr *Merged = MergeOps(MBB, InsertBefore, Offset, Base, BaseKill,
801 Opcode, Pred, PredReg, DL, Regs);
805 // Determine earliest instruction that will get removed. We then keep an
806 // iterator just above it so the following erases don't invalidated it.
807 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
808 bool EarliestAtBegin = false;
809 if (EarliestI == MBB.begin()) {
810 EarliestAtBegin = true;
812 EarliestI = std::prev(EarliestI);
815 // Remove instructions which have been merged.
816 for (MachineInstr *MI : Cand.Instrs)
819 // Determine range between the earliest removed instruction and the new one.
821 EarliestI = MBB.begin();
823 EarliestI = std::next(EarliestI);
824 auto FixupRange = make_range(EarliestI, iterator(Merged));
826 if (isLoadSingle(Opcode)) {
827 // If the previous loads defined a super-reg, then we have to mark earlier
828 // operands undef; Replicate the super-reg def on the merged instruction.
829 for (MachineInstr &MI : FixupRange) {
830 for (unsigned &ImpDefReg : ImpDefs) {
831 for (MachineOperand &MO : MI.implicit_operands()) {
832 if (!MO.isReg() || MO.getReg() != ImpDefReg)
842 MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
843 for (unsigned ImpDef : ImpDefs)
844 MIB.addReg(ImpDef, RegState::ImplicitDefine);
846 // Remove kill flags: We are possibly storing the values later now.
847 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
848 for (MachineInstr &MI : FixupRange) {
849 for (MachineOperand &MO : MI.uses()) {
850 if (!MO.isReg() || !MO.isKill())
852 if (KilledRegs.count(MO.getReg()))
856 assert(ImpDefs.empty());
862 /// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
863 void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
864 const MachineInstr *FirstMI = MemOps[0].MI;
865 unsigned Opcode = FirstMI->getOpcode();
866 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
867 unsigned Size = getLSMultipleTransferSize(FirstMI);
868 // vldm / vstm limit are 32 for S variants, 16 for D variants.
889 unsigned EIndex = MemOps.size();
891 // Look at the first instruction.
892 const MachineInstr *MI = MemOps[SIndex].MI;
893 int Offset = MemOps[SIndex].Offset;
894 const MachineOperand &PMO = getLoadStoreRegOp(*MI);
895 unsigned PReg = PMO.getReg();
896 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
897 unsigned Latest = SIndex;
898 unsigned Earliest = SIndex;
901 // Merge additional instructions fulfilling LDM/STM constraints.
902 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
903 int NewOffset = MemOps[I].Offset;
904 if (NewOffset != Offset + (int)Size)
906 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
907 unsigned Reg = MO.getReg();
910 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
911 // Register numbers must be in ascending order.
912 if (RegNum <= PRegNum)
914 // For VFP / NEON load/store multiples, the registers must be consecutive
915 // and within the limit on the number of registers per instruction.
916 if (!isNotVFP && RegNum != PRegNum+1)
918 // On Swift we don't want vldm/vstm to start with a odd register num
919 // because Q register unaligned vldm/vstm need more uops.
920 if (!isNotVFP && STI->isSwift() && Count == 1 && (PRegNum % 2) == 1)
923 // Track MemOp with latest and earliest position (Positions are
924 // counted in reverse).
925 unsigned Position = MemOps[I].Position;
926 if (Position < MemOps[Latest].Position)
928 else if (Position > MemOps[Earliest].Position)
930 // Prepare for next MemOp.
935 // Form a candidate from the Ops collected so far.
936 MergeCandidate *Candidate = new(Allocator) MergeCandidate;
937 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
938 Candidate->Instrs.push_back(MemOps[C].MI);
939 Candidate->LatestMIIdx = Latest - SIndex;
940 Candidate->EarliestMIIdx = Earliest - SIndex;
941 Candidate->InsertPos = MemOps[Latest].Position;
942 Candidates.push_back(Candidate);
943 // Continue after the chain.
945 } while (SIndex < EIndex);
948 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
949 unsigned Bytes, unsigned Limit,
950 ARMCC::CondCodes Pred, unsigned PredReg) {
951 unsigned MyPredReg = 0;
955 bool CheckCPSRDef = false;
956 switch (MI->getOpcode()) {
957 default: return false;
967 // Make sure the offset fits in 8 bits.
968 if (Bytes == 0 || (Limit && Bytes >= Limit))
971 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
972 MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
973 if (!(MI->getOperand(0).getReg() == Base &&
974 MI->getOperand(1).getReg() == Base &&
975 (MI->getOperand(2).getImm() * Scale) == Bytes &&
976 getInstrPredicate(MI, MyPredReg) == Pred &&
977 MyPredReg == PredReg))
980 return CheckCPSRDef ? !definesCPSR(MI) : true;
983 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
984 unsigned Bytes, unsigned Limit,
985 ARMCC::CondCodes Pred, unsigned PredReg) {
986 unsigned MyPredReg = 0;
990 bool CheckCPSRDef = false;
991 switch (MI->getOpcode()) {
992 default: return false;
1002 if (Bytes == 0 || (Limit && Bytes >= Limit))
1003 // Make sure the offset fits in 8 bits.
1006 unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
1007 MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
1008 if (!(MI->getOperand(0).getReg() == Base &&
1009 MI->getOperand(1).getReg() == Base &&
1010 (MI->getOperand(2).getImm() * Scale) == Bytes &&
1011 getInstrPredicate(MI, MyPredReg) == Pred &&
1012 MyPredReg == PredReg))
1015 return CheckCPSRDef ? !definesCPSR(MI) : true;
1018 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
1019 ARM_AM::AMSubMode Mode) {
1021 default: llvm_unreachable("Unhandled opcode!");
1027 default: llvm_unreachable("Unhandled submode!");
1028 case ARM_AM::ia: return ARM::LDMIA_UPD;
1029 case ARM_AM::ib: return ARM::LDMIB_UPD;
1030 case ARM_AM::da: return ARM::LDMDA_UPD;
1031 case ARM_AM::db: return ARM::LDMDB_UPD;
1038 default: llvm_unreachable("Unhandled submode!");
1039 case ARM_AM::ia: return ARM::STMIA_UPD;
1040 case ARM_AM::ib: return ARM::STMIB_UPD;
1041 case ARM_AM::da: return ARM::STMDA_UPD;
1042 case ARM_AM::db: return ARM::STMDB_UPD;
1047 default: llvm_unreachable("Unhandled submode!");
1048 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1049 case ARM_AM::db: return ARM::t2LDMDB_UPD;
1054 default: llvm_unreachable("Unhandled submode!");
1055 case ARM_AM::ia: return ARM::t2STMIA_UPD;
1056 case ARM_AM::db: return ARM::t2STMDB_UPD;
1060 default: llvm_unreachable("Unhandled submode!");
1061 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1062 case ARM_AM::db: return ARM::VLDMSDB_UPD;
1066 default: llvm_unreachable("Unhandled submode!");
1067 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1068 case ARM_AM::db: return ARM::VLDMDDB_UPD;
1072 default: llvm_unreachable("Unhandled submode!");
1073 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1074 case ARM_AM::db: return ARM::VSTMSDB_UPD;
1078 default: llvm_unreachable("Unhandled submode!");
1079 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1080 case ARM_AM::db: return ARM::VSTMDDB_UPD;
1085 /// Fold proceeding/trailing inc/dec of base register into the
1086 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1088 /// stmia rn, <ra, rb, rc>
1089 /// rn := rn + 4 * 3;
1091 /// stmia rn!, <ra, rb, rc>
1093 /// rn := rn - 4 * 3;
1094 /// ldmia rn, <ra, rb, rc>
1096 /// ldmdb rn!, <ra, rb, rc>
1097 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
1098 // Thumb1 is already using updating loads/stores.
1099 if (isThumb1) return false;
1101 const MachineOperand &BaseOP = MI->getOperand(0);
1102 unsigned Base = BaseOP.getReg();
1103 bool BaseKill = BaseOP.isKill();
1104 unsigned Bytes = getLSMultipleTransferSize(MI);
1105 unsigned PredReg = 0;
1106 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1107 unsigned Opcode = MI->getOpcode();
1108 DebugLoc DL = MI->getDebugLoc();
1110 // Can't use an updating ld/st if the base register is also a dest
1111 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1112 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
1113 if (MI->getOperand(i).getReg() == Base)
1116 bool DoMerge = false;
1117 ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
1119 // Try merging with the previous instruction.
1120 MachineBasicBlock &MBB = *MI->getParent();
1121 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1122 MachineBasicBlock::iterator MBBI(MI);
1123 if (MBBI != BeginMBBI) {
1124 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1125 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1127 if (Mode == ARM_AM::ia &&
1128 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1131 } else if (Mode == ARM_AM::ib &&
1132 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1137 MBB.erase(PrevMBBI);
1140 // Try merging with the next instruction.
1141 MachineBasicBlock::iterator EndMBBI = MBB.end();
1142 if (!DoMerge && MBBI != EndMBBI) {
1143 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1144 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1146 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
1147 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1149 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
1150 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1154 MBB.erase(NextMBBI);
1160 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1161 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1162 .addReg(Base, getDefRegState(true)) // WB base register
1163 .addReg(Base, getKillRegState(BaseKill))
1164 .addImm(Pred).addReg(PredReg);
1166 // Transfer the rest of operands.
1167 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
1168 MIB.addOperand(MI->getOperand(OpNum));
1170 // Transfer memoperands.
1171 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1177 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1178 ARM_AM::AddrOpc Mode) {
1181 return ARM::LDR_PRE_IMM;
1183 return ARM::STR_PRE_IMM;
1185 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1187 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1189 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1191 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1194 return ARM::t2LDR_PRE;
1197 return ARM::t2STR_PRE;
1198 default: llvm_unreachable("Unhandled opcode!");
1202 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1203 ARM_AM::AddrOpc Mode) {
1206 return ARM::LDR_POST_IMM;
1208 return ARM::STR_POST_IMM;
1210 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1212 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1214 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1216 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1219 return ARM::t2LDR_POST;
1222 return ARM::t2STR_POST;
1223 default: llvm_unreachable("Unhandled opcode!");
1227 /// Fold proceeding/trailing inc/dec of base register into the
1228 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1229 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
1230 // Thumb1 doesn't have updating LDR/STR.
1231 // FIXME: Use LDM/STM with single register instead.
1232 if (isThumb1) return false;
1234 unsigned Base = getLoadStoreBaseOp(*MI).getReg();
1235 bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
1236 unsigned Bytes = getLSMultipleTransferSize(MI);
1237 unsigned Opcode = MI->getOpcode();
1238 DebugLoc DL = MI->getDebugLoc();
1239 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1240 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1241 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1242 if (isi32Load(Opcode) || isi32Store(Opcode))
1243 if (MI->getOperand(2).getImm() != 0)
1245 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1248 bool isLd = isLoadSingle(Opcode);
1249 // Can't do the merge if the destination register is the same as the would-be
1250 // writeback register.
1251 if (MI->getOperand(0).getReg() == Base)
1254 unsigned PredReg = 0;
1255 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1256 bool DoMerge = false;
1257 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1258 unsigned NewOpc = 0;
1259 // AM2 - 12 bits, thumb2 - 8 bits.
1260 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
1262 // Try merging with the previous instruction.
1263 MachineBasicBlock &MBB = *MI->getParent();
1264 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1265 MachineBasicBlock::iterator MBBI(MI);
1266 if (MBBI != BeginMBBI) {
1267 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1268 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1270 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1272 AddSub = ARM_AM::sub;
1273 } else if (!isAM5 &&
1274 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1278 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
1279 MBB.erase(PrevMBBI);
1283 // Try merging with the next instruction.
1284 MachineBasicBlock::iterator EndMBBI = MBB.end();
1285 if (!DoMerge && MBBI != EndMBBI) {
1286 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1287 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1290 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1292 AddSub = ARM_AM::sub;
1293 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1297 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
1298 MBB.erase(NextMBBI);
1306 // VLDM[SD]_UPD, VSTM[SD]_UPD
1307 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1308 // updating load/store-multiple instructions can be used with only one
1310 MachineOperand &MO = MI->getOperand(0);
1311 BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1312 .addReg(Base, getDefRegState(true)) // WB base register
1313 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1314 .addImm(Pred).addReg(PredReg)
1315 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1316 getKillRegState(MO.isKill())));
1319 // LDR_PRE, LDR_POST
1320 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1321 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1322 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1323 .addReg(Base, RegState::Define)
1324 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1326 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1327 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1328 .addReg(Base, RegState::Define)
1329 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1332 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1333 // t2LDR_PRE, t2LDR_POST
1334 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1335 .addReg(Base, RegState::Define)
1336 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1339 MachineOperand &MO = MI->getOperand(0);
1340 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1341 // the vestigal zero-reg offset register. When that's fixed, this clause
1342 // can be removed entirely.
1343 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1344 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1345 // STR_PRE, STR_POST
1346 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1347 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1348 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1350 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1351 // t2STR_PRE, t2STR_POST
1352 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1353 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1354 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1362 /// Returns true if instruction is a memory operation that this pass is capable
1363 /// of operating on.
1364 static bool isMemoryOp(const MachineInstr *MI) {
1365 // When no memory operands are present, conservatively assume unaligned,
1366 // volatile, unfoldable.
1367 if (!MI->hasOneMemOperand())
1370 const MachineMemOperand *MMO = *MI->memoperands_begin();
1372 // Don't touch volatile memory accesses - we may be changing their order.
1373 if (MMO->isVolatile())
1376 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1378 if (MMO->getAlignment() < 4)
1381 // str <undef> could probably be eliminated entirely, but for now we just want
1382 // to avoid making a mess of it.
1383 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1384 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1385 MI->getOperand(0).isUndef())
1388 // Likewise don't mess with references to undefined addresses.
1389 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1390 MI->getOperand(1).isUndef())
1393 unsigned Opcode = MI->getOpcode();
1398 return MI->getOperand(1).isReg();
1401 return MI->getOperand(1).isReg();
1412 return MI->getOperand(1).isReg();
1417 static void InsertLDR_STR(MachineBasicBlock &MBB,
1418 MachineBasicBlock::iterator &MBBI,
1419 int Offset, bool isDef,
1420 DebugLoc DL, unsigned NewOpc,
1421 unsigned Reg, bool RegDeadKill, bool RegUndef,
1422 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1423 bool OffKill, bool OffUndef,
1424 ARMCC::CondCodes Pred, unsigned PredReg,
1425 const TargetInstrInfo *TII, bool isT2) {
1427 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1429 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1430 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1431 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1433 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1435 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1436 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1437 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1441 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1442 MachineBasicBlock::iterator &MBBI) {
1443 MachineInstr *MI = &*MBBI;
1444 unsigned Opcode = MI->getOpcode();
1445 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
1448 const MachineOperand &BaseOp = MI->getOperand(2);
1449 unsigned BaseReg = BaseOp.getReg();
1450 unsigned EvenReg = MI->getOperand(0).getReg();
1451 unsigned OddReg = MI->getOperand(1).getReg();
1452 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1453 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1455 // ARM errata 602117: LDRD with base in list may result in incorrect base
1456 // register when interrupted or faulted.
1457 bool Errata602117 = EvenReg == BaseReg &&
1458 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
1459 // ARM LDRD/STRD needs consecutive registers.
1460 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
1461 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
1463 if (!Errata602117 && !NonConsecutiveRegs)
1466 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1467 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1468 bool EvenDeadKill = isLd ?
1469 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1470 bool EvenUndef = MI->getOperand(0).isUndef();
1471 bool OddDeadKill = isLd ?
1472 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1473 bool OddUndef = MI->getOperand(1).isUndef();
1474 bool BaseKill = BaseOp.isKill();
1475 bool BaseUndef = BaseOp.isUndef();
1476 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1477 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1478 int OffImm = getMemoryOpOffset(MI);
1479 unsigned PredReg = 0;
1480 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1482 if (OddRegNum > EvenRegNum && OffImm == 0) {
1483 // Ascending register numbers and no offset. It's safe to change it to a
1485 unsigned NewOpc = (isLd)
1486 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1487 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1489 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1490 .addReg(BaseReg, getKillRegState(BaseKill))
1491 .addImm(Pred).addReg(PredReg)
1492 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1493 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1496 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1497 .addReg(BaseReg, getKillRegState(BaseKill))
1498 .addImm(Pred).addReg(PredReg)
1500 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1502 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1506 // Split into two instructions.
1507 unsigned NewOpc = (isLd)
1508 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1509 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1510 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1511 // so adjust and use t2LDRi12 here for that.
1512 unsigned NewOpc2 = (isLd)
1513 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1514 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1515 DebugLoc dl = MBBI->getDebugLoc();
1516 // If this is a load and base register is killed, it may have been
1517 // re-defed by the load, make sure the first load does not clobber it.
1519 (BaseKill || OffKill) &&
1520 (TRI->regsOverlap(EvenReg, BaseReg))) {
1521 assert(!TRI->regsOverlap(OddReg, BaseReg));
1522 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1523 OddReg, OddDeadKill, false,
1524 BaseReg, false, BaseUndef, false, OffUndef,
1525 Pred, PredReg, TII, isT2);
1526 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1527 EvenReg, EvenDeadKill, false,
1528 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1529 Pred, PredReg, TII, isT2);
1531 if (OddReg == EvenReg && EvenDeadKill) {
1532 // If the two source operands are the same, the kill marker is
1533 // probably on the first one. e.g.
1534 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1535 EvenDeadKill = false;
1538 // Never kill the base register in the first instruction.
1539 if (EvenReg == BaseReg)
1540 EvenDeadKill = false;
1541 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1542 EvenReg, EvenDeadKill, EvenUndef,
1543 BaseReg, false, BaseUndef, false, OffUndef,
1544 Pred, PredReg, TII, isT2);
1545 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1546 OddReg, OddDeadKill, OddUndef,
1547 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1548 Pred, PredReg, TII, isT2);
1556 MBBI = MBB.erase(MBBI);
1560 /// An optimization pass to turn multiple LDR / STR ops of the same base and
1561 /// incrementing offset into LDM / STM ops.
1562 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1564 unsigned CurrBase = 0;
1565 unsigned CurrOpc = ~0u;
1566 unsigned CurrSize = 0;
1567 ARMCC::CondCodes CurrPred = ARMCC::AL;
1568 unsigned CurrPredReg = 0;
1569 unsigned Position = 0;
1570 assert(Candidates.size() == 0);
1571 LiveRegsValid = false;
1573 for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
1575 // The instruction in front of the iterator is the one we look at.
1576 MBBI = std::prev(I);
1577 if (FixInvalidRegPairOp(MBB, MBBI))
1581 if (isMemoryOp(MBBI)) {
1582 unsigned Opcode = MBBI->getOpcode();
1583 unsigned Size = getLSMultipleTransferSize(MBBI);
1584 const MachineOperand &MO = MBBI->getOperand(0);
1585 unsigned Reg = MO.getReg();
1586 unsigned Base = getLoadStoreBaseOp(*MBBI).getReg();
1587 unsigned PredReg = 0;
1588 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1589 int Offset = getMemoryOpOffset(MBBI);
1590 if (CurrBase == 0) {
1591 // Start of a new chain.
1596 CurrPredReg = PredReg;
1597 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
1600 // Note: No need to match PredReg in the next if.
1601 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1603 // r4 := ldr [r0, #8]
1604 // r4 := ldr [r0, #4]
1607 // If a load overrides the base register or a register loaded by
1608 // another load in our chain, we cannot take this instruction.
1609 bool Overlap = false;
1610 if (isLoadSingle(Opcode)) {
1611 Overlap = (Base == Reg);
1613 for (const MemOpQueueEntry &E : MemOps) {
1614 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
1623 // Check offset and sort memory operation into the current chain.
1624 if (Offset > MemOps.back().Offset) {
1625 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
1628 MemOpQueue::iterator MI, ME;
1629 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
1630 if (Offset < MI->Offset) {
1631 // Found a place to insert.
1634 if (Offset == MI->Offset) {
1635 // Collision, abort.
1640 if (MI != MemOps.end()) {
1641 MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position));
1648 // Don't advance the iterator; The op will start a new chain next.
1651 // Fallthrough to look into existing chain.
1652 } else if (MBBI->isDebugValue())
1655 // If we are here then the chain is broken; Extract candidates for a merge.
1656 if (MemOps.size() > 0) {
1657 FormCandidates(MemOps);
1658 // Reset for the next chain.
1662 CurrPred = ARMCC::AL;
1667 if (MemOps.size() > 0)
1668 FormCandidates(MemOps);
1670 // Sort candidates so they get processed from end to begin of the basic
1671 // block later; This is necessary for liveness calculation.
1672 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
1673 return M0->InsertPos < M1->InsertPos;
1675 std::sort(Candidates.begin(), Candidates.end(), LessThan);
1677 // Go through list of candidates and merge.
1678 bool Changed = false;
1679 for (const MergeCandidate *Candidate : Candidates) {
1680 if (Candidate->Instrs.size() > 1) {
1681 MachineInstr *Merged = MergeOpsUpdate(*Candidate);
1682 // Merge preceding/trailing base inc/dec into the merged op.
1684 MergeBaseUpdateLSMultiple(Merged);
1687 for (MachineInstr *MI : Candidate->Instrs) {
1688 if (MergeBaseUpdateLoadStore(MI))
1693 assert(Candidate->Instrs.size() == 1);
1694 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
1703 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
1704 /// into the preceding stack restore so it directly restore the value of LR
1706 /// ldmfd sp!, {..., lr}
1709 /// ldmfd sp!, {..., lr}
1712 /// ldmfd sp!, {..., pc}
1713 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1714 // Thumb1 LDM doesn't allow high registers.
1715 if (isThumb1) return false;
1716 if (MBB.empty()) return false;
1718 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1719 if (MBBI != MBB.begin() &&
1720 (MBBI->getOpcode() == ARM::BX_RET ||
1721 MBBI->getOpcode() == ARM::tBX_RET ||
1722 MBBI->getOpcode() == ARM::MOVPCLR)) {
1723 MachineInstr *PrevMI = std::prev(MBBI);
1724 unsigned Opcode = PrevMI->getOpcode();
1725 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1726 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1727 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1728 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1729 if (MO.getReg() != ARM::LR)
1731 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1732 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1733 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1734 PrevMI->setDesc(TII->get(NewOpc));
1736 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1744 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1746 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1747 TL = STI->getTargetLowering();
1748 AFI = Fn.getInfo<ARMFunctionInfo>();
1749 TII = STI->getInstrInfo();
1750 TRI = STI->getRegisterInfo();
1751 MRI = &Fn.getRegInfo();
1752 RegClassInfoValid = false;
1753 isThumb2 = AFI->isThumb2Function();
1754 isThumb1 = AFI->isThumbFunction() && !isThumb2;
1756 bool Modified = false;
1757 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1759 MachineBasicBlock &MBB = *MFI;
1760 Modified |= LoadStoreMultipleOpti(MBB);
1761 if (STI->hasV5TOps())
1762 Modified |= MergeReturnIntoLDM(MBB);
1770 /// Pre- register allocation pass that move load / stores from consecutive
1771 /// locations close to make it more likely they will be combined later.
1772 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1774 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1776 const DataLayout *TD;
1777 const TargetInstrInfo *TII;
1778 const TargetRegisterInfo *TRI;
1779 const ARMSubtarget *STI;
1780 MachineRegisterInfo *MRI;
1781 MachineFunction *MF;
1783 bool runOnMachineFunction(MachineFunction &Fn) override;
1785 const char *getPassName() const override {
1786 return "ARM pre- register allocation load / store optimization pass";
1790 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1791 unsigned &NewOpc, unsigned &EvenReg,
1792 unsigned &OddReg, unsigned &BaseReg,
1794 unsigned &PredReg, ARMCC::CondCodes &Pred,
1796 bool RescheduleOps(MachineBasicBlock *MBB,
1797 SmallVectorImpl<MachineInstr *> &Ops,
1798 unsigned Base, bool isLd,
1799 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1800 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1802 char ARMPreAllocLoadStoreOpt::ID = 0;
1805 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1806 TD = Fn.getTarget().getDataLayout();
1807 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1808 TII = STI->getInstrInfo();
1809 TRI = STI->getRegisterInfo();
1810 MRI = &Fn.getRegInfo();
1813 bool Modified = false;
1814 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1816 Modified |= RescheduleLoadStoreInstrs(MFI);
1821 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1822 MachineBasicBlock::iterator I,
1823 MachineBasicBlock::iterator E,
1824 SmallPtrSetImpl<MachineInstr*> &MemOps,
1825 SmallSet<unsigned, 4> &MemRegs,
1826 const TargetRegisterInfo *TRI) {
1827 // Are there stores / loads / calls between them?
1828 // FIXME: This is overly conservative. We should make use of alias information
1830 SmallSet<unsigned, 4> AddedRegPressure;
1832 if (I->isDebugValue() || MemOps.count(&*I))
1834 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1836 if (isLd && I->mayStore())
1841 // It's not safe to move the first 'str' down.
1844 // str r4, [r0, #+4]
1848 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1849 MachineOperand &MO = I->getOperand(j);
1852 unsigned Reg = MO.getReg();
1853 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1855 if (Reg != Base && !MemRegs.count(Reg))
1856 AddedRegPressure.insert(Reg);
1860 // Estimate register pressure increase due to the transformation.
1861 if (MemRegs.size() <= 4)
1862 // Ok if we are moving small number of instructions.
1864 return AddedRegPressure.size() <= MemRegs.size() * 2;
1868 /// Copy \p Op0 and \p Op1 operands into a new array assigned to MI.
1869 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1870 MachineInstr *Op1) {
1871 assert(MI->memoperands_empty() && "expected a new machineinstr");
1872 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1873 + (Op1->memoperands_end() - Op1->memoperands_begin());
1875 MachineFunction *MF = MI->getParent()->getParent();
1876 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1877 MachineSDNode::mmo_iterator MemEnd =
1878 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1880 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1881 MI->setMemRefs(MemBegin, MemEnd);
1885 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1886 DebugLoc &dl, unsigned &NewOpc,
1888 unsigned &SecondReg,
1889 unsigned &BaseReg, int &Offset,
1891 ARMCC::CondCodes &Pred,
1893 // Make sure we're allowed to generate LDRD/STRD.
1894 if (!STI->hasV5TEOps())
1897 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1899 unsigned Opcode = Op0->getOpcode();
1900 if (Opcode == ARM::LDRi12) {
1902 } else if (Opcode == ARM::STRi12) {
1904 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1905 NewOpc = ARM::t2LDRDi8;
1908 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1909 NewOpc = ARM::t2STRDi8;
1916 // Make sure the base address satisfies i64 ld / st alignment requirement.
1917 // At the moment, we ignore the memoryoperand's value.
1918 // If we want to use AliasAnalysis, we should check it accordingly.
1919 if (!Op0->hasOneMemOperand() ||
1920 (*Op0->memoperands_begin())->isVolatile())
1923 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1924 const Function *Func = MF->getFunction();
1925 unsigned ReqAlign = STI->hasV6Ops()
1926 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1927 : 8; // Pre-v6 need 8-byte align
1928 if (Align < ReqAlign)
1931 // Then make sure the immediate offset fits.
1932 int OffImm = getMemoryOpOffset(Op0);
1934 int Limit = (1 << 8) * Scale;
1935 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1939 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1941 AddSub = ARM_AM::sub;
1944 int Limit = (1 << 8) * Scale;
1945 if (OffImm >= Limit || (OffImm & (Scale-1)))
1947 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1949 FirstReg = Op0->getOperand(0).getReg();
1950 SecondReg = Op1->getOperand(0).getReg();
1951 if (FirstReg == SecondReg)
1953 BaseReg = Op0->getOperand(1).getReg();
1954 Pred = getInstrPredicate(Op0, PredReg);
1955 dl = Op0->getDebugLoc();
1959 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1960 SmallVectorImpl<MachineInstr *> &Ops,
1961 unsigned Base, bool isLd,
1962 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1963 bool RetVal = false;
1965 // Sort by offset (in reverse order).
1966 std::sort(Ops.begin(), Ops.end(),
1967 [](const MachineInstr *LHS, const MachineInstr *RHS) {
1968 int LOffset = getMemoryOpOffset(LHS);
1969 int ROffset = getMemoryOpOffset(RHS);
1970 assert(LHS == RHS || LOffset != ROffset);
1971 return LOffset > ROffset;
1974 // The loads / stores of the same base are in order. Scan them from first to
1975 // last and check for the following:
1976 // 1. Any def of base.
1978 while (Ops.size() > 1) {
1979 unsigned FirstLoc = ~0U;
1980 unsigned LastLoc = 0;
1981 MachineInstr *FirstOp = nullptr;
1982 MachineInstr *LastOp = nullptr;
1984 unsigned LastOpcode = 0;
1985 unsigned LastBytes = 0;
1986 unsigned NumMove = 0;
1987 for (int i = Ops.size() - 1; i >= 0; --i) {
1988 MachineInstr *Op = Ops[i];
1989 unsigned Loc = MI2LocMap[Op];
1990 if (Loc <= FirstLoc) {
1994 if (Loc >= LastLoc) {
2000 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
2001 if (LastOpcode && LSMOpcode != LastOpcode)
2004 int Offset = getMemoryOpOffset(Op);
2005 unsigned Bytes = getLSMultipleTransferSize(Op);
2007 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2010 LastOffset = Offset;
2012 LastOpcode = LSMOpcode;
2013 if (++NumMove == 8) // FIXME: Tune this limit.
2020 SmallPtrSet<MachineInstr*, 4> MemOps;
2021 SmallSet<unsigned, 4> MemRegs;
2022 for (int i = NumMove-1; i >= 0; --i) {
2023 MemOps.insert(Ops[i]);
2024 MemRegs.insert(Ops[i]->getOperand(0).getReg());
2027 // Be conservative, if the instructions are too far apart, don't
2028 // move them. We want to limit the increase of register pressure.
2029 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2031 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2032 MemOps, MemRegs, TRI);
2034 for (unsigned i = 0; i != NumMove; ++i)
2037 // This is the new location for the loads / stores.
2038 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2039 while (InsertPos != MBB->end()
2040 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
2043 // If we are moving a pair of loads / stores, see if it makes sense
2044 // to try to allocate a pair of registers that can form register pairs.
2045 MachineInstr *Op0 = Ops.back();
2046 MachineInstr *Op1 = Ops[Ops.size()-2];
2047 unsigned FirstReg = 0, SecondReg = 0;
2048 unsigned BaseReg = 0, PredReg = 0;
2049 ARMCC::CondCodes Pred = ARMCC::AL;
2051 unsigned NewOpc = 0;
2054 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2055 FirstReg, SecondReg, BaseReg,
2056 Offset, PredReg, Pred, isT2)) {
2060 const MCInstrDesc &MCID = TII->get(NewOpc);
2061 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
2062 MRI->constrainRegClass(FirstReg, TRC);
2063 MRI->constrainRegClass(SecondReg, TRC);
2065 // Form the pair instruction.
2067 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2068 .addReg(FirstReg, RegState::Define)
2069 .addReg(SecondReg, RegState::Define)
2071 // FIXME: We're converting from LDRi12 to an insn that still
2072 // uses addrmode2, so we need an explicit offset reg. It should
2073 // always by reg0 since we're transforming LDRi12s.
2076 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2077 concatenateMemOperands(MIB, Op0, Op1);
2078 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2081 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2085 // FIXME: We're converting from LDRi12 to an insn that still
2086 // uses addrmode2, so we need an explicit offset reg. It should
2087 // always by reg0 since we're transforming STRi12s.
2090 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2091 concatenateMemOperands(MIB, Op0, Op1);
2092 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2099 // Add register allocation hints to form register pairs.
2100 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
2101 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg);
2104 for (unsigned i = 0; i != NumMove; ++i) {
2105 MachineInstr *Op = Ops.back();
2107 MBB->splice(InsertPos, MBB, Op);
2111 NumLdStMoved += NumMove;
2121 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2122 bool RetVal = false;
2124 DenseMap<MachineInstr*, unsigned> MI2LocMap;
2125 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
2126 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
2127 SmallVector<unsigned, 4> LdBases;
2128 SmallVector<unsigned, 4> StBases;
2131 MachineBasicBlock::iterator MBBI = MBB->begin();
2132 MachineBasicBlock::iterator E = MBB->end();
2134 for (; MBBI != E; ++MBBI) {
2135 MachineInstr *MI = MBBI;
2136 if (MI->isCall() || MI->isTerminator()) {
2137 // Stop at barriers.
2142 if (!MI->isDebugValue())
2143 MI2LocMap[MI] = ++Loc;
2145 if (!isMemoryOp(MI))
2147 unsigned PredReg = 0;
2148 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2151 int Opc = MI->getOpcode();
2152 bool isLd = isLoadSingle(Opc);
2153 unsigned Base = MI->getOperand(1).getReg();
2154 int Offset = getMemoryOpOffset(MI);
2156 bool StopHere = false;
2158 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2159 Base2LdsMap.find(Base);
2160 if (BI != Base2LdsMap.end()) {
2161 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2162 if (Offset == getMemoryOpOffset(BI->second[i])) {
2168 BI->second.push_back(MI);
2170 Base2LdsMap[Base].push_back(MI);
2171 LdBases.push_back(Base);
2174 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2175 Base2StsMap.find(Base);
2176 if (BI != Base2StsMap.end()) {
2177 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2178 if (Offset == getMemoryOpOffset(BI->second[i])) {
2184 BI->second.push_back(MI);
2186 Base2StsMap[Base].push_back(MI);
2187 StBases.push_back(Base);
2192 // Found a duplicate (a base+offset combination that's seen earlier).
2199 // Re-schedule loads.
2200 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2201 unsigned Base = LdBases[i];
2202 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2204 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2207 // Re-schedule stores.
2208 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2209 unsigned Base = StBases[i];
2210 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2212 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2216 Base2LdsMap.clear();
2217 Base2StsMap.clear();
2227 /// Returns an instance of the load / store optimization pass.
2228 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2230 return new ARMPreAllocLoadStoreOpt();
2231 return new ARMLoadStoreOpt();