1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMISelLowering.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMSubtarget.h"
21 #include "MCTargetDesc/ARMAddressingModes.h"
22 #include "ThumbRegisterInfo.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/CodeGen/MachineBasicBlock.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include "llvm/CodeGen/MachineInstr.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/RegisterScavenging.h"
35 #include "llvm/CodeGen/SelectionDAGNodes.h"
36 #include "llvm/IR/DataLayout.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/Support/Debug.h"
40 #include "llvm/Support/ErrorHandling.h"
41 #include "llvm/Support/raw_ostream.h"
42 #include "llvm/Target/TargetInstrInfo.h"
43 #include "llvm/Target/TargetMachine.h"
44 #include "llvm/Target/TargetRegisterInfo.h"
47 #define DEBUG_TYPE "arm-ldst-opt"
49 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
50 STATISTIC(NumSTMGened , "Number of stm instructions generated");
51 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
52 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
53 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
54 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
55 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
56 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
57 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
58 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
59 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
61 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
62 /// load / store instructions to form ldm / stm instructions.
65 struct ARMLoadStoreOpt : public MachineFunctionPass {
67 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
69 const TargetInstrInfo *TII;
70 const TargetRegisterInfo *TRI;
71 const ARMSubtarget *STI;
72 const TargetLowering *TL;
75 bool isThumb1, isThumb2;
77 bool runOnMachineFunction(MachineFunction &Fn) override;
79 const char *getPassName() const override {
80 return "ARM load / store optimization pass";
84 struct MemOpQueueEntry {
89 MachineBasicBlock::iterator MBBI;
91 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
92 MachineBasicBlock::iterator i)
93 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
95 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
96 typedef MemOpQueue::iterator MemOpQueueIter;
98 void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
99 const MemOpQueue &MemOps, unsigned DefReg,
100 unsigned RangeBegin, unsigned RangeEnd);
101 void UpdateBaseRegUses(MachineBasicBlock &MBB,
102 MachineBasicBlock::iterator MBBI,
103 DebugLoc dl, unsigned Base, unsigned WordOffset,
104 ARMCC::CondCodes Pred, unsigned PredReg);
105 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
106 int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
107 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
109 ArrayRef<std::pair<unsigned, bool> > Regs,
110 ArrayRef<unsigned> ImpDefs);
111 void MergeOpsUpdate(MachineBasicBlock &MBB,
113 unsigned memOpsBegin,
115 unsigned insertAfter,
120 ARMCC::CondCodes Pred,
124 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
125 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
126 unsigned Opcode, unsigned Size,
127 ARMCC::CondCodes Pred, unsigned PredReg,
128 unsigned Scratch, MemOpQueue &MemOps,
129 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
130 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
131 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
132 MachineBasicBlock::iterator &MBBI);
133 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
134 MachineBasicBlock::iterator MBBI,
135 const TargetInstrInfo *TII,
137 MachineBasicBlock::iterator &I);
138 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
139 MachineBasicBlock::iterator MBBI,
141 MachineBasicBlock::iterator &I);
142 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
143 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
145 char ARMLoadStoreOpt::ID = 0;
148 static bool definesCPSR(const MachineInstr *MI) {
149 for (const auto &MO : MI->operands()) {
152 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
153 // If the instruction has live CPSR def, then it's not safe to fold it
154 // into load / store.
161 static int getMemoryOpOffset(const MachineInstr *MI) {
162 unsigned Opcode = MI->getOpcode();
163 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
164 unsigned NumOperands = MI->getDesc().getNumOperands();
165 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
167 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
168 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
169 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
170 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
173 // Thumb1 immediate offsets are scaled by 4
174 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
175 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
178 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
179 : ARM_AM::getAM5Offset(OffField) * 4;
180 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
181 : ARM_AM::getAM5Op(OffField);
183 if (Op == ARM_AM::sub)
189 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
191 default: llvm_unreachable("Unhandled opcode!");
195 default: llvm_unreachable("Unhandled submode!");
196 case ARM_AM::ia: return ARM::LDMIA;
197 case ARM_AM::da: return ARM::LDMDA;
198 case ARM_AM::db: return ARM::LDMDB;
199 case ARM_AM::ib: return ARM::LDMIB;
204 default: llvm_unreachable("Unhandled submode!");
205 case ARM_AM::ia: return ARM::STMIA;
206 case ARM_AM::da: return ARM::STMDA;
207 case ARM_AM::db: return ARM::STMDB;
208 case ARM_AM::ib: return ARM::STMIB;
212 // tLDMIA is writeback-only - unless the base register is in the input
216 default: llvm_unreachable("Unhandled submode!");
217 case ARM_AM::ia: return ARM::tLDMIA;
221 // There is no non-writeback tSTMIA either.
224 default: llvm_unreachable("Unhandled submode!");
225 case ARM_AM::ia: return ARM::tSTMIA_UPD;
231 default: llvm_unreachable("Unhandled submode!");
232 case ARM_AM::ia: return ARM::t2LDMIA;
233 case ARM_AM::db: return ARM::t2LDMDB;
239 default: llvm_unreachable("Unhandled submode!");
240 case ARM_AM::ia: return ARM::t2STMIA;
241 case ARM_AM::db: return ARM::t2STMDB;
246 default: llvm_unreachable("Unhandled submode!");
247 case ARM_AM::ia: return ARM::VLDMSIA;
248 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
253 default: llvm_unreachable("Unhandled submode!");
254 case ARM_AM::ia: return ARM::VSTMSIA;
255 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
260 default: llvm_unreachable("Unhandled submode!");
261 case ARM_AM::ia: return ARM::VLDMDIA;
262 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
267 default: llvm_unreachable("Unhandled submode!");
268 case ARM_AM::ia: return ARM::VSTMDIA;
269 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
277 AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
279 default: llvm_unreachable("Unhandled opcode!");
286 case ARM::tLDMIA_UPD:
287 case ARM::tSTMIA_UPD:
288 case ARM::t2LDMIA_RET:
290 case ARM::t2LDMIA_UPD:
292 case ARM::t2STMIA_UPD:
294 case ARM::VLDMSIA_UPD:
296 case ARM::VSTMSIA_UPD:
298 case ARM::VLDMDIA_UPD:
300 case ARM::VSTMDIA_UPD:
314 case ARM::t2LDMDB_UPD:
316 case ARM::t2STMDB_UPD:
317 case ARM::VLDMSDB_UPD:
318 case ARM::VSTMSDB_UPD:
319 case ARM::VLDMDDB_UPD:
320 case ARM::VSTMDDB_UPD:
331 } // end namespace ARM_AM
332 } // end namespace llvm
334 static bool isT1i32Load(unsigned Opc) {
335 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
338 static bool isT2i32Load(unsigned Opc) {
339 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
342 static bool isi32Load(unsigned Opc) {
343 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
346 static bool isT1i32Store(unsigned Opc) {
347 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
350 static bool isT2i32Store(unsigned Opc) {
351 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
354 static bool isi32Store(unsigned Opc) {
355 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
358 static unsigned getImmScale(unsigned Opc) {
360 default: llvm_unreachable("Unhandled opcode!");
375 /// Update future uses of the base register with the offset introduced
376 /// due to writeback. This function only works on Thumb1.
378 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
379 MachineBasicBlock::iterator MBBI,
380 DebugLoc dl, unsigned Base,
382 ARMCC::CondCodes Pred, unsigned PredReg) {
383 assert(isThumb1 && "Can only update base register uses for Thumb1!");
384 // Start updating any instructions with immediate offsets. Insert a SUB before
385 // the first non-updateable instruction (if any).
386 for (; MBBI != MBB.end(); ++MBBI) {
387 bool InsertSub = false;
388 unsigned Opc = MBBI->getOpcode();
390 if (MBBI->readsRegister(Base)) {
393 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
395 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
397 if (IsLoad || IsStore) {
398 // Loads and stores with immediate offsets can be updated, but only if
399 // the new offset isn't negative.
400 // The MachineOperand containing the offset immediate is the last one
401 // before predicates.
403 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
404 // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
405 Offset = MO.getImm() - WordOffset * getImmScale(Opc);
407 // If storing the base register, it needs to be reset first.
408 unsigned InstrSrcReg = MBBI->getOperand(0).getReg();
410 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
415 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
416 !definesCPSR(MBBI)) {
417 // SUBS/ADDS using this register, with a dead def of the CPSR.
418 // Merge it with the update; if the merged offset is too large,
419 // insert a new sub instead.
421 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
422 Offset = (Opc == ARM::tSUBi8) ?
423 MO.getImm() + WordOffset * 4 :
424 MO.getImm() - WordOffset * 4 ;
425 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
426 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
429 // The base register has now been reset, so exit early.
436 // Can't update the instruction.
440 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
441 // Since SUBS sets the condition flags, we can't place the base reset
442 // after an instruction that has a live CPSR def.
443 // The base register might also contain an argument for a function call.
448 // An instruction above couldn't be updated, so insert a sub.
449 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
450 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
454 if (MBBI->killsRegister(Base))
455 // Register got killed. Stop updating.
459 // End of block was reached.
460 if (MBB.succ_size() > 0) {
461 // FIXME: Because of a bug, live registers are sometimes missing from
462 // the successor blocks' live-in sets. This means we can't trust that
463 // information and *always* have to reset at the end of a block.
465 if (MBBI != MBB.end()) --MBBI;
467 BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
468 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
472 /// MergeOps - Create and insert a LDM or STM with Base as base register and
473 /// registers in Regs as the register operands that would be loaded / stored.
474 /// It returns true if the transformation is done.
476 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
477 MachineBasicBlock::iterator MBBI,
478 int Offset, unsigned Base, bool BaseKill,
479 unsigned Opcode, ARMCC::CondCodes Pred,
480 unsigned PredReg, unsigned Scratch, DebugLoc dl,
481 ArrayRef<std::pair<unsigned, bool> > Regs,
482 ArrayRef<unsigned> ImpDefs) {
483 // Only a single register to load / store. Don't bother.
484 unsigned NumRegs = Regs.size();
488 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
489 // Compute liveness information for that register to make the decision.
490 bool SafeToClobberCPSR = !isThumb1 ||
491 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) ==
492 MachineBasicBlock::LQR_Dead);
494 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
496 // Exception: If the base register is in the input reglist, Thumb1 LDM is
498 // It's also not possible to merge an STR of the base register in Thumb1.
500 for (const std::pair<unsigned, bool> &R : Regs)
501 if (Base == R.first) {
502 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
503 if (Opcode == ARM::tLDRi) {
506 } else if (Opcode == ARM::tSTRi) {
511 ARM_AM::AMSubMode Mode = ARM_AM::ia;
512 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
513 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
514 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
516 if (Offset == 4 && haveIBAndDA) {
518 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
520 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
521 // VLDM/VSTM do not support DB mode without also updating the base reg.
523 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
524 // Check if this is a supported opcode before inserting instructions to
525 // calculate a new base register.
526 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
528 // If starting offset isn't zero, insert a MI to materialize a new base.
529 // But only do so if it is cost effective, i.e. merging more than two
534 // On Thumb1, it's not worth materializing a new base register without
535 // clobbering the CPSR (i.e. not using ADDS/SUBS).
536 if (!SafeToClobberCPSR)
540 if (isi32Load(Opcode)) {
541 // If it is a load, then just use one of the destination register to
542 // use as the new base.
543 NewBase = Regs[NumRegs-1].first;
545 // Use the scratch register to use as a new base.
552 isThumb2 ? ARM::t2ADDri :
553 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
554 (isThumb1 && Offset < 8) ? ARM::tADDi3 :
555 isThumb1 ? ARM::tADDi8 : ARM::ADDri;
560 isThumb2 ? ARM::t2SUBri :
561 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
562 isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
565 if (!TL->isLegalAddImmediate(Offset))
566 // FIXME: Try add with register operand?
567 return false; // Probably not worth it then.
570 // Thumb1: depending on immediate size, use either
571 // ADDS NewBase, Base, #imm3
574 // ADDS NewBase, #imm8.
575 if (Base != NewBase &&
576 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
577 // Need to insert a MOV to the new base first.
578 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
580 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
581 if (Pred != ARMCC::AL)
583 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVSr), NewBase)
584 .addReg(Base, getKillRegState(BaseKill));
586 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
587 .addReg(Base, getKillRegState(BaseKill))
588 .addImm(Pred).addReg(PredReg);
590 // Set up BaseKill and Base correctly to insert the ADDS/SUBS below.
594 if (BaseOpc == ARM::tADDrSPi) {
595 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
596 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
597 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset/4)
598 .addImm(Pred).addReg(PredReg);
600 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true)
601 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
602 .addImm(Pred).addReg(PredReg);
604 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
605 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
606 .addImm(Pred).addReg(PredReg).addReg(0);
609 BaseKill = true; // New base is always killed straight away.
612 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
613 Opcode == ARM::VLDRD);
615 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
616 // base register writeback.
617 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
618 if (!Opcode) return false;
620 // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
621 // - There is no writeback (LDM of base register),
622 // - the base register is killed by the merged instruction,
623 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
624 // to reset the base register.
625 // Otherwise, don't merge.
626 // It's safe to return here since the code to materialize a new base register
627 // above is also conditional on SafeToClobberCPSR.
628 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
631 MachineInstrBuilder MIB;
634 if (Opcode == ARM::tLDMIA)
635 // Update tLDMIA with writeback if necessary.
636 Opcode = ARM::tLDMIA_UPD;
638 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
640 // Thumb1: we might need to set base writeback when building the MI.
641 MIB.addReg(Base, getDefRegState(true))
642 .addReg(Base, getKillRegState(BaseKill));
644 // The base isn't dead after a merged instruction with writeback.
645 // Insert a sub instruction after the newly formed instruction to reset.
647 UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
650 // No writeback, simply build the MachineInstr.
651 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
652 MIB.addReg(Base, getKillRegState(BaseKill));
655 MIB.addImm(Pred).addReg(PredReg);
657 for (const std::pair<unsigned, bool> &R : Regs)
658 MIB = MIB.addReg(R.first, getDefRegState(isDef)
659 | getKillRegState(R.second));
661 // Add implicit defs for super-registers.
662 for (unsigned ImpDef : ImpDefs)
663 MIB.addReg(ImpDef, RegState::ImplicitDefine);
668 /// \brief Find all instructions using a given imp-def within a range.
670 /// We are trying to combine a range of instructions, one of which (located at
671 /// position RangeBegin) implicitly defines a register. The final LDM/STM will
672 /// be placed at RangeEnd, and so any uses of this definition between RangeStart
673 /// and RangeEnd must be modified to use an undefined value.
675 /// The live range continues until we find a second definition or one of the
676 /// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
677 /// we must consider all uses and decide which are relevant in a second pass.
678 void ARMLoadStoreOpt::findUsesOfImpDef(
679 SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,
680 unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
681 std::map<unsigned, MachineOperand *> Uses;
682 unsigned LastLivePos = RangeEnd;
684 // First we find all uses of this register with Position between RangeBegin
685 // and RangeEnd, any or all of these could be uses of a definition at
686 // RangeBegin. We also record the latest position a definition at RangeBegin
687 // would be considered live.
688 for (unsigned i = 0; i < MemOps.size(); ++i) {
689 MachineInstr &MI = *MemOps[i].MBBI;
690 unsigned MIPosition = MemOps[i].Position;
691 if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
694 // If this instruction defines the register, then any later use will be of
695 // that definition rather than ours.
696 if (MI.definesRegister(DefReg))
697 LastLivePos = std::min(LastLivePos, MIPosition);
699 MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
703 // If this instruction kills the register then (assuming liveness is
704 // correct when we start) we don't need to think about anything after here.
706 LastLivePos = std::min(LastLivePos, MIPosition);
708 Uses[MIPosition] = UseOp;
711 // Now we traverse the list of all uses, and append the ones that actually use
712 // our definition to the requested list.
713 for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),
716 // List is sorted by position so once we've found one out of range there
717 // will be no more to consider.
718 if (I->first > LastLivePos)
720 UsesOfImpDefs.push_back(I->second);
724 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
726 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
728 unsigned memOpsBegin, unsigned memOpsEnd,
729 unsigned insertAfter, int Offset,
730 unsigned Base, bool BaseKill,
732 ARMCC::CondCodes Pred, unsigned PredReg,
735 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
736 // First calculate which of the registers should be killed by the merged
738 const unsigned insertPos = memOps[insertAfter].Position;
739 SmallSet<unsigned, 4> KilledRegs;
740 DenseMap<unsigned, unsigned> Killer;
741 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
742 if (i == memOpsBegin) {
747 if (memOps[i].Position < insertPos && memOps[i].isKill) {
748 unsigned Reg = memOps[i].Reg;
749 KilledRegs.insert(Reg);
754 SmallVector<std::pair<unsigned, bool>, 8> Regs;
755 SmallVector<unsigned, 8> ImpDefs;
756 SmallVector<MachineOperand *, 8> UsesOfImpDefs;
757 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
758 unsigned Reg = memOps[i].Reg;
759 // If we are inserting the merged operation after an operation that
760 // uses the same register, make sure to transfer any kill flag.
761 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
762 Regs.push_back(std::make_pair(Reg, isKill));
764 // Collect any implicit defs of super-registers. They must be preserved.
765 for (const MachineOperand &MO : memOps[i].MBBI->operands()) {
766 if (!MO.isReg() || !MO.isDef() || !MO.isImplicit() || MO.isDead())
768 unsigned DefReg = MO.getReg();
769 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
770 ImpDefs.push_back(DefReg);
772 // There may be other uses of the definition between this instruction and
773 // the eventual LDM/STM position. These should be marked undef if the
774 // merge takes place.
775 findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
780 // Try to do the merge.
781 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
783 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
784 Pred, PredReg, Scratch, dl, Regs, ImpDefs))
787 // Merge succeeded, update records.
788 Merges.push_back(std::prev(Loc));
790 // In gathering loads together, we may have moved the imp-def of a register
791 // past one of its uses. This is OK, since we know better than the rest of
792 // LLVM what's OK with ARM loads and stores; but we still have to adjust the
794 for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
795 E = UsesOfImpDefs.end();
799 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
800 // Remove kill flags from any memops that come before insertPos.
801 if (Regs[i-memOpsBegin].second) {
802 unsigned Reg = Regs[i-memOpsBegin].first;
803 if (KilledRegs.count(Reg)) {
804 unsigned j = Killer[Reg];
805 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
806 assert(Idx >= 0 && "Cannot find killing operand");
807 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
808 memOps[j].isKill = false;
810 memOps[i].isKill = true;
812 MBB.erase(memOps[i].MBBI);
813 // Update this memop to refer to the merged instruction.
814 // We may need to move kill flags again.
815 memOps[i].Merged = true;
816 memOps[i].MBBI = Merges.back();
817 memOps[i].Position = insertPos;
820 // Update memOps offsets, since they may have been modified by MergeOps.
821 for (auto &MemOp : memOps) {
822 MemOp.Offset = getMemoryOpOffset(MemOp.MBBI);
826 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
827 /// load / store multiple instructions.
829 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
830 unsigned Base, unsigned Opcode, unsigned Size,
831 ARMCC::CondCodes Pred, unsigned PredReg,
832 unsigned Scratch, MemOpQueue &MemOps,
833 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
834 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
835 int Offset = MemOps[SIndex].Offset;
836 int SOffset = Offset;
837 unsigned insertAfter = SIndex;
838 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
839 DebugLoc dl = Loc->getDebugLoc();
840 const MachineOperand &PMO = Loc->getOperand(0);
841 unsigned PReg = PMO.getReg();
842 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
844 unsigned Limit = ~0U;
845 bool BaseKill = false;
846 // vldm / vstm limit are 32 for S variants, 16 for D variants.
864 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
865 int NewOffset = MemOps[i].Offset;
866 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
867 unsigned Reg = MO.getReg();
868 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
869 // Register numbers must be in ascending order. For VFP / NEON load and
870 // store multiples, the registers must also be consecutive and within the
871 // limit on the number of registers per instruction.
872 if (Reg != ARM::SP &&
873 NewOffset == Offset + (int)Size &&
874 ((isNotVFP && RegNum > PRegNum) ||
875 ((Count < Limit) && RegNum == PRegNum+1)) &&
876 // On Swift we don't want vldm/vstm to start with a odd register num
877 // because Q register unaligned vldm/vstm need more uops.
878 (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) {
883 // Can't merge this in. Try merge the earlier ones first.
884 // We need to compute BaseKill here because the MemOps may have been
886 BaseKill = Loc->killsRegister(Base);
888 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base,
889 BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
890 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
895 if (MemOps[i].Position > MemOps[insertAfter].Position) {
897 Loc = MemOps[i].MBBI;
901 BaseKill = Loc->killsRegister(Base);
902 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
903 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
906 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
907 unsigned Bytes, unsigned Limit,
908 ARMCC::CondCodes Pred, unsigned PredReg) {
909 unsigned MyPredReg = 0;
913 bool CheckCPSRDef = false;
914 switch (MI->getOpcode()) {
915 default: return false;
925 // Make sure the offset fits in 8 bits.
926 if (Bytes == 0 || (Limit && Bytes >= Limit))
929 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
930 MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
931 if (!(MI->getOperand(0).getReg() == Base &&
932 MI->getOperand(1).getReg() == Base &&
933 (MI->getOperand(2).getImm() * Scale) == Bytes &&
934 getInstrPredicate(MI, MyPredReg) == Pred &&
935 MyPredReg == PredReg))
938 return CheckCPSRDef ? !definesCPSR(MI) : true;
941 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
942 unsigned Bytes, unsigned Limit,
943 ARMCC::CondCodes Pred, unsigned PredReg) {
944 unsigned MyPredReg = 0;
948 bool CheckCPSRDef = false;
949 switch (MI->getOpcode()) {
950 default: return false;
960 if (Bytes == 0 || (Limit && Bytes >= Limit))
961 // Make sure the offset fits in 8 bits.
964 unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
965 MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
966 if (!(MI->getOperand(0).getReg() == Base &&
967 MI->getOperand(1).getReg() == Base &&
968 (MI->getOperand(2).getImm() * Scale) == Bytes &&
969 getInstrPredicate(MI, MyPredReg) == Pred &&
970 MyPredReg == PredReg))
973 return CheckCPSRDef ? !definesCPSR(MI) : true;
976 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
977 switch (MI->getOpcode()) {
1004 case ARM::tLDMIA_UPD:
1005 case ARM::tSTMIA_UPD:
1012 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
1015 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
1019 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
1020 ARM_AM::AMSubMode Mode) {
1022 default: llvm_unreachable("Unhandled opcode!");
1028 default: llvm_unreachable("Unhandled submode!");
1029 case ARM_AM::ia: return ARM::LDMIA_UPD;
1030 case ARM_AM::ib: return ARM::LDMIB_UPD;
1031 case ARM_AM::da: return ARM::LDMDA_UPD;
1032 case ARM_AM::db: return ARM::LDMDB_UPD;
1039 default: llvm_unreachable("Unhandled submode!");
1040 case ARM_AM::ia: return ARM::STMIA_UPD;
1041 case ARM_AM::ib: return ARM::STMIB_UPD;
1042 case ARM_AM::da: return ARM::STMDA_UPD;
1043 case ARM_AM::db: return ARM::STMDB_UPD;
1048 default: llvm_unreachable("Unhandled submode!");
1049 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1050 case ARM_AM::db: return ARM::t2LDMDB_UPD;
1055 default: llvm_unreachable("Unhandled submode!");
1056 case ARM_AM::ia: return ARM::t2STMIA_UPD;
1057 case ARM_AM::db: return ARM::t2STMDB_UPD;
1061 default: llvm_unreachable("Unhandled submode!");
1062 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1063 case ARM_AM::db: return ARM::VLDMSDB_UPD;
1067 default: llvm_unreachable("Unhandled submode!");
1068 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1069 case ARM_AM::db: return ARM::VLDMDDB_UPD;
1073 default: llvm_unreachable("Unhandled submode!");
1074 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1075 case ARM_AM::db: return ARM::VSTMSDB_UPD;
1079 default: llvm_unreachable("Unhandled submode!");
1080 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1081 case ARM_AM::db: return ARM::VSTMDDB_UPD;
1086 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
1087 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1089 /// stmia rn, <ra, rb, rc>
1090 /// rn := rn + 4 * 3;
1092 /// stmia rn!, <ra, rb, rc>
1094 /// rn := rn - 4 * 3;
1095 /// ldmia rn, <ra, rb, rc>
1097 /// ldmdb rn!, <ra, rb, rc>
1098 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
1099 MachineBasicBlock::iterator MBBI,
1101 MachineBasicBlock::iterator &I) {
1102 // Thumb1 is already using updating loads/stores.
1103 if (isThumb1) return false;
1105 MachineInstr *MI = MBBI;
1106 unsigned Base = MI->getOperand(0).getReg();
1107 bool BaseKill = MI->getOperand(0).isKill();
1108 unsigned Bytes = getLSMultipleTransferSize(MI);
1109 unsigned PredReg = 0;
1110 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1111 unsigned Opcode = MI->getOpcode();
1112 DebugLoc dl = MI->getDebugLoc();
1114 // Can't use an updating ld/st if the base register is also a dest
1115 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1116 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
1117 if (MI->getOperand(i).getReg() == Base)
1120 bool DoMerge = false;
1121 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
1123 // Try merging with the previous instruction.
1124 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1125 if (MBBI != BeginMBBI) {
1126 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1127 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1129 if (Mode == ARM_AM::ia &&
1130 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1133 } else if (Mode == ARM_AM::ib &&
1134 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1139 MBB.erase(PrevMBBI);
1142 // Try merging with the next instruction.
1143 MachineBasicBlock::iterator EndMBBI = MBB.end();
1144 if (!DoMerge && MBBI != EndMBBI) {
1145 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1146 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1148 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
1149 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1151 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
1152 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1156 if (NextMBBI == I) {
1160 MBB.erase(NextMBBI);
1167 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1168 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1169 .addReg(Base, getDefRegState(true)) // WB base register
1170 .addReg(Base, getKillRegState(BaseKill))
1171 .addImm(Pred).addReg(PredReg);
1173 // Transfer the rest of operands.
1174 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
1175 MIB.addOperand(MI->getOperand(OpNum));
1177 // Transfer memoperands.
1178 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1184 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1185 ARM_AM::AddrOpc Mode) {
1188 return ARM::LDR_PRE_IMM;
1190 return ARM::STR_PRE_IMM;
1192 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1194 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1196 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1198 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1201 return ARM::t2LDR_PRE;
1204 return ARM::t2STR_PRE;
1205 default: llvm_unreachable("Unhandled opcode!");
1209 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1210 ARM_AM::AddrOpc Mode) {
1213 return ARM::LDR_POST_IMM;
1215 return ARM::STR_POST_IMM;
1217 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1219 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1221 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1223 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1226 return ARM::t2LDR_POST;
1229 return ARM::t2STR_POST;
1230 default: llvm_unreachable("Unhandled opcode!");
1234 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
1235 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1236 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
1237 MachineBasicBlock::iterator MBBI,
1238 const TargetInstrInfo *TII,
1240 MachineBasicBlock::iterator &I) {
1241 // Thumb1 doesn't have updating LDR/STR.
1242 // FIXME: Use LDM/STM with single register instead.
1243 if (isThumb1) return false;
1245 MachineInstr *MI = MBBI;
1246 unsigned Base = MI->getOperand(1).getReg();
1247 bool BaseKill = MI->getOperand(1).isKill();
1248 unsigned Bytes = getLSMultipleTransferSize(MI);
1249 unsigned Opcode = MI->getOpcode();
1250 DebugLoc dl = MI->getDebugLoc();
1251 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1252 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1253 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1254 if (isi32Load(Opcode) || isi32Store(Opcode))
1255 if (MI->getOperand(2).getImm() != 0)
1257 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1260 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
1261 // Can't do the merge if the destination register is the same as the would-be
1262 // writeback register.
1263 if (MI->getOperand(0).getReg() == Base)
1266 unsigned PredReg = 0;
1267 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1268 bool DoMerge = false;
1269 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1270 unsigned NewOpc = 0;
1271 // AM2 - 12 bits, thumb2 - 8 bits.
1272 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
1274 // Try merging with the previous instruction.
1275 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1276 if (MBBI != BeginMBBI) {
1277 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1278 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1280 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1282 AddSub = ARM_AM::sub;
1283 } else if (!isAM5 &&
1284 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1288 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
1289 MBB.erase(PrevMBBI);
1293 // Try merging with the next instruction.
1294 MachineBasicBlock::iterator EndMBBI = MBB.end();
1295 if (!DoMerge && MBBI != EndMBBI) {
1296 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1297 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1300 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1302 AddSub = ARM_AM::sub;
1303 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1307 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
1308 if (NextMBBI == I) {
1312 MBB.erase(NextMBBI);
1320 // VLDM[SD]_UPD, VSTM[SD]_UPD
1321 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1322 // updating load/store-multiple instructions can be used with only one
1324 MachineOperand &MO = MI->getOperand(0);
1325 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1326 .addReg(Base, getDefRegState(true)) // WB base register
1327 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1328 .addImm(Pred).addReg(PredReg)
1329 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1330 getKillRegState(MO.isKill())));
1333 // LDR_PRE, LDR_POST
1334 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1335 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1336 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1337 .addReg(Base, RegState::Define)
1338 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1340 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1341 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1342 .addReg(Base, RegState::Define)
1343 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1346 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1347 // t2LDR_PRE, t2LDR_POST
1348 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1349 .addReg(Base, RegState::Define)
1350 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1353 MachineOperand &MO = MI->getOperand(0);
1354 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1355 // the vestigal zero-reg offset register. When that's fixed, this clause
1356 // can be removed entirely.
1357 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1358 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1359 // STR_PRE, STR_POST
1360 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1361 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1362 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1364 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1365 // t2STR_PRE, t2STR_POST
1366 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1367 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1368 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1376 /// isMemoryOp - Returns true if instruction is a memory operation that this
1377 /// pass is capable of operating on.
1378 static bool isMemoryOp(const MachineInstr *MI) {
1379 // When no memory operands are present, conservatively assume unaligned,
1380 // volatile, unfoldable.
1381 if (!MI->hasOneMemOperand())
1384 const MachineMemOperand *MMO = *MI->memoperands_begin();
1386 // Don't touch volatile memory accesses - we may be changing their order.
1387 if (MMO->isVolatile())
1390 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1392 if (MMO->getAlignment() < 4)
1395 // str <undef> could probably be eliminated entirely, but for now we just want
1396 // to avoid making a mess of it.
1397 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1398 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1399 MI->getOperand(0).isUndef())
1402 // Likewise don't mess with references to undefined addresses.
1403 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1404 MI->getOperand(1).isUndef())
1407 unsigned Opcode = MI->getOpcode();
1412 return MI->getOperand(1).isReg();
1415 return MI->getOperand(1).isReg();
1426 return MI->getOperand(1).isReg();
1431 /// AdvanceRS - Advance register scavenger to just before the earliest memory
1432 /// op that is being merged.
1433 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1434 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1435 unsigned Position = MemOps[0].Position;
1436 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1437 if (MemOps[i].Position < Position) {
1438 Position = MemOps[i].Position;
1439 Loc = MemOps[i].MBBI;
1443 if (Loc != MBB.begin())
1444 RS->forward(std::prev(Loc));
1447 static void InsertLDR_STR(MachineBasicBlock &MBB,
1448 MachineBasicBlock::iterator &MBBI,
1449 int Offset, bool isDef,
1450 DebugLoc dl, unsigned NewOpc,
1451 unsigned Reg, bool RegDeadKill, bool RegUndef,
1452 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1453 bool OffKill, bool OffUndef,
1454 ARMCC::CondCodes Pred, unsigned PredReg,
1455 const TargetInstrInfo *TII, bool isT2) {
1457 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1459 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1460 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1461 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1463 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1465 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1466 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1467 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1471 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1472 MachineBasicBlock::iterator &MBBI) {
1473 MachineInstr *MI = &*MBBI;
1474 unsigned Opcode = MI->getOpcode();
1475 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1476 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1477 const MachineOperand &BaseOp = MI->getOperand(2);
1478 unsigned BaseReg = BaseOp.getReg();
1479 unsigned EvenReg = MI->getOperand(0).getReg();
1480 unsigned OddReg = MI->getOperand(1).getReg();
1481 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1482 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1483 // ARM errata 602117: LDRD with base in list may result in incorrect base
1484 // register when interrupted or faulted.
1485 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1486 if (!Errata602117 &&
1487 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1490 MachineBasicBlock::iterator NewBBI = MBBI;
1491 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1492 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1493 bool EvenDeadKill = isLd ?
1494 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1495 bool EvenUndef = MI->getOperand(0).isUndef();
1496 bool OddDeadKill = isLd ?
1497 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1498 bool OddUndef = MI->getOperand(1).isUndef();
1499 bool BaseKill = BaseOp.isKill();
1500 bool BaseUndef = BaseOp.isUndef();
1501 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1502 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1503 int OffImm = getMemoryOpOffset(MI);
1504 unsigned PredReg = 0;
1505 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1507 if (OddRegNum > EvenRegNum && OffImm == 0) {
1508 // Ascending register numbers and no offset. It's safe to change it to a
1510 unsigned NewOpc = (isLd)
1511 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1512 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1514 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1515 .addReg(BaseReg, getKillRegState(BaseKill))
1516 .addImm(Pred).addReg(PredReg)
1517 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1518 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1521 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1522 .addReg(BaseReg, getKillRegState(BaseKill))
1523 .addImm(Pred).addReg(PredReg)
1525 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1527 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1530 NewBBI = std::prev(MBBI);
1532 // Split into two instructions.
1533 unsigned NewOpc = (isLd)
1534 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1535 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1536 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1537 // so adjust and use t2LDRi12 here for that.
1538 unsigned NewOpc2 = (isLd)
1539 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1540 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1541 DebugLoc dl = MBBI->getDebugLoc();
1542 // If this is a load and base register is killed, it may have been
1543 // re-defed by the load, make sure the first load does not clobber it.
1545 (BaseKill || OffKill) &&
1546 (TRI->regsOverlap(EvenReg, BaseReg))) {
1547 assert(!TRI->regsOverlap(OddReg, BaseReg));
1548 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1549 OddReg, OddDeadKill, false,
1550 BaseReg, false, BaseUndef, false, OffUndef,
1551 Pred, PredReg, TII, isT2);
1552 NewBBI = std::prev(MBBI);
1553 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1554 EvenReg, EvenDeadKill, false,
1555 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1556 Pred, PredReg, TII, isT2);
1558 if (OddReg == EvenReg && EvenDeadKill) {
1559 // If the two source operands are the same, the kill marker is
1560 // probably on the first one. e.g.
1561 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1562 EvenDeadKill = false;
1565 // Never kill the base register in the first instruction.
1566 if (EvenReg == BaseReg)
1567 EvenDeadKill = false;
1568 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1569 EvenReg, EvenDeadKill, EvenUndef,
1570 BaseReg, false, BaseUndef, false, OffUndef,
1571 Pred, PredReg, TII, isT2);
1572 NewBBI = std::prev(MBBI);
1573 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1574 OddReg, OddDeadKill, OddUndef,
1575 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1576 Pred, PredReg, TII, isT2);
1591 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1592 /// ops of the same base and incrementing offset into LDM / STM ops.
1593 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1594 unsigned NumMerges = 0;
1595 unsigned NumMemOps = 0;
1597 unsigned CurrBase = 0;
1598 unsigned CurrOpc = ~0u;
1599 unsigned CurrSize = 0;
1600 ARMCC::CondCodes CurrPred = ARMCC::AL;
1601 unsigned CurrPredReg = 0;
1602 unsigned Position = 0;
1603 SmallVector<MachineBasicBlock::iterator,4> Merges;
1605 RS->enterBasicBlock(&MBB);
1606 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1608 if (FixInvalidRegPairOp(MBB, MBBI))
1611 bool Advance = false;
1612 bool TryMerge = false;
1614 bool isMemOp = isMemoryOp(MBBI);
1616 unsigned Opcode = MBBI->getOpcode();
1617 unsigned Size = getLSMultipleTransferSize(MBBI);
1618 const MachineOperand &MO = MBBI->getOperand(0);
1619 unsigned Reg = MO.getReg();
1620 bool isKill = MO.isDef() ? false : MO.isKill();
1621 unsigned Base = MBBI->getOperand(1).getReg();
1622 unsigned PredReg = 0;
1623 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1624 int Offset = getMemoryOpOffset(MBBI);
1627 // r5 := ldr [r5, #4]
1628 // r6 := ldr [r5, #8]
1630 // The second ldr has effectively broken the chain even though it
1631 // looks like the later ldr(s) use the same base register. Try to
1632 // merge the ldr's so far, including this one. But don't try to
1633 // combine the following ldr(s).
1634 bool Clobber = isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg();
1637 // r4 := ldr [r0, #8]
1638 // r4 := ldr [r0, #4]
1640 // The optimization may reorder the second ldr in front of the first
1641 // ldr, which violates write after write(WAW) dependence. The same as
1642 // str. Try to merge inst(s) already in MemOps.
1643 bool Overlap = false;
1644 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
1645 if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
1651 if (CurrBase == 0 && !Clobber) {
1652 // Start of a new chain.
1657 CurrPredReg = PredReg;
1658 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1661 } else if (!Overlap) {
1667 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1668 // No need to match PredReg.
1669 // Continue adding to the queue.
1670 if (Offset > MemOps.back().Offset) {
1671 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1676 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1678 if (Offset < I->Offset) {
1679 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1684 } else if (Offset == I->Offset) {
1685 // Collision! This can't be merged!
1694 if (MBBI->isDebugValue()) {
1697 // Reach the end of the block, try merging the memory instructions.
1699 } else if (Advance) {
1703 // Reach the end of the block, try merging the memory instructions.
1710 if (NumMemOps > 1) {
1711 // Try to find a free register to use as a new base in case it's needed.
1712 // First advance to the instruction just before the start of the chain.
1713 AdvanceRS(MBB, MemOps);
1715 // Find a scratch register.
1717 RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass);
1719 // Process the load / store instructions.
1720 RS->forward(std::prev(MBBI));
1724 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1725 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1727 // Try folding preceding/trailing base inc/dec into the generated
1729 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1730 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1732 NumMerges += Merges.size();
1734 // Try folding preceding/trailing base inc/dec into those load/store
1735 // that were not merged to form LDM/STM ops.
1736 for (unsigned i = 0; i != NumMemOps; ++i)
1737 if (!MemOps[i].Merged)
1738 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1741 // RS may be pointing to an instruction that's deleted.
1742 RS->skipTo(std::prev(MBBI));
1743 } else if (NumMemOps == 1) {
1744 // Try folding preceding/trailing base inc/dec into the single
1746 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1748 RS->forward(std::prev(MBBI));
1755 CurrPred = ARMCC::AL;
1762 // If iterator hasn't been advanced and this is not a memory op, skip it.
1763 // It can't start a new chain anyway.
1764 if (!Advance && !isMemOp && MBBI != E) {
1770 return NumMerges > 0;
1773 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1774 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1775 /// directly restore the value of LR into pc.
1776 /// ldmfd sp!, {..., lr}
1779 /// ldmfd sp!, {..., lr}
1782 /// ldmfd sp!, {..., pc}
1783 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1784 // Thumb1 LDM doesn't allow high registers.
1785 if (isThumb1) return false;
1786 if (MBB.empty()) return false;
1788 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1789 if (MBBI != MBB.begin() &&
1790 (MBBI->getOpcode() == ARM::BX_RET ||
1791 MBBI->getOpcode() == ARM::tBX_RET ||
1792 MBBI->getOpcode() == ARM::MOVPCLR)) {
1793 MachineInstr *PrevMI = std::prev(MBBI);
1794 unsigned Opcode = PrevMI->getOpcode();
1795 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1796 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1797 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1798 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1799 if (MO.getReg() != ARM::LR)
1801 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1802 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1803 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1804 PrevMI->setDesc(TII->get(NewOpc));
1806 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1814 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1815 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1816 TL = STI->getTargetLowering();
1817 AFI = Fn.getInfo<ARMFunctionInfo>();
1818 TII = STI->getInstrInfo();
1819 TRI = STI->getRegisterInfo();
1820 RS = new RegScavenger();
1821 isThumb2 = AFI->isThumb2Function();
1822 isThumb1 = AFI->isThumbFunction() && !isThumb2;
1824 bool Modified = false;
1825 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1827 MachineBasicBlock &MBB = *MFI;
1828 Modified |= LoadStoreMultipleOpti(MBB);
1829 if (STI->hasV5TOps())
1830 Modified |= MergeReturnIntoLDM(MBB);
1838 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1839 /// load / stores from consecutive locations close to make it more
1840 /// likely they will be combined later.
1843 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1845 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1847 const DataLayout *TD;
1848 const TargetInstrInfo *TII;
1849 const TargetRegisterInfo *TRI;
1850 const ARMSubtarget *STI;
1851 MachineRegisterInfo *MRI;
1852 MachineFunction *MF;
1854 bool runOnMachineFunction(MachineFunction &Fn) override;
1856 const char *getPassName() const override {
1857 return "ARM pre- register allocation load / store optimization pass";
1861 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1862 unsigned &NewOpc, unsigned &EvenReg,
1863 unsigned &OddReg, unsigned &BaseReg,
1865 unsigned &PredReg, ARMCC::CondCodes &Pred,
1867 bool RescheduleOps(MachineBasicBlock *MBB,
1868 SmallVectorImpl<MachineInstr *> &Ops,
1869 unsigned Base, bool isLd,
1870 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1871 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1873 char ARMPreAllocLoadStoreOpt::ID = 0;
1876 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1877 TD = Fn.getTarget().getDataLayout();
1878 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1879 TII = STI->getInstrInfo();
1880 TRI = STI->getRegisterInfo();
1881 MRI = &Fn.getRegInfo();
1884 bool Modified = false;
1885 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1887 Modified |= RescheduleLoadStoreInstrs(MFI);
1892 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1893 MachineBasicBlock::iterator I,
1894 MachineBasicBlock::iterator E,
1895 SmallPtrSetImpl<MachineInstr*> &MemOps,
1896 SmallSet<unsigned, 4> &MemRegs,
1897 const TargetRegisterInfo *TRI) {
1898 // Are there stores / loads / calls between them?
1899 // FIXME: This is overly conservative. We should make use of alias information
1901 SmallSet<unsigned, 4> AddedRegPressure;
1903 if (I->isDebugValue() || MemOps.count(&*I))
1905 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1907 if (isLd && I->mayStore())
1912 // It's not safe to move the first 'str' down.
1915 // str r4, [r0, #+4]
1919 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1920 MachineOperand &MO = I->getOperand(j);
1923 unsigned Reg = MO.getReg();
1924 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1926 if (Reg != Base && !MemRegs.count(Reg))
1927 AddedRegPressure.insert(Reg);
1931 // Estimate register pressure increase due to the transformation.
1932 if (MemRegs.size() <= 4)
1933 // Ok if we are moving small number of instructions.
1935 return AddedRegPressure.size() <= MemRegs.size() * 2;
1939 /// Copy Op0 and Op1 operands into a new array assigned to MI.
1940 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1941 MachineInstr *Op1) {
1942 assert(MI->memoperands_empty() && "expected a new machineinstr");
1943 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1944 + (Op1->memoperands_end() - Op1->memoperands_begin());
1946 MachineFunction *MF = MI->getParent()->getParent();
1947 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1948 MachineSDNode::mmo_iterator MemEnd =
1949 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1951 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1952 MI->setMemRefs(MemBegin, MemEnd);
1956 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1958 unsigned &NewOpc, unsigned &EvenReg,
1959 unsigned &OddReg, unsigned &BaseReg,
1960 int &Offset, unsigned &PredReg,
1961 ARMCC::CondCodes &Pred,
1963 // Make sure we're allowed to generate LDRD/STRD.
1964 if (!STI->hasV5TEOps())
1967 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1969 unsigned Opcode = Op0->getOpcode();
1970 if (Opcode == ARM::LDRi12) {
1972 } else if (Opcode == ARM::STRi12) {
1974 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1975 NewOpc = ARM::t2LDRDi8;
1978 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1979 NewOpc = ARM::t2STRDi8;
1986 // Make sure the base address satisfies i64 ld / st alignment requirement.
1987 // At the moment, we ignore the memoryoperand's value.
1988 // If we want to use AliasAnalysis, we should check it accordingly.
1989 if (!Op0->hasOneMemOperand() ||
1990 (*Op0->memoperands_begin())->isVolatile())
1993 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1994 const Function *Func = MF->getFunction();
1995 unsigned ReqAlign = STI->hasV6Ops()
1996 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1997 : 8; // Pre-v6 need 8-byte align
1998 if (Align < ReqAlign)
2001 // Then make sure the immediate offset fits.
2002 int OffImm = getMemoryOpOffset(Op0);
2004 int Limit = (1 << 8) * Scale;
2005 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
2009 ARM_AM::AddrOpc AddSub = ARM_AM::add;
2011 AddSub = ARM_AM::sub;
2014 int Limit = (1 << 8) * Scale;
2015 if (OffImm >= Limit || (OffImm & (Scale-1)))
2017 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
2019 EvenReg = Op0->getOperand(0).getReg();
2020 OddReg = Op1->getOperand(0).getReg();
2021 if (EvenReg == OddReg)
2023 BaseReg = Op0->getOperand(1).getReg();
2024 Pred = getInstrPredicate(Op0, PredReg);
2025 dl = Op0->getDebugLoc();
2029 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
2030 SmallVectorImpl<MachineInstr *> &Ops,
2031 unsigned Base, bool isLd,
2032 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
2033 bool RetVal = false;
2035 // Sort by offset (in reverse order).
2036 std::sort(Ops.begin(), Ops.end(),
2037 [](const MachineInstr *LHS, const MachineInstr *RHS) {
2038 int LOffset = getMemoryOpOffset(LHS);
2039 int ROffset = getMemoryOpOffset(RHS);
2040 assert(LHS == RHS || LOffset != ROffset);
2041 return LOffset > ROffset;
2044 // The loads / stores of the same base are in order. Scan them from first to
2045 // last and check for the following:
2046 // 1. Any def of base.
2048 while (Ops.size() > 1) {
2049 unsigned FirstLoc = ~0U;
2050 unsigned LastLoc = 0;
2051 MachineInstr *FirstOp = nullptr;
2052 MachineInstr *LastOp = nullptr;
2054 unsigned LastOpcode = 0;
2055 unsigned LastBytes = 0;
2056 unsigned NumMove = 0;
2057 for (int i = Ops.size() - 1; i >= 0; --i) {
2058 MachineInstr *Op = Ops[i];
2059 unsigned Loc = MI2LocMap[Op];
2060 if (Loc <= FirstLoc) {
2064 if (Loc >= LastLoc) {
2070 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
2071 if (LastOpcode && LSMOpcode != LastOpcode)
2074 int Offset = getMemoryOpOffset(Op);
2075 unsigned Bytes = getLSMultipleTransferSize(Op);
2077 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2080 LastOffset = Offset;
2082 LastOpcode = LSMOpcode;
2083 if (++NumMove == 8) // FIXME: Tune this limit.
2090 SmallPtrSet<MachineInstr*, 4> MemOps;
2091 SmallSet<unsigned, 4> MemRegs;
2092 for (int i = NumMove-1; i >= 0; --i) {
2093 MemOps.insert(Ops[i]);
2094 MemRegs.insert(Ops[i]->getOperand(0).getReg());
2097 // Be conservative, if the instructions are too far apart, don't
2098 // move them. We want to limit the increase of register pressure.
2099 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2101 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2102 MemOps, MemRegs, TRI);
2104 for (unsigned i = 0; i != NumMove; ++i)
2107 // This is the new location for the loads / stores.
2108 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2109 while (InsertPos != MBB->end()
2110 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
2113 // If we are moving a pair of loads / stores, see if it makes sense
2114 // to try to allocate a pair of registers that can form register pairs.
2115 MachineInstr *Op0 = Ops.back();
2116 MachineInstr *Op1 = Ops[Ops.size()-2];
2117 unsigned EvenReg = 0, OddReg = 0;
2118 unsigned BaseReg = 0, PredReg = 0;
2119 ARMCC::CondCodes Pred = ARMCC::AL;
2121 unsigned NewOpc = 0;
2124 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2125 EvenReg, OddReg, BaseReg,
2126 Offset, PredReg, Pred, isT2)) {
2130 const MCInstrDesc &MCID = TII->get(NewOpc);
2131 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
2132 MRI->constrainRegClass(EvenReg, TRC);
2133 MRI->constrainRegClass(OddReg, TRC);
2135 // Form the pair instruction.
2137 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2138 .addReg(EvenReg, RegState::Define)
2139 .addReg(OddReg, RegState::Define)
2141 // FIXME: We're converting from LDRi12 to an insn that still
2142 // uses addrmode2, so we need an explicit offset reg. It should
2143 // always by reg0 since we're transforming LDRi12s.
2146 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2147 concatenateMemOperands(MIB, Op0, Op1);
2148 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2151 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2155 // FIXME: We're converting from LDRi12 to an insn that still
2156 // uses addrmode2, so we need an explicit offset reg. It should
2157 // always by reg0 since we're transforming STRi12s.
2160 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2161 concatenateMemOperands(MIB, Op0, Op1);
2162 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2168 // Add register allocation hints to form register pairs.
2169 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
2170 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
2172 for (unsigned i = 0; i != NumMove; ++i) {
2173 MachineInstr *Op = Ops.back();
2175 MBB->splice(InsertPos, MBB, Op);
2179 NumLdStMoved += NumMove;
2189 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2190 bool RetVal = false;
2192 DenseMap<MachineInstr*, unsigned> MI2LocMap;
2193 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
2194 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
2195 SmallVector<unsigned, 4> LdBases;
2196 SmallVector<unsigned, 4> StBases;
2199 MachineBasicBlock::iterator MBBI = MBB->begin();
2200 MachineBasicBlock::iterator E = MBB->end();
2202 for (; MBBI != E; ++MBBI) {
2203 MachineInstr *MI = MBBI;
2204 if (MI->isCall() || MI->isTerminator()) {
2205 // Stop at barriers.
2210 if (!MI->isDebugValue())
2211 MI2LocMap[MI] = ++Loc;
2213 if (!isMemoryOp(MI))
2215 unsigned PredReg = 0;
2216 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2219 int Opc = MI->getOpcode();
2220 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
2221 unsigned Base = MI->getOperand(1).getReg();
2222 int Offset = getMemoryOpOffset(MI);
2224 bool StopHere = false;
2226 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2227 Base2LdsMap.find(Base);
2228 if (BI != Base2LdsMap.end()) {
2229 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2230 if (Offset == getMemoryOpOffset(BI->second[i])) {
2236 BI->second.push_back(MI);
2238 Base2LdsMap[Base].push_back(MI);
2239 LdBases.push_back(Base);
2242 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2243 Base2StsMap.find(Base);
2244 if (BI != Base2StsMap.end()) {
2245 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2246 if (Offset == getMemoryOpOffset(BI->second[i])) {
2252 BI->second.push_back(MI);
2254 Base2StsMap[Base].push_back(MI);
2255 StBases.push_back(Base);
2260 // Found a duplicate (a base+offset combination that's seen earlier).
2267 // Re-schedule loads.
2268 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2269 unsigned Base = LdBases[i];
2270 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2272 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2275 // Re-schedule stores.
2276 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2277 unsigned Base = StBases[i];
2278 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2280 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2284 Base2LdsMap.clear();
2285 Base2StsMap.clear();
2295 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
2296 /// optimization pass.
2297 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2299 return new ARMPreAllocLoadStoreOpt();
2300 return new ARMLoadStoreOpt();