1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 /// \file This file contains a pass that performs load / store related peephole
11 /// optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMISelLowering.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMSubtarget.h"
21 #include "MCTargetDesc/ARMAddressingModes.h"
22 #include "ThumbRegisterInfo.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/CodeGen/MachineBasicBlock.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include "llvm/CodeGen/MachineInstr.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/RegisterScavenging.h"
35 #include "llvm/CodeGen/SelectionDAGNodes.h"
36 #include "llvm/IR/DataLayout.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/Support/Debug.h"
40 #include "llvm/Support/ErrorHandling.h"
41 #include "llvm/Support/raw_ostream.h"
42 #include "llvm/Target/TargetInstrInfo.h"
43 #include "llvm/Target/TargetMachine.h"
44 #include "llvm/Target/TargetRegisterInfo.h"
47 #define DEBUG_TYPE "arm-ldst-opt"
49 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
50 STATISTIC(NumSTMGened , "Number of stm instructions generated");
51 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
52 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
53 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
54 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
55 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
56 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
57 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
58 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
59 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
62 /// Post- register allocation pass the combine load / store instructions to
63 /// form ldm / stm instructions.
64 struct ARMLoadStoreOpt : public MachineFunctionPass {
66 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
68 const TargetInstrInfo *TII;
69 const TargetRegisterInfo *TRI;
70 const ARMSubtarget *STI;
71 const TargetLowering *TL;
74 bool isThumb1, isThumb2;
76 bool runOnMachineFunction(MachineFunction &Fn) override;
78 const char *getPassName() const override {
79 return "ARM load / store optimization pass";
83 struct MemOpQueueEntry {
88 MachineBasicBlock::iterator MBBI;
90 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
91 MachineBasicBlock::iterator i)
92 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
94 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
95 typedef MemOpQueue::iterator MemOpQueueIter;
97 void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
98 const MemOpQueue &MemOps, unsigned DefReg,
99 unsigned RangeBegin, unsigned RangeEnd);
100 void UpdateBaseRegUses(MachineBasicBlock &MBB,
101 MachineBasicBlock::iterator MBBI,
102 DebugLoc dl, unsigned Base, unsigned WordOffset,
103 ARMCC::CondCodes Pred, unsigned PredReg);
104 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
105 int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
106 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
108 ArrayRef<std::pair<unsigned, bool> > Regs,
109 ArrayRef<unsigned> ImpDefs);
110 void MergeOpsUpdate(MachineBasicBlock &MBB,
112 unsigned memOpsBegin,
114 unsigned insertAfter,
119 ARMCC::CondCodes Pred,
123 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
124 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
125 unsigned Opcode, unsigned Size,
126 ARMCC::CondCodes Pred, unsigned PredReg,
127 unsigned Scratch, MemOpQueue &MemOps,
128 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
129 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
130 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
131 MachineBasicBlock::iterator &MBBI);
132 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
133 MachineBasicBlock::iterator MBBI,
134 const TargetInstrInfo *TII,
136 MachineBasicBlock::iterator &I);
137 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
138 MachineBasicBlock::iterator MBBI,
140 MachineBasicBlock::iterator &I);
141 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
142 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
144 char ARMLoadStoreOpt::ID = 0;
147 static bool definesCPSR(const MachineInstr *MI) {
148 for (const auto &MO : MI->operands()) {
151 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
152 // If the instruction has live CPSR def, then it's not safe to fold it
153 // into load / store.
160 static int getMemoryOpOffset(const MachineInstr *MI) {
161 unsigned Opcode = MI->getOpcode();
162 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
163 unsigned NumOperands = MI->getDesc().getNumOperands();
164 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
166 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
167 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
168 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
169 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
172 // Thumb1 immediate offsets are scaled by 4
173 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
174 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
177 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
178 : ARM_AM::getAM5Offset(OffField) * 4;
179 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
180 : ARM_AM::getAM5Op(OffField);
182 if (Op == ARM_AM::sub)
188 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
190 default: llvm_unreachable("Unhandled opcode!");
194 default: llvm_unreachable("Unhandled submode!");
195 case ARM_AM::ia: return ARM::LDMIA;
196 case ARM_AM::da: return ARM::LDMDA;
197 case ARM_AM::db: return ARM::LDMDB;
198 case ARM_AM::ib: return ARM::LDMIB;
203 default: llvm_unreachable("Unhandled submode!");
204 case ARM_AM::ia: return ARM::STMIA;
205 case ARM_AM::da: return ARM::STMDA;
206 case ARM_AM::db: return ARM::STMDB;
207 case ARM_AM::ib: return ARM::STMIB;
211 // tLDMIA is writeback-only - unless the base register is in the input
215 default: llvm_unreachable("Unhandled submode!");
216 case ARM_AM::ia: return ARM::tLDMIA;
220 // There is no non-writeback tSTMIA either.
223 default: llvm_unreachable("Unhandled submode!");
224 case ARM_AM::ia: return ARM::tSTMIA_UPD;
230 default: llvm_unreachable("Unhandled submode!");
231 case ARM_AM::ia: return ARM::t2LDMIA;
232 case ARM_AM::db: return ARM::t2LDMDB;
238 default: llvm_unreachable("Unhandled submode!");
239 case ARM_AM::ia: return ARM::t2STMIA;
240 case ARM_AM::db: return ARM::t2STMDB;
245 default: llvm_unreachable("Unhandled submode!");
246 case ARM_AM::ia: return ARM::VLDMSIA;
247 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
252 default: llvm_unreachable("Unhandled submode!");
253 case ARM_AM::ia: return ARM::VSTMSIA;
254 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
259 default: llvm_unreachable("Unhandled submode!");
260 case ARM_AM::ia: return ARM::VLDMDIA;
261 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
266 default: llvm_unreachable("Unhandled submode!");
267 case ARM_AM::ia: return ARM::VSTMDIA;
268 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
273 static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
275 default: llvm_unreachable("Unhandled opcode!");
282 case ARM::tLDMIA_UPD:
283 case ARM::tSTMIA_UPD:
284 case ARM::t2LDMIA_RET:
286 case ARM::t2LDMIA_UPD:
288 case ARM::t2STMIA_UPD:
290 case ARM::VLDMSIA_UPD:
292 case ARM::VSTMSIA_UPD:
294 case ARM::VLDMDIA_UPD:
296 case ARM::VSTMDIA_UPD:
310 case ARM::t2LDMDB_UPD:
312 case ARM::t2STMDB_UPD:
313 case ARM::VLDMSDB_UPD:
314 case ARM::VSTMSDB_UPD:
315 case ARM::VLDMDDB_UPD:
316 case ARM::VSTMDDB_UPD:
327 static bool isT1i32Load(unsigned Opc) {
328 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
331 static bool isT2i32Load(unsigned Opc) {
332 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
335 static bool isi32Load(unsigned Opc) {
336 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
339 static bool isT1i32Store(unsigned Opc) {
340 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
343 static bool isT2i32Store(unsigned Opc) {
344 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
347 static bool isi32Store(unsigned Opc) {
348 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
351 static unsigned getImmScale(unsigned Opc) {
353 default: llvm_unreachable("Unhandled opcode!");
368 /// Update future uses of the base register with the offset introduced
369 /// due to writeback. This function only works on Thumb1.
371 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
372 MachineBasicBlock::iterator MBBI,
373 DebugLoc dl, unsigned Base,
375 ARMCC::CondCodes Pred, unsigned PredReg) {
376 assert(isThumb1 && "Can only update base register uses for Thumb1!");
377 // Start updating any instructions with immediate offsets. Insert a SUB before
378 // the first non-updateable instruction (if any).
379 for (; MBBI != MBB.end(); ++MBBI) {
380 bool InsertSub = false;
381 unsigned Opc = MBBI->getOpcode();
383 if (MBBI->readsRegister(Base)) {
386 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
388 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
390 if (IsLoad || IsStore) {
391 // Loads and stores with immediate offsets can be updated, but only if
392 // the new offset isn't negative.
393 // The MachineOperand containing the offset immediate is the last one
394 // before predicates.
396 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
397 // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
398 Offset = MO.getImm() - WordOffset * getImmScale(Opc);
400 // If storing the base register, it needs to be reset first.
401 unsigned InstrSrcReg = MBBI->getOperand(0).getReg();
403 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
408 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
409 !definesCPSR(MBBI)) {
410 // SUBS/ADDS using this register, with a dead def of the CPSR.
411 // Merge it with the update; if the merged offset is too large,
412 // insert a new sub instead.
414 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
415 Offset = (Opc == ARM::tSUBi8) ?
416 MO.getImm() + WordOffset * 4 :
417 MO.getImm() - WordOffset * 4 ;
418 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
419 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
422 // The base register has now been reset, so exit early.
429 // Can't update the instruction.
433 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
434 // Since SUBS sets the condition flags, we can't place the base reset
435 // after an instruction that has a live CPSR def.
436 // The base register might also contain an argument for a function call.
441 // An instruction above couldn't be updated, so insert a sub.
442 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
443 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
447 if (MBBI->killsRegister(Base))
448 // Register got killed. Stop updating.
452 // End of block was reached.
453 if (MBB.succ_size() > 0) {
454 // FIXME: Because of a bug, live registers are sometimes missing from
455 // the successor blocks' live-in sets. This means we can't trust that
456 // information and *always* have to reset at the end of a block.
458 if (MBBI != MBB.end()) --MBBI;
460 BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
461 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
465 /// Create and insert a LDM or STM with Base as base register and registers in
466 /// Regs as the register operands that would be loaded / stored. It returns
467 /// true if the transformation is done.
469 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
470 MachineBasicBlock::iterator MBBI,
471 int Offset, unsigned Base, bool BaseKill,
472 unsigned Opcode, ARMCC::CondCodes Pred,
473 unsigned PredReg, unsigned Scratch, DebugLoc dl,
474 ArrayRef<std::pair<unsigned, bool> > Regs,
475 ArrayRef<unsigned> ImpDefs) {
476 // Only a single register to load / store. Don't bother.
477 unsigned NumRegs = Regs.size();
481 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
482 // Compute liveness information for that register to make the decision.
483 bool SafeToClobberCPSR = !isThumb1 ||
484 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) ==
485 MachineBasicBlock::LQR_Dead);
487 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
489 // Exception: If the base register is in the input reglist, Thumb1 LDM is
491 // It's also not possible to merge an STR of the base register in Thumb1.
493 for (const std::pair<unsigned, bool> &R : Regs)
494 if (Base == R.first) {
495 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
496 if (Opcode == ARM::tLDRi) {
499 } else if (Opcode == ARM::tSTRi) {
504 ARM_AM::AMSubMode Mode = ARM_AM::ia;
505 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
506 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
507 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
509 if (Offset == 4 && haveIBAndDA) {
511 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
513 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
514 // VLDM/VSTM do not support DB mode without also updating the base reg.
516 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
517 // Check if this is a supported opcode before inserting instructions to
518 // calculate a new base register.
519 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
521 // If starting offset isn't zero, insert a MI to materialize a new base.
522 // But only do so if it is cost effective, i.e. merging more than two
527 // On Thumb1, it's not worth materializing a new base register without
528 // clobbering the CPSR (i.e. not using ADDS/SUBS).
529 if (!SafeToClobberCPSR)
533 if (isi32Load(Opcode)) {
534 // If it is a load, then just use one of the destination register to
535 // use as the new base.
536 NewBase = Regs[NumRegs-1].first;
538 // Use the scratch register to use as a new base.
545 isThumb2 ? ARM::t2ADDri :
546 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
547 (isThumb1 && Offset < 8) ? ARM::tADDi3 :
548 isThumb1 ? ARM::tADDi8 : ARM::ADDri;
553 isThumb2 ? ARM::t2SUBri :
554 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
555 isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
558 if (!TL->isLegalAddImmediate(Offset))
559 // FIXME: Try add with register operand?
560 return false; // Probably not worth it then.
563 // Thumb1: depending on immediate size, use either
564 // ADDS NewBase, Base, #imm3
567 // ADDS NewBase, #imm8.
568 if (Base != NewBase &&
569 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
570 // Need to insert a MOV to the new base first.
571 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
573 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
574 if (Pred != ARMCC::AL)
576 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVSr), NewBase)
577 .addReg(Base, getKillRegState(BaseKill));
579 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
580 .addReg(Base, getKillRegState(BaseKill))
581 .addImm(Pred).addReg(PredReg);
583 // Set up BaseKill and Base correctly to insert the ADDS/SUBS below.
587 if (BaseOpc == ARM::tADDrSPi) {
588 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
589 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
590 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset/4)
591 .addImm(Pred).addReg(PredReg);
593 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true)
594 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
595 .addImm(Pred).addReg(PredReg);
597 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
598 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
599 .addImm(Pred).addReg(PredReg).addReg(0);
602 BaseKill = true; // New base is always killed straight away.
605 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
606 Opcode == ARM::VLDRD);
608 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
609 // base register writeback.
610 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
611 if (!Opcode) return false;
613 // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
614 // - There is no writeback (LDM of base register),
615 // - the base register is killed by the merged instruction,
616 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
617 // to reset the base register.
618 // Otherwise, don't merge.
619 // It's safe to return here since the code to materialize a new base register
620 // above is also conditional on SafeToClobberCPSR.
621 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
624 MachineInstrBuilder MIB;
627 if (Opcode == ARM::tLDMIA)
628 // Update tLDMIA with writeback if necessary.
629 Opcode = ARM::tLDMIA_UPD;
631 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
633 // Thumb1: we might need to set base writeback when building the MI.
634 MIB.addReg(Base, getDefRegState(true))
635 .addReg(Base, getKillRegState(BaseKill));
637 // The base isn't dead after a merged instruction with writeback.
638 // Insert a sub instruction after the newly formed instruction to reset.
640 UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
643 // No writeback, simply build the MachineInstr.
644 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
645 MIB.addReg(Base, getKillRegState(BaseKill));
648 MIB.addImm(Pred).addReg(PredReg);
650 for (const std::pair<unsigned, bool> &R : Regs)
651 MIB = MIB.addReg(R.first, getDefRegState(isDef)
652 | getKillRegState(R.second));
654 // Add implicit defs for super-registers.
655 for (unsigned ImpDef : ImpDefs)
656 MIB.addReg(ImpDef, RegState::ImplicitDefine);
661 /// Find all instructions using a given imp-def within a range.
663 /// We are trying to combine a range of instructions, one of which (located at
664 /// position RangeBegin) implicitly defines a register. The final LDM/STM will
665 /// be placed at RangeEnd, and so any uses of this definition between RangeStart
666 /// and RangeEnd must be modified to use an undefined value.
668 /// The live range continues until we find a second definition or one of the
669 /// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
670 /// we must consider all uses and decide which are relevant in a second pass.
671 void ARMLoadStoreOpt::findUsesOfImpDef(
672 SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,
673 unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
674 std::map<unsigned, MachineOperand *> Uses;
675 unsigned LastLivePos = RangeEnd;
677 // First we find all uses of this register with Position between RangeBegin
678 // and RangeEnd, any or all of these could be uses of a definition at
679 // RangeBegin. We also record the latest position a definition at RangeBegin
680 // would be considered live.
681 for (unsigned i = 0; i < MemOps.size(); ++i) {
682 MachineInstr &MI = *MemOps[i].MBBI;
683 unsigned MIPosition = MemOps[i].Position;
684 if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
687 // If this instruction defines the register, then any later use will be of
688 // that definition rather than ours.
689 if (MI.definesRegister(DefReg))
690 LastLivePos = std::min(LastLivePos, MIPosition);
692 MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
696 // If this instruction kills the register then (assuming liveness is
697 // correct when we start) we don't need to think about anything after here.
699 LastLivePos = std::min(LastLivePos, MIPosition);
701 Uses[MIPosition] = UseOp;
704 // Now we traverse the list of all uses, and append the ones that actually use
705 // our definition to the requested list.
706 for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),
709 // List is sorted by position so once we've found one out of range there
710 // will be no more to consider.
711 if (I->first > LastLivePos)
713 UsesOfImpDefs.push_back(I->second);
717 /// Call MergeOps and update MemOps and merges accordingly on success.
718 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
720 unsigned memOpsBegin, unsigned memOpsEnd,
721 unsigned insertAfter, int Offset,
722 unsigned Base, bool BaseKill,
724 ARMCC::CondCodes Pred, unsigned PredReg,
727 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
728 // First calculate which of the registers should be killed by the merged
730 const unsigned insertPos = memOps[insertAfter].Position;
731 SmallSet<unsigned, 4> KilledRegs;
732 DenseMap<unsigned, unsigned> Killer;
733 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
734 if (i == memOpsBegin) {
739 if (memOps[i].Position < insertPos && memOps[i].isKill) {
740 unsigned Reg = memOps[i].Reg;
741 KilledRegs.insert(Reg);
746 SmallVector<std::pair<unsigned, bool>, 8> Regs;
747 SmallVector<unsigned, 8> ImpDefs;
748 SmallVector<MachineOperand *, 8> UsesOfImpDefs;
749 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
750 unsigned Reg = memOps[i].Reg;
751 // If we are inserting the merged operation after an operation that
752 // uses the same register, make sure to transfer any kill flag.
753 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
754 Regs.push_back(std::make_pair(Reg, isKill));
756 // Collect any implicit defs of super-registers. They must be preserved.
757 for (const MachineOperand &MO : memOps[i].MBBI->operands()) {
758 if (!MO.isReg() || !MO.isDef() || !MO.isImplicit() || MO.isDead())
760 unsigned DefReg = MO.getReg();
761 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
762 ImpDefs.push_back(DefReg);
764 // There may be other uses of the definition between this instruction and
765 // the eventual LDM/STM position. These should be marked undef if the
766 // merge takes place.
767 findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
772 // Try to do the merge.
773 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
775 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
776 Pred, PredReg, Scratch, dl, Regs, ImpDefs))
779 // Merge succeeded, update records.
780 Merges.push_back(std::prev(Loc));
782 // In gathering loads together, we may have moved the imp-def of a register
783 // past one of its uses. This is OK, since we know better than the rest of
784 // LLVM what's OK with ARM loads and stores; but we still have to adjust the
786 for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
787 E = UsesOfImpDefs.end();
791 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
792 // Remove kill flags from any memops that come before insertPos.
793 if (Regs[i-memOpsBegin].second) {
794 unsigned Reg = Regs[i-memOpsBegin].first;
795 if (KilledRegs.count(Reg)) {
796 unsigned j = Killer[Reg];
797 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
798 assert(Idx >= 0 && "Cannot find killing operand");
799 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
800 memOps[j].isKill = false;
802 memOps[i].isKill = true;
804 MBB.erase(memOps[i].MBBI);
805 // Update this memop to refer to the merged instruction.
806 // We may need to move kill flags again.
807 memOps[i].Merged = true;
808 memOps[i].MBBI = Merges.back();
809 memOps[i].Position = insertPos;
812 // Update memOps offsets, since they may have been modified by MergeOps.
813 for (auto &MemOp : memOps) {
814 MemOp.Offset = getMemoryOpOffset(MemOp.MBBI);
818 /// Merge a number of load / store instructions into one or more load / store
819 /// multiple instructions.
821 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
822 unsigned Base, unsigned Opcode, unsigned Size,
823 ARMCC::CondCodes Pred, unsigned PredReg,
824 unsigned Scratch, MemOpQueue &MemOps,
825 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
826 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
827 int Offset = MemOps[SIndex].Offset;
828 int SOffset = Offset;
829 unsigned insertAfter = SIndex;
830 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
831 DebugLoc dl = Loc->getDebugLoc();
832 const MachineOperand &PMO = Loc->getOperand(0);
833 unsigned PReg = PMO.getReg();
834 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
836 unsigned Limit = ~0U;
837 bool BaseKill = false;
838 // vldm / vstm limit are 32 for S variants, 16 for D variants.
856 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
857 int NewOffset = MemOps[i].Offset;
858 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
859 unsigned Reg = MO.getReg();
860 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
861 // Register numbers must be in ascending order. For VFP / NEON load and
862 // store multiples, the registers must also be consecutive and within the
863 // limit on the number of registers per instruction.
864 if (Reg != ARM::SP &&
865 NewOffset == Offset + (int)Size &&
866 ((isNotVFP && RegNum > PRegNum) ||
867 ((Count < Limit) && RegNum == PRegNum+1)) &&
868 // On Swift we don't want vldm/vstm to start with a odd register num
869 // because Q register unaligned vldm/vstm need more uops.
870 (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) {
875 // Can't merge this in. Try merge the earlier ones first.
876 // We need to compute BaseKill here because the MemOps may have been
878 BaseKill = Loc->killsRegister(Base);
880 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base,
881 BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
882 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
887 if (MemOps[i].Position > MemOps[insertAfter].Position) {
889 Loc = MemOps[i].MBBI;
893 BaseKill = Loc->killsRegister(Base);
894 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
895 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
898 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
899 unsigned Bytes, unsigned Limit,
900 ARMCC::CondCodes Pred, unsigned PredReg) {
901 unsigned MyPredReg = 0;
905 bool CheckCPSRDef = false;
906 switch (MI->getOpcode()) {
907 default: return false;
917 // Make sure the offset fits in 8 bits.
918 if (Bytes == 0 || (Limit && Bytes >= Limit))
921 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
922 MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
923 if (!(MI->getOperand(0).getReg() == Base &&
924 MI->getOperand(1).getReg() == Base &&
925 (MI->getOperand(2).getImm() * Scale) == Bytes &&
926 getInstrPredicate(MI, MyPredReg) == Pred &&
927 MyPredReg == PredReg))
930 return CheckCPSRDef ? !definesCPSR(MI) : true;
933 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
934 unsigned Bytes, unsigned Limit,
935 ARMCC::CondCodes Pred, unsigned PredReg) {
936 unsigned MyPredReg = 0;
940 bool CheckCPSRDef = false;
941 switch (MI->getOpcode()) {
942 default: return false;
952 if (Bytes == 0 || (Limit && Bytes >= Limit))
953 // Make sure the offset fits in 8 bits.
956 unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
957 MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
958 if (!(MI->getOperand(0).getReg() == Base &&
959 MI->getOperand(1).getReg() == Base &&
960 (MI->getOperand(2).getImm() * Scale) == Bytes &&
961 getInstrPredicate(MI, MyPredReg) == Pred &&
962 MyPredReg == PredReg))
965 return CheckCPSRDef ? !definesCPSR(MI) : true;
968 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
969 switch (MI->getOpcode()) {
996 case ARM::tLDMIA_UPD:
997 case ARM::tSTMIA_UPD:
1004 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
1007 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
1011 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
1012 ARM_AM::AMSubMode Mode) {
1014 default: llvm_unreachable("Unhandled opcode!");
1020 default: llvm_unreachable("Unhandled submode!");
1021 case ARM_AM::ia: return ARM::LDMIA_UPD;
1022 case ARM_AM::ib: return ARM::LDMIB_UPD;
1023 case ARM_AM::da: return ARM::LDMDA_UPD;
1024 case ARM_AM::db: return ARM::LDMDB_UPD;
1031 default: llvm_unreachable("Unhandled submode!");
1032 case ARM_AM::ia: return ARM::STMIA_UPD;
1033 case ARM_AM::ib: return ARM::STMIB_UPD;
1034 case ARM_AM::da: return ARM::STMDA_UPD;
1035 case ARM_AM::db: return ARM::STMDB_UPD;
1040 default: llvm_unreachable("Unhandled submode!");
1041 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1042 case ARM_AM::db: return ARM::t2LDMDB_UPD;
1047 default: llvm_unreachable("Unhandled submode!");
1048 case ARM_AM::ia: return ARM::t2STMIA_UPD;
1049 case ARM_AM::db: return ARM::t2STMDB_UPD;
1053 default: llvm_unreachable("Unhandled submode!");
1054 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1055 case ARM_AM::db: return ARM::VLDMSDB_UPD;
1059 default: llvm_unreachable("Unhandled submode!");
1060 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1061 case ARM_AM::db: return ARM::VLDMDDB_UPD;
1065 default: llvm_unreachable("Unhandled submode!");
1066 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1067 case ARM_AM::db: return ARM::VSTMSDB_UPD;
1071 default: llvm_unreachable("Unhandled submode!");
1072 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1073 case ARM_AM::db: return ARM::VSTMDDB_UPD;
1078 /// Fold proceeding/trailing inc/dec of base register into the
1079 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1081 /// stmia rn, <ra, rb, rc>
1082 /// rn := rn + 4 * 3;
1084 /// stmia rn!, <ra, rb, rc>
1086 /// rn := rn - 4 * 3;
1087 /// ldmia rn, <ra, rb, rc>
1089 /// ldmdb rn!, <ra, rb, rc>
1090 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
1091 MachineBasicBlock::iterator MBBI,
1093 MachineBasicBlock::iterator &I) {
1094 // Thumb1 is already using updating loads/stores.
1095 if (isThumb1) return false;
1097 MachineInstr *MI = MBBI;
1098 unsigned Base = MI->getOperand(0).getReg();
1099 bool BaseKill = MI->getOperand(0).isKill();
1100 unsigned Bytes = getLSMultipleTransferSize(MI);
1101 unsigned PredReg = 0;
1102 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1103 unsigned Opcode = MI->getOpcode();
1104 DebugLoc dl = MI->getDebugLoc();
1106 // Can't use an updating ld/st if the base register is also a dest
1107 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1108 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
1109 if (MI->getOperand(i).getReg() == Base)
1112 bool DoMerge = false;
1113 ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
1115 // Try merging with the previous instruction.
1116 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1117 if (MBBI != BeginMBBI) {
1118 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1119 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1121 if (Mode == ARM_AM::ia &&
1122 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1125 } else if (Mode == ARM_AM::ib &&
1126 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1131 MBB.erase(PrevMBBI);
1134 // Try merging with the next instruction.
1135 MachineBasicBlock::iterator EndMBBI = MBB.end();
1136 if (!DoMerge && MBBI != EndMBBI) {
1137 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1138 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1140 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
1141 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1143 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
1144 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1148 if (NextMBBI == I) {
1152 MBB.erase(NextMBBI);
1159 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1160 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1161 .addReg(Base, getDefRegState(true)) // WB base register
1162 .addReg(Base, getKillRegState(BaseKill))
1163 .addImm(Pred).addReg(PredReg);
1165 // Transfer the rest of operands.
1166 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
1167 MIB.addOperand(MI->getOperand(OpNum));
1169 // Transfer memoperands.
1170 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1176 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1177 ARM_AM::AddrOpc Mode) {
1180 return ARM::LDR_PRE_IMM;
1182 return ARM::STR_PRE_IMM;
1184 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1186 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1188 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1190 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1193 return ARM::t2LDR_PRE;
1196 return ARM::t2STR_PRE;
1197 default: llvm_unreachable("Unhandled opcode!");
1201 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1202 ARM_AM::AddrOpc Mode) {
1205 return ARM::LDR_POST_IMM;
1207 return ARM::STR_POST_IMM;
1209 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1211 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1213 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1215 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1218 return ARM::t2LDR_POST;
1221 return ARM::t2STR_POST;
1222 default: llvm_unreachable("Unhandled opcode!");
1226 /// Fold proceeding/trailing inc/dec of base register into the
1227 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1228 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
1229 MachineBasicBlock::iterator MBBI,
1230 const TargetInstrInfo *TII,
1232 MachineBasicBlock::iterator &I) {
1233 // Thumb1 doesn't have updating LDR/STR.
1234 // FIXME: Use LDM/STM with single register instead.
1235 if (isThumb1) return false;
1237 MachineInstr *MI = MBBI;
1238 unsigned Base = MI->getOperand(1).getReg();
1239 bool BaseKill = MI->getOperand(1).isKill();
1240 unsigned Bytes = getLSMultipleTransferSize(MI);
1241 unsigned Opcode = MI->getOpcode();
1242 DebugLoc dl = MI->getDebugLoc();
1243 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1244 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1245 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1246 if (isi32Load(Opcode) || isi32Store(Opcode))
1247 if (MI->getOperand(2).getImm() != 0)
1249 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1252 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
1253 // Can't do the merge if the destination register is the same as the would-be
1254 // writeback register.
1255 if (MI->getOperand(0).getReg() == Base)
1258 unsigned PredReg = 0;
1259 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1260 bool DoMerge = false;
1261 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1262 unsigned NewOpc = 0;
1263 // AM2 - 12 bits, thumb2 - 8 bits.
1264 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
1266 // Try merging with the previous instruction.
1267 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1268 if (MBBI != BeginMBBI) {
1269 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1270 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1272 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1274 AddSub = ARM_AM::sub;
1275 } else if (!isAM5 &&
1276 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1280 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
1281 MBB.erase(PrevMBBI);
1285 // Try merging with the next instruction.
1286 MachineBasicBlock::iterator EndMBBI = MBB.end();
1287 if (!DoMerge && MBBI != EndMBBI) {
1288 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1289 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1292 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1294 AddSub = ARM_AM::sub;
1295 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1299 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
1300 if (NextMBBI == I) {
1304 MBB.erase(NextMBBI);
1312 // VLDM[SD]_UPD, VSTM[SD]_UPD
1313 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1314 // updating load/store-multiple instructions can be used with only one
1316 MachineOperand &MO = MI->getOperand(0);
1317 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1318 .addReg(Base, getDefRegState(true)) // WB base register
1319 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1320 .addImm(Pred).addReg(PredReg)
1321 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1322 getKillRegState(MO.isKill())));
1325 // LDR_PRE, LDR_POST
1326 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1327 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1328 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1329 .addReg(Base, RegState::Define)
1330 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1332 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1333 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1334 .addReg(Base, RegState::Define)
1335 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1338 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1339 // t2LDR_PRE, t2LDR_POST
1340 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1341 .addReg(Base, RegState::Define)
1342 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1345 MachineOperand &MO = MI->getOperand(0);
1346 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1347 // the vestigal zero-reg offset register. When that's fixed, this clause
1348 // can be removed entirely.
1349 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1350 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1351 // STR_PRE, STR_POST
1352 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1353 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1354 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1356 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1357 // t2STR_PRE, t2STR_POST
1358 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1359 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1360 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1368 /// Returns true if instruction is a memory operation that this pass is capable
1369 /// of operating on.
1370 static bool isMemoryOp(const MachineInstr *MI) {
1371 // When no memory operands are present, conservatively assume unaligned,
1372 // volatile, unfoldable.
1373 if (!MI->hasOneMemOperand())
1376 const MachineMemOperand *MMO = *MI->memoperands_begin();
1378 // Don't touch volatile memory accesses - we may be changing their order.
1379 if (MMO->isVolatile())
1382 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1384 if (MMO->getAlignment() < 4)
1387 // str <undef> could probably be eliminated entirely, but for now we just want
1388 // to avoid making a mess of it.
1389 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1390 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1391 MI->getOperand(0).isUndef())
1394 // Likewise don't mess with references to undefined addresses.
1395 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1396 MI->getOperand(1).isUndef())
1399 unsigned Opcode = MI->getOpcode();
1404 return MI->getOperand(1).isReg();
1407 return MI->getOperand(1).isReg();
1418 return MI->getOperand(1).isReg();
1423 /// Advance register scavenger to just before the earliest memory op that is
1425 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1426 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1427 unsigned Position = MemOps[0].Position;
1428 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1429 if (MemOps[i].Position < Position) {
1430 Position = MemOps[i].Position;
1431 Loc = MemOps[i].MBBI;
1435 if (Loc != MBB.begin())
1436 RS->forward(std::prev(Loc));
1439 static void InsertLDR_STR(MachineBasicBlock &MBB,
1440 MachineBasicBlock::iterator &MBBI,
1441 int Offset, bool isDef,
1442 DebugLoc dl, unsigned NewOpc,
1443 unsigned Reg, bool RegDeadKill, bool RegUndef,
1444 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1445 bool OffKill, bool OffUndef,
1446 ARMCC::CondCodes Pred, unsigned PredReg,
1447 const TargetInstrInfo *TII, bool isT2) {
1449 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1451 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1452 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1453 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1455 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1457 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1458 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1459 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1463 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1464 MachineBasicBlock::iterator &MBBI) {
1465 MachineInstr *MI = &*MBBI;
1466 unsigned Opcode = MI->getOpcode();
1467 if (Opcode == ARM::LDRD || Opcode == ARM::STRD) {
1468 const MachineOperand &BaseOp = MI->getOperand(2);
1469 unsigned BaseReg = BaseOp.getReg();
1470 unsigned EvenReg = MI->getOperand(0).getReg();
1471 unsigned OddReg = MI->getOperand(1).getReg();
1472 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1473 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1474 // ARM errata 602117: LDRD with base in list may result in incorrect base
1475 // register when interrupted or faulted.
1476 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1477 if (!Errata602117 &&
1478 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1481 MachineBasicBlock::iterator NewBBI = MBBI;
1482 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1483 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1484 bool EvenDeadKill = isLd ?
1485 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1486 bool EvenUndef = MI->getOperand(0).isUndef();
1487 bool OddDeadKill = isLd ?
1488 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1489 bool OddUndef = MI->getOperand(1).isUndef();
1490 bool BaseKill = BaseOp.isKill();
1491 bool BaseUndef = BaseOp.isUndef();
1492 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1493 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1494 int OffImm = getMemoryOpOffset(MI);
1495 unsigned PredReg = 0;
1496 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1498 if (OddRegNum > EvenRegNum && OffImm == 0) {
1499 // Ascending register numbers and no offset. It's safe to change it to a
1501 unsigned NewOpc = (isLd)
1502 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1503 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1505 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1506 .addReg(BaseReg, getKillRegState(BaseKill))
1507 .addImm(Pred).addReg(PredReg)
1508 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1509 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1512 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1513 .addReg(BaseReg, getKillRegState(BaseKill))
1514 .addImm(Pred).addReg(PredReg)
1516 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1518 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1521 NewBBI = std::prev(MBBI);
1523 // Split into two instructions.
1524 unsigned NewOpc = (isLd)
1525 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1526 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1527 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1528 // so adjust and use t2LDRi12 here for that.
1529 unsigned NewOpc2 = (isLd)
1530 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1531 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1532 DebugLoc dl = MBBI->getDebugLoc();
1533 // If this is a load and base register is killed, it may have been
1534 // re-defed by the load, make sure the first load does not clobber it.
1536 (BaseKill || OffKill) &&
1537 (TRI->regsOverlap(EvenReg, BaseReg))) {
1538 assert(!TRI->regsOverlap(OddReg, BaseReg));
1539 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1540 OddReg, OddDeadKill, false,
1541 BaseReg, false, BaseUndef, false, OffUndef,
1542 Pred, PredReg, TII, isT2);
1543 NewBBI = std::prev(MBBI);
1544 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1545 EvenReg, EvenDeadKill, false,
1546 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1547 Pred, PredReg, TII, isT2);
1549 if (OddReg == EvenReg && EvenDeadKill) {
1550 // If the two source operands are the same, the kill marker is
1551 // probably on the first one. e.g.
1552 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1553 EvenDeadKill = false;
1556 // Never kill the base register in the first instruction.
1557 if (EvenReg == BaseReg)
1558 EvenDeadKill = false;
1559 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1560 EvenReg, EvenDeadKill, EvenUndef,
1561 BaseReg, false, BaseUndef, false, OffUndef,
1562 Pred, PredReg, TII, isT2);
1563 NewBBI = std::prev(MBBI);
1564 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1565 OddReg, OddDeadKill, OddUndef,
1566 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1567 Pred, PredReg, TII, isT2);
1582 /// An optimization pass to turn multiple LDR / STR ops of the same base and
1583 /// incrementing offset into LDM / STM ops.
1584 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1585 unsigned NumMerges = 0;
1586 unsigned NumMemOps = 0;
1588 unsigned CurrBase = 0;
1589 unsigned CurrOpc = ~0u;
1590 unsigned CurrSize = 0;
1591 ARMCC::CondCodes CurrPred = ARMCC::AL;
1592 unsigned CurrPredReg = 0;
1593 unsigned Position = 0;
1594 SmallVector<MachineBasicBlock::iterator,4> Merges;
1596 RS->enterBasicBlock(&MBB);
1597 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1599 if (FixInvalidRegPairOp(MBB, MBBI))
1602 bool Advance = false;
1603 bool TryMerge = false;
1605 bool isMemOp = isMemoryOp(MBBI);
1607 unsigned Opcode = MBBI->getOpcode();
1608 unsigned Size = getLSMultipleTransferSize(MBBI);
1609 const MachineOperand &MO = MBBI->getOperand(0);
1610 unsigned Reg = MO.getReg();
1611 bool isKill = MO.isDef() ? false : MO.isKill();
1612 unsigned Base = MBBI->getOperand(1).getReg();
1613 unsigned PredReg = 0;
1614 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1615 int Offset = getMemoryOpOffset(MBBI);
1618 // r5 := ldr [r5, #4]
1619 // r6 := ldr [r5, #8]
1621 // The second ldr has effectively broken the chain even though it
1622 // looks like the later ldr(s) use the same base register. Try to
1623 // merge the ldr's so far, including this one. But don't try to
1624 // combine the following ldr(s).
1625 bool Clobber = isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg();
1628 // r4 := ldr [r0, #8]
1629 // r4 := ldr [r0, #4]
1631 // The optimization may reorder the second ldr in front of the first
1632 // ldr, which violates write after write(WAW) dependence. The same as
1633 // str. Try to merge inst(s) already in MemOps.
1634 bool Overlap = false;
1635 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
1636 if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
1642 if (CurrBase == 0 && !Clobber) {
1643 // Start of a new chain.
1648 CurrPredReg = PredReg;
1649 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1652 } else if (!Overlap) {
1658 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1659 // No need to match PredReg.
1660 // Continue adding to the queue.
1661 if (Offset > MemOps.back().Offset) {
1662 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1667 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1669 if (Offset < I->Offset) {
1670 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1675 } else if (Offset == I->Offset) {
1676 // Collision! This can't be merged!
1685 if (MBBI->isDebugValue()) {
1688 // Reach the end of the block, try merging the memory instructions.
1690 } else if (Advance) {
1694 // Reach the end of the block, try merging the memory instructions.
1701 if (NumMemOps > 1) {
1702 // Try to find a free register to use as a new base in case it's needed.
1703 // First advance to the instruction just before the start of the chain.
1704 AdvanceRS(MBB, MemOps);
1706 // Find a scratch register.
1708 RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass);
1710 // Process the load / store instructions.
1711 RS->forward(std::prev(MBBI));
1715 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1716 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1718 // Try folding preceding/trailing base inc/dec into the generated
1720 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1721 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1723 NumMerges += Merges.size();
1725 // Try folding preceding/trailing base inc/dec into those load/store
1726 // that were not merged to form LDM/STM ops.
1727 for (unsigned i = 0; i != NumMemOps; ++i)
1728 if (!MemOps[i].Merged)
1729 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1732 // RS may be pointing to an instruction that's deleted.
1733 RS->skipTo(std::prev(MBBI));
1734 } else if (NumMemOps == 1) {
1735 // Try folding preceding/trailing base inc/dec into the single
1737 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1739 RS->forward(std::prev(MBBI));
1746 CurrPred = ARMCC::AL;
1753 // If iterator hasn't been advanced and this is not a memory op, skip it.
1754 // It can't start a new chain anyway.
1755 if (!Advance && !isMemOp && MBBI != E) {
1761 return NumMerges > 0;
1764 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
1765 /// into the preceding stack restore so it directly restore the value of LR
1767 /// ldmfd sp!, {..., lr}
1770 /// ldmfd sp!, {..., lr}
1773 /// ldmfd sp!, {..., pc}
1774 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1775 // Thumb1 LDM doesn't allow high registers.
1776 if (isThumb1) return false;
1777 if (MBB.empty()) return false;
1779 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1780 if (MBBI != MBB.begin() &&
1781 (MBBI->getOpcode() == ARM::BX_RET ||
1782 MBBI->getOpcode() == ARM::tBX_RET ||
1783 MBBI->getOpcode() == ARM::MOVPCLR)) {
1784 MachineInstr *PrevMI = std::prev(MBBI);
1785 unsigned Opcode = PrevMI->getOpcode();
1786 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1787 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1788 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1789 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1790 if (MO.getReg() != ARM::LR)
1792 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1793 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1794 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1795 PrevMI->setDesc(TII->get(NewOpc));
1797 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1805 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1806 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1807 TL = STI->getTargetLowering();
1808 AFI = Fn.getInfo<ARMFunctionInfo>();
1809 TII = STI->getInstrInfo();
1810 TRI = STI->getRegisterInfo();
1811 RS = new RegScavenger();
1812 isThumb2 = AFI->isThumb2Function();
1813 isThumb1 = AFI->isThumbFunction() && !isThumb2;
1815 bool Modified = false;
1816 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1818 MachineBasicBlock &MBB = *MFI;
1819 Modified |= LoadStoreMultipleOpti(MBB);
1820 if (STI->hasV5TOps())
1821 Modified |= MergeReturnIntoLDM(MBB);
1829 /// Pre- register allocation pass that move load / stores from consecutive
1830 /// locations close to make it more likely they will be combined later.
1831 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1833 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1835 const DataLayout *TD;
1836 const TargetInstrInfo *TII;
1837 const TargetRegisterInfo *TRI;
1838 const ARMSubtarget *STI;
1839 MachineRegisterInfo *MRI;
1840 MachineFunction *MF;
1842 bool runOnMachineFunction(MachineFunction &Fn) override;
1844 const char *getPassName() const override {
1845 return "ARM pre- register allocation load / store optimization pass";
1849 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1850 unsigned &NewOpc, unsigned &EvenReg,
1851 unsigned &OddReg, unsigned &BaseReg,
1853 unsigned &PredReg, ARMCC::CondCodes &Pred,
1855 bool RescheduleOps(MachineBasicBlock *MBB,
1856 SmallVectorImpl<MachineInstr *> &Ops,
1857 unsigned Base, bool isLd,
1858 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1859 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1861 char ARMPreAllocLoadStoreOpt::ID = 0;
1864 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1865 TD = Fn.getTarget().getDataLayout();
1866 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1867 TII = STI->getInstrInfo();
1868 TRI = STI->getRegisterInfo();
1869 MRI = &Fn.getRegInfo();
1872 bool Modified = false;
1873 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1875 Modified |= RescheduleLoadStoreInstrs(MFI);
1880 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1881 MachineBasicBlock::iterator I,
1882 MachineBasicBlock::iterator E,
1883 SmallPtrSetImpl<MachineInstr*> &MemOps,
1884 SmallSet<unsigned, 4> &MemRegs,
1885 const TargetRegisterInfo *TRI) {
1886 // Are there stores / loads / calls between them?
1887 // FIXME: This is overly conservative. We should make use of alias information
1889 SmallSet<unsigned, 4> AddedRegPressure;
1891 if (I->isDebugValue() || MemOps.count(&*I))
1893 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1895 if (isLd && I->mayStore())
1900 // It's not safe to move the first 'str' down.
1903 // str r4, [r0, #+4]
1907 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1908 MachineOperand &MO = I->getOperand(j);
1911 unsigned Reg = MO.getReg();
1912 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1914 if (Reg != Base && !MemRegs.count(Reg))
1915 AddedRegPressure.insert(Reg);
1919 // Estimate register pressure increase due to the transformation.
1920 if (MemRegs.size() <= 4)
1921 // Ok if we are moving small number of instructions.
1923 return AddedRegPressure.size() <= MemRegs.size() * 2;
1927 /// Copy \p Op0 and \p Op1 operands into a new array assigned to MI.
1928 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1929 MachineInstr *Op1) {
1930 assert(MI->memoperands_empty() && "expected a new machineinstr");
1931 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1932 + (Op1->memoperands_end() - Op1->memoperands_begin());
1934 MachineFunction *MF = MI->getParent()->getParent();
1935 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1936 MachineSDNode::mmo_iterator MemEnd =
1937 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1939 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1940 MI->setMemRefs(MemBegin, MemEnd);
1944 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1945 DebugLoc &dl, unsigned &NewOpc,
1947 unsigned &SecondReg,
1948 unsigned &BaseReg, int &Offset,
1950 ARMCC::CondCodes &Pred,
1952 // Make sure we're allowed to generate LDRD/STRD.
1953 if (!STI->hasV5TEOps())
1956 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1958 unsigned Opcode = Op0->getOpcode();
1959 if (Opcode == ARM::LDRi12) {
1961 } else if (Opcode == ARM::STRi12) {
1963 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1964 NewOpc = ARM::t2LDRDi8;
1967 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1968 NewOpc = ARM::t2STRDi8;
1975 // Make sure the base address satisfies i64 ld / st alignment requirement.
1976 // At the moment, we ignore the memoryoperand's value.
1977 // If we want to use AliasAnalysis, we should check it accordingly.
1978 if (!Op0->hasOneMemOperand() ||
1979 (*Op0->memoperands_begin())->isVolatile())
1982 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1983 const Function *Func = MF->getFunction();
1984 unsigned ReqAlign = STI->hasV6Ops()
1985 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1986 : 8; // Pre-v6 need 8-byte align
1987 if (Align < ReqAlign)
1990 // Then make sure the immediate offset fits.
1991 int OffImm = getMemoryOpOffset(Op0);
1993 int Limit = (1 << 8) * Scale;
1994 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1998 ARM_AM::AddrOpc AddSub = ARM_AM::add;
2000 AddSub = ARM_AM::sub;
2003 int Limit = (1 << 8) * Scale;
2004 if (OffImm >= Limit || (OffImm & (Scale-1)))
2006 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
2008 FirstReg = Op0->getOperand(0).getReg();
2009 SecondReg = Op1->getOperand(0).getReg();
2010 if (FirstReg == SecondReg)
2012 BaseReg = Op0->getOperand(1).getReg();
2013 Pred = getInstrPredicate(Op0, PredReg);
2014 dl = Op0->getDebugLoc();
2018 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
2019 SmallVectorImpl<MachineInstr *> &Ops,
2020 unsigned Base, bool isLd,
2021 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
2022 bool RetVal = false;
2024 // Sort by offset (in reverse order).
2025 std::sort(Ops.begin(), Ops.end(),
2026 [](const MachineInstr *LHS, const MachineInstr *RHS) {
2027 int LOffset = getMemoryOpOffset(LHS);
2028 int ROffset = getMemoryOpOffset(RHS);
2029 assert(LHS == RHS || LOffset != ROffset);
2030 return LOffset > ROffset;
2033 // The loads / stores of the same base are in order. Scan them from first to
2034 // last and check for the following:
2035 // 1. Any def of base.
2037 while (Ops.size() > 1) {
2038 unsigned FirstLoc = ~0U;
2039 unsigned LastLoc = 0;
2040 MachineInstr *FirstOp = nullptr;
2041 MachineInstr *LastOp = nullptr;
2043 unsigned LastOpcode = 0;
2044 unsigned LastBytes = 0;
2045 unsigned NumMove = 0;
2046 for (int i = Ops.size() - 1; i >= 0; --i) {
2047 MachineInstr *Op = Ops[i];
2048 unsigned Loc = MI2LocMap[Op];
2049 if (Loc <= FirstLoc) {
2053 if (Loc >= LastLoc) {
2059 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
2060 if (LastOpcode && LSMOpcode != LastOpcode)
2063 int Offset = getMemoryOpOffset(Op);
2064 unsigned Bytes = getLSMultipleTransferSize(Op);
2066 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2069 LastOffset = Offset;
2071 LastOpcode = LSMOpcode;
2072 if (++NumMove == 8) // FIXME: Tune this limit.
2079 SmallPtrSet<MachineInstr*, 4> MemOps;
2080 SmallSet<unsigned, 4> MemRegs;
2081 for (int i = NumMove-1; i >= 0; --i) {
2082 MemOps.insert(Ops[i]);
2083 MemRegs.insert(Ops[i]->getOperand(0).getReg());
2086 // Be conservative, if the instructions are too far apart, don't
2087 // move them. We want to limit the increase of register pressure.
2088 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2090 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2091 MemOps, MemRegs, TRI);
2093 for (unsigned i = 0; i != NumMove; ++i)
2096 // This is the new location for the loads / stores.
2097 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2098 while (InsertPos != MBB->end()
2099 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
2102 // If we are moving a pair of loads / stores, see if it makes sense
2103 // to try to allocate a pair of registers that can form register pairs.
2104 MachineInstr *Op0 = Ops.back();
2105 MachineInstr *Op1 = Ops[Ops.size()-2];
2106 unsigned FirstReg = 0, SecondReg = 0;
2107 unsigned BaseReg = 0, PredReg = 0;
2108 ARMCC::CondCodes Pred = ARMCC::AL;
2110 unsigned NewOpc = 0;
2113 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2114 FirstReg, SecondReg, BaseReg,
2115 Offset, PredReg, Pred, isT2)) {
2119 const MCInstrDesc &MCID = TII->get(NewOpc);
2120 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
2121 MRI->constrainRegClass(FirstReg, TRC);
2122 MRI->constrainRegClass(SecondReg, TRC);
2124 // Form the pair instruction.
2126 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2127 .addReg(FirstReg, RegState::Define)
2128 .addReg(SecondReg, RegState::Define)
2130 // FIXME: We're converting from LDRi12 to an insn that still
2131 // uses addrmode2, so we need an explicit offset reg. It should
2132 // always by reg0 since we're transforming LDRi12s.
2135 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2136 concatenateMemOperands(MIB, Op0, Op1);
2137 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2140 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2144 // FIXME: We're converting from LDRi12 to an insn that still
2145 // uses addrmode2, so we need an explicit offset reg. It should
2146 // always by reg0 since we're transforming STRi12s.
2149 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2150 concatenateMemOperands(MIB, Op0, Op1);
2151 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2158 // Add register allocation hints to form register pairs.
2159 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
2160 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg);
2163 for (unsigned i = 0; i != NumMove; ++i) {
2164 MachineInstr *Op = Ops.back();
2166 MBB->splice(InsertPos, MBB, Op);
2170 NumLdStMoved += NumMove;
2180 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2181 bool RetVal = false;
2183 DenseMap<MachineInstr*, unsigned> MI2LocMap;
2184 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
2185 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
2186 SmallVector<unsigned, 4> LdBases;
2187 SmallVector<unsigned, 4> StBases;
2190 MachineBasicBlock::iterator MBBI = MBB->begin();
2191 MachineBasicBlock::iterator E = MBB->end();
2193 for (; MBBI != E; ++MBBI) {
2194 MachineInstr *MI = MBBI;
2195 if (MI->isCall() || MI->isTerminator()) {
2196 // Stop at barriers.
2201 if (!MI->isDebugValue())
2202 MI2LocMap[MI] = ++Loc;
2204 if (!isMemoryOp(MI))
2206 unsigned PredReg = 0;
2207 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2210 int Opc = MI->getOpcode();
2211 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
2212 unsigned Base = MI->getOperand(1).getReg();
2213 int Offset = getMemoryOpOffset(MI);
2215 bool StopHere = false;
2217 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2218 Base2LdsMap.find(Base);
2219 if (BI != Base2LdsMap.end()) {
2220 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2221 if (Offset == getMemoryOpOffset(BI->second[i])) {
2227 BI->second.push_back(MI);
2229 Base2LdsMap[Base].push_back(MI);
2230 LdBases.push_back(Base);
2233 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2234 Base2StsMap.find(Base);
2235 if (BI != Base2StsMap.end()) {
2236 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2237 if (Offset == getMemoryOpOffset(BI->second[i])) {
2243 BI->second.push_back(MI);
2245 Base2StsMap[Base].push_back(MI);
2246 StBases.push_back(Base);
2251 // Found a duplicate (a base+offset combination that's seen earlier).
2258 // Re-schedule loads.
2259 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2260 unsigned Base = LdBases[i];
2261 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2263 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2266 // Re-schedule stores.
2267 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2268 unsigned Base = StBases[i];
2269 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2271 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2275 Base2LdsMap.clear();
2276 Base2StsMap.clear();
2286 /// Returns an instance of the load / store optimization pass.
2287 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2289 return new ARMPreAllocLoadStoreOpt();
2290 return new ARMLoadStoreOpt();