1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 /// \file This file contains a pass that performs load / store related peephole
11 /// optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMISelLowering.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMSubtarget.h"
21 #include "MCTargetDesc/ARMAddressingModes.h"
22 #include "ThumbRegisterInfo.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/CodeGen/MachineBasicBlock.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include "llvm/CodeGen/MachineInstr.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/RegisterScavenging.h"
35 #include "llvm/CodeGen/SelectionDAGNodes.h"
36 #include "llvm/IR/DataLayout.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/Support/Debug.h"
40 #include "llvm/Support/ErrorHandling.h"
41 #include "llvm/Support/raw_ostream.h"
42 #include "llvm/Target/TargetInstrInfo.h"
43 #include "llvm/Target/TargetMachine.h"
44 #include "llvm/Target/TargetRegisterInfo.h"
47 #define DEBUG_TYPE "arm-ldst-opt"
49 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
50 STATISTIC(NumSTMGened , "Number of stm instructions generated");
51 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
52 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
53 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
54 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
55 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
56 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
57 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
58 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
59 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
62 /// Post- register allocation pass the combine load / store instructions to
63 /// form ldm / stm instructions.
64 struct ARMLoadStoreOpt : public MachineFunctionPass {
66 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
68 const TargetInstrInfo *TII;
69 const TargetRegisterInfo *TRI;
70 const ARMSubtarget *STI;
71 const TargetLowering *TL;
74 bool isThumb1, isThumb2;
76 bool runOnMachineFunction(MachineFunction &Fn) override;
78 const char *getPassName() const override {
79 return "ARM load / store optimization pass";
83 struct MemOpQueueEntry {
88 MachineBasicBlock::iterator MBBI;
90 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
91 MachineBasicBlock::iterator i)
92 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
94 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
95 typedef MemOpQueue::iterator MemOpQueueIter;
97 void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
98 const MemOpQueue &MemOps, unsigned DefReg,
99 unsigned RangeBegin, unsigned RangeEnd);
100 void UpdateBaseRegUses(MachineBasicBlock &MBB,
101 MachineBasicBlock::iterator MBBI,
102 DebugLoc dl, unsigned Base, unsigned WordOffset,
103 ARMCC::CondCodes Pred, unsigned PredReg);
104 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
105 int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
106 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
108 ArrayRef<std::pair<unsigned, bool> > Regs,
109 ArrayRef<unsigned> ImpDefs);
110 void MergeOpsUpdate(MachineBasicBlock &MBB,
112 unsigned memOpsBegin,
114 unsigned insertAfter,
119 ARMCC::CondCodes Pred,
123 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
124 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
125 unsigned Opcode, unsigned Size,
126 ARMCC::CondCodes Pred, unsigned PredReg,
127 unsigned Scratch, MemOpQueue &MemOps,
128 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
129 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
130 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
131 MachineBasicBlock::iterator &MBBI);
132 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
133 MachineBasicBlock::iterator MBBI,
134 const TargetInstrInfo *TII,
136 MachineBasicBlock::iterator &I);
137 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
138 MachineBasicBlock::iterator MBBI,
140 MachineBasicBlock::iterator &I);
141 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
142 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
144 char ARMLoadStoreOpt::ID = 0;
147 static bool definesCPSR(const MachineInstr *MI) {
148 for (const auto &MO : MI->operands()) {
151 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
152 // If the instruction has live CPSR def, then it's not safe to fold it
153 // into load / store.
160 static int getMemoryOpOffset(const MachineInstr *MI) {
161 unsigned Opcode = MI->getOpcode();
162 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
163 unsigned NumOperands = MI->getDesc().getNumOperands();
164 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
166 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
167 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
168 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
169 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
172 // Thumb1 immediate offsets are scaled by 4
173 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
174 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
177 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
178 : ARM_AM::getAM5Offset(OffField) * 4;
179 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
180 : ARM_AM::getAM5Op(OffField);
182 if (Op == ARM_AM::sub)
188 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
190 default: llvm_unreachable("Unhandled opcode!");
194 default: llvm_unreachable("Unhandled submode!");
195 case ARM_AM::ia: return ARM::LDMIA;
196 case ARM_AM::da: return ARM::LDMDA;
197 case ARM_AM::db: return ARM::LDMDB;
198 case ARM_AM::ib: return ARM::LDMIB;
203 default: llvm_unreachable("Unhandled submode!");
204 case ARM_AM::ia: return ARM::STMIA;
205 case ARM_AM::da: return ARM::STMDA;
206 case ARM_AM::db: return ARM::STMDB;
207 case ARM_AM::ib: return ARM::STMIB;
211 // tLDMIA is writeback-only - unless the base register is in the input
215 default: llvm_unreachable("Unhandled submode!");
216 case ARM_AM::ia: return ARM::tLDMIA;
220 // There is no non-writeback tSTMIA either.
223 default: llvm_unreachable("Unhandled submode!");
224 case ARM_AM::ia: return ARM::tSTMIA_UPD;
230 default: llvm_unreachable("Unhandled submode!");
231 case ARM_AM::ia: return ARM::t2LDMIA;
232 case ARM_AM::db: return ARM::t2LDMDB;
238 default: llvm_unreachable("Unhandled submode!");
239 case ARM_AM::ia: return ARM::t2STMIA;
240 case ARM_AM::db: return ARM::t2STMDB;
245 default: llvm_unreachable("Unhandled submode!");
246 case ARM_AM::ia: return ARM::VLDMSIA;
247 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
252 default: llvm_unreachable("Unhandled submode!");
253 case ARM_AM::ia: return ARM::VSTMSIA;
254 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
259 default: llvm_unreachable("Unhandled submode!");
260 case ARM_AM::ia: return ARM::VLDMDIA;
261 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
266 default: llvm_unreachable("Unhandled submode!");
267 case ARM_AM::ia: return ARM::VSTMDIA;
268 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
276 AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
278 default: llvm_unreachable("Unhandled opcode!");
285 case ARM::tLDMIA_UPD:
286 case ARM::tSTMIA_UPD:
287 case ARM::t2LDMIA_RET:
289 case ARM::t2LDMIA_UPD:
291 case ARM::t2STMIA_UPD:
293 case ARM::VLDMSIA_UPD:
295 case ARM::VSTMSIA_UPD:
297 case ARM::VLDMDIA_UPD:
299 case ARM::VSTMDIA_UPD:
313 case ARM::t2LDMDB_UPD:
315 case ARM::t2STMDB_UPD:
316 case ARM::VLDMSDB_UPD:
317 case ARM::VSTMSDB_UPD:
318 case ARM::VLDMDDB_UPD:
319 case ARM::VSTMDDB_UPD:
330 } // end namespace ARM_AM
331 } // end namespace llvm
333 static bool isT1i32Load(unsigned Opc) {
334 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
337 static bool isT2i32Load(unsigned Opc) {
338 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
341 static bool isi32Load(unsigned Opc) {
342 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
345 static bool isT1i32Store(unsigned Opc) {
346 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
349 static bool isT2i32Store(unsigned Opc) {
350 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
353 static bool isi32Store(unsigned Opc) {
354 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
357 static unsigned getImmScale(unsigned Opc) {
359 default: llvm_unreachable("Unhandled opcode!");
374 /// Update future uses of the base register with the offset introduced
375 /// due to writeback. This function only works on Thumb1.
377 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
378 MachineBasicBlock::iterator MBBI,
379 DebugLoc dl, unsigned Base,
381 ARMCC::CondCodes Pred, unsigned PredReg) {
382 assert(isThumb1 && "Can only update base register uses for Thumb1!");
383 // Start updating any instructions with immediate offsets. Insert a SUB before
384 // the first non-updateable instruction (if any).
385 for (; MBBI != MBB.end(); ++MBBI) {
386 bool InsertSub = false;
387 unsigned Opc = MBBI->getOpcode();
389 if (MBBI->readsRegister(Base)) {
392 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
394 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
396 if (IsLoad || IsStore) {
397 // Loads and stores with immediate offsets can be updated, but only if
398 // the new offset isn't negative.
399 // The MachineOperand containing the offset immediate is the last one
400 // before predicates.
402 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
403 // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
404 Offset = MO.getImm() - WordOffset * getImmScale(Opc);
406 // If storing the base register, it needs to be reset first.
407 unsigned InstrSrcReg = MBBI->getOperand(0).getReg();
409 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
414 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
415 !definesCPSR(MBBI)) {
416 // SUBS/ADDS using this register, with a dead def of the CPSR.
417 // Merge it with the update; if the merged offset is too large,
418 // insert a new sub instead.
420 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
421 Offset = (Opc == ARM::tSUBi8) ?
422 MO.getImm() + WordOffset * 4 :
423 MO.getImm() - WordOffset * 4 ;
424 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
425 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
428 // The base register has now been reset, so exit early.
435 // Can't update the instruction.
439 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
440 // Since SUBS sets the condition flags, we can't place the base reset
441 // after an instruction that has a live CPSR def.
442 // The base register might also contain an argument for a function call.
447 // An instruction above couldn't be updated, so insert a sub.
448 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
449 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
453 if (MBBI->killsRegister(Base))
454 // Register got killed. Stop updating.
458 // End of block was reached.
459 if (MBB.succ_size() > 0) {
460 // FIXME: Because of a bug, live registers are sometimes missing from
461 // the successor blocks' live-in sets. This means we can't trust that
462 // information and *always* have to reset at the end of a block.
464 if (MBBI != MBB.end()) --MBBI;
466 BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
467 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
471 /// Create and insert a LDM or STM with Base as base register and registers in
472 /// Regs as the register operands that would be loaded / stored. It returns
473 /// true if the transformation is done.
475 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
476 MachineBasicBlock::iterator MBBI,
477 int Offset, unsigned Base, bool BaseKill,
478 unsigned Opcode, ARMCC::CondCodes Pred,
479 unsigned PredReg, unsigned Scratch, DebugLoc dl,
480 ArrayRef<std::pair<unsigned, bool> > Regs,
481 ArrayRef<unsigned> ImpDefs) {
482 // Only a single register to load / store. Don't bother.
483 unsigned NumRegs = Regs.size();
487 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
488 // Compute liveness information for that register to make the decision.
489 bool SafeToClobberCPSR = !isThumb1 ||
490 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) ==
491 MachineBasicBlock::LQR_Dead);
493 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
495 // Exception: If the base register is in the input reglist, Thumb1 LDM is
497 // It's also not possible to merge an STR of the base register in Thumb1.
499 for (const std::pair<unsigned, bool> &R : Regs)
500 if (Base == R.first) {
501 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
502 if (Opcode == ARM::tLDRi) {
505 } else if (Opcode == ARM::tSTRi) {
510 ARM_AM::AMSubMode Mode = ARM_AM::ia;
511 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
512 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
513 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
515 if (Offset == 4 && haveIBAndDA) {
517 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
519 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
520 // VLDM/VSTM do not support DB mode without also updating the base reg.
522 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
523 // Check if this is a supported opcode before inserting instructions to
524 // calculate a new base register.
525 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
527 // If starting offset isn't zero, insert a MI to materialize a new base.
528 // But only do so if it is cost effective, i.e. merging more than two
533 // On Thumb1, it's not worth materializing a new base register without
534 // clobbering the CPSR (i.e. not using ADDS/SUBS).
535 if (!SafeToClobberCPSR)
539 if (isi32Load(Opcode)) {
540 // If it is a load, then just use one of the destination register to
541 // use as the new base.
542 NewBase = Regs[NumRegs-1].first;
544 // Use the scratch register to use as a new base.
551 isThumb2 ? ARM::t2ADDri :
552 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
553 (isThumb1 && Offset < 8) ? ARM::tADDi3 :
554 isThumb1 ? ARM::tADDi8 : ARM::ADDri;
559 isThumb2 ? ARM::t2SUBri :
560 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
561 isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
564 if (!TL->isLegalAddImmediate(Offset))
565 // FIXME: Try add with register operand?
566 return false; // Probably not worth it then.
569 // Thumb1: depending on immediate size, use either
570 // ADDS NewBase, Base, #imm3
573 // ADDS NewBase, #imm8.
574 if (Base != NewBase &&
575 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
576 // Need to insert a MOV to the new base first.
577 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
579 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
580 if (Pred != ARMCC::AL)
582 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVSr), NewBase)
583 .addReg(Base, getKillRegState(BaseKill));
585 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
586 .addReg(Base, getKillRegState(BaseKill))
587 .addImm(Pred).addReg(PredReg);
589 // Set up BaseKill and Base correctly to insert the ADDS/SUBS below.
593 if (BaseOpc == ARM::tADDrSPi) {
594 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
595 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
596 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset/4)
597 .addImm(Pred).addReg(PredReg);
599 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true)
600 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
601 .addImm(Pred).addReg(PredReg);
603 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
604 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
605 .addImm(Pred).addReg(PredReg).addReg(0);
608 BaseKill = true; // New base is always killed straight away.
611 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
612 Opcode == ARM::VLDRD);
614 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
615 // base register writeback.
616 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
617 if (!Opcode) return false;
619 // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
620 // - There is no writeback (LDM of base register),
621 // - the base register is killed by the merged instruction,
622 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
623 // to reset the base register.
624 // Otherwise, don't merge.
625 // It's safe to return here since the code to materialize a new base register
626 // above is also conditional on SafeToClobberCPSR.
627 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
630 MachineInstrBuilder MIB;
633 if (Opcode == ARM::tLDMIA)
634 // Update tLDMIA with writeback if necessary.
635 Opcode = ARM::tLDMIA_UPD;
637 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
639 // Thumb1: we might need to set base writeback when building the MI.
640 MIB.addReg(Base, getDefRegState(true))
641 .addReg(Base, getKillRegState(BaseKill));
643 // The base isn't dead after a merged instruction with writeback.
644 // Insert a sub instruction after the newly formed instruction to reset.
646 UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
649 // No writeback, simply build the MachineInstr.
650 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
651 MIB.addReg(Base, getKillRegState(BaseKill));
654 MIB.addImm(Pred).addReg(PredReg);
656 for (const std::pair<unsigned, bool> &R : Regs)
657 MIB = MIB.addReg(R.first, getDefRegState(isDef)
658 | getKillRegState(R.second));
660 // Add implicit defs for super-registers.
661 for (unsigned ImpDef : ImpDefs)
662 MIB.addReg(ImpDef, RegState::ImplicitDefine);
667 /// Find all instructions using a given imp-def within a range.
669 /// We are trying to combine a range of instructions, one of which (located at
670 /// position RangeBegin) implicitly defines a register. The final LDM/STM will
671 /// be placed at RangeEnd, and so any uses of this definition between RangeStart
672 /// and RangeEnd must be modified to use an undefined value.
674 /// The live range continues until we find a second definition or one of the
675 /// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
676 /// we must consider all uses and decide which are relevant in a second pass.
677 void ARMLoadStoreOpt::findUsesOfImpDef(
678 SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,
679 unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
680 std::map<unsigned, MachineOperand *> Uses;
681 unsigned LastLivePos = RangeEnd;
683 // First we find all uses of this register with Position between RangeBegin
684 // and RangeEnd, any or all of these could be uses of a definition at
685 // RangeBegin. We also record the latest position a definition at RangeBegin
686 // would be considered live.
687 for (unsigned i = 0; i < MemOps.size(); ++i) {
688 MachineInstr &MI = *MemOps[i].MBBI;
689 unsigned MIPosition = MemOps[i].Position;
690 if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
693 // If this instruction defines the register, then any later use will be of
694 // that definition rather than ours.
695 if (MI.definesRegister(DefReg))
696 LastLivePos = std::min(LastLivePos, MIPosition);
698 MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
702 // If this instruction kills the register then (assuming liveness is
703 // correct when we start) we don't need to think about anything after here.
705 LastLivePos = std::min(LastLivePos, MIPosition);
707 Uses[MIPosition] = UseOp;
710 // Now we traverse the list of all uses, and append the ones that actually use
711 // our definition to the requested list.
712 for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),
715 // List is sorted by position so once we've found one out of range there
716 // will be no more to consider.
717 if (I->first > LastLivePos)
719 UsesOfImpDefs.push_back(I->second);
723 /// Call MergeOps and update MemOps and merges accordingly on success.
724 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
726 unsigned memOpsBegin, unsigned memOpsEnd,
727 unsigned insertAfter, int Offset,
728 unsigned Base, bool BaseKill,
730 ARMCC::CondCodes Pred, unsigned PredReg,
733 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
734 // First calculate which of the registers should be killed by the merged
736 const unsigned insertPos = memOps[insertAfter].Position;
737 SmallSet<unsigned, 4> KilledRegs;
738 DenseMap<unsigned, unsigned> Killer;
739 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
740 if (i == memOpsBegin) {
745 if (memOps[i].Position < insertPos && memOps[i].isKill) {
746 unsigned Reg = memOps[i].Reg;
747 KilledRegs.insert(Reg);
752 SmallVector<std::pair<unsigned, bool>, 8> Regs;
753 SmallVector<unsigned, 8> ImpDefs;
754 SmallVector<MachineOperand *, 8> UsesOfImpDefs;
755 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
756 unsigned Reg = memOps[i].Reg;
757 // If we are inserting the merged operation after an operation that
758 // uses the same register, make sure to transfer any kill flag.
759 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
760 Regs.push_back(std::make_pair(Reg, isKill));
762 // Collect any implicit defs of super-registers. They must be preserved.
763 for (const MachineOperand &MO : memOps[i].MBBI->operands()) {
764 if (!MO.isReg() || !MO.isDef() || !MO.isImplicit() || MO.isDead())
766 unsigned DefReg = MO.getReg();
767 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
768 ImpDefs.push_back(DefReg);
770 // There may be other uses of the definition between this instruction and
771 // the eventual LDM/STM position. These should be marked undef if the
772 // merge takes place.
773 findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
778 // Try to do the merge.
779 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
781 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
782 Pred, PredReg, Scratch, dl, Regs, ImpDefs))
785 // Merge succeeded, update records.
786 Merges.push_back(std::prev(Loc));
788 // In gathering loads together, we may have moved the imp-def of a register
789 // past one of its uses. This is OK, since we know better than the rest of
790 // LLVM what's OK with ARM loads and stores; but we still have to adjust the
792 for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
793 E = UsesOfImpDefs.end();
797 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
798 // Remove kill flags from any memops that come before insertPos.
799 if (Regs[i-memOpsBegin].second) {
800 unsigned Reg = Regs[i-memOpsBegin].first;
801 if (KilledRegs.count(Reg)) {
802 unsigned j = Killer[Reg];
803 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
804 assert(Idx >= 0 && "Cannot find killing operand");
805 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
806 memOps[j].isKill = false;
808 memOps[i].isKill = true;
810 MBB.erase(memOps[i].MBBI);
811 // Update this memop to refer to the merged instruction.
812 // We may need to move kill flags again.
813 memOps[i].Merged = true;
814 memOps[i].MBBI = Merges.back();
815 memOps[i].Position = insertPos;
818 // Update memOps offsets, since they may have been modified by MergeOps.
819 for (auto &MemOp : memOps) {
820 MemOp.Offset = getMemoryOpOffset(MemOp.MBBI);
824 /// Merge a number of load / store instructions into one or more load / store
825 /// multiple instructions.
827 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
828 unsigned Base, unsigned Opcode, unsigned Size,
829 ARMCC::CondCodes Pred, unsigned PredReg,
830 unsigned Scratch, MemOpQueue &MemOps,
831 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
832 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
833 int Offset = MemOps[SIndex].Offset;
834 int SOffset = Offset;
835 unsigned insertAfter = SIndex;
836 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
837 DebugLoc dl = Loc->getDebugLoc();
838 const MachineOperand &PMO = Loc->getOperand(0);
839 unsigned PReg = PMO.getReg();
840 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
842 unsigned Limit = ~0U;
843 bool BaseKill = false;
844 // vldm / vstm limit are 32 for S variants, 16 for D variants.
862 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
863 int NewOffset = MemOps[i].Offset;
864 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
865 unsigned Reg = MO.getReg();
866 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
867 // Register numbers must be in ascending order. For VFP / NEON load and
868 // store multiples, the registers must also be consecutive and within the
869 // limit on the number of registers per instruction.
870 if (Reg != ARM::SP &&
871 NewOffset == Offset + (int)Size &&
872 ((isNotVFP && RegNum > PRegNum) ||
873 ((Count < Limit) && RegNum == PRegNum+1)) &&
874 // On Swift we don't want vldm/vstm to start with a odd register num
875 // because Q register unaligned vldm/vstm need more uops.
876 (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) {
881 // Can't merge this in. Try merge the earlier ones first.
882 // We need to compute BaseKill here because the MemOps may have been
884 BaseKill = Loc->killsRegister(Base);
886 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base,
887 BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
888 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
893 if (MemOps[i].Position > MemOps[insertAfter].Position) {
895 Loc = MemOps[i].MBBI;
899 BaseKill = Loc->killsRegister(Base);
900 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
901 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
904 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
905 unsigned Bytes, unsigned Limit,
906 ARMCC::CondCodes Pred, unsigned PredReg) {
907 unsigned MyPredReg = 0;
911 bool CheckCPSRDef = false;
912 switch (MI->getOpcode()) {
913 default: return false;
923 // Make sure the offset fits in 8 bits.
924 if (Bytes == 0 || (Limit && Bytes >= Limit))
927 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
928 MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
929 if (!(MI->getOperand(0).getReg() == Base &&
930 MI->getOperand(1).getReg() == Base &&
931 (MI->getOperand(2).getImm() * Scale) == Bytes &&
932 getInstrPredicate(MI, MyPredReg) == Pred &&
933 MyPredReg == PredReg))
936 return CheckCPSRDef ? !definesCPSR(MI) : true;
939 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
940 unsigned Bytes, unsigned Limit,
941 ARMCC::CondCodes Pred, unsigned PredReg) {
942 unsigned MyPredReg = 0;
946 bool CheckCPSRDef = false;
947 switch (MI->getOpcode()) {
948 default: return false;
958 if (Bytes == 0 || (Limit && Bytes >= Limit))
959 // Make sure the offset fits in 8 bits.
962 unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
963 MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
964 if (!(MI->getOperand(0).getReg() == Base &&
965 MI->getOperand(1).getReg() == Base &&
966 (MI->getOperand(2).getImm() * Scale) == Bytes &&
967 getInstrPredicate(MI, MyPredReg) == Pred &&
968 MyPredReg == PredReg))
971 return CheckCPSRDef ? !definesCPSR(MI) : true;
974 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
975 switch (MI->getOpcode()) {
1002 case ARM::tLDMIA_UPD:
1003 case ARM::tSTMIA_UPD:
1010 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
1013 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
1017 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
1018 ARM_AM::AMSubMode Mode) {
1020 default: llvm_unreachable("Unhandled opcode!");
1026 default: llvm_unreachable("Unhandled submode!");
1027 case ARM_AM::ia: return ARM::LDMIA_UPD;
1028 case ARM_AM::ib: return ARM::LDMIB_UPD;
1029 case ARM_AM::da: return ARM::LDMDA_UPD;
1030 case ARM_AM::db: return ARM::LDMDB_UPD;
1037 default: llvm_unreachable("Unhandled submode!");
1038 case ARM_AM::ia: return ARM::STMIA_UPD;
1039 case ARM_AM::ib: return ARM::STMIB_UPD;
1040 case ARM_AM::da: return ARM::STMDA_UPD;
1041 case ARM_AM::db: return ARM::STMDB_UPD;
1046 default: llvm_unreachable("Unhandled submode!");
1047 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1048 case ARM_AM::db: return ARM::t2LDMDB_UPD;
1053 default: llvm_unreachable("Unhandled submode!");
1054 case ARM_AM::ia: return ARM::t2STMIA_UPD;
1055 case ARM_AM::db: return ARM::t2STMDB_UPD;
1059 default: llvm_unreachable("Unhandled submode!");
1060 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1061 case ARM_AM::db: return ARM::VLDMSDB_UPD;
1065 default: llvm_unreachable("Unhandled submode!");
1066 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1067 case ARM_AM::db: return ARM::VLDMDDB_UPD;
1071 default: llvm_unreachable("Unhandled submode!");
1072 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1073 case ARM_AM::db: return ARM::VSTMSDB_UPD;
1077 default: llvm_unreachable("Unhandled submode!");
1078 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1079 case ARM_AM::db: return ARM::VSTMDDB_UPD;
1084 /// Fold proceeding/trailing inc/dec of base register into the
1085 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1087 /// stmia rn, <ra, rb, rc>
1088 /// rn := rn + 4 * 3;
1090 /// stmia rn!, <ra, rb, rc>
1092 /// rn := rn - 4 * 3;
1093 /// ldmia rn, <ra, rb, rc>
1095 /// ldmdb rn!, <ra, rb, rc>
1096 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
1097 MachineBasicBlock::iterator MBBI,
1099 MachineBasicBlock::iterator &I) {
1100 // Thumb1 is already using updating loads/stores.
1101 if (isThumb1) return false;
1103 MachineInstr *MI = MBBI;
1104 unsigned Base = MI->getOperand(0).getReg();
1105 bool BaseKill = MI->getOperand(0).isKill();
1106 unsigned Bytes = getLSMultipleTransferSize(MI);
1107 unsigned PredReg = 0;
1108 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1109 unsigned Opcode = MI->getOpcode();
1110 DebugLoc dl = MI->getDebugLoc();
1112 // Can't use an updating ld/st if the base register is also a dest
1113 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1114 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
1115 if (MI->getOperand(i).getReg() == Base)
1118 bool DoMerge = false;
1119 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
1121 // Try merging with the previous instruction.
1122 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1123 if (MBBI != BeginMBBI) {
1124 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1125 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1127 if (Mode == ARM_AM::ia &&
1128 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1131 } else if (Mode == ARM_AM::ib &&
1132 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1137 MBB.erase(PrevMBBI);
1140 // Try merging with the next instruction.
1141 MachineBasicBlock::iterator EndMBBI = MBB.end();
1142 if (!DoMerge && MBBI != EndMBBI) {
1143 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1144 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1146 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
1147 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1149 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
1150 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1154 if (NextMBBI == I) {
1158 MBB.erase(NextMBBI);
1165 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1166 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1167 .addReg(Base, getDefRegState(true)) // WB base register
1168 .addReg(Base, getKillRegState(BaseKill))
1169 .addImm(Pred).addReg(PredReg);
1171 // Transfer the rest of operands.
1172 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
1173 MIB.addOperand(MI->getOperand(OpNum));
1175 // Transfer memoperands.
1176 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1182 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1183 ARM_AM::AddrOpc Mode) {
1186 return ARM::LDR_PRE_IMM;
1188 return ARM::STR_PRE_IMM;
1190 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1192 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1194 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1196 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1199 return ARM::t2LDR_PRE;
1202 return ARM::t2STR_PRE;
1203 default: llvm_unreachable("Unhandled opcode!");
1207 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1208 ARM_AM::AddrOpc Mode) {
1211 return ARM::LDR_POST_IMM;
1213 return ARM::STR_POST_IMM;
1215 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1217 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1219 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1221 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1224 return ARM::t2LDR_POST;
1227 return ARM::t2STR_POST;
1228 default: llvm_unreachable("Unhandled opcode!");
1232 /// Fold proceeding/trailing inc/dec of base register into the
1233 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1234 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
1235 MachineBasicBlock::iterator MBBI,
1236 const TargetInstrInfo *TII,
1238 MachineBasicBlock::iterator &I) {
1239 // Thumb1 doesn't have updating LDR/STR.
1240 // FIXME: Use LDM/STM with single register instead.
1241 if (isThumb1) return false;
1243 MachineInstr *MI = MBBI;
1244 unsigned Base = MI->getOperand(1).getReg();
1245 bool BaseKill = MI->getOperand(1).isKill();
1246 unsigned Bytes = getLSMultipleTransferSize(MI);
1247 unsigned Opcode = MI->getOpcode();
1248 DebugLoc dl = MI->getDebugLoc();
1249 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1250 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1251 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1252 if (isi32Load(Opcode) || isi32Store(Opcode))
1253 if (MI->getOperand(2).getImm() != 0)
1255 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1258 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
1259 // Can't do the merge if the destination register is the same as the would-be
1260 // writeback register.
1261 if (MI->getOperand(0).getReg() == Base)
1264 unsigned PredReg = 0;
1265 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1266 bool DoMerge = false;
1267 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1268 unsigned NewOpc = 0;
1269 // AM2 - 12 bits, thumb2 - 8 bits.
1270 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
1272 // Try merging with the previous instruction.
1273 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1274 if (MBBI != BeginMBBI) {
1275 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1276 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1278 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1280 AddSub = ARM_AM::sub;
1281 } else if (!isAM5 &&
1282 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1286 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
1287 MBB.erase(PrevMBBI);
1291 // Try merging with the next instruction.
1292 MachineBasicBlock::iterator EndMBBI = MBB.end();
1293 if (!DoMerge && MBBI != EndMBBI) {
1294 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1295 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1298 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1300 AddSub = ARM_AM::sub;
1301 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1305 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
1306 if (NextMBBI == I) {
1310 MBB.erase(NextMBBI);
1318 // VLDM[SD]_UPD, VSTM[SD]_UPD
1319 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1320 // updating load/store-multiple instructions can be used with only one
1322 MachineOperand &MO = MI->getOperand(0);
1323 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1324 .addReg(Base, getDefRegState(true)) // WB base register
1325 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1326 .addImm(Pred).addReg(PredReg)
1327 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1328 getKillRegState(MO.isKill())));
1331 // LDR_PRE, LDR_POST
1332 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1333 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1334 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1335 .addReg(Base, RegState::Define)
1336 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1338 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1339 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1340 .addReg(Base, RegState::Define)
1341 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1344 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1345 // t2LDR_PRE, t2LDR_POST
1346 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1347 .addReg(Base, RegState::Define)
1348 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1351 MachineOperand &MO = MI->getOperand(0);
1352 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1353 // the vestigal zero-reg offset register. When that's fixed, this clause
1354 // can be removed entirely.
1355 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1356 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1357 // STR_PRE, STR_POST
1358 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1359 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1360 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1362 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1363 // t2STR_PRE, t2STR_POST
1364 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1365 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1366 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1374 /// Returns true if instruction is a memory operation that this pass is capable
1375 /// of operating on.
1376 static bool isMemoryOp(const MachineInstr *MI) {
1377 // When no memory operands are present, conservatively assume unaligned,
1378 // volatile, unfoldable.
1379 if (!MI->hasOneMemOperand())
1382 const MachineMemOperand *MMO = *MI->memoperands_begin();
1384 // Don't touch volatile memory accesses - we may be changing their order.
1385 if (MMO->isVolatile())
1388 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1390 if (MMO->getAlignment() < 4)
1393 // str <undef> could probably be eliminated entirely, but for now we just want
1394 // to avoid making a mess of it.
1395 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1396 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1397 MI->getOperand(0).isUndef())
1400 // Likewise don't mess with references to undefined addresses.
1401 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1402 MI->getOperand(1).isUndef())
1405 unsigned Opcode = MI->getOpcode();
1410 return MI->getOperand(1).isReg();
1413 return MI->getOperand(1).isReg();
1424 return MI->getOperand(1).isReg();
1429 /// Advance register scavenger to just before the earliest memory op that is
1431 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1432 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1433 unsigned Position = MemOps[0].Position;
1434 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1435 if (MemOps[i].Position < Position) {
1436 Position = MemOps[i].Position;
1437 Loc = MemOps[i].MBBI;
1441 if (Loc != MBB.begin())
1442 RS->forward(std::prev(Loc));
1445 static void InsertLDR_STR(MachineBasicBlock &MBB,
1446 MachineBasicBlock::iterator &MBBI,
1447 int Offset, bool isDef,
1448 DebugLoc dl, unsigned NewOpc,
1449 unsigned Reg, bool RegDeadKill, bool RegUndef,
1450 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1451 bool OffKill, bool OffUndef,
1452 ARMCC::CondCodes Pred, unsigned PredReg,
1453 const TargetInstrInfo *TII, bool isT2) {
1455 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1457 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1458 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1459 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1461 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1463 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1464 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1465 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1469 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1470 MachineBasicBlock::iterator &MBBI) {
1471 MachineInstr *MI = &*MBBI;
1472 unsigned Opcode = MI->getOpcode();
1473 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1474 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1475 const MachineOperand &BaseOp = MI->getOperand(2);
1476 unsigned BaseReg = BaseOp.getReg();
1477 unsigned EvenReg = MI->getOperand(0).getReg();
1478 unsigned OddReg = MI->getOperand(1).getReg();
1479 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1480 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1481 // ARM errata 602117: LDRD with base in list may result in incorrect base
1482 // register when interrupted or faulted.
1483 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1484 if (!Errata602117 &&
1485 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1488 MachineBasicBlock::iterator NewBBI = MBBI;
1489 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1490 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1491 bool EvenDeadKill = isLd ?
1492 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1493 bool EvenUndef = MI->getOperand(0).isUndef();
1494 bool OddDeadKill = isLd ?
1495 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1496 bool OddUndef = MI->getOperand(1).isUndef();
1497 bool BaseKill = BaseOp.isKill();
1498 bool BaseUndef = BaseOp.isUndef();
1499 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1500 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1501 int OffImm = getMemoryOpOffset(MI);
1502 unsigned PredReg = 0;
1503 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1505 if (OddRegNum > EvenRegNum && OffImm == 0) {
1506 // Ascending register numbers and no offset. It's safe to change it to a
1508 unsigned NewOpc = (isLd)
1509 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1510 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1512 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1513 .addReg(BaseReg, getKillRegState(BaseKill))
1514 .addImm(Pred).addReg(PredReg)
1515 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1516 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1519 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1520 .addReg(BaseReg, getKillRegState(BaseKill))
1521 .addImm(Pred).addReg(PredReg)
1523 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1525 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1528 NewBBI = std::prev(MBBI);
1530 // Split into two instructions.
1531 unsigned NewOpc = (isLd)
1532 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1533 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1534 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1535 // so adjust and use t2LDRi12 here for that.
1536 unsigned NewOpc2 = (isLd)
1537 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1538 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1539 DebugLoc dl = MBBI->getDebugLoc();
1540 // If this is a load and base register is killed, it may have been
1541 // re-defed by the load, make sure the first load does not clobber it.
1543 (BaseKill || OffKill) &&
1544 (TRI->regsOverlap(EvenReg, BaseReg))) {
1545 assert(!TRI->regsOverlap(OddReg, BaseReg));
1546 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1547 OddReg, OddDeadKill, false,
1548 BaseReg, false, BaseUndef, false, OffUndef,
1549 Pred, PredReg, TII, isT2);
1550 NewBBI = std::prev(MBBI);
1551 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1552 EvenReg, EvenDeadKill, false,
1553 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1554 Pred, PredReg, TII, isT2);
1556 if (OddReg == EvenReg && EvenDeadKill) {
1557 // If the two source operands are the same, the kill marker is
1558 // probably on the first one. e.g.
1559 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1560 EvenDeadKill = false;
1563 // Never kill the base register in the first instruction.
1564 if (EvenReg == BaseReg)
1565 EvenDeadKill = false;
1566 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1567 EvenReg, EvenDeadKill, EvenUndef,
1568 BaseReg, false, BaseUndef, false, OffUndef,
1569 Pred, PredReg, TII, isT2);
1570 NewBBI = std::prev(MBBI);
1571 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1572 OddReg, OddDeadKill, OddUndef,
1573 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1574 Pred, PredReg, TII, isT2);
1589 /// An optimization pass to turn multiple LDR / STR ops of the same base and
1590 /// incrementing offset into LDM / STM ops.
1591 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1592 unsigned NumMerges = 0;
1593 unsigned NumMemOps = 0;
1595 unsigned CurrBase = 0;
1596 unsigned CurrOpc = ~0u;
1597 unsigned CurrSize = 0;
1598 ARMCC::CondCodes CurrPred = ARMCC::AL;
1599 unsigned CurrPredReg = 0;
1600 unsigned Position = 0;
1601 SmallVector<MachineBasicBlock::iterator,4> Merges;
1603 RS->enterBasicBlock(&MBB);
1604 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1606 if (FixInvalidRegPairOp(MBB, MBBI))
1609 bool Advance = false;
1610 bool TryMerge = false;
1612 bool isMemOp = isMemoryOp(MBBI);
1614 unsigned Opcode = MBBI->getOpcode();
1615 unsigned Size = getLSMultipleTransferSize(MBBI);
1616 const MachineOperand &MO = MBBI->getOperand(0);
1617 unsigned Reg = MO.getReg();
1618 bool isKill = MO.isDef() ? false : MO.isKill();
1619 unsigned Base = MBBI->getOperand(1).getReg();
1620 unsigned PredReg = 0;
1621 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1622 int Offset = getMemoryOpOffset(MBBI);
1625 // r5 := ldr [r5, #4]
1626 // r6 := ldr [r5, #8]
1628 // The second ldr has effectively broken the chain even though it
1629 // looks like the later ldr(s) use the same base register. Try to
1630 // merge the ldr's so far, including this one. But don't try to
1631 // combine the following ldr(s).
1632 bool Clobber = isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg();
1635 // r4 := ldr [r0, #8]
1636 // r4 := ldr [r0, #4]
1638 // The optimization may reorder the second ldr in front of the first
1639 // ldr, which violates write after write(WAW) dependence. The same as
1640 // str. Try to merge inst(s) already in MemOps.
1641 bool Overlap = false;
1642 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
1643 if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
1649 if (CurrBase == 0 && !Clobber) {
1650 // Start of a new chain.
1655 CurrPredReg = PredReg;
1656 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1659 } else if (!Overlap) {
1665 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1666 // No need to match PredReg.
1667 // Continue adding to the queue.
1668 if (Offset > MemOps.back().Offset) {
1669 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1674 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1676 if (Offset < I->Offset) {
1677 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1682 } else if (Offset == I->Offset) {
1683 // Collision! This can't be merged!
1692 if (MBBI->isDebugValue()) {
1695 // Reach the end of the block, try merging the memory instructions.
1697 } else if (Advance) {
1701 // Reach the end of the block, try merging the memory instructions.
1708 if (NumMemOps > 1) {
1709 // Try to find a free register to use as a new base in case it's needed.
1710 // First advance to the instruction just before the start of the chain.
1711 AdvanceRS(MBB, MemOps);
1713 // Find a scratch register.
1715 RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass);
1717 // Process the load / store instructions.
1718 RS->forward(std::prev(MBBI));
1722 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1723 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1725 // Try folding preceding/trailing base inc/dec into the generated
1727 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1728 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1730 NumMerges += Merges.size();
1732 // Try folding preceding/trailing base inc/dec into those load/store
1733 // that were not merged to form LDM/STM ops.
1734 for (unsigned i = 0; i != NumMemOps; ++i)
1735 if (!MemOps[i].Merged)
1736 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1739 // RS may be pointing to an instruction that's deleted.
1740 RS->skipTo(std::prev(MBBI));
1741 } else if (NumMemOps == 1) {
1742 // Try folding preceding/trailing base inc/dec into the single
1744 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1746 RS->forward(std::prev(MBBI));
1753 CurrPred = ARMCC::AL;
1760 // If iterator hasn't been advanced and this is not a memory op, skip it.
1761 // It can't start a new chain anyway.
1762 if (!Advance && !isMemOp && MBBI != E) {
1768 return NumMerges > 0;
1771 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
1772 /// into the preceding stack restore so it directly restore the value of LR
1774 /// ldmfd sp!, {..., lr}
1777 /// ldmfd sp!, {..., lr}
1780 /// ldmfd sp!, {..., pc}
1781 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1782 // Thumb1 LDM doesn't allow high registers.
1783 if (isThumb1) return false;
1784 if (MBB.empty()) return false;
1786 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1787 if (MBBI != MBB.begin() &&
1788 (MBBI->getOpcode() == ARM::BX_RET ||
1789 MBBI->getOpcode() == ARM::tBX_RET ||
1790 MBBI->getOpcode() == ARM::MOVPCLR)) {
1791 MachineInstr *PrevMI = std::prev(MBBI);
1792 unsigned Opcode = PrevMI->getOpcode();
1793 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1794 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1795 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1796 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1797 if (MO.getReg() != ARM::LR)
1799 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1800 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1801 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1802 PrevMI->setDesc(TII->get(NewOpc));
1804 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1812 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1813 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1814 TL = STI->getTargetLowering();
1815 AFI = Fn.getInfo<ARMFunctionInfo>();
1816 TII = STI->getInstrInfo();
1817 TRI = STI->getRegisterInfo();
1818 RS = new RegScavenger();
1819 isThumb2 = AFI->isThumb2Function();
1820 isThumb1 = AFI->isThumbFunction() && !isThumb2;
1822 bool Modified = false;
1823 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1825 MachineBasicBlock &MBB = *MFI;
1826 Modified |= LoadStoreMultipleOpti(MBB);
1827 if (STI->hasV5TOps())
1828 Modified |= MergeReturnIntoLDM(MBB);
1836 /// Pre- register allocation pass that move load / stores from consecutive
1837 /// locations close to make it more likely they will be combined later.
1838 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1840 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1842 const DataLayout *TD;
1843 const TargetInstrInfo *TII;
1844 const TargetRegisterInfo *TRI;
1845 const ARMSubtarget *STI;
1846 MachineRegisterInfo *MRI;
1847 MachineFunction *MF;
1849 bool runOnMachineFunction(MachineFunction &Fn) override;
1851 const char *getPassName() const override {
1852 return "ARM pre- register allocation load / store optimization pass";
1856 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1857 unsigned &NewOpc, unsigned &EvenReg,
1858 unsigned &OddReg, unsigned &BaseReg,
1860 unsigned &PredReg, ARMCC::CondCodes &Pred,
1862 bool RescheduleOps(MachineBasicBlock *MBB,
1863 SmallVectorImpl<MachineInstr *> &Ops,
1864 unsigned Base, bool isLd,
1865 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1866 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1868 char ARMPreAllocLoadStoreOpt::ID = 0;
1871 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1872 TD = Fn.getTarget().getDataLayout();
1873 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1874 TII = STI->getInstrInfo();
1875 TRI = STI->getRegisterInfo();
1876 MRI = &Fn.getRegInfo();
1879 bool Modified = false;
1880 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1882 Modified |= RescheduleLoadStoreInstrs(MFI);
1887 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1888 MachineBasicBlock::iterator I,
1889 MachineBasicBlock::iterator E,
1890 SmallPtrSetImpl<MachineInstr*> &MemOps,
1891 SmallSet<unsigned, 4> &MemRegs,
1892 const TargetRegisterInfo *TRI) {
1893 // Are there stores / loads / calls between them?
1894 // FIXME: This is overly conservative. We should make use of alias information
1896 SmallSet<unsigned, 4> AddedRegPressure;
1898 if (I->isDebugValue() || MemOps.count(&*I))
1900 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1902 if (isLd && I->mayStore())
1907 // It's not safe to move the first 'str' down.
1910 // str r4, [r0, #+4]
1914 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1915 MachineOperand &MO = I->getOperand(j);
1918 unsigned Reg = MO.getReg();
1919 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1921 if (Reg != Base && !MemRegs.count(Reg))
1922 AddedRegPressure.insert(Reg);
1926 // Estimate register pressure increase due to the transformation.
1927 if (MemRegs.size() <= 4)
1928 // Ok if we are moving small number of instructions.
1930 return AddedRegPressure.size() <= MemRegs.size() * 2;
1934 /// Copy \p Op0 and \p Op1 operands into a new array assigned to MI.
1935 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1936 MachineInstr *Op1) {
1937 assert(MI->memoperands_empty() && "expected a new machineinstr");
1938 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1939 + (Op1->memoperands_end() - Op1->memoperands_begin());
1941 MachineFunction *MF = MI->getParent()->getParent();
1942 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1943 MachineSDNode::mmo_iterator MemEnd =
1944 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1946 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1947 MI->setMemRefs(MemBegin, MemEnd);
1951 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1953 unsigned &NewOpc, unsigned &EvenReg,
1954 unsigned &OddReg, unsigned &BaseReg,
1955 int &Offset, unsigned &PredReg,
1956 ARMCC::CondCodes &Pred,
1958 // Make sure we're allowed to generate LDRD/STRD.
1959 if (!STI->hasV5TEOps())
1962 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1964 unsigned Opcode = Op0->getOpcode();
1965 if (Opcode == ARM::LDRi12) {
1967 } else if (Opcode == ARM::STRi12) {
1969 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1970 NewOpc = ARM::t2LDRDi8;
1973 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1974 NewOpc = ARM::t2STRDi8;
1981 // Make sure the base address satisfies i64 ld / st alignment requirement.
1982 // At the moment, we ignore the memoryoperand's value.
1983 // If we want to use AliasAnalysis, we should check it accordingly.
1984 if (!Op0->hasOneMemOperand() ||
1985 (*Op0->memoperands_begin())->isVolatile())
1988 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1989 const Function *Func = MF->getFunction();
1990 unsigned ReqAlign = STI->hasV6Ops()
1991 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1992 : 8; // Pre-v6 need 8-byte align
1993 if (Align < ReqAlign)
1996 // Then make sure the immediate offset fits.
1997 int OffImm = getMemoryOpOffset(Op0);
1999 int Limit = (1 << 8) * Scale;
2000 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
2004 ARM_AM::AddrOpc AddSub = ARM_AM::add;
2006 AddSub = ARM_AM::sub;
2009 int Limit = (1 << 8) * Scale;
2010 if (OffImm >= Limit || (OffImm & (Scale-1)))
2012 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
2014 EvenReg = Op0->getOperand(0).getReg();
2015 OddReg = Op1->getOperand(0).getReg();
2016 if (EvenReg == OddReg)
2018 BaseReg = Op0->getOperand(1).getReg();
2019 Pred = getInstrPredicate(Op0, PredReg);
2020 dl = Op0->getDebugLoc();
2024 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
2025 SmallVectorImpl<MachineInstr *> &Ops,
2026 unsigned Base, bool isLd,
2027 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
2028 bool RetVal = false;
2030 // Sort by offset (in reverse order).
2031 std::sort(Ops.begin(), Ops.end(),
2032 [](const MachineInstr *LHS, const MachineInstr *RHS) {
2033 int LOffset = getMemoryOpOffset(LHS);
2034 int ROffset = getMemoryOpOffset(RHS);
2035 assert(LHS == RHS || LOffset != ROffset);
2036 return LOffset > ROffset;
2039 // The loads / stores of the same base are in order. Scan them from first to
2040 // last and check for the following:
2041 // 1. Any def of base.
2043 while (Ops.size() > 1) {
2044 unsigned FirstLoc = ~0U;
2045 unsigned LastLoc = 0;
2046 MachineInstr *FirstOp = nullptr;
2047 MachineInstr *LastOp = nullptr;
2049 unsigned LastOpcode = 0;
2050 unsigned LastBytes = 0;
2051 unsigned NumMove = 0;
2052 for (int i = Ops.size() - 1; i >= 0; --i) {
2053 MachineInstr *Op = Ops[i];
2054 unsigned Loc = MI2LocMap[Op];
2055 if (Loc <= FirstLoc) {
2059 if (Loc >= LastLoc) {
2065 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
2066 if (LastOpcode && LSMOpcode != LastOpcode)
2069 int Offset = getMemoryOpOffset(Op);
2070 unsigned Bytes = getLSMultipleTransferSize(Op);
2072 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2075 LastOffset = Offset;
2077 LastOpcode = LSMOpcode;
2078 if (++NumMove == 8) // FIXME: Tune this limit.
2085 SmallPtrSet<MachineInstr*, 4> MemOps;
2086 SmallSet<unsigned, 4> MemRegs;
2087 for (int i = NumMove-1; i >= 0; --i) {
2088 MemOps.insert(Ops[i]);
2089 MemRegs.insert(Ops[i]->getOperand(0).getReg());
2092 // Be conservative, if the instructions are too far apart, don't
2093 // move them. We want to limit the increase of register pressure.
2094 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2096 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2097 MemOps, MemRegs, TRI);
2099 for (unsigned i = 0; i != NumMove; ++i)
2102 // This is the new location for the loads / stores.
2103 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2104 while (InsertPos != MBB->end()
2105 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
2108 // If we are moving a pair of loads / stores, see if it makes sense
2109 // to try to allocate a pair of registers that can form register pairs.
2110 MachineInstr *Op0 = Ops.back();
2111 MachineInstr *Op1 = Ops[Ops.size()-2];
2112 unsigned EvenReg = 0, OddReg = 0;
2113 unsigned BaseReg = 0, PredReg = 0;
2114 ARMCC::CondCodes Pred = ARMCC::AL;
2116 unsigned NewOpc = 0;
2119 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2120 EvenReg, OddReg, BaseReg,
2121 Offset, PredReg, Pred, isT2)) {
2125 const MCInstrDesc &MCID = TII->get(NewOpc);
2126 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
2127 MRI->constrainRegClass(EvenReg, TRC);
2128 MRI->constrainRegClass(OddReg, TRC);
2130 // Form the pair instruction.
2132 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2133 .addReg(EvenReg, RegState::Define)
2134 .addReg(OddReg, RegState::Define)
2136 // FIXME: We're converting from LDRi12 to an insn that still
2137 // uses addrmode2, so we need an explicit offset reg. It should
2138 // always by reg0 since we're transforming LDRi12s.
2141 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2142 concatenateMemOperands(MIB, Op0, Op1);
2143 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2146 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2150 // FIXME: We're converting from LDRi12 to an insn that still
2151 // uses addrmode2, so we need an explicit offset reg. It should
2152 // always by reg0 since we're transforming STRi12s.
2155 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2156 concatenateMemOperands(MIB, Op0, Op1);
2157 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2163 // Add register allocation hints to form register pairs.
2164 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
2165 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
2167 for (unsigned i = 0; i != NumMove; ++i) {
2168 MachineInstr *Op = Ops.back();
2170 MBB->splice(InsertPos, MBB, Op);
2174 NumLdStMoved += NumMove;
2184 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2185 bool RetVal = false;
2187 DenseMap<MachineInstr*, unsigned> MI2LocMap;
2188 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
2189 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
2190 SmallVector<unsigned, 4> LdBases;
2191 SmallVector<unsigned, 4> StBases;
2194 MachineBasicBlock::iterator MBBI = MBB->begin();
2195 MachineBasicBlock::iterator E = MBB->end();
2197 for (; MBBI != E; ++MBBI) {
2198 MachineInstr *MI = MBBI;
2199 if (MI->isCall() || MI->isTerminator()) {
2200 // Stop at barriers.
2205 if (!MI->isDebugValue())
2206 MI2LocMap[MI] = ++Loc;
2208 if (!isMemoryOp(MI))
2210 unsigned PredReg = 0;
2211 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2214 int Opc = MI->getOpcode();
2215 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
2216 unsigned Base = MI->getOperand(1).getReg();
2217 int Offset = getMemoryOpOffset(MI);
2219 bool StopHere = false;
2221 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2222 Base2LdsMap.find(Base);
2223 if (BI != Base2LdsMap.end()) {
2224 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2225 if (Offset == getMemoryOpOffset(BI->second[i])) {
2231 BI->second.push_back(MI);
2233 Base2LdsMap[Base].push_back(MI);
2234 LdBases.push_back(Base);
2237 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2238 Base2StsMap.find(Base);
2239 if (BI != Base2StsMap.end()) {
2240 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2241 if (Offset == getMemoryOpOffset(BI->second[i])) {
2247 BI->second.push_back(MI);
2249 Base2StsMap[Base].push_back(MI);
2250 StBases.push_back(Base);
2255 // Found a duplicate (a base+offset combination that's seen earlier).
2262 // Re-schedule loads.
2263 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2264 unsigned Base = LdBases[i];
2265 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2267 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2270 // Re-schedule stores.
2271 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2272 unsigned Base = StBases[i];
2273 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2275 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2279 Base2LdsMap.clear();
2280 Base2StsMap.clear();
2290 /// Returns an instance of the load / store optimization pass.
2291 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2293 return new ARMPreAllocLoadStoreOpt();
2294 return new ARMLoadStoreOpt();