1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMISelLowering.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMSubtarget.h"
21 #include "MCTargetDesc/ARMAddressingModes.h"
22 #include "Thumb1RegisterInfo.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/CodeGen/MachineBasicBlock.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include "llvm/CodeGen/MachineInstr.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/RegisterScavenging.h"
35 #include "llvm/CodeGen/SelectionDAGNodes.h"
36 #include "llvm/IR/DataLayout.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/Support/Debug.h"
40 #include "llvm/Support/ErrorHandling.h"
41 #include "llvm/Target/TargetInstrInfo.h"
42 #include "llvm/Target/TargetMachine.h"
43 #include "llvm/Target/TargetRegisterInfo.h"
46 #define DEBUG_TYPE "arm-ldst-opt"
48 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
49 STATISTIC(NumSTMGened , "Number of stm instructions generated");
50 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
51 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
52 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
53 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
54 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
55 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
56 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
57 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
58 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
60 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
61 /// load / store instructions to form ldm / stm instructions.
64 struct ARMLoadStoreOpt : public MachineFunctionPass {
66 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
68 const TargetInstrInfo *TII;
69 const TargetRegisterInfo *TRI;
70 const ARMSubtarget *STI;
71 const TargetLowering *TL;
74 bool isThumb1, isThumb2;
76 bool runOnMachineFunction(MachineFunction &Fn) override;
78 const char *getPassName() const override {
79 return "ARM load / store optimization pass";
83 struct MemOpQueueEntry {
88 MachineBasicBlock::iterator MBBI;
90 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
91 MachineBasicBlock::iterator i)
92 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
94 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
95 typedef MemOpQueue::iterator MemOpQueueIter;
97 void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
98 const MemOpQueue &MemOps, unsigned DefReg,
99 unsigned RangeBegin, unsigned RangeEnd);
100 void UpdateBaseRegUses(MachineBasicBlock &MBB,
101 MachineBasicBlock::iterator MBBI,
102 DebugLoc dl, unsigned Base, unsigned WordOffset,
103 ARMCC::CondCodes Pred, unsigned PredReg);
104 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
105 int Offset, unsigned Base, bool BaseKill, int Opcode,
106 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
108 ArrayRef<std::pair<unsigned, bool> > Regs,
109 ArrayRef<unsigned> ImpDefs);
110 void MergeOpsUpdate(MachineBasicBlock &MBB,
112 unsigned memOpsBegin,
114 unsigned insertAfter,
119 ARMCC::CondCodes Pred,
123 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
124 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
125 int Opcode, unsigned Size,
126 ARMCC::CondCodes Pred, unsigned PredReg,
127 unsigned Scratch, MemOpQueue &MemOps,
128 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
129 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
130 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
131 MachineBasicBlock::iterator &MBBI);
132 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
133 MachineBasicBlock::iterator MBBI,
134 const TargetInstrInfo *TII,
136 MachineBasicBlock::iterator &I);
137 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
138 MachineBasicBlock::iterator MBBI,
140 MachineBasicBlock::iterator &I);
141 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
142 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
144 char ARMLoadStoreOpt::ID = 0;
147 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
149 default: llvm_unreachable("Unhandled opcode!");
153 default: llvm_unreachable("Unhandled submode!");
154 case ARM_AM::ia: return ARM::LDMIA;
155 case ARM_AM::da: return ARM::LDMDA;
156 case ARM_AM::db: return ARM::LDMDB;
157 case ARM_AM::ib: return ARM::LDMIB;
162 default: llvm_unreachable("Unhandled submode!");
163 case ARM_AM::ia: return ARM::STMIA;
164 case ARM_AM::da: return ARM::STMDA;
165 case ARM_AM::db: return ARM::STMDB;
166 case ARM_AM::ib: return ARM::STMIB;
169 // tLDMIA is writeback-only - unless the base register is in the input
173 default: llvm_unreachable("Unhandled submode!");
174 case ARM_AM::ia: return ARM::tLDMIA;
177 // There is no non-writeback tSTMIA either.
180 default: llvm_unreachable("Unhandled submode!");
181 case ARM_AM::ia: return ARM::tSTMIA_UPD;
187 default: llvm_unreachable("Unhandled submode!");
188 case ARM_AM::ia: return ARM::t2LDMIA;
189 case ARM_AM::db: return ARM::t2LDMDB;
195 default: llvm_unreachable("Unhandled submode!");
196 case ARM_AM::ia: return ARM::t2STMIA;
197 case ARM_AM::db: return ARM::t2STMDB;
202 default: llvm_unreachable("Unhandled submode!");
203 case ARM_AM::ia: return ARM::VLDMSIA;
204 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
209 default: llvm_unreachable("Unhandled submode!");
210 case ARM_AM::ia: return ARM::VSTMSIA;
211 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
216 default: llvm_unreachable("Unhandled submode!");
217 case ARM_AM::ia: return ARM::VLDMDIA;
218 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
223 default: llvm_unreachable("Unhandled submode!");
224 case ARM_AM::ia: return ARM::VSTMDIA;
225 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
233 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
235 default: llvm_unreachable("Unhandled opcode!");
242 case ARM::tLDMIA_UPD:
243 case ARM::tSTMIA_UPD:
244 case ARM::t2LDMIA_RET:
246 case ARM::t2LDMIA_UPD:
248 case ARM::t2STMIA_UPD:
250 case ARM::VLDMSIA_UPD:
252 case ARM::VSTMSIA_UPD:
254 case ARM::VLDMDIA_UPD:
256 case ARM::VSTMDIA_UPD:
270 case ARM::t2LDMDB_UPD:
272 case ARM::t2STMDB_UPD:
273 case ARM::VLDMSDB_UPD:
274 case ARM::VSTMSDB_UPD:
275 case ARM::VLDMDDB_UPD:
276 case ARM::VSTMDDB_UPD:
287 } // end namespace ARM_AM
288 } // end namespace llvm
290 static bool isT1i32Load(unsigned Opc) {
291 return Opc == ARM::tLDRi;
294 static bool isT2i32Load(unsigned Opc) {
295 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
298 static bool isi32Load(unsigned Opc) {
299 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
302 static bool isT1i32Store(unsigned Opc) {
303 return Opc == ARM::tSTRi;
306 static bool isT2i32Store(unsigned Opc) {
307 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
310 static bool isi32Store(unsigned Opc) {
311 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
314 static unsigned getImmScale(unsigned Opc) {
316 default: llvm_unreachable("Unhandled opcode!");
329 /// Update future uses of the base register with the offset introduced
330 /// due to writeback. This function only works on Thumb1.
332 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
333 MachineBasicBlock::iterator MBBI,
334 DebugLoc dl, unsigned Base,
336 ARMCC::CondCodes Pred, unsigned PredReg) {
337 assert(isThumb1 && "Can only update base register uses for Thumb1!");
339 // Start updating any instructions with immediate offsets. Insert a sub before
340 // the first non-updateable instruction (if any).
341 for (; MBBI != MBB.end(); ++MBBI) {
342 if (MBBI->readsRegister(Base)) {
343 unsigned Opc = MBBI->getOpcode();
345 bool InsertSub = false;
347 if (Opc == ARM::tLDRi || Opc == ARM::tSTRi ||
348 Opc == ARM::tLDRHi || Opc == ARM::tSTRHi ||
349 Opc == ARM::tLDRBi || Opc == ARM::tSTRBi) {
350 // Loads and stores with immediate offsets can be updated, but only if
351 // the new offset isn't negative.
352 // The MachineOperand containing the offset immediate is the last one
353 // before predicates.
355 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
356 // The offsets are scaled by 1, 2 or 4 depending on the Opcode
357 Offset = MO.getImm() - WordOffset * getImmScale(Opc);
363 } else if (Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) {
364 // SUB/ADD using this register. Merge it with the update.
365 // If the merged offset is too large, insert a new sub instead.
367 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
368 Offset = (Opc == ARM::tSUBi8) ?
369 MO.getImm() + WordOffset * 4 :
370 MO.getImm() - WordOffset * 4 ;
371 if (TL->isLegalAddImmediate(Offset)) {
373 // The base register has now been reset, so exit early.
380 // Can't update the instruction.
385 // An instruction above couldn't be updated, so insert a sub.
386 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base))
387 .addReg(Base, getKillRegState(true)).addImm(WordOffset * 4)
388 .addImm(Pred).addReg(PredReg);
393 if (MBBI->killsRegister(Base))
394 // Register got killed. Stop updating.
398 // The end of the block was reached. This means register liveness escapes the
399 // block, and it's necessary to insert a sub before the last instruction.
400 if (MBB.succ_size() > 0)
401 // But only insert the SUB if there is actually a successor block.
402 // FIXME: Check more carefully if register is live at this point, e.g. by
403 // also examining the successor block's register liveness information.
404 AddDefaultT1CC(BuildMI(MBB, --MBBI, dl, TII->get(ARM::tSUBi8), Base))
405 .addReg(Base, getKillRegState(true)).addImm(WordOffset * 4)
406 .addImm(Pred).addReg(PredReg);
409 /// MergeOps - Create and insert a LDM or STM with Base as base register and
410 /// registers in Regs as the register operands that would be loaded / stored.
411 /// It returns true if the transformation is done.
413 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
414 MachineBasicBlock::iterator MBBI,
415 int Offset, unsigned Base, bool BaseKill,
416 int Opcode, ARMCC::CondCodes Pred,
417 unsigned PredReg, unsigned Scratch, DebugLoc dl,
418 ArrayRef<std::pair<unsigned, bool> > Regs,
419 ArrayRef<unsigned> ImpDefs) {
420 // Only a single register to load / store. Don't bother.
421 unsigned NumRegs = Regs.size();
425 ARM_AM::AMSubMode Mode = ARM_AM::ia;
426 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
427 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
428 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
430 if (Offset == 4 && haveIBAndDA) {
432 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
434 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
435 // VLDM/VSTM do not support DB mode without also updating the base reg.
437 } else if (Offset != 0) {
438 // Check if this is a supported opcode before inserting instructions to
439 // calculate a new base register.
440 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
442 // If starting offset isn't zero, insert a MI to materialize a new base.
443 // But only do so if it is cost effective, i.e. merging more than two
449 if (isi32Load(Opcode)) {
450 // If it is a load, then just use one of the destination register to
451 // use as the new base.
452 NewBase = Regs[NumRegs-1].first;
454 // Use the scratch register to use as a new base.
461 isThumb2 ? ARM::t2ADDri :
462 isThumb1 ? ARM::tADDi8 : ARM::ADDri;
466 isThumb2 ? ARM::t2SUBri :
467 isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
471 if (!TL->isLegalAddImmediate(Offset))
472 // FIXME: Try add with register operand?
473 return false; // Probably not worth it then.
476 if (Base != NewBase) {
477 // Need to insert a MOV to the new base first.
478 // FIXME: If the immediate fits in 3 bits, use ADD instead.
479 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
480 .addReg(Base, getKillRegState(BaseKill))
481 .addImm(Pred).addReg(PredReg);
483 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase))
484 .addReg(NewBase, getKillRegState(true)).addImm(Offset)
485 .addImm(Pred).addReg(PredReg);
487 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
488 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
489 .addImm(Pred).addReg(PredReg).addReg(0);
493 BaseKill = true; // New base is always killed straight away.
496 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
497 Opcode == ARM::VLDRD);
499 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
500 // base register writeback.
501 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
502 if (!Opcode) return false;
504 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
506 // Exception: If the base register is in the input reglist, Thumb1 LDM is
507 // non-writeback. Check for this.
508 if (Opcode == ARM::tLDMIA && isThumb1)
509 for (unsigned I = 0; I < NumRegs; ++I)
510 if (Base == Regs[I].first) {
515 MachineInstrBuilder MIB;
518 if (Opcode == ARM::tLDMIA)
519 // Update tLDMIA with writeback if necessary.
520 Opcode = ARM::tLDMIA_UPD;
522 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
524 // Thumb1: we might need to set base writeback when building the MI.
525 MIB.addReg(Base, getDefRegState(true))
526 .addReg(Base, getKillRegState(BaseKill));
528 // The base isn't dead after a merged instruction with writeback. Update
529 // future uses of the base with the added offset (if possible), or reset
530 // the base register as necessary.
532 UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
534 // No writeback, simply build the MachineInstr.
535 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
536 MIB.addReg(Base, getKillRegState(BaseKill));
539 MIB.addImm(Pred).addReg(PredReg);
541 for (unsigned i = 0; i != NumRegs; ++i)
542 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
543 | getKillRegState(Regs[i].second));
545 // Add implicit defs for super-registers.
546 for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i)
547 MIB.addReg(ImpDefs[i], RegState::ImplicitDefine);
552 /// \brief Find all instructions using a given imp-def within a range.
554 /// We are trying to combine a range of instructions, one of which (located at
555 /// position RangeBegin) implicitly defines a register. The final LDM/STM will
556 /// be placed at RangeEnd, and so any uses of this definition between RangeStart
557 /// and RangeEnd must be modified to use an undefined value.
559 /// The live range continues until we find a second definition or one of the
560 /// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
561 /// we must consider all uses and decide which are relevant in a second pass.
562 void ARMLoadStoreOpt::findUsesOfImpDef(
563 SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,
564 unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
565 std::map<unsigned, MachineOperand *> Uses;
566 unsigned LastLivePos = RangeEnd;
568 // First we find all uses of this register with Position between RangeBegin
569 // and RangeEnd, any or all of these could be uses of a definition at
570 // RangeBegin. We also record the latest position a definition at RangeBegin
571 // would be considered live.
572 for (unsigned i = 0; i < MemOps.size(); ++i) {
573 MachineInstr &MI = *MemOps[i].MBBI;
574 unsigned MIPosition = MemOps[i].Position;
575 if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
578 // If this instruction defines the register, then any later use will be of
579 // that definition rather than ours.
580 if (MI.definesRegister(DefReg))
581 LastLivePos = std::min(LastLivePos, MIPosition);
583 MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
587 // If this instruction kills the register then (assuming liveness is
588 // correct when we start) we don't need to think about anything after here.
590 LastLivePos = std::min(LastLivePos, MIPosition);
592 Uses[MIPosition] = UseOp;
595 // Now we traverse the list of all uses, and append the ones that actually use
596 // our definition to the requested list.
597 for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),
600 // List is sorted by position so once we've found one out of range there
601 // will be no more to consider.
602 if (I->first > LastLivePos)
604 UsesOfImpDefs.push_back(I->second);
608 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
610 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
612 unsigned memOpsBegin, unsigned memOpsEnd,
613 unsigned insertAfter, int Offset,
614 unsigned Base, bool BaseKill,
616 ARMCC::CondCodes Pred, unsigned PredReg,
619 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
620 // First calculate which of the registers should be killed by the merged
622 const unsigned insertPos = memOps[insertAfter].Position;
623 SmallSet<unsigned, 4> KilledRegs;
624 DenseMap<unsigned, unsigned> Killer;
625 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
626 if (i == memOpsBegin) {
631 if (memOps[i].Position < insertPos && memOps[i].isKill) {
632 unsigned Reg = memOps[i].Reg;
633 KilledRegs.insert(Reg);
638 SmallVector<std::pair<unsigned, bool>, 8> Regs;
639 SmallVector<unsigned, 8> ImpDefs;
640 SmallVector<MachineOperand *, 8> UsesOfImpDefs;
641 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
642 unsigned Reg = memOps[i].Reg;
643 // If we are inserting the merged operation after an operation that
644 // uses the same register, make sure to transfer any kill flag.
645 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
646 Regs.push_back(std::make_pair(Reg, isKill));
648 // Collect any implicit defs of super-registers. They must be preserved.
649 for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) {
650 if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead())
652 unsigned DefReg = MO->getReg();
653 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
654 ImpDefs.push_back(DefReg);
656 // There may be other uses of the definition between this instruction and
657 // the eventual LDM/STM position. These should be marked undef if the
658 // merge takes place.
659 findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
664 // Try to do the merge.
665 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
667 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
668 Pred, PredReg, Scratch, dl, Regs, ImpDefs))
671 // Merge succeeded, update records.
672 Merges.push_back(std::prev(Loc));
674 // In gathering loads together, we may have moved the imp-def of a register
675 // past one of its uses. This is OK, since we know better than the rest of
676 // LLVM what's OK with ARM loads and stores; but we still have to adjust the
678 for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
679 E = UsesOfImpDefs.end();
683 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
684 // Remove kill flags from any memops that come before insertPos.
685 if (Regs[i-memOpsBegin].second) {
686 unsigned Reg = Regs[i-memOpsBegin].first;
687 if (KilledRegs.count(Reg)) {
688 unsigned j = Killer[Reg];
689 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
690 assert(Idx >= 0 && "Cannot find killing operand");
691 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
692 memOps[j].isKill = false;
694 memOps[i].isKill = true;
696 MBB.erase(memOps[i].MBBI);
697 // Update this memop to refer to the merged instruction.
698 // We may need to move kill flags again.
699 memOps[i].Merged = true;
700 memOps[i].MBBI = Merges.back();
701 memOps[i].Position = insertPos;
705 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
706 /// load / store multiple instructions.
708 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
709 unsigned Base, int Opcode, unsigned Size,
710 ARMCC::CondCodes Pred, unsigned PredReg,
711 unsigned Scratch, MemOpQueue &MemOps,
712 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
713 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
714 int Offset = MemOps[SIndex].Offset;
715 int SOffset = Offset;
716 unsigned insertAfter = SIndex;
717 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
718 DebugLoc dl = Loc->getDebugLoc();
719 const MachineOperand &PMO = Loc->getOperand(0);
720 unsigned PReg = PMO.getReg();
721 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
723 unsigned Limit = ~0U;
725 // vldm / vstm limit are 32 for S variants, 16 for D variants.
743 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
744 int NewOffset = MemOps[i].Offset;
745 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
746 unsigned Reg = MO.getReg();
747 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
748 // Register numbers must be in ascending order. For VFP / NEON load and
749 // store multiples, the registers must also be consecutive and within the
750 // limit on the number of registers per instruction.
751 if (Reg != ARM::SP &&
752 NewOffset == Offset + (int)Size &&
753 ((isNotVFP && RegNum > PRegNum) ||
754 ((Count < Limit) && RegNum == PRegNum+1)) &&
755 // On Swift we don't want vldm/vstm to start with a odd register num
756 // because Q register unaligned vldm/vstm need more uops.
757 (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) {
762 // Can't merge this in. Try merge the earlier ones first.
763 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
764 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
765 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
770 if (MemOps[i].Position > MemOps[insertAfter].Position)
774 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
775 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
776 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
779 static bool definesCPSR(MachineInstr *MI) {
780 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
781 const MachineOperand &MO = MI->getOperand(i);
784 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
785 // If the instruction has live CPSR def, then it's not safe to fold it
786 // into load / store.
793 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
794 unsigned Bytes, unsigned Limit,
795 ARMCC::CondCodes Pred, unsigned PredReg) {
796 unsigned MyPredReg = 0;
800 bool CheckCPSRDef = false;
801 switch (MI->getOpcode()) {
802 default: return false;
812 // Make sure the offset fits in 8 bits.
813 if (Bytes == 0 || (Limit && Bytes >= Limit))
816 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
817 MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
818 if (!(MI->getOperand(0).getReg() == Base &&
819 MI->getOperand(1).getReg() == Base &&
820 (MI->getOperand(2).getImm() * Scale) == Bytes &&
821 getInstrPredicate(MI, MyPredReg) == Pred &&
822 MyPredReg == PredReg))
825 return CheckCPSRDef ? !definesCPSR(MI) : true;
828 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
829 unsigned Bytes, unsigned Limit,
830 ARMCC::CondCodes Pred, unsigned PredReg) {
831 unsigned MyPredReg = 0;
835 bool CheckCPSRDef = false;
836 switch (MI->getOpcode()) {
837 default: return false;
847 if (Bytes == 0 || (Limit && Bytes >= Limit))
848 // Make sure the offset fits in 8 bits.
851 unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
852 MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
853 if (!(MI->getOperand(0).getReg() == Base &&
854 MI->getOperand(1).getReg() == Base &&
855 (MI->getOperand(2).getImm() * Scale) == Bytes &&
856 getInstrPredicate(MI, MyPredReg) == Pred &&
857 MyPredReg == PredReg))
860 return CheckCPSRDef ? !definesCPSR(MI) : true;
863 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
864 switch (MI->getOpcode()) {
889 case ARM::tLDMIA_UPD:
890 case ARM::tSTMIA_UPD:
897 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
900 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
904 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
905 ARM_AM::AMSubMode Mode) {
907 default: llvm_unreachable("Unhandled opcode!");
913 default: llvm_unreachable("Unhandled submode!");
914 case ARM_AM::ia: return ARM::LDMIA_UPD;
915 case ARM_AM::ib: return ARM::LDMIB_UPD;
916 case ARM_AM::da: return ARM::LDMDA_UPD;
917 case ARM_AM::db: return ARM::LDMDB_UPD;
924 default: llvm_unreachable("Unhandled submode!");
925 case ARM_AM::ia: return ARM::STMIA_UPD;
926 case ARM_AM::ib: return ARM::STMIB_UPD;
927 case ARM_AM::da: return ARM::STMDA_UPD;
928 case ARM_AM::db: return ARM::STMDB_UPD;
933 default: llvm_unreachable("Unhandled submode!");
934 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
935 case ARM_AM::db: return ARM::t2LDMDB_UPD;
940 default: llvm_unreachable("Unhandled submode!");
941 case ARM_AM::ia: return ARM::t2STMIA_UPD;
942 case ARM_AM::db: return ARM::t2STMDB_UPD;
946 default: llvm_unreachable("Unhandled submode!");
947 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
948 case ARM_AM::db: return ARM::VLDMSDB_UPD;
952 default: llvm_unreachable("Unhandled submode!");
953 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
954 case ARM_AM::db: return ARM::VLDMDDB_UPD;
958 default: llvm_unreachable("Unhandled submode!");
959 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
960 case ARM_AM::db: return ARM::VSTMSDB_UPD;
964 default: llvm_unreachable("Unhandled submode!");
965 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
966 case ARM_AM::db: return ARM::VSTMDDB_UPD;
971 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
972 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
974 /// stmia rn, <ra, rb, rc>
975 /// rn := rn + 4 * 3;
977 /// stmia rn!, <ra, rb, rc>
979 /// rn := rn - 4 * 3;
980 /// ldmia rn, <ra, rb, rc>
982 /// ldmdb rn!, <ra, rb, rc>
983 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
984 MachineBasicBlock::iterator MBBI,
986 MachineBasicBlock::iterator &I) {
987 // Thumb1 is already using updating loads/stores.
988 if (isThumb1) return false;
990 MachineInstr *MI = MBBI;
991 unsigned Base = MI->getOperand(0).getReg();
992 bool BaseKill = MI->getOperand(0).isKill();
993 unsigned Bytes = getLSMultipleTransferSize(MI);
994 unsigned PredReg = 0;
995 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
996 int Opcode = MI->getOpcode();
997 DebugLoc dl = MI->getDebugLoc();
999 // Can't use an updating ld/st if the base register is also a dest
1000 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1001 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
1002 if (MI->getOperand(i).getReg() == Base)
1005 bool DoMerge = false;
1006 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
1008 // Try merging with the previous instruction.
1009 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1010 if (MBBI != BeginMBBI) {
1011 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1012 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1014 if (Mode == ARM_AM::ia &&
1015 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1018 } else if (Mode == ARM_AM::ib &&
1019 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1024 MBB.erase(PrevMBBI);
1027 // Try merging with the next instruction.
1028 MachineBasicBlock::iterator EndMBBI = MBB.end();
1029 if (!DoMerge && MBBI != EndMBBI) {
1030 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1031 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1033 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
1034 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1036 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
1037 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1041 if (NextMBBI == I) {
1045 MBB.erase(NextMBBI);
1052 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1053 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1054 .addReg(Base, getDefRegState(true)) // WB base register
1055 .addReg(Base, getKillRegState(BaseKill))
1056 .addImm(Pred).addReg(PredReg);
1058 // Transfer the rest of operands.
1059 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
1060 MIB.addOperand(MI->getOperand(OpNum));
1062 // Transfer memoperands.
1063 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1069 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1070 ARM_AM::AddrOpc Mode) {
1073 return ARM::LDR_PRE_IMM;
1075 return ARM::STR_PRE_IMM;
1077 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1079 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1081 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1083 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1086 return ARM::t2LDR_PRE;
1089 return ARM::t2STR_PRE;
1090 default: llvm_unreachable("Unhandled opcode!");
1094 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1095 ARM_AM::AddrOpc Mode) {
1098 return ARM::LDR_POST_IMM;
1100 return ARM::STR_POST_IMM;
1102 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1104 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1106 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1108 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1111 return ARM::t2LDR_POST;
1114 return ARM::t2STR_POST;
1115 default: llvm_unreachable("Unhandled opcode!");
1119 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
1120 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1121 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
1122 MachineBasicBlock::iterator MBBI,
1123 const TargetInstrInfo *TII,
1125 MachineBasicBlock::iterator &I) {
1126 // Thumb1 doesn't have updating LDR/STR.
1127 // FIXME: Use LDM/STM with single register instead.
1128 if (isThumb1) return false;
1130 MachineInstr *MI = MBBI;
1131 unsigned Base = MI->getOperand(1).getReg();
1132 bool BaseKill = MI->getOperand(1).isKill();
1133 unsigned Bytes = getLSMultipleTransferSize(MI);
1134 int Opcode = MI->getOpcode();
1135 DebugLoc dl = MI->getDebugLoc();
1136 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1137 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1138 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1139 if (isi32Load(Opcode) || isi32Store(Opcode))
1140 if (MI->getOperand(2).getImm() != 0)
1142 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1145 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
1146 // Can't do the merge if the destination register is the same as the would-be
1147 // writeback register.
1148 if (MI->getOperand(0).getReg() == Base)
1151 unsigned PredReg = 0;
1152 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1153 bool DoMerge = false;
1154 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1155 unsigned NewOpc = 0;
1156 // AM2 - 12 bits, thumb2 - 8 bits.
1157 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
1159 // Try merging with the previous instruction.
1160 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1161 if (MBBI != BeginMBBI) {
1162 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1163 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1165 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1167 AddSub = ARM_AM::sub;
1168 } else if (!isAM5 &&
1169 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1173 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
1174 MBB.erase(PrevMBBI);
1178 // Try merging with the next instruction.
1179 MachineBasicBlock::iterator EndMBBI = MBB.end();
1180 if (!DoMerge && MBBI != EndMBBI) {
1181 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1182 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1185 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1187 AddSub = ARM_AM::sub;
1188 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1192 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
1193 if (NextMBBI == I) {
1197 MBB.erase(NextMBBI);
1205 // VLDM[SD]_UPD, VSTM[SD]_UPD
1206 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1207 // updating load/store-multiple instructions can be used with only one
1209 MachineOperand &MO = MI->getOperand(0);
1210 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1211 .addReg(Base, getDefRegState(true)) // WB base register
1212 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1213 .addImm(Pred).addReg(PredReg)
1214 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1215 getKillRegState(MO.isKill())));
1218 // LDR_PRE, LDR_POST
1219 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1220 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1221 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1222 .addReg(Base, RegState::Define)
1223 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1225 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1226 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1227 .addReg(Base, RegState::Define)
1228 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1231 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1232 // t2LDR_PRE, t2LDR_POST
1233 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1234 .addReg(Base, RegState::Define)
1235 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1238 MachineOperand &MO = MI->getOperand(0);
1239 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1240 // the vestigal zero-reg offset register. When that's fixed, this clause
1241 // can be removed entirely.
1242 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1243 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1244 // STR_PRE, STR_POST
1245 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1246 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1247 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1249 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1250 // t2STR_PRE, t2STR_POST
1251 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1252 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1253 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1261 /// isMemoryOp - Returns true if instruction is a memory operation that this
1262 /// pass is capable of operating on.
1263 static bool isMemoryOp(const MachineInstr *MI) {
1264 // When no memory operands are present, conservatively assume unaligned,
1265 // volatile, unfoldable.
1266 if (!MI->hasOneMemOperand())
1269 const MachineMemOperand *MMO = *MI->memoperands_begin();
1271 // Don't touch volatile memory accesses - we may be changing their order.
1272 if (MMO->isVolatile())
1275 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1277 if (MMO->getAlignment() < 4)
1280 // str <undef> could probably be eliminated entirely, but for now we just want
1281 // to avoid making a mess of it.
1282 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1283 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1284 MI->getOperand(0).isUndef())
1287 // Likewise don't mess with references to undefined addresses.
1288 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1289 MI->getOperand(1).isUndef())
1292 int Opcode = MI->getOpcode();
1297 return MI->getOperand(1).isReg();
1300 return MI->getOperand(1).isReg();
1309 return MI->getOperand(1).isReg();
1314 /// AdvanceRS - Advance register scavenger to just before the earliest memory
1315 /// op that is being merged.
1316 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1317 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1318 unsigned Position = MemOps[0].Position;
1319 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1320 if (MemOps[i].Position < Position) {
1321 Position = MemOps[i].Position;
1322 Loc = MemOps[i].MBBI;
1326 if (Loc != MBB.begin())
1327 RS->forward(std::prev(Loc));
1330 static int getMemoryOpOffset(const MachineInstr *MI) {
1331 int Opcode = MI->getOpcode();
1332 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1333 unsigned NumOperands = MI->getDesc().getNumOperands();
1334 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1336 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1337 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1338 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1339 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
1342 // Thumb1 immediate offsets are scaled by 4
1343 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi)
1344 return OffField * 4;
1346 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1347 : ARM_AM::getAM5Offset(OffField) * 4;
1349 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1352 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1358 static void InsertLDR_STR(MachineBasicBlock &MBB,
1359 MachineBasicBlock::iterator &MBBI,
1360 int Offset, bool isDef,
1361 DebugLoc dl, unsigned NewOpc,
1362 unsigned Reg, bool RegDeadKill, bool RegUndef,
1363 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1364 bool OffKill, bool OffUndef,
1365 ARMCC::CondCodes Pred, unsigned PredReg,
1366 const TargetInstrInfo *TII, bool isT2) {
1368 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1370 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1371 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1372 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1374 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1376 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1377 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1378 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1382 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1383 MachineBasicBlock::iterator &MBBI) {
1384 MachineInstr *MI = &*MBBI;
1385 unsigned Opcode = MI->getOpcode();
1386 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1387 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1388 const MachineOperand &BaseOp = MI->getOperand(2);
1389 unsigned BaseReg = BaseOp.getReg();
1390 unsigned EvenReg = MI->getOperand(0).getReg();
1391 unsigned OddReg = MI->getOperand(1).getReg();
1392 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1393 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1394 // ARM errata 602117: LDRD with base in list may result in incorrect base
1395 // register when interrupted or faulted.
1396 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1397 if (!Errata602117 &&
1398 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1401 MachineBasicBlock::iterator NewBBI = MBBI;
1402 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1403 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1404 bool EvenDeadKill = isLd ?
1405 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1406 bool EvenUndef = MI->getOperand(0).isUndef();
1407 bool OddDeadKill = isLd ?
1408 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1409 bool OddUndef = MI->getOperand(1).isUndef();
1410 bool BaseKill = BaseOp.isKill();
1411 bool BaseUndef = BaseOp.isUndef();
1412 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1413 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1414 int OffImm = getMemoryOpOffset(MI);
1415 unsigned PredReg = 0;
1416 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1418 if (OddRegNum > EvenRegNum && OffImm == 0) {
1419 // Ascending register numbers and no offset. It's safe to change it to a
1421 unsigned NewOpc = (isLd)
1422 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1423 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1425 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1426 .addReg(BaseReg, getKillRegState(BaseKill))
1427 .addImm(Pred).addReg(PredReg)
1428 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1429 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1432 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1433 .addReg(BaseReg, getKillRegState(BaseKill))
1434 .addImm(Pred).addReg(PredReg)
1436 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1438 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1441 NewBBI = std::prev(MBBI);
1443 // Split into two instructions.
1444 unsigned NewOpc = (isLd)
1445 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1446 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1447 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1448 // so adjust and use t2LDRi12 here for that.
1449 unsigned NewOpc2 = (isLd)
1450 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1451 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1452 DebugLoc dl = MBBI->getDebugLoc();
1453 // If this is a load and base register is killed, it may have been
1454 // re-defed by the load, make sure the first load does not clobber it.
1456 (BaseKill || OffKill) &&
1457 (TRI->regsOverlap(EvenReg, BaseReg))) {
1458 assert(!TRI->regsOverlap(OddReg, BaseReg));
1459 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1460 OddReg, OddDeadKill, false,
1461 BaseReg, false, BaseUndef, false, OffUndef,
1462 Pred, PredReg, TII, isT2);
1463 NewBBI = std::prev(MBBI);
1464 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1465 EvenReg, EvenDeadKill, false,
1466 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1467 Pred, PredReg, TII, isT2);
1469 if (OddReg == EvenReg && EvenDeadKill) {
1470 // If the two source operands are the same, the kill marker is
1471 // probably on the first one. e.g.
1472 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1473 EvenDeadKill = false;
1476 // Never kill the base register in the first instruction.
1477 if (EvenReg == BaseReg)
1478 EvenDeadKill = false;
1479 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1480 EvenReg, EvenDeadKill, EvenUndef,
1481 BaseReg, false, BaseUndef, false, OffUndef,
1482 Pred, PredReg, TII, isT2);
1483 NewBBI = std::prev(MBBI);
1484 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1485 OddReg, OddDeadKill, OddUndef,
1486 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1487 Pred, PredReg, TII, isT2);
1502 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1503 /// ops of the same base and incrementing offset into LDM / STM ops.
1504 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1505 unsigned NumMerges = 0;
1506 unsigned NumMemOps = 0;
1508 unsigned CurrBase = 0;
1510 unsigned CurrSize = 0;
1511 ARMCC::CondCodes CurrPred = ARMCC::AL;
1512 unsigned CurrPredReg = 0;
1513 unsigned Position = 0;
1514 SmallVector<MachineBasicBlock::iterator,4> Merges;
1516 RS->enterBasicBlock(&MBB);
1517 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1519 if (FixInvalidRegPairOp(MBB, MBBI))
1522 bool Advance = false;
1523 bool TryMerge = false;
1524 bool Clobber = false;
1526 bool isMemOp = isMemoryOp(MBBI);
1528 int Opcode = MBBI->getOpcode();
1529 unsigned Size = getLSMultipleTransferSize(MBBI);
1530 const MachineOperand &MO = MBBI->getOperand(0);
1531 unsigned Reg = MO.getReg();
1532 bool isKill = MO.isDef() ? false : MO.isKill();
1533 unsigned Base = MBBI->getOperand(1).getReg();
1534 unsigned PredReg = 0;
1535 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1536 int Offset = getMemoryOpOffset(MBBI);
1539 // r5 := ldr [r5, #4]
1540 // r6 := ldr [r5, #8]
1542 // The second ldr has effectively broken the chain even though it
1543 // looks like the later ldr(s) use the same base register. Try to
1544 // merge the ldr's so far, including this one. But don't try to
1545 // combine the following ldr(s).
1546 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1549 // r4 := ldr [r0, #8]
1550 // r4 := ldr [r0, #4]
1552 // The optimization may reorder the second ldr in front of the first
1553 // ldr, which violates write after write(WAW) dependence. The same as
1554 // str. Try to merge inst(s) already in MemOps.
1555 bool Overlap = false;
1556 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
1557 if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
1563 if (CurrBase == 0 && !Clobber) {
1564 // Start of a new chain.
1569 CurrPredReg = PredReg;
1570 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1573 } else if (!Overlap) {
1579 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1580 // No need to match PredReg.
1581 // Continue adding to the queue.
1582 if (Offset > MemOps.back().Offset) {
1583 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1588 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1590 if (Offset < I->Offset) {
1591 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1596 } else if (Offset == I->Offset) {
1597 // Collision! This can't be merged!
1606 if (MBBI->isDebugValue()) {
1609 // Reach the end of the block, try merging the memory instructions.
1611 } else if (Advance) {
1615 // Reach the end of the block, try merging the memory instructions.
1622 if (NumMemOps > 1) {
1623 // Try to find a free register to use as a new base in case it's needed.
1624 // First advance to the instruction just before the start of the chain.
1625 AdvanceRS(MBB, MemOps);
1627 // Find a scratch register.
1629 RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass);
1631 // Process the load / store instructions.
1632 RS->forward(std::prev(MBBI));
1636 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1637 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1639 // Try folding preceding/trailing base inc/dec into the generated
1641 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1642 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1644 NumMerges += Merges.size();
1646 // Try folding preceding/trailing base inc/dec into those load/store
1647 // that were not merged to form LDM/STM ops.
1648 for (unsigned i = 0; i != NumMemOps; ++i)
1649 if (!MemOps[i].Merged)
1650 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1653 // RS may be pointing to an instruction that's deleted.
1654 RS->skipTo(std::prev(MBBI));
1655 } else if (NumMemOps == 1) {
1656 // Try folding preceding/trailing base inc/dec into the single
1658 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1660 RS->forward(std::prev(MBBI));
1667 CurrPred = ARMCC::AL;
1674 // If iterator hasn't been advanced and this is not a memory op, skip it.
1675 // It can't start a new chain anyway.
1676 if (!Advance && !isMemOp && MBBI != E) {
1682 return NumMerges > 0;
1685 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1686 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1687 /// directly restore the value of LR into pc.
1688 /// ldmfd sp!, {..., lr}
1691 /// ldmfd sp!, {..., lr}
1694 /// ldmfd sp!, {..., pc}
1695 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1696 // Thumb1 LDM doesn't allow high registers.
1697 if (isThumb1) return false;
1698 if (MBB.empty()) return false;
1700 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1701 if (MBBI != MBB.begin() &&
1702 (MBBI->getOpcode() == ARM::BX_RET ||
1703 MBBI->getOpcode() == ARM::tBX_RET ||
1704 MBBI->getOpcode() == ARM::MOVPCLR)) {
1705 MachineInstr *PrevMI = std::prev(MBBI);
1706 unsigned Opcode = PrevMI->getOpcode();
1707 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1708 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1709 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1710 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1711 if (MO.getReg() != ARM::LR)
1713 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1714 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1715 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1716 PrevMI->setDesc(TII->get(NewOpc));
1718 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1726 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1727 const TargetMachine &TM = Fn.getTarget();
1728 TL = TM.getSubtargetImpl()->getTargetLowering();
1729 AFI = Fn.getInfo<ARMFunctionInfo>();
1730 TII = TM.getSubtargetImpl()->getInstrInfo();
1731 TRI = TM.getSubtargetImpl()->getRegisterInfo();
1732 STI = &TM.getSubtarget<ARMSubtarget>();
1733 RS = new RegScavenger();
1734 isThumb2 = AFI->isThumb2Function();
1735 isThumb1 = AFI->isThumbFunction() && !isThumb2;
1737 // FIXME: Temporarily disabling for Thumb-1 due to miscompiles
1743 bool Modified = false;
1744 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1746 MachineBasicBlock &MBB = *MFI;
1747 Modified |= LoadStoreMultipleOpti(MBB);
1748 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1749 Modified |= MergeReturnIntoLDM(MBB);
1757 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1758 /// load / stores from consecutive locations close to make it more
1759 /// likely they will be combined later.
1762 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1764 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1766 const DataLayout *TD;
1767 const TargetInstrInfo *TII;
1768 const TargetRegisterInfo *TRI;
1769 const ARMSubtarget *STI;
1770 MachineRegisterInfo *MRI;
1771 MachineFunction *MF;
1773 bool runOnMachineFunction(MachineFunction &Fn) override;
1775 const char *getPassName() const override {
1776 return "ARM pre- register allocation load / store optimization pass";
1780 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1781 unsigned &NewOpc, unsigned &EvenReg,
1782 unsigned &OddReg, unsigned &BaseReg,
1784 unsigned &PredReg, ARMCC::CondCodes &Pred,
1786 bool RescheduleOps(MachineBasicBlock *MBB,
1787 SmallVectorImpl<MachineInstr *> &Ops,
1788 unsigned Base, bool isLd,
1789 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1790 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1792 char ARMPreAllocLoadStoreOpt::ID = 0;
1795 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1796 TD = Fn.getSubtarget().getDataLayout();
1797 TII = Fn.getSubtarget().getInstrInfo();
1798 TRI = Fn.getSubtarget().getRegisterInfo();
1799 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1800 MRI = &Fn.getRegInfo();
1803 bool Modified = false;
1804 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1806 Modified |= RescheduleLoadStoreInstrs(MFI);
1811 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1812 MachineBasicBlock::iterator I,
1813 MachineBasicBlock::iterator E,
1814 SmallPtrSet<MachineInstr*, 4> &MemOps,
1815 SmallSet<unsigned, 4> &MemRegs,
1816 const TargetRegisterInfo *TRI) {
1817 // Are there stores / loads / calls between them?
1818 // FIXME: This is overly conservative. We should make use of alias information
1820 SmallSet<unsigned, 4> AddedRegPressure;
1822 if (I->isDebugValue() || MemOps.count(&*I))
1824 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1826 if (isLd && I->mayStore())
1831 // It's not safe to move the first 'str' down.
1834 // str r4, [r0, #+4]
1838 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1839 MachineOperand &MO = I->getOperand(j);
1842 unsigned Reg = MO.getReg();
1843 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1845 if (Reg != Base && !MemRegs.count(Reg))
1846 AddedRegPressure.insert(Reg);
1850 // Estimate register pressure increase due to the transformation.
1851 if (MemRegs.size() <= 4)
1852 // Ok if we are moving small number of instructions.
1854 return AddedRegPressure.size() <= MemRegs.size() * 2;
1858 /// Copy Op0 and Op1 operands into a new array assigned to MI.
1859 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1860 MachineInstr *Op1) {
1861 assert(MI->memoperands_empty() && "expected a new machineinstr");
1862 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1863 + (Op1->memoperands_end() - Op1->memoperands_begin());
1865 MachineFunction *MF = MI->getParent()->getParent();
1866 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1867 MachineSDNode::mmo_iterator MemEnd =
1868 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1870 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1871 MI->setMemRefs(MemBegin, MemEnd);
1875 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1877 unsigned &NewOpc, unsigned &EvenReg,
1878 unsigned &OddReg, unsigned &BaseReg,
1879 int &Offset, unsigned &PredReg,
1880 ARMCC::CondCodes &Pred,
1882 // Make sure we're allowed to generate LDRD/STRD.
1883 if (!STI->hasV5TEOps())
1886 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1888 unsigned Opcode = Op0->getOpcode();
1889 if (Opcode == ARM::LDRi12) {
1891 } else if (Opcode == ARM::STRi12) {
1893 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1894 NewOpc = ARM::t2LDRDi8;
1897 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1898 NewOpc = ARM::t2STRDi8;
1905 // Make sure the base address satisfies i64 ld / st alignment requirement.
1906 // At the moment, we ignore the memoryoperand's value.
1907 // If we want to use AliasAnalysis, we should check it accordingly.
1908 if (!Op0->hasOneMemOperand() ||
1909 (*Op0->memoperands_begin())->isVolatile())
1912 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1913 const Function *Func = MF->getFunction();
1914 unsigned ReqAlign = STI->hasV6Ops()
1915 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1916 : 8; // Pre-v6 need 8-byte align
1917 if (Align < ReqAlign)
1920 // Then make sure the immediate offset fits.
1921 int OffImm = getMemoryOpOffset(Op0);
1923 int Limit = (1 << 8) * Scale;
1924 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1928 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1930 AddSub = ARM_AM::sub;
1933 int Limit = (1 << 8) * Scale;
1934 if (OffImm >= Limit || (OffImm & (Scale-1)))
1936 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1938 EvenReg = Op0->getOperand(0).getReg();
1939 OddReg = Op1->getOperand(0).getReg();
1940 if (EvenReg == OddReg)
1942 BaseReg = Op0->getOperand(1).getReg();
1943 Pred = getInstrPredicate(Op0, PredReg);
1944 dl = Op0->getDebugLoc();
1948 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1949 SmallVectorImpl<MachineInstr *> &Ops,
1950 unsigned Base, bool isLd,
1951 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1952 bool RetVal = false;
1954 // Sort by offset (in reverse order).
1955 std::sort(Ops.begin(), Ops.end(),
1956 [](const MachineInstr *LHS, const MachineInstr *RHS) {
1957 int LOffset = getMemoryOpOffset(LHS);
1958 int ROffset = getMemoryOpOffset(RHS);
1959 assert(LHS == RHS || LOffset != ROffset);
1960 return LOffset > ROffset;
1963 // The loads / stores of the same base are in order. Scan them from first to
1964 // last and check for the following:
1965 // 1. Any def of base.
1967 while (Ops.size() > 1) {
1968 unsigned FirstLoc = ~0U;
1969 unsigned LastLoc = 0;
1970 MachineInstr *FirstOp = nullptr;
1971 MachineInstr *LastOp = nullptr;
1973 unsigned LastOpcode = 0;
1974 unsigned LastBytes = 0;
1975 unsigned NumMove = 0;
1976 for (int i = Ops.size() - 1; i >= 0; --i) {
1977 MachineInstr *Op = Ops[i];
1978 unsigned Loc = MI2LocMap[Op];
1979 if (Loc <= FirstLoc) {
1983 if (Loc >= LastLoc) {
1989 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
1990 if (LastOpcode && LSMOpcode != LastOpcode)
1993 int Offset = getMemoryOpOffset(Op);
1994 unsigned Bytes = getLSMultipleTransferSize(Op);
1996 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1999 LastOffset = Offset;
2001 LastOpcode = LSMOpcode;
2002 if (++NumMove == 8) // FIXME: Tune this limit.
2009 SmallPtrSet<MachineInstr*, 4> MemOps;
2010 SmallSet<unsigned, 4> MemRegs;
2011 for (int i = NumMove-1; i >= 0; --i) {
2012 MemOps.insert(Ops[i]);
2013 MemRegs.insert(Ops[i]->getOperand(0).getReg());
2016 // Be conservative, if the instructions are too far apart, don't
2017 // move them. We want to limit the increase of register pressure.
2018 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2020 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2021 MemOps, MemRegs, TRI);
2023 for (unsigned i = 0; i != NumMove; ++i)
2026 // This is the new location for the loads / stores.
2027 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2028 while (InsertPos != MBB->end()
2029 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
2032 // If we are moving a pair of loads / stores, see if it makes sense
2033 // to try to allocate a pair of registers that can form register pairs.
2034 MachineInstr *Op0 = Ops.back();
2035 MachineInstr *Op1 = Ops[Ops.size()-2];
2036 unsigned EvenReg = 0, OddReg = 0;
2037 unsigned BaseReg = 0, PredReg = 0;
2038 ARMCC::CondCodes Pred = ARMCC::AL;
2040 unsigned NewOpc = 0;
2043 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2044 EvenReg, OddReg, BaseReg,
2045 Offset, PredReg, Pred, isT2)) {
2049 const MCInstrDesc &MCID = TII->get(NewOpc);
2050 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
2051 MRI->constrainRegClass(EvenReg, TRC);
2052 MRI->constrainRegClass(OddReg, TRC);
2054 // Form the pair instruction.
2056 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2057 .addReg(EvenReg, RegState::Define)
2058 .addReg(OddReg, RegState::Define)
2060 // FIXME: We're converting from LDRi12 to an insn that still
2061 // uses addrmode2, so we need an explicit offset reg. It should
2062 // always by reg0 since we're transforming LDRi12s.
2065 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2066 concatenateMemOperands(MIB, Op0, Op1);
2067 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2070 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2074 // FIXME: We're converting from LDRi12 to an insn that still
2075 // uses addrmode2, so we need an explicit offset reg. It should
2076 // always by reg0 since we're transforming STRi12s.
2079 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2080 concatenateMemOperands(MIB, Op0, Op1);
2081 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2087 // Add register allocation hints to form register pairs.
2088 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
2089 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
2091 for (unsigned i = 0; i != NumMove; ++i) {
2092 MachineInstr *Op = Ops.back();
2094 MBB->splice(InsertPos, MBB, Op);
2098 NumLdStMoved += NumMove;
2108 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2109 bool RetVal = false;
2111 DenseMap<MachineInstr*, unsigned> MI2LocMap;
2112 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
2113 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
2114 SmallVector<unsigned, 4> LdBases;
2115 SmallVector<unsigned, 4> StBases;
2118 MachineBasicBlock::iterator MBBI = MBB->begin();
2119 MachineBasicBlock::iterator E = MBB->end();
2121 for (; MBBI != E; ++MBBI) {
2122 MachineInstr *MI = MBBI;
2123 if (MI->isCall() || MI->isTerminator()) {
2124 // Stop at barriers.
2129 if (!MI->isDebugValue())
2130 MI2LocMap[MI] = ++Loc;
2132 if (!isMemoryOp(MI))
2134 unsigned PredReg = 0;
2135 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2138 int Opc = MI->getOpcode();
2139 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
2140 unsigned Base = MI->getOperand(1).getReg();
2141 int Offset = getMemoryOpOffset(MI);
2143 bool StopHere = false;
2145 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2146 Base2LdsMap.find(Base);
2147 if (BI != Base2LdsMap.end()) {
2148 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2149 if (Offset == getMemoryOpOffset(BI->second[i])) {
2155 BI->second.push_back(MI);
2157 Base2LdsMap[Base].push_back(MI);
2158 LdBases.push_back(Base);
2161 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2162 Base2StsMap.find(Base);
2163 if (BI != Base2StsMap.end()) {
2164 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2165 if (Offset == getMemoryOpOffset(BI->second[i])) {
2171 BI->second.push_back(MI);
2173 Base2StsMap[Base].push_back(MI);
2174 StBases.push_back(Base);
2179 // Found a duplicate (a base+offset combination that's seen earlier).
2186 // Re-schedule loads.
2187 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2188 unsigned Base = LdBases[i];
2189 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2191 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2194 // Re-schedule stores.
2195 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2196 unsigned Base = StBases[i];
2197 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2199 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2203 Base2LdsMap.clear();
2204 Base2StsMap.clear();
2214 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
2215 /// optimization pass.
2216 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2218 return new ARMPreAllocLoadStoreOpt();
2219 return new ARMLoadStoreOpt();