1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMISelLowering.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMSubtarget.h"
21 #include "MCTargetDesc/ARMAddressingModes.h"
22 #include "Thumb1RegisterInfo.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/CodeGen/MachineBasicBlock.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include "llvm/CodeGen/MachineInstr.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/RegisterScavenging.h"
35 #include "llvm/CodeGen/SelectionDAGNodes.h"
36 #include "llvm/IR/DataLayout.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/Support/Debug.h"
40 #include "llvm/Support/ErrorHandling.h"
41 #include "llvm/Target/TargetInstrInfo.h"
42 #include "llvm/Target/TargetMachine.h"
43 #include "llvm/Target/TargetRegisterInfo.h"
46 #define DEBUG_TYPE "arm-ldst-opt"
48 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
49 STATISTIC(NumSTMGened , "Number of stm instructions generated");
50 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
51 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
52 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
53 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
54 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
55 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
56 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
57 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
58 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
60 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
61 /// load / store instructions to form ldm / stm instructions.
64 struct ARMLoadStoreOpt : public MachineFunctionPass {
66 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
68 const TargetInstrInfo *TII;
69 const TargetRegisterInfo *TRI;
70 const ARMSubtarget *STI;
71 const TargetLowering *TL;
74 bool isThumb1, isThumb2;
76 bool runOnMachineFunction(MachineFunction &Fn) override;
78 const char *getPassName() const override {
79 return "ARM load / store optimization pass";
83 struct MemOpQueueEntry {
88 MachineBasicBlock::iterator MBBI;
90 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
91 MachineBasicBlock::iterator i)
92 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
94 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
95 typedef MemOpQueue::iterator MemOpQueueIter;
97 void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
98 const MemOpQueue &MemOps, unsigned DefReg,
99 unsigned RangeBegin, unsigned RangeEnd);
100 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
101 int Offset, unsigned Base, bool BaseKill, int Opcode,
102 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
104 ArrayRef<std::pair<unsigned, bool> > Regs,
105 ArrayRef<unsigned> ImpDefs);
106 void MergeOpsUpdate(MachineBasicBlock &MBB,
108 unsigned memOpsBegin,
110 unsigned insertAfter,
115 ARMCC::CondCodes Pred,
119 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
120 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
121 int Opcode, unsigned Size,
122 ARMCC::CondCodes Pred, unsigned PredReg,
123 unsigned Scratch, MemOpQueue &MemOps,
124 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
125 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
126 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
127 MachineBasicBlock::iterator &MBBI);
128 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
129 MachineBasicBlock::iterator MBBI,
130 const TargetInstrInfo *TII,
132 MachineBasicBlock::iterator &I);
133 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
134 MachineBasicBlock::iterator MBBI,
136 MachineBasicBlock::iterator &I);
137 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
138 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
140 char ARMLoadStoreOpt::ID = 0;
143 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
145 default: llvm_unreachable("Unhandled opcode!");
149 default: llvm_unreachable("Unhandled submode!");
150 case ARM_AM::ia: return ARM::LDMIA;
151 case ARM_AM::da: return ARM::LDMDA;
152 case ARM_AM::db: return ARM::LDMDB;
153 case ARM_AM::ib: return ARM::LDMIB;
158 default: llvm_unreachable("Unhandled submode!");
159 case ARM_AM::ia: return ARM::STMIA;
160 case ARM_AM::da: return ARM::STMDA;
161 case ARM_AM::db: return ARM::STMDB;
162 case ARM_AM::ib: return ARM::STMIB;
165 // tLDMIA is writeback-only - unless the base register is in the input
169 default: llvm_unreachable("Unhandled submode!");
170 case ARM_AM::ia: return ARM::tLDMIA;
173 // There is no non-writeback tSTMIA either.
176 default: llvm_unreachable("Unhandled submode!");
177 case ARM_AM::ia: return ARM::tSTMIA_UPD;
183 default: llvm_unreachable("Unhandled submode!");
184 case ARM_AM::ia: return ARM::t2LDMIA;
185 case ARM_AM::db: return ARM::t2LDMDB;
191 default: llvm_unreachable("Unhandled submode!");
192 case ARM_AM::ia: return ARM::t2STMIA;
193 case ARM_AM::db: return ARM::t2STMDB;
198 default: llvm_unreachable("Unhandled submode!");
199 case ARM_AM::ia: return ARM::VLDMSIA;
200 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
205 default: llvm_unreachable("Unhandled submode!");
206 case ARM_AM::ia: return ARM::VSTMSIA;
207 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
212 default: llvm_unreachable("Unhandled submode!");
213 case ARM_AM::ia: return ARM::VLDMDIA;
214 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
219 default: llvm_unreachable("Unhandled submode!");
220 case ARM_AM::ia: return ARM::VSTMDIA;
221 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
229 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
231 default: llvm_unreachable("Unhandled opcode!");
238 case ARM::tLDMIA_UPD:
239 case ARM::tSTMIA_UPD:
240 case ARM::t2LDMIA_RET:
242 case ARM::t2LDMIA_UPD:
244 case ARM::t2STMIA_UPD:
246 case ARM::VLDMSIA_UPD:
248 case ARM::VSTMSIA_UPD:
250 case ARM::VLDMDIA_UPD:
252 case ARM::VSTMDIA_UPD:
266 case ARM::t2LDMDB_UPD:
268 case ARM::t2STMDB_UPD:
269 case ARM::VLDMSDB_UPD:
270 case ARM::VSTMSDB_UPD:
271 case ARM::VLDMDDB_UPD:
272 case ARM::VSTMDDB_UPD:
283 } // end namespace ARM_AM
284 } // end namespace llvm
286 static bool isT1i32Load(unsigned Opc) {
287 return Opc == ARM::tLDRi;
290 static bool isT2i32Load(unsigned Opc) {
291 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
294 static bool isi32Load(unsigned Opc) {
295 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
298 static bool isT1i32Store(unsigned Opc) {
299 return Opc == ARM::tSTRi;
302 static bool isT2i32Store(unsigned Opc) {
303 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
306 static bool isi32Store(unsigned Opc) {
307 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
310 /// MergeOps - Create and insert a LDM or STM with Base as base register and
311 /// registers in Regs as the register operands that would be loaded / stored.
312 /// It returns true if the transformation is done.
314 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
315 MachineBasicBlock::iterator MBBI,
316 int Offset, unsigned Base, bool BaseKill,
317 int Opcode, ARMCC::CondCodes Pred,
318 unsigned PredReg, unsigned Scratch, DebugLoc dl,
319 ArrayRef<std::pair<unsigned, bool> > Regs,
320 ArrayRef<unsigned> ImpDefs) {
321 // Only a single register to load / store. Don't bother.
322 unsigned NumRegs = Regs.size();
326 ARM_AM::AMSubMode Mode = ARM_AM::ia;
327 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
328 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
329 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
331 if (Offset == 4 && haveIBAndDA) {
333 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
335 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
336 // VLDM/VSTM do not support DB mode without also updating the base reg.
338 } else if (Offset != 0) {
339 // Check if this is a supported opcode before inserting instructions to
340 // calculate a new base register.
341 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
343 // If starting offset isn't zero, insert a MI to materialize a new base.
344 // But only do so if it is cost effective, i.e. merging more than two
350 if (isi32Load(Opcode)) {
351 // If it is a load, then just use one of the destination register to
352 // use as the new base.
353 NewBase = Regs[NumRegs-1].first;
355 // Use the scratch register to use as a new base.
362 isThumb2 ? ARM::t2ADDri :
363 isThumb1 ? ARM::tADDi8 : ARM::ADDri;
367 isThumb2 ? ARM::t2SUBri :
368 isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
372 if (!TL->isLegalAddImmediate(Offset))
373 // FIXME: Try add with register operand?
374 return false; // Probably not worth it then.
377 if (Base != NewBase) {
378 // Need to insert a MOV to the new base first.
379 // FIXME: If the immediate fits in 3 bits, use ADD instead.
380 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
381 .addReg(Base, getKillRegState(BaseKill))
382 .addImm(Pred).addReg(PredReg);
384 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase))
385 .addReg(NewBase, getKillRegState(true)).addImm(Offset)
386 .addImm(Pred).addReg(PredReg);
388 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
389 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
390 .addImm(Pred).addReg(PredReg).addReg(0);
394 BaseKill = true; // New base is always killed straight away.
397 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
398 Opcode == ARM::VLDRD);
400 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
401 // base register writeback.
402 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
403 if (!Opcode) return false;
405 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
407 // Exception: If the base register is in the input reglist, Thumb1 LDM is
408 // non-writeback. Check for this.
409 if (Opcode == ARM::tLDMIA && isThumb1)
410 for (unsigned I = 0; I < NumRegs; ++I)
411 if (Base == Regs[I].first) {
416 // If the merged instruction has writeback and the base register is not killed
417 // it's not safe to do the merge on Thumb1. This is because resetting the base
418 // register writeback by inserting a SUBS sets the condition flags.
419 // FIXME: Try something clever here to see if resetting the base register can
420 // be avoided, e.g. by updating a later ADD/SUB of the base register with the
422 if (isThumb1 && Writeback && !BaseKill) return false;
424 MachineInstrBuilder MIB;
427 if (Opcode == ARM::tLDMIA)
428 // Update tLDMIA with writeback if necessary.
429 Opcode = ARM::tLDMIA_UPD;
431 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
433 // Thumb1: we might need to set base writeback when building the MI.
434 MIB.addReg(Base, getDefRegState(true))
435 .addReg(Base, getKillRegState(BaseKill));
437 // No writeback, simply build the MachineInstr.
438 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
439 MIB.addReg(Base, getKillRegState(BaseKill));
442 MIB.addImm(Pred).addReg(PredReg);
444 for (unsigned i = 0; i != NumRegs; ++i)
445 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
446 | getKillRegState(Regs[i].second));
448 // Add implicit defs for super-registers.
449 for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i)
450 MIB.addReg(ImpDefs[i], RegState::ImplicitDefine);
455 /// \brief Find all instructions using a given imp-def within a range.
457 /// We are trying to combine a range of instructions, one of which (located at
458 /// position RangeBegin) implicitly defines a register. The final LDM/STM will
459 /// be placed at RangeEnd, and so any uses of this definition between RangeStart
460 /// and RangeEnd must be modified to use an undefined value.
462 /// The live range continues until we find a second definition or one of the
463 /// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
464 /// we must consider all uses and decide which are relevant in a second pass.
465 void ARMLoadStoreOpt::findUsesOfImpDef(
466 SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,
467 unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
468 std::map<unsigned, MachineOperand *> Uses;
469 unsigned LastLivePos = RangeEnd;
471 // First we find all uses of this register with Position between RangeBegin
472 // and RangeEnd, any or all of these could be uses of a definition at
473 // RangeBegin. We also record the latest position a definition at RangeBegin
474 // would be considered live.
475 for (unsigned i = 0; i < MemOps.size(); ++i) {
476 MachineInstr &MI = *MemOps[i].MBBI;
477 unsigned MIPosition = MemOps[i].Position;
478 if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
481 // If this instruction defines the register, then any later use will be of
482 // that definition rather than ours.
483 if (MI.definesRegister(DefReg))
484 LastLivePos = std::min(LastLivePos, MIPosition);
486 MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
490 // If this instruction kills the register then (assuming liveness is
491 // correct when we start) we don't need to think about anything after here.
493 LastLivePos = std::min(LastLivePos, MIPosition);
495 Uses[MIPosition] = UseOp;
498 // Now we traverse the list of all uses, and append the ones that actually use
499 // our definition to the requested list.
500 for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),
503 // List is sorted by position so once we've found one out of range there
504 // will be no more to consider.
505 if (I->first > LastLivePos)
507 UsesOfImpDefs.push_back(I->second);
511 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
513 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
515 unsigned memOpsBegin, unsigned memOpsEnd,
516 unsigned insertAfter, int Offset,
517 unsigned Base, bool BaseKill,
519 ARMCC::CondCodes Pred, unsigned PredReg,
522 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
523 // First calculate which of the registers should be killed by the merged
525 const unsigned insertPos = memOps[insertAfter].Position;
526 SmallSet<unsigned, 4> KilledRegs;
527 DenseMap<unsigned, unsigned> Killer;
528 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
529 if (i == memOpsBegin) {
534 if (memOps[i].Position < insertPos && memOps[i].isKill) {
535 unsigned Reg = memOps[i].Reg;
536 KilledRegs.insert(Reg);
541 SmallVector<std::pair<unsigned, bool>, 8> Regs;
542 SmallVector<unsigned, 8> ImpDefs;
543 SmallVector<MachineOperand *, 8> UsesOfImpDefs;
544 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
545 unsigned Reg = memOps[i].Reg;
546 // If we are inserting the merged operation after an operation that
547 // uses the same register, make sure to transfer any kill flag.
548 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
549 Regs.push_back(std::make_pair(Reg, isKill));
551 // Collect any implicit defs of super-registers. They must be preserved.
552 for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) {
553 if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead())
555 unsigned DefReg = MO->getReg();
556 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
557 ImpDefs.push_back(DefReg);
559 // There may be other uses of the definition between this instruction and
560 // the eventual LDM/STM position. These should be marked undef if the
561 // merge takes place.
562 findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
567 // Try to do the merge.
568 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
570 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
571 Pred, PredReg, Scratch, dl, Regs, ImpDefs))
574 // Merge succeeded, update records.
575 Merges.push_back(std::prev(Loc));
577 // In gathering loads together, we may have moved the imp-def of a register
578 // past one of its uses. This is OK, since we know better than the rest of
579 // LLVM what's OK with ARM loads and stores; but we still have to adjust the
581 for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
582 E = UsesOfImpDefs.end();
586 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
587 // Remove kill flags from any memops that come before insertPos.
588 if (Regs[i-memOpsBegin].second) {
589 unsigned Reg = Regs[i-memOpsBegin].first;
590 if (KilledRegs.count(Reg)) {
591 unsigned j = Killer[Reg];
592 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
593 assert(Idx >= 0 && "Cannot find killing operand");
594 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
595 memOps[j].isKill = false;
597 memOps[i].isKill = true;
599 MBB.erase(memOps[i].MBBI);
600 // Update this memop to refer to the merged instruction.
601 // We may need to move kill flags again.
602 memOps[i].Merged = true;
603 memOps[i].MBBI = Merges.back();
604 memOps[i].Position = insertPos;
608 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
609 /// load / store multiple instructions.
611 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
612 unsigned Base, int Opcode, unsigned Size,
613 ARMCC::CondCodes Pred, unsigned PredReg,
614 unsigned Scratch, MemOpQueue &MemOps,
615 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
616 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
617 int Offset = MemOps[SIndex].Offset;
618 int SOffset = Offset;
619 unsigned insertAfter = SIndex;
620 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
621 DebugLoc dl = Loc->getDebugLoc();
622 const MachineOperand &PMO = Loc->getOperand(0);
623 unsigned PReg = PMO.getReg();
624 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
626 unsigned Limit = ~0U;
627 bool BaseKill = false;
628 // vldm / vstm limit are 32 for S variants, 16 for D variants.
646 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
647 int NewOffset = MemOps[i].Offset;
648 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
649 unsigned Reg = MO.getReg();
650 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
651 // Register numbers must be in ascending order. For VFP / NEON load and
652 // store multiples, the registers must also be consecutive and within the
653 // limit on the number of registers per instruction.
654 if (Reg != ARM::SP &&
655 NewOffset == Offset + (int)Size &&
656 ((isNotVFP && RegNum > PRegNum) ||
657 ((Count < Limit) && RegNum == PRegNum+1)) &&
658 // On Swift we don't want vldm/vstm to start with a odd register num
659 // because Q register unaligned vldm/vstm need more uops.
660 (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) {
665 // Can't merge this in. Try merge the earlier ones first.
666 // We need to compute BaseKill here because the MemOps may have been
668 BaseKill = Loc->killsRegister(Base);
670 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base,
671 BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
672 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
677 if (MemOps[i].Position > MemOps[insertAfter].Position) {
679 Loc = MemOps[i].MBBI;
683 BaseKill = Loc->killsRegister(Base);
684 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
685 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
688 static bool definesCPSR(MachineInstr *MI) {
689 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
690 const MachineOperand &MO = MI->getOperand(i);
693 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
694 // If the instruction has live CPSR def, then it's not safe to fold it
695 // into load / store.
702 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
703 unsigned Bytes, unsigned Limit,
704 ARMCC::CondCodes Pred, unsigned PredReg) {
705 unsigned MyPredReg = 0;
709 bool CheckCPSRDef = false;
710 switch (MI->getOpcode()) {
711 default: return false;
721 // Make sure the offset fits in 8 bits.
722 if (Bytes == 0 || (Limit && Bytes >= Limit))
725 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
726 MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
727 if (!(MI->getOperand(0).getReg() == Base &&
728 MI->getOperand(1).getReg() == Base &&
729 (MI->getOperand(2).getImm() * Scale) == Bytes &&
730 getInstrPredicate(MI, MyPredReg) == Pred &&
731 MyPredReg == PredReg))
734 return CheckCPSRDef ? !definesCPSR(MI) : true;
737 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
738 unsigned Bytes, unsigned Limit,
739 ARMCC::CondCodes Pred, unsigned PredReg) {
740 unsigned MyPredReg = 0;
744 bool CheckCPSRDef = false;
745 switch (MI->getOpcode()) {
746 default: return false;
756 if (Bytes == 0 || (Limit && Bytes >= Limit))
757 // Make sure the offset fits in 8 bits.
760 unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
761 MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
762 if (!(MI->getOperand(0).getReg() == Base &&
763 MI->getOperand(1).getReg() == Base &&
764 (MI->getOperand(2).getImm() * Scale) == Bytes &&
765 getInstrPredicate(MI, MyPredReg) == Pred &&
766 MyPredReg == PredReg))
769 return CheckCPSRDef ? !definesCPSR(MI) : true;
772 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
773 switch (MI->getOpcode()) {
798 case ARM::tLDMIA_UPD:
799 case ARM::tSTMIA_UPD:
806 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
809 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
813 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
814 ARM_AM::AMSubMode Mode) {
816 default: llvm_unreachable("Unhandled opcode!");
822 default: llvm_unreachable("Unhandled submode!");
823 case ARM_AM::ia: return ARM::LDMIA_UPD;
824 case ARM_AM::ib: return ARM::LDMIB_UPD;
825 case ARM_AM::da: return ARM::LDMDA_UPD;
826 case ARM_AM::db: return ARM::LDMDB_UPD;
833 default: llvm_unreachable("Unhandled submode!");
834 case ARM_AM::ia: return ARM::STMIA_UPD;
835 case ARM_AM::ib: return ARM::STMIB_UPD;
836 case ARM_AM::da: return ARM::STMDA_UPD;
837 case ARM_AM::db: return ARM::STMDB_UPD;
842 default: llvm_unreachable("Unhandled submode!");
843 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
844 case ARM_AM::db: return ARM::t2LDMDB_UPD;
849 default: llvm_unreachable("Unhandled submode!");
850 case ARM_AM::ia: return ARM::t2STMIA_UPD;
851 case ARM_AM::db: return ARM::t2STMDB_UPD;
855 default: llvm_unreachable("Unhandled submode!");
856 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
857 case ARM_AM::db: return ARM::VLDMSDB_UPD;
861 default: llvm_unreachable("Unhandled submode!");
862 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
863 case ARM_AM::db: return ARM::VLDMDDB_UPD;
867 default: llvm_unreachable("Unhandled submode!");
868 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
869 case ARM_AM::db: return ARM::VSTMSDB_UPD;
873 default: llvm_unreachable("Unhandled submode!");
874 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
875 case ARM_AM::db: return ARM::VSTMDDB_UPD;
880 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
881 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
883 /// stmia rn, <ra, rb, rc>
884 /// rn := rn + 4 * 3;
886 /// stmia rn!, <ra, rb, rc>
888 /// rn := rn - 4 * 3;
889 /// ldmia rn, <ra, rb, rc>
891 /// ldmdb rn!, <ra, rb, rc>
892 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
893 MachineBasicBlock::iterator MBBI,
895 MachineBasicBlock::iterator &I) {
896 // Thumb1 is already using updating loads/stores.
897 if (isThumb1) return false;
899 MachineInstr *MI = MBBI;
900 unsigned Base = MI->getOperand(0).getReg();
901 bool BaseKill = MI->getOperand(0).isKill();
902 unsigned Bytes = getLSMultipleTransferSize(MI);
903 unsigned PredReg = 0;
904 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
905 int Opcode = MI->getOpcode();
906 DebugLoc dl = MI->getDebugLoc();
908 // Can't use an updating ld/st if the base register is also a dest
909 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
910 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
911 if (MI->getOperand(i).getReg() == Base)
914 bool DoMerge = false;
915 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
917 // Try merging with the previous instruction.
918 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
919 if (MBBI != BeginMBBI) {
920 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
921 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
923 if (Mode == ARM_AM::ia &&
924 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
927 } else if (Mode == ARM_AM::ib &&
928 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
936 // Try merging with the next instruction.
937 MachineBasicBlock::iterator EndMBBI = MBB.end();
938 if (!DoMerge && MBBI != EndMBBI) {
939 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
940 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
942 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
943 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
945 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
946 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
961 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
962 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
963 .addReg(Base, getDefRegState(true)) // WB base register
964 .addReg(Base, getKillRegState(BaseKill))
965 .addImm(Pred).addReg(PredReg);
967 // Transfer the rest of operands.
968 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
969 MIB.addOperand(MI->getOperand(OpNum));
971 // Transfer memoperands.
972 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
978 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
979 ARM_AM::AddrOpc Mode) {
982 return ARM::LDR_PRE_IMM;
984 return ARM::STR_PRE_IMM;
986 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
988 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
990 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
992 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
995 return ARM::t2LDR_PRE;
998 return ARM::t2STR_PRE;
999 default: llvm_unreachable("Unhandled opcode!");
1003 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1004 ARM_AM::AddrOpc Mode) {
1007 return ARM::LDR_POST_IMM;
1009 return ARM::STR_POST_IMM;
1011 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1013 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1015 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1017 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1020 return ARM::t2LDR_POST;
1023 return ARM::t2STR_POST;
1024 default: llvm_unreachable("Unhandled opcode!");
1028 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
1029 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1030 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
1031 MachineBasicBlock::iterator MBBI,
1032 const TargetInstrInfo *TII,
1034 MachineBasicBlock::iterator &I) {
1035 // Thumb1 doesn't have updating LDR/STR.
1036 // FIXME: Use LDM/STM with single register instead.
1037 if (isThumb1) return false;
1039 MachineInstr *MI = MBBI;
1040 unsigned Base = MI->getOperand(1).getReg();
1041 bool BaseKill = MI->getOperand(1).isKill();
1042 unsigned Bytes = getLSMultipleTransferSize(MI);
1043 int Opcode = MI->getOpcode();
1044 DebugLoc dl = MI->getDebugLoc();
1045 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1046 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1047 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1048 if (isi32Load(Opcode) || isi32Store(Opcode))
1049 if (MI->getOperand(2).getImm() != 0)
1051 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1054 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
1055 // Can't do the merge if the destination register is the same as the would-be
1056 // writeback register.
1057 if (MI->getOperand(0).getReg() == Base)
1060 unsigned PredReg = 0;
1061 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1062 bool DoMerge = false;
1063 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1064 unsigned NewOpc = 0;
1065 // AM2 - 12 bits, thumb2 - 8 bits.
1066 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
1068 // Try merging with the previous instruction.
1069 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1070 if (MBBI != BeginMBBI) {
1071 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1072 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1074 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1076 AddSub = ARM_AM::sub;
1077 } else if (!isAM5 &&
1078 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1082 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
1083 MBB.erase(PrevMBBI);
1087 // Try merging with the next instruction.
1088 MachineBasicBlock::iterator EndMBBI = MBB.end();
1089 if (!DoMerge && MBBI != EndMBBI) {
1090 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1091 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1094 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1096 AddSub = ARM_AM::sub;
1097 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1101 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
1102 if (NextMBBI == I) {
1106 MBB.erase(NextMBBI);
1114 // VLDM[SD]_UPD, VSTM[SD]_UPD
1115 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1116 // updating load/store-multiple instructions can be used with only one
1118 MachineOperand &MO = MI->getOperand(0);
1119 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1120 .addReg(Base, getDefRegState(true)) // WB base register
1121 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1122 .addImm(Pred).addReg(PredReg)
1123 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1124 getKillRegState(MO.isKill())));
1127 // LDR_PRE, LDR_POST
1128 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1129 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1130 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1131 .addReg(Base, RegState::Define)
1132 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1134 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1135 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1136 .addReg(Base, RegState::Define)
1137 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1140 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1141 // t2LDR_PRE, t2LDR_POST
1142 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1143 .addReg(Base, RegState::Define)
1144 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1147 MachineOperand &MO = MI->getOperand(0);
1148 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1149 // the vestigal zero-reg offset register. When that's fixed, this clause
1150 // can be removed entirely.
1151 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1152 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1153 // STR_PRE, STR_POST
1154 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1155 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1156 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1158 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1159 // t2STR_PRE, t2STR_POST
1160 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1161 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1162 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1170 /// isMemoryOp - Returns true if instruction is a memory operation that this
1171 /// pass is capable of operating on.
1172 static bool isMemoryOp(const MachineInstr *MI) {
1173 // When no memory operands are present, conservatively assume unaligned,
1174 // volatile, unfoldable.
1175 if (!MI->hasOneMemOperand())
1178 const MachineMemOperand *MMO = *MI->memoperands_begin();
1180 // Don't touch volatile memory accesses - we may be changing their order.
1181 if (MMO->isVolatile())
1184 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1186 if (MMO->getAlignment() < 4)
1189 // str <undef> could probably be eliminated entirely, but for now we just want
1190 // to avoid making a mess of it.
1191 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1192 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1193 MI->getOperand(0).isUndef())
1196 // Likewise don't mess with references to undefined addresses.
1197 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1198 MI->getOperand(1).isUndef())
1201 int Opcode = MI->getOpcode();
1206 return MI->getOperand(1).isReg();
1209 return MI->getOperand(1).isReg();
1218 return MI->getOperand(1).isReg();
1223 /// AdvanceRS - Advance register scavenger to just before the earliest memory
1224 /// op that is being merged.
1225 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1226 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1227 unsigned Position = MemOps[0].Position;
1228 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1229 if (MemOps[i].Position < Position) {
1230 Position = MemOps[i].Position;
1231 Loc = MemOps[i].MBBI;
1235 if (Loc != MBB.begin())
1236 RS->forward(std::prev(Loc));
1239 static int getMemoryOpOffset(const MachineInstr *MI) {
1240 int Opcode = MI->getOpcode();
1241 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1242 unsigned NumOperands = MI->getDesc().getNumOperands();
1243 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1245 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1246 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1247 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1248 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
1251 // Thumb1 immediate offsets are scaled by 4
1252 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi)
1253 return OffField * 4;
1255 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1256 : ARM_AM::getAM5Offset(OffField) * 4;
1258 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1261 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1267 static void InsertLDR_STR(MachineBasicBlock &MBB,
1268 MachineBasicBlock::iterator &MBBI,
1269 int Offset, bool isDef,
1270 DebugLoc dl, unsigned NewOpc,
1271 unsigned Reg, bool RegDeadKill, bool RegUndef,
1272 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1273 bool OffKill, bool OffUndef,
1274 ARMCC::CondCodes Pred, unsigned PredReg,
1275 const TargetInstrInfo *TII, bool isT2) {
1277 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1279 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1280 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1281 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1283 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1285 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1286 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1287 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1291 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1292 MachineBasicBlock::iterator &MBBI) {
1293 MachineInstr *MI = &*MBBI;
1294 unsigned Opcode = MI->getOpcode();
1295 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1296 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1297 const MachineOperand &BaseOp = MI->getOperand(2);
1298 unsigned BaseReg = BaseOp.getReg();
1299 unsigned EvenReg = MI->getOperand(0).getReg();
1300 unsigned OddReg = MI->getOperand(1).getReg();
1301 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1302 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1303 // ARM errata 602117: LDRD with base in list may result in incorrect base
1304 // register when interrupted or faulted.
1305 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1306 if (!Errata602117 &&
1307 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1310 MachineBasicBlock::iterator NewBBI = MBBI;
1311 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1312 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1313 bool EvenDeadKill = isLd ?
1314 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1315 bool EvenUndef = MI->getOperand(0).isUndef();
1316 bool OddDeadKill = isLd ?
1317 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1318 bool OddUndef = MI->getOperand(1).isUndef();
1319 bool BaseKill = BaseOp.isKill();
1320 bool BaseUndef = BaseOp.isUndef();
1321 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1322 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1323 int OffImm = getMemoryOpOffset(MI);
1324 unsigned PredReg = 0;
1325 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1327 if (OddRegNum > EvenRegNum && OffImm == 0) {
1328 // Ascending register numbers and no offset. It's safe to change it to a
1330 unsigned NewOpc = (isLd)
1331 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1332 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1334 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1335 .addReg(BaseReg, getKillRegState(BaseKill))
1336 .addImm(Pred).addReg(PredReg)
1337 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1338 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1341 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1342 .addReg(BaseReg, getKillRegState(BaseKill))
1343 .addImm(Pred).addReg(PredReg)
1345 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1347 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1350 NewBBI = std::prev(MBBI);
1352 // Split into two instructions.
1353 unsigned NewOpc = (isLd)
1354 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1355 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1356 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1357 // so adjust and use t2LDRi12 here for that.
1358 unsigned NewOpc2 = (isLd)
1359 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1360 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1361 DebugLoc dl = MBBI->getDebugLoc();
1362 // If this is a load and base register is killed, it may have been
1363 // re-defed by the load, make sure the first load does not clobber it.
1365 (BaseKill || OffKill) &&
1366 (TRI->regsOverlap(EvenReg, BaseReg))) {
1367 assert(!TRI->regsOverlap(OddReg, BaseReg));
1368 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1369 OddReg, OddDeadKill, false,
1370 BaseReg, false, BaseUndef, false, OffUndef,
1371 Pred, PredReg, TII, isT2);
1372 NewBBI = std::prev(MBBI);
1373 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1374 EvenReg, EvenDeadKill, false,
1375 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1376 Pred, PredReg, TII, isT2);
1378 if (OddReg == EvenReg && EvenDeadKill) {
1379 // If the two source operands are the same, the kill marker is
1380 // probably on the first one. e.g.
1381 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1382 EvenDeadKill = false;
1385 // Never kill the base register in the first instruction.
1386 if (EvenReg == BaseReg)
1387 EvenDeadKill = false;
1388 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1389 EvenReg, EvenDeadKill, EvenUndef,
1390 BaseReg, false, BaseUndef, false, OffUndef,
1391 Pred, PredReg, TII, isT2);
1392 NewBBI = std::prev(MBBI);
1393 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1394 OddReg, OddDeadKill, OddUndef,
1395 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1396 Pred, PredReg, TII, isT2);
1411 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1412 /// ops of the same base and incrementing offset into LDM / STM ops.
1413 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1414 unsigned NumMerges = 0;
1415 unsigned NumMemOps = 0;
1417 unsigned CurrBase = 0;
1419 unsigned CurrSize = 0;
1420 ARMCC::CondCodes CurrPred = ARMCC::AL;
1421 unsigned CurrPredReg = 0;
1422 unsigned Position = 0;
1423 SmallVector<MachineBasicBlock::iterator,4> Merges;
1425 RS->enterBasicBlock(&MBB);
1426 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1428 if (FixInvalidRegPairOp(MBB, MBBI))
1431 bool Advance = false;
1432 bool TryMerge = false;
1433 bool Clobber = false;
1435 bool isMemOp = isMemoryOp(MBBI);
1437 int Opcode = MBBI->getOpcode();
1438 unsigned Size = getLSMultipleTransferSize(MBBI);
1439 const MachineOperand &MO = MBBI->getOperand(0);
1440 unsigned Reg = MO.getReg();
1441 bool isKill = MO.isDef() ? false : MO.isKill();
1442 unsigned Base = MBBI->getOperand(1).getReg();
1443 unsigned PredReg = 0;
1444 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1445 int Offset = getMemoryOpOffset(MBBI);
1448 // r5 := ldr [r5, #4]
1449 // r6 := ldr [r5, #8]
1451 // The second ldr has effectively broken the chain even though it
1452 // looks like the later ldr(s) use the same base register. Try to
1453 // merge the ldr's so far, including this one. But don't try to
1454 // combine the following ldr(s).
1455 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1458 // r4 := ldr [r0, #8]
1459 // r4 := ldr [r0, #4]
1461 // The optimization may reorder the second ldr in front of the first
1462 // ldr, which violates write after write(WAW) dependence. The same as
1463 // str. Try to merge inst(s) already in MemOps.
1464 bool Overlap = false;
1465 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
1466 if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
1472 if (CurrBase == 0 && !Clobber) {
1473 // Start of a new chain.
1478 CurrPredReg = PredReg;
1479 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1482 } else if (!Overlap) {
1488 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1489 // No need to match PredReg.
1490 // Continue adding to the queue.
1491 if (Offset > MemOps.back().Offset) {
1492 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1497 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1499 if (Offset < I->Offset) {
1500 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1505 } else if (Offset == I->Offset) {
1506 // Collision! This can't be merged!
1515 if (MBBI->isDebugValue()) {
1518 // Reach the end of the block, try merging the memory instructions.
1520 } else if (Advance) {
1524 // Reach the end of the block, try merging the memory instructions.
1531 if (NumMemOps > 1) {
1532 // Try to find a free register to use as a new base in case it's needed.
1533 // First advance to the instruction just before the start of the chain.
1534 AdvanceRS(MBB, MemOps);
1536 // Find a scratch register.
1538 RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass);
1540 // Process the load / store instructions.
1541 RS->forward(std::prev(MBBI));
1545 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1546 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1548 // Try folding preceding/trailing base inc/dec into the generated
1550 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1551 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1553 NumMerges += Merges.size();
1555 // Try folding preceding/trailing base inc/dec into those load/store
1556 // that were not merged to form LDM/STM ops.
1557 for (unsigned i = 0; i != NumMemOps; ++i)
1558 if (!MemOps[i].Merged)
1559 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1562 // RS may be pointing to an instruction that's deleted.
1563 RS->skipTo(std::prev(MBBI));
1564 } else if (NumMemOps == 1) {
1565 // Try folding preceding/trailing base inc/dec into the single
1567 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1569 RS->forward(std::prev(MBBI));
1576 CurrPred = ARMCC::AL;
1583 // If iterator hasn't been advanced and this is not a memory op, skip it.
1584 // It can't start a new chain anyway.
1585 if (!Advance && !isMemOp && MBBI != E) {
1591 return NumMerges > 0;
1594 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1595 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1596 /// directly restore the value of LR into pc.
1597 /// ldmfd sp!, {..., lr}
1600 /// ldmfd sp!, {..., lr}
1603 /// ldmfd sp!, {..., pc}
1604 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1605 // Thumb1 LDM doesn't allow high registers.
1606 if (isThumb1) return false;
1607 if (MBB.empty()) return false;
1609 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1610 if (MBBI != MBB.begin() &&
1611 (MBBI->getOpcode() == ARM::BX_RET ||
1612 MBBI->getOpcode() == ARM::tBX_RET ||
1613 MBBI->getOpcode() == ARM::MOVPCLR)) {
1614 MachineInstr *PrevMI = std::prev(MBBI);
1615 unsigned Opcode = PrevMI->getOpcode();
1616 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1617 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1618 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1619 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1620 if (MO.getReg() != ARM::LR)
1622 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1623 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1624 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1625 PrevMI->setDesc(TII->get(NewOpc));
1627 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1635 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1636 const TargetMachine &TM = Fn.getTarget();
1637 TL = TM.getSubtargetImpl()->getTargetLowering();
1638 AFI = Fn.getInfo<ARMFunctionInfo>();
1639 TII = TM.getSubtargetImpl()->getInstrInfo();
1640 TRI = TM.getSubtargetImpl()->getRegisterInfo();
1641 STI = &TM.getSubtarget<ARMSubtarget>();
1642 RS = new RegScavenger();
1643 isThumb2 = AFI->isThumb2Function();
1644 isThumb1 = AFI->isThumbFunction() && !isThumb2;
1646 bool Modified = false;
1647 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1649 MachineBasicBlock &MBB = *MFI;
1650 Modified |= LoadStoreMultipleOpti(MBB);
1651 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1652 Modified |= MergeReturnIntoLDM(MBB);
1660 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1661 /// load / stores from consecutive locations close to make it more
1662 /// likely they will be combined later.
1665 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1667 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1669 const DataLayout *TD;
1670 const TargetInstrInfo *TII;
1671 const TargetRegisterInfo *TRI;
1672 const ARMSubtarget *STI;
1673 MachineRegisterInfo *MRI;
1674 MachineFunction *MF;
1676 bool runOnMachineFunction(MachineFunction &Fn) override;
1678 const char *getPassName() const override {
1679 return "ARM pre- register allocation load / store optimization pass";
1683 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1684 unsigned &NewOpc, unsigned &EvenReg,
1685 unsigned &OddReg, unsigned &BaseReg,
1687 unsigned &PredReg, ARMCC::CondCodes &Pred,
1689 bool RescheduleOps(MachineBasicBlock *MBB,
1690 SmallVectorImpl<MachineInstr *> &Ops,
1691 unsigned Base, bool isLd,
1692 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1693 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1695 char ARMPreAllocLoadStoreOpt::ID = 0;
1698 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1699 TD = Fn.getSubtarget().getDataLayout();
1700 TII = Fn.getSubtarget().getInstrInfo();
1701 TRI = Fn.getSubtarget().getRegisterInfo();
1702 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1703 MRI = &Fn.getRegInfo();
1706 bool Modified = false;
1707 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1709 Modified |= RescheduleLoadStoreInstrs(MFI);
1714 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1715 MachineBasicBlock::iterator I,
1716 MachineBasicBlock::iterator E,
1717 SmallPtrSet<MachineInstr*, 4> &MemOps,
1718 SmallSet<unsigned, 4> &MemRegs,
1719 const TargetRegisterInfo *TRI) {
1720 // Are there stores / loads / calls between them?
1721 // FIXME: This is overly conservative. We should make use of alias information
1723 SmallSet<unsigned, 4> AddedRegPressure;
1725 if (I->isDebugValue() || MemOps.count(&*I))
1727 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1729 if (isLd && I->mayStore())
1734 // It's not safe to move the first 'str' down.
1737 // str r4, [r0, #+4]
1741 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1742 MachineOperand &MO = I->getOperand(j);
1745 unsigned Reg = MO.getReg();
1746 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1748 if (Reg != Base && !MemRegs.count(Reg))
1749 AddedRegPressure.insert(Reg);
1753 // Estimate register pressure increase due to the transformation.
1754 if (MemRegs.size() <= 4)
1755 // Ok if we are moving small number of instructions.
1757 return AddedRegPressure.size() <= MemRegs.size() * 2;
1761 /// Copy Op0 and Op1 operands into a new array assigned to MI.
1762 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1763 MachineInstr *Op1) {
1764 assert(MI->memoperands_empty() && "expected a new machineinstr");
1765 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1766 + (Op1->memoperands_end() - Op1->memoperands_begin());
1768 MachineFunction *MF = MI->getParent()->getParent();
1769 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1770 MachineSDNode::mmo_iterator MemEnd =
1771 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1773 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1774 MI->setMemRefs(MemBegin, MemEnd);
1778 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1780 unsigned &NewOpc, unsigned &EvenReg,
1781 unsigned &OddReg, unsigned &BaseReg,
1782 int &Offset, unsigned &PredReg,
1783 ARMCC::CondCodes &Pred,
1785 // Make sure we're allowed to generate LDRD/STRD.
1786 if (!STI->hasV5TEOps())
1789 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1791 unsigned Opcode = Op0->getOpcode();
1792 if (Opcode == ARM::LDRi12) {
1794 } else if (Opcode == ARM::STRi12) {
1796 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1797 NewOpc = ARM::t2LDRDi8;
1800 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1801 NewOpc = ARM::t2STRDi8;
1808 // Make sure the base address satisfies i64 ld / st alignment requirement.
1809 // At the moment, we ignore the memoryoperand's value.
1810 // If we want to use AliasAnalysis, we should check it accordingly.
1811 if (!Op0->hasOneMemOperand() ||
1812 (*Op0->memoperands_begin())->isVolatile())
1815 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1816 const Function *Func = MF->getFunction();
1817 unsigned ReqAlign = STI->hasV6Ops()
1818 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1819 : 8; // Pre-v6 need 8-byte align
1820 if (Align < ReqAlign)
1823 // Then make sure the immediate offset fits.
1824 int OffImm = getMemoryOpOffset(Op0);
1826 int Limit = (1 << 8) * Scale;
1827 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1831 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1833 AddSub = ARM_AM::sub;
1836 int Limit = (1 << 8) * Scale;
1837 if (OffImm >= Limit || (OffImm & (Scale-1)))
1839 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1841 EvenReg = Op0->getOperand(0).getReg();
1842 OddReg = Op1->getOperand(0).getReg();
1843 if (EvenReg == OddReg)
1845 BaseReg = Op0->getOperand(1).getReg();
1846 Pred = getInstrPredicate(Op0, PredReg);
1847 dl = Op0->getDebugLoc();
1851 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1852 SmallVectorImpl<MachineInstr *> &Ops,
1853 unsigned Base, bool isLd,
1854 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1855 bool RetVal = false;
1857 // Sort by offset (in reverse order).
1858 std::sort(Ops.begin(), Ops.end(),
1859 [](const MachineInstr *LHS, const MachineInstr *RHS) {
1860 int LOffset = getMemoryOpOffset(LHS);
1861 int ROffset = getMemoryOpOffset(RHS);
1862 assert(LHS == RHS || LOffset != ROffset);
1863 return LOffset > ROffset;
1866 // The loads / stores of the same base are in order. Scan them from first to
1867 // last and check for the following:
1868 // 1. Any def of base.
1870 while (Ops.size() > 1) {
1871 unsigned FirstLoc = ~0U;
1872 unsigned LastLoc = 0;
1873 MachineInstr *FirstOp = nullptr;
1874 MachineInstr *LastOp = nullptr;
1876 unsigned LastOpcode = 0;
1877 unsigned LastBytes = 0;
1878 unsigned NumMove = 0;
1879 for (int i = Ops.size() - 1; i >= 0; --i) {
1880 MachineInstr *Op = Ops[i];
1881 unsigned Loc = MI2LocMap[Op];
1882 if (Loc <= FirstLoc) {
1886 if (Loc >= LastLoc) {
1892 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
1893 if (LastOpcode && LSMOpcode != LastOpcode)
1896 int Offset = getMemoryOpOffset(Op);
1897 unsigned Bytes = getLSMultipleTransferSize(Op);
1899 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1902 LastOffset = Offset;
1904 LastOpcode = LSMOpcode;
1905 if (++NumMove == 8) // FIXME: Tune this limit.
1912 SmallPtrSet<MachineInstr*, 4> MemOps;
1913 SmallSet<unsigned, 4> MemRegs;
1914 for (int i = NumMove-1; i >= 0; --i) {
1915 MemOps.insert(Ops[i]);
1916 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1919 // Be conservative, if the instructions are too far apart, don't
1920 // move them. We want to limit the increase of register pressure.
1921 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1923 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1924 MemOps, MemRegs, TRI);
1926 for (unsigned i = 0; i != NumMove; ++i)
1929 // This is the new location for the loads / stores.
1930 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1931 while (InsertPos != MBB->end()
1932 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1935 // If we are moving a pair of loads / stores, see if it makes sense
1936 // to try to allocate a pair of registers that can form register pairs.
1937 MachineInstr *Op0 = Ops.back();
1938 MachineInstr *Op1 = Ops[Ops.size()-2];
1939 unsigned EvenReg = 0, OddReg = 0;
1940 unsigned BaseReg = 0, PredReg = 0;
1941 ARMCC::CondCodes Pred = ARMCC::AL;
1943 unsigned NewOpc = 0;
1946 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1947 EvenReg, OddReg, BaseReg,
1948 Offset, PredReg, Pred, isT2)) {
1952 const MCInstrDesc &MCID = TII->get(NewOpc);
1953 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
1954 MRI->constrainRegClass(EvenReg, TRC);
1955 MRI->constrainRegClass(OddReg, TRC);
1957 // Form the pair instruction.
1959 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1960 .addReg(EvenReg, RegState::Define)
1961 .addReg(OddReg, RegState::Define)
1963 // FIXME: We're converting from LDRi12 to an insn that still
1964 // uses addrmode2, so we need an explicit offset reg. It should
1965 // always by reg0 since we're transforming LDRi12s.
1968 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1969 concatenateMemOperands(MIB, Op0, Op1);
1970 DEBUG(dbgs() << "Formed " << *MIB << "\n");
1973 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1977 // FIXME: We're converting from LDRi12 to an insn that still
1978 // uses addrmode2, so we need an explicit offset reg. It should
1979 // always by reg0 since we're transforming STRi12s.
1982 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1983 concatenateMemOperands(MIB, Op0, Op1);
1984 DEBUG(dbgs() << "Formed " << *MIB << "\n");
1990 // Add register allocation hints to form register pairs.
1991 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1992 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1994 for (unsigned i = 0; i != NumMove; ++i) {
1995 MachineInstr *Op = Ops.back();
1997 MBB->splice(InsertPos, MBB, Op);
2001 NumLdStMoved += NumMove;
2011 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2012 bool RetVal = false;
2014 DenseMap<MachineInstr*, unsigned> MI2LocMap;
2015 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
2016 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
2017 SmallVector<unsigned, 4> LdBases;
2018 SmallVector<unsigned, 4> StBases;
2021 MachineBasicBlock::iterator MBBI = MBB->begin();
2022 MachineBasicBlock::iterator E = MBB->end();
2024 for (; MBBI != E; ++MBBI) {
2025 MachineInstr *MI = MBBI;
2026 if (MI->isCall() || MI->isTerminator()) {
2027 // Stop at barriers.
2032 if (!MI->isDebugValue())
2033 MI2LocMap[MI] = ++Loc;
2035 if (!isMemoryOp(MI))
2037 unsigned PredReg = 0;
2038 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2041 int Opc = MI->getOpcode();
2042 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
2043 unsigned Base = MI->getOperand(1).getReg();
2044 int Offset = getMemoryOpOffset(MI);
2046 bool StopHere = false;
2048 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2049 Base2LdsMap.find(Base);
2050 if (BI != Base2LdsMap.end()) {
2051 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2052 if (Offset == getMemoryOpOffset(BI->second[i])) {
2058 BI->second.push_back(MI);
2060 Base2LdsMap[Base].push_back(MI);
2061 LdBases.push_back(Base);
2064 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2065 Base2StsMap.find(Base);
2066 if (BI != Base2StsMap.end()) {
2067 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2068 if (Offset == getMemoryOpOffset(BI->second[i])) {
2074 BI->second.push_back(MI);
2076 Base2StsMap[Base].push_back(MI);
2077 StBases.push_back(Base);
2082 // Found a duplicate (a base+offset combination that's seen earlier).
2089 // Re-schedule loads.
2090 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2091 unsigned Base = LdBases[i];
2092 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2094 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2097 // Re-schedule stores.
2098 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2099 unsigned Base = StBases[i];
2100 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2102 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2106 Base2LdsMap.clear();
2107 Base2StsMap.clear();
2117 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
2118 /// optimization pass.
2119 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2121 return new ARMPreAllocLoadStoreOpt();
2122 return new ARMLoadStoreOpt();