1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMISelLowering.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMSubtarget.h"
21 #include "MCTargetDesc/ARMAddressingModes.h"
22 #include "Thumb1RegisterInfo.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/CodeGen/MachineBasicBlock.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include "llvm/CodeGen/MachineInstr.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/RegisterScavenging.h"
35 #include "llvm/CodeGen/SelectionDAGNodes.h"
36 #include "llvm/IR/DataLayout.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/Support/Debug.h"
40 #include "llvm/Support/ErrorHandling.h"
41 #include "llvm/Target/TargetInstrInfo.h"
42 #include "llvm/Target/TargetMachine.h"
43 #include "llvm/Target/TargetRegisterInfo.h"
46 #define DEBUG_TYPE "arm-ldst-opt"
48 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
49 STATISTIC(NumSTMGened , "Number of stm instructions generated");
50 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
51 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
52 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
53 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
54 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
55 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
56 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
57 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
58 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
60 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
61 /// load / store instructions to form ldm / stm instructions.
64 struct ARMLoadStoreOpt : public MachineFunctionPass {
66 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
68 const TargetInstrInfo *TII;
69 const TargetRegisterInfo *TRI;
70 const ARMSubtarget *STI;
71 const TargetLowering *TL;
74 bool isThumb1, isThumb2;
76 bool runOnMachineFunction(MachineFunction &Fn) override;
78 const char *getPassName() const override {
79 return "ARM load / store optimization pass";
83 struct MemOpQueueEntry {
88 MachineBasicBlock::iterator MBBI;
90 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
91 MachineBasicBlock::iterator i)
92 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
94 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
95 typedef MemOpQueue::iterator MemOpQueueIter;
97 void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
98 const MemOpQueue &MemOps, unsigned DefReg,
99 unsigned RangeBegin, unsigned RangeEnd);
100 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
101 int Offset, unsigned Base, bool BaseKill, int Opcode,
102 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
104 ArrayRef<std::pair<unsigned, bool> > Regs,
105 ArrayRef<unsigned> ImpDefs);
106 void MergeOpsUpdate(MachineBasicBlock &MBB,
108 unsigned memOpsBegin,
110 unsigned insertAfter,
115 ARMCC::CondCodes Pred,
119 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
120 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
121 int Opcode, unsigned Size,
122 ARMCC::CondCodes Pred, unsigned PredReg,
123 unsigned Scratch, MemOpQueue &MemOps,
124 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
125 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
126 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
127 MachineBasicBlock::iterator &MBBI);
128 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
129 MachineBasicBlock::iterator MBBI,
130 const TargetInstrInfo *TII,
132 MachineBasicBlock::iterator &I);
133 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
134 MachineBasicBlock::iterator MBBI,
136 MachineBasicBlock::iterator &I);
137 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
138 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
140 char ARMLoadStoreOpt::ID = 0;
143 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
145 default: llvm_unreachable("Unhandled opcode!");
149 default: llvm_unreachable("Unhandled submode!");
150 case ARM_AM::ia: return ARM::LDMIA;
151 case ARM_AM::da: return ARM::LDMDA;
152 case ARM_AM::db: return ARM::LDMDB;
153 case ARM_AM::ib: return ARM::LDMIB;
158 default: llvm_unreachable("Unhandled submode!");
159 case ARM_AM::ia: return ARM::STMIA;
160 case ARM_AM::da: return ARM::STMDA;
161 case ARM_AM::db: return ARM::STMDB;
162 case ARM_AM::ib: return ARM::STMIB;
165 // tLDMIA is writeback-only - unless the base register is in the input
169 default: llvm_unreachable("Unhandled submode!");
170 case ARM_AM::ia: return ARM::tLDMIA;
173 // There is no non-writeback tSTMIA either.
176 default: llvm_unreachable("Unhandled submode!");
177 case ARM_AM::ia: return ARM::tSTMIA_UPD;
183 default: llvm_unreachable("Unhandled submode!");
184 case ARM_AM::ia: return ARM::t2LDMIA;
185 case ARM_AM::db: return ARM::t2LDMDB;
191 default: llvm_unreachable("Unhandled submode!");
192 case ARM_AM::ia: return ARM::t2STMIA;
193 case ARM_AM::db: return ARM::t2STMDB;
198 default: llvm_unreachable("Unhandled submode!");
199 case ARM_AM::ia: return ARM::VLDMSIA;
200 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
205 default: llvm_unreachable("Unhandled submode!");
206 case ARM_AM::ia: return ARM::VSTMSIA;
207 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
212 default: llvm_unreachable("Unhandled submode!");
213 case ARM_AM::ia: return ARM::VLDMDIA;
214 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
219 default: llvm_unreachable("Unhandled submode!");
220 case ARM_AM::ia: return ARM::VSTMDIA;
221 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
229 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
231 default: llvm_unreachable("Unhandled opcode!");
238 case ARM::tLDMIA_UPD:
239 case ARM::tSTMIA_UPD:
240 case ARM::t2LDMIA_RET:
242 case ARM::t2LDMIA_UPD:
244 case ARM::t2STMIA_UPD:
246 case ARM::VLDMSIA_UPD:
248 case ARM::VSTMSIA_UPD:
250 case ARM::VLDMDIA_UPD:
252 case ARM::VSTMDIA_UPD:
266 case ARM::t2LDMDB_UPD:
268 case ARM::t2STMDB_UPD:
269 case ARM::VLDMSDB_UPD:
270 case ARM::VSTMSDB_UPD:
271 case ARM::VLDMDDB_UPD:
272 case ARM::VSTMDDB_UPD:
283 } // end namespace ARM_AM
284 } // end namespace llvm
286 static bool isT1i32Load(unsigned Opc) {
287 return Opc == ARM::tLDRi;
290 static bool isT2i32Load(unsigned Opc) {
291 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
294 static bool isi32Load(unsigned Opc) {
295 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
298 static bool isT1i32Store(unsigned Opc) {
299 return Opc == ARM::tSTRi;
302 static bool isT2i32Store(unsigned Opc) {
303 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
306 static bool isi32Store(unsigned Opc) {
307 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
310 /// MergeOps - Create and insert a LDM or STM with Base as base register and
311 /// registers in Regs as the register operands that would be loaded / stored.
312 /// It returns true if the transformation is done.
314 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
315 MachineBasicBlock::iterator MBBI,
316 int Offset, unsigned Base, bool BaseKill,
317 int Opcode, ARMCC::CondCodes Pred,
318 unsigned PredReg, unsigned Scratch, DebugLoc dl,
319 ArrayRef<std::pair<unsigned, bool> > Regs,
320 ArrayRef<unsigned> ImpDefs) {
321 // Only a single register to load / store. Don't bother.
322 unsigned NumRegs = Regs.size();
326 ARM_AM::AMSubMode Mode = ARM_AM::ia;
327 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
328 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
329 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
331 if (Offset == 4 && haveIBAndDA) {
333 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
335 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
336 // VLDM/VSTM do not support DB mode without also updating the base reg.
338 } else if (Offset != 0) {
339 // Check if this is a supported opcode before inserting instructions to
340 // calculate a new base register.
341 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
343 // If starting offset isn't zero, insert a MI to materialize a new base.
344 // But only do so if it is cost effective, i.e. merging more than two
350 if (isi32Load(Opcode)) {
351 // If it is a load, then just use one of the destination register to
352 // use as the new base.
353 NewBase = Regs[NumRegs-1].first;
355 // Use the scratch register to use as a new base.
362 isThumb2 ? ARM::t2ADDri :
363 (isThumb1 && Offset < 8) ? ARM::tADDi3 :
364 isThumb1 ? ARM::tADDi8 : ARM::ADDri;
369 isThumb2 ? ARM::t2SUBri :
370 (isThumb1 && Offset < 8) ? ARM::tSUBi3 :
371 isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
374 if (!TL->isLegalAddImmediate(Offset))
375 // FIXME: Try add with register operand?
376 return false; // Probably not worth it then.
379 // Thumb1: depending on immediate size, use either
380 // ADD NewBase, Base, #imm3
383 // ADD NewBase, #imm8.
384 if (Base != NewBase && Offset >= 8) {
385 // Need to insert a MOV to the new base first.
386 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
387 .addReg(Base, getKillRegState(BaseKill))
388 .addImm(Pred).addReg(PredReg);
389 // Set up BaseKill and Base correctly to insert the ADDS/SUBS below.
393 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase))
394 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
395 .addImm(Pred).addReg(PredReg);
397 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
398 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
399 .addImm(Pred).addReg(PredReg).addReg(0);
402 BaseKill = true; // New base is always killed straight away.
405 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
406 Opcode == ARM::VLDRD);
408 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
409 // base register writeback.
410 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
411 if (!Opcode) return false;
413 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
415 // Exception: If the base register is in the input reglist, Thumb1 LDM is
416 // non-writeback. Check for this.
417 if (Opcode == ARM::tLDMIA && isThumb1)
418 for (unsigned I = 0; I < NumRegs; ++I)
419 if (Base == Regs[I].first) {
424 // If the merged instruction has writeback and the base register is not killed
425 // it's not safe to do the merge on Thumb1. This is because resetting the base
426 // register writeback by inserting a SUBS sets the condition flags.
427 // FIXME: Try something clever here to see if resetting the base register can
428 // be avoided, e.g. by updating a later ADD/SUB of the base register with the
430 if (isThumb1 && Writeback && !BaseKill) return false;
432 MachineInstrBuilder MIB;
435 if (Opcode == ARM::tLDMIA)
436 // Update tLDMIA with writeback if necessary.
437 Opcode = ARM::tLDMIA_UPD;
439 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
441 // Thumb1: we might need to set base writeback when building the MI.
442 MIB.addReg(Base, getDefRegState(true))
443 .addReg(Base, getKillRegState(BaseKill));
445 // No writeback, simply build the MachineInstr.
446 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
447 MIB.addReg(Base, getKillRegState(BaseKill));
450 MIB.addImm(Pred).addReg(PredReg);
452 for (unsigned i = 0; i != NumRegs; ++i)
453 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
454 | getKillRegState(Regs[i].second));
456 // Add implicit defs for super-registers.
457 for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i)
458 MIB.addReg(ImpDefs[i], RegState::ImplicitDefine);
463 /// \brief Find all instructions using a given imp-def within a range.
465 /// We are trying to combine a range of instructions, one of which (located at
466 /// position RangeBegin) implicitly defines a register. The final LDM/STM will
467 /// be placed at RangeEnd, and so any uses of this definition between RangeStart
468 /// and RangeEnd must be modified to use an undefined value.
470 /// The live range continues until we find a second definition or one of the
471 /// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
472 /// we must consider all uses and decide which are relevant in a second pass.
473 void ARMLoadStoreOpt::findUsesOfImpDef(
474 SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,
475 unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
476 std::map<unsigned, MachineOperand *> Uses;
477 unsigned LastLivePos = RangeEnd;
479 // First we find all uses of this register with Position between RangeBegin
480 // and RangeEnd, any or all of these could be uses of a definition at
481 // RangeBegin. We also record the latest position a definition at RangeBegin
482 // would be considered live.
483 for (unsigned i = 0; i < MemOps.size(); ++i) {
484 MachineInstr &MI = *MemOps[i].MBBI;
485 unsigned MIPosition = MemOps[i].Position;
486 if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
489 // If this instruction defines the register, then any later use will be of
490 // that definition rather than ours.
491 if (MI.definesRegister(DefReg))
492 LastLivePos = std::min(LastLivePos, MIPosition);
494 MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
498 // If this instruction kills the register then (assuming liveness is
499 // correct when we start) we don't need to think about anything after here.
501 LastLivePos = std::min(LastLivePos, MIPosition);
503 Uses[MIPosition] = UseOp;
506 // Now we traverse the list of all uses, and append the ones that actually use
507 // our definition to the requested list.
508 for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),
511 // List is sorted by position so once we've found one out of range there
512 // will be no more to consider.
513 if (I->first > LastLivePos)
515 UsesOfImpDefs.push_back(I->second);
519 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
521 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
523 unsigned memOpsBegin, unsigned memOpsEnd,
524 unsigned insertAfter, int Offset,
525 unsigned Base, bool BaseKill,
527 ARMCC::CondCodes Pred, unsigned PredReg,
530 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
531 // First calculate which of the registers should be killed by the merged
533 const unsigned insertPos = memOps[insertAfter].Position;
534 SmallSet<unsigned, 4> KilledRegs;
535 DenseMap<unsigned, unsigned> Killer;
536 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
537 if (i == memOpsBegin) {
542 if (memOps[i].Position < insertPos && memOps[i].isKill) {
543 unsigned Reg = memOps[i].Reg;
544 KilledRegs.insert(Reg);
549 SmallVector<std::pair<unsigned, bool>, 8> Regs;
550 SmallVector<unsigned, 8> ImpDefs;
551 SmallVector<MachineOperand *, 8> UsesOfImpDefs;
552 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
553 unsigned Reg = memOps[i].Reg;
554 // If we are inserting the merged operation after an operation that
555 // uses the same register, make sure to transfer any kill flag.
556 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
557 Regs.push_back(std::make_pair(Reg, isKill));
559 // Collect any implicit defs of super-registers. They must be preserved.
560 for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) {
561 if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead())
563 unsigned DefReg = MO->getReg();
564 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
565 ImpDefs.push_back(DefReg);
567 // There may be other uses of the definition between this instruction and
568 // the eventual LDM/STM position. These should be marked undef if the
569 // merge takes place.
570 findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
575 // Try to do the merge.
576 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
578 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
579 Pred, PredReg, Scratch, dl, Regs, ImpDefs))
582 // Merge succeeded, update records.
583 Merges.push_back(std::prev(Loc));
585 // In gathering loads together, we may have moved the imp-def of a register
586 // past one of its uses. This is OK, since we know better than the rest of
587 // LLVM what's OK with ARM loads and stores; but we still have to adjust the
589 for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
590 E = UsesOfImpDefs.end();
594 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
595 // Remove kill flags from any memops that come before insertPos.
596 if (Regs[i-memOpsBegin].second) {
597 unsigned Reg = Regs[i-memOpsBegin].first;
598 if (KilledRegs.count(Reg)) {
599 unsigned j = Killer[Reg];
600 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
601 assert(Idx >= 0 && "Cannot find killing operand");
602 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
603 memOps[j].isKill = false;
605 memOps[i].isKill = true;
607 MBB.erase(memOps[i].MBBI);
608 // Update this memop to refer to the merged instruction.
609 // We may need to move kill flags again.
610 memOps[i].Merged = true;
611 memOps[i].MBBI = Merges.back();
612 memOps[i].Position = insertPos;
616 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
617 /// load / store multiple instructions.
619 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
620 unsigned Base, int Opcode, unsigned Size,
621 ARMCC::CondCodes Pred, unsigned PredReg,
622 unsigned Scratch, MemOpQueue &MemOps,
623 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
624 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
625 int Offset = MemOps[SIndex].Offset;
626 int SOffset = Offset;
627 unsigned insertAfter = SIndex;
628 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
629 DebugLoc dl = Loc->getDebugLoc();
630 const MachineOperand &PMO = Loc->getOperand(0);
631 unsigned PReg = PMO.getReg();
632 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
634 unsigned Limit = ~0U;
635 bool BaseKill = false;
636 // vldm / vstm limit are 32 for S variants, 16 for D variants.
654 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
655 int NewOffset = MemOps[i].Offset;
656 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
657 unsigned Reg = MO.getReg();
658 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
659 // Register numbers must be in ascending order. For VFP / NEON load and
660 // store multiples, the registers must also be consecutive and within the
661 // limit on the number of registers per instruction.
662 if (Reg != ARM::SP &&
663 NewOffset == Offset + (int)Size &&
664 ((isNotVFP && RegNum > PRegNum) ||
665 ((Count < Limit) && RegNum == PRegNum+1)) &&
666 // On Swift we don't want vldm/vstm to start with a odd register num
667 // because Q register unaligned vldm/vstm need more uops.
668 (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) {
673 // Can't merge this in. Try merge the earlier ones first.
674 // We need to compute BaseKill here because the MemOps may have been
676 BaseKill = Loc->killsRegister(Base);
678 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base,
679 BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
680 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
685 if (MemOps[i].Position > MemOps[insertAfter].Position) {
687 Loc = MemOps[i].MBBI;
691 BaseKill = Loc->killsRegister(Base);
692 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
693 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
696 static bool definesCPSR(MachineInstr *MI) {
697 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
698 const MachineOperand &MO = MI->getOperand(i);
701 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
702 // If the instruction has live CPSR def, then it's not safe to fold it
703 // into load / store.
710 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
711 unsigned Bytes, unsigned Limit,
712 ARMCC::CondCodes Pred, unsigned PredReg) {
713 unsigned MyPredReg = 0;
717 bool CheckCPSRDef = false;
718 switch (MI->getOpcode()) {
719 default: return false;
729 // Make sure the offset fits in 8 bits.
730 if (Bytes == 0 || (Limit && Bytes >= Limit))
733 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
734 MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
735 if (!(MI->getOperand(0).getReg() == Base &&
736 MI->getOperand(1).getReg() == Base &&
737 (MI->getOperand(2).getImm() * Scale) == Bytes &&
738 getInstrPredicate(MI, MyPredReg) == Pred &&
739 MyPredReg == PredReg))
742 return CheckCPSRDef ? !definesCPSR(MI) : true;
745 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
746 unsigned Bytes, unsigned Limit,
747 ARMCC::CondCodes Pred, unsigned PredReg) {
748 unsigned MyPredReg = 0;
752 bool CheckCPSRDef = false;
753 switch (MI->getOpcode()) {
754 default: return false;
764 if (Bytes == 0 || (Limit && Bytes >= Limit))
765 // Make sure the offset fits in 8 bits.
768 unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
769 MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
770 if (!(MI->getOperand(0).getReg() == Base &&
771 MI->getOperand(1).getReg() == Base &&
772 (MI->getOperand(2).getImm() * Scale) == Bytes &&
773 getInstrPredicate(MI, MyPredReg) == Pred &&
774 MyPredReg == PredReg))
777 return CheckCPSRDef ? !definesCPSR(MI) : true;
780 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
781 switch (MI->getOpcode()) {
806 case ARM::tLDMIA_UPD:
807 case ARM::tSTMIA_UPD:
814 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
817 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
821 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
822 ARM_AM::AMSubMode Mode) {
824 default: llvm_unreachable("Unhandled opcode!");
830 default: llvm_unreachable("Unhandled submode!");
831 case ARM_AM::ia: return ARM::LDMIA_UPD;
832 case ARM_AM::ib: return ARM::LDMIB_UPD;
833 case ARM_AM::da: return ARM::LDMDA_UPD;
834 case ARM_AM::db: return ARM::LDMDB_UPD;
841 default: llvm_unreachable("Unhandled submode!");
842 case ARM_AM::ia: return ARM::STMIA_UPD;
843 case ARM_AM::ib: return ARM::STMIB_UPD;
844 case ARM_AM::da: return ARM::STMDA_UPD;
845 case ARM_AM::db: return ARM::STMDB_UPD;
850 default: llvm_unreachable("Unhandled submode!");
851 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
852 case ARM_AM::db: return ARM::t2LDMDB_UPD;
857 default: llvm_unreachable("Unhandled submode!");
858 case ARM_AM::ia: return ARM::t2STMIA_UPD;
859 case ARM_AM::db: return ARM::t2STMDB_UPD;
863 default: llvm_unreachable("Unhandled submode!");
864 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
865 case ARM_AM::db: return ARM::VLDMSDB_UPD;
869 default: llvm_unreachable("Unhandled submode!");
870 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
871 case ARM_AM::db: return ARM::VLDMDDB_UPD;
875 default: llvm_unreachable("Unhandled submode!");
876 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
877 case ARM_AM::db: return ARM::VSTMSDB_UPD;
881 default: llvm_unreachable("Unhandled submode!");
882 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
883 case ARM_AM::db: return ARM::VSTMDDB_UPD;
888 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
889 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
891 /// stmia rn, <ra, rb, rc>
892 /// rn := rn + 4 * 3;
894 /// stmia rn!, <ra, rb, rc>
896 /// rn := rn - 4 * 3;
897 /// ldmia rn, <ra, rb, rc>
899 /// ldmdb rn!, <ra, rb, rc>
900 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
901 MachineBasicBlock::iterator MBBI,
903 MachineBasicBlock::iterator &I) {
904 // Thumb1 is already using updating loads/stores.
905 if (isThumb1) return false;
907 MachineInstr *MI = MBBI;
908 unsigned Base = MI->getOperand(0).getReg();
909 bool BaseKill = MI->getOperand(0).isKill();
910 unsigned Bytes = getLSMultipleTransferSize(MI);
911 unsigned PredReg = 0;
912 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
913 int Opcode = MI->getOpcode();
914 DebugLoc dl = MI->getDebugLoc();
916 // Can't use an updating ld/st if the base register is also a dest
917 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
918 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
919 if (MI->getOperand(i).getReg() == Base)
922 bool DoMerge = false;
923 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
925 // Try merging with the previous instruction.
926 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
927 if (MBBI != BeginMBBI) {
928 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
929 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
931 if (Mode == ARM_AM::ia &&
932 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
935 } else if (Mode == ARM_AM::ib &&
936 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
944 // Try merging with the next instruction.
945 MachineBasicBlock::iterator EndMBBI = MBB.end();
946 if (!DoMerge && MBBI != EndMBBI) {
947 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
948 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
950 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
951 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
953 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
954 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
969 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
970 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
971 .addReg(Base, getDefRegState(true)) // WB base register
972 .addReg(Base, getKillRegState(BaseKill))
973 .addImm(Pred).addReg(PredReg);
975 // Transfer the rest of operands.
976 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
977 MIB.addOperand(MI->getOperand(OpNum));
979 // Transfer memoperands.
980 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
986 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
987 ARM_AM::AddrOpc Mode) {
990 return ARM::LDR_PRE_IMM;
992 return ARM::STR_PRE_IMM;
994 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
996 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
998 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1000 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1003 return ARM::t2LDR_PRE;
1006 return ARM::t2STR_PRE;
1007 default: llvm_unreachable("Unhandled opcode!");
1011 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1012 ARM_AM::AddrOpc Mode) {
1015 return ARM::LDR_POST_IMM;
1017 return ARM::STR_POST_IMM;
1019 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1021 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1023 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1025 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1028 return ARM::t2LDR_POST;
1031 return ARM::t2STR_POST;
1032 default: llvm_unreachable("Unhandled opcode!");
1036 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
1037 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1038 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
1039 MachineBasicBlock::iterator MBBI,
1040 const TargetInstrInfo *TII,
1042 MachineBasicBlock::iterator &I) {
1043 // Thumb1 doesn't have updating LDR/STR.
1044 // FIXME: Use LDM/STM with single register instead.
1045 if (isThumb1) return false;
1047 MachineInstr *MI = MBBI;
1048 unsigned Base = MI->getOperand(1).getReg();
1049 bool BaseKill = MI->getOperand(1).isKill();
1050 unsigned Bytes = getLSMultipleTransferSize(MI);
1051 int Opcode = MI->getOpcode();
1052 DebugLoc dl = MI->getDebugLoc();
1053 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1054 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1055 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1056 if (isi32Load(Opcode) || isi32Store(Opcode))
1057 if (MI->getOperand(2).getImm() != 0)
1059 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1062 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
1063 // Can't do the merge if the destination register is the same as the would-be
1064 // writeback register.
1065 if (MI->getOperand(0).getReg() == Base)
1068 unsigned PredReg = 0;
1069 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1070 bool DoMerge = false;
1071 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1072 unsigned NewOpc = 0;
1073 // AM2 - 12 bits, thumb2 - 8 bits.
1074 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
1076 // Try merging with the previous instruction.
1077 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1078 if (MBBI != BeginMBBI) {
1079 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1080 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1082 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1084 AddSub = ARM_AM::sub;
1085 } else if (!isAM5 &&
1086 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1090 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
1091 MBB.erase(PrevMBBI);
1095 // Try merging with the next instruction.
1096 MachineBasicBlock::iterator EndMBBI = MBB.end();
1097 if (!DoMerge && MBBI != EndMBBI) {
1098 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1099 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1102 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1104 AddSub = ARM_AM::sub;
1105 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1109 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
1110 if (NextMBBI == I) {
1114 MBB.erase(NextMBBI);
1122 // VLDM[SD]_UPD, VSTM[SD]_UPD
1123 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1124 // updating load/store-multiple instructions can be used with only one
1126 MachineOperand &MO = MI->getOperand(0);
1127 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1128 .addReg(Base, getDefRegState(true)) // WB base register
1129 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1130 .addImm(Pred).addReg(PredReg)
1131 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1132 getKillRegState(MO.isKill())));
1135 // LDR_PRE, LDR_POST
1136 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1137 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1138 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1139 .addReg(Base, RegState::Define)
1140 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1142 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1143 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1144 .addReg(Base, RegState::Define)
1145 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1148 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1149 // t2LDR_PRE, t2LDR_POST
1150 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1151 .addReg(Base, RegState::Define)
1152 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1155 MachineOperand &MO = MI->getOperand(0);
1156 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1157 // the vestigal zero-reg offset register. When that's fixed, this clause
1158 // can be removed entirely.
1159 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1160 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1161 // STR_PRE, STR_POST
1162 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1163 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1164 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1166 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1167 // t2STR_PRE, t2STR_POST
1168 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1169 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1170 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1178 /// isMemoryOp - Returns true if instruction is a memory operation that this
1179 /// pass is capable of operating on.
1180 static bool isMemoryOp(const MachineInstr *MI) {
1181 // When no memory operands are present, conservatively assume unaligned,
1182 // volatile, unfoldable.
1183 if (!MI->hasOneMemOperand())
1186 const MachineMemOperand *MMO = *MI->memoperands_begin();
1188 // Don't touch volatile memory accesses - we may be changing their order.
1189 if (MMO->isVolatile())
1192 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1194 if (MMO->getAlignment() < 4)
1197 // str <undef> could probably be eliminated entirely, but for now we just want
1198 // to avoid making a mess of it.
1199 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1200 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1201 MI->getOperand(0).isUndef())
1204 // Likewise don't mess with references to undefined addresses.
1205 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1206 MI->getOperand(1).isUndef())
1209 int Opcode = MI->getOpcode();
1214 return MI->getOperand(1).isReg();
1217 return MI->getOperand(1).isReg();
1226 return MI->getOperand(1).isReg();
1231 /// AdvanceRS - Advance register scavenger to just before the earliest memory
1232 /// op that is being merged.
1233 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1234 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1235 unsigned Position = MemOps[0].Position;
1236 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1237 if (MemOps[i].Position < Position) {
1238 Position = MemOps[i].Position;
1239 Loc = MemOps[i].MBBI;
1243 if (Loc != MBB.begin())
1244 RS->forward(std::prev(Loc));
1247 static int getMemoryOpOffset(const MachineInstr *MI) {
1248 int Opcode = MI->getOpcode();
1249 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1250 unsigned NumOperands = MI->getDesc().getNumOperands();
1251 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1253 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1254 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1255 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1256 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
1259 // Thumb1 immediate offsets are scaled by 4
1260 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi)
1261 return OffField * 4;
1263 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1264 : ARM_AM::getAM5Offset(OffField) * 4;
1266 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1269 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1275 static void InsertLDR_STR(MachineBasicBlock &MBB,
1276 MachineBasicBlock::iterator &MBBI,
1277 int Offset, bool isDef,
1278 DebugLoc dl, unsigned NewOpc,
1279 unsigned Reg, bool RegDeadKill, bool RegUndef,
1280 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1281 bool OffKill, bool OffUndef,
1282 ARMCC::CondCodes Pred, unsigned PredReg,
1283 const TargetInstrInfo *TII, bool isT2) {
1285 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1287 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1288 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1289 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1291 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1293 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1294 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1295 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1299 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1300 MachineBasicBlock::iterator &MBBI) {
1301 MachineInstr *MI = &*MBBI;
1302 unsigned Opcode = MI->getOpcode();
1303 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1304 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1305 const MachineOperand &BaseOp = MI->getOperand(2);
1306 unsigned BaseReg = BaseOp.getReg();
1307 unsigned EvenReg = MI->getOperand(0).getReg();
1308 unsigned OddReg = MI->getOperand(1).getReg();
1309 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1310 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1311 // ARM errata 602117: LDRD with base in list may result in incorrect base
1312 // register when interrupted or faulted.
1313 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1314 if (!Errata602117 &&
1315 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1318 MachineBasicBlock::iterator NewBBI = MBBI;
1319 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1320 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1321 bool EvenDeadKill = isLd ?
1322 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1323 bool EvenUndef = MI->getOperand(0).isUndef();
1324 bool OddDeadKill = isLd ?
1325 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1326 bool OddUndef = MI->getOperand(1).isUndef();
1327 bool BaseKill = BaseOp.isKill();
1328 bool BaseUndef = BaseOp.isUndef();
1329 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1330 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1331 int OffImm = getMemoryOpOffset(MI);
1332 unsigned PredReg = 0;
1333 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1335 if (OddRegNum > EvenRegNum && OffImm == 0) {
1336 // Ascending register numbers and no offset. It's safe to change it to a
1338 unsigned NewOpc = (isLd)
1339 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1340 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1342 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1343 .addReg(BaseReg, getKillRegState(BaseKill))
1344 .addImm(Pred).addReg(PredReg)
1345 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1346 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1349 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1350 .addReg(BaseReg, getKillRegState(BaseKill))
1351 .addImm(Pred).addReg(PredReg)
1353 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1355 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1358 NewBBI = std::prev(MBBI);
1360 // Split into two instructions.
1361 unsigned NewOpc = (isLd)
1362 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1363 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1364 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1365 // so adjust and use t2LDRi12 here for that.
1366 unsigned NewOpc2 = (isLd)
1367 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1368 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1369 DebugLoc dl = MBBI->getDebugLoc();
1370 // If this is a load and base register is killed, it may have been
1371 // re-defed by the load, make sure the first load does not clobber it.
1373 (BaseKill || OffKill) &&
1374 (TRI->regsOverlap(EvenReg, BaseReg))) {
1375 assert(!TRI->regsOverlap(OddReg, BaseReg));
1376 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1377 OddReg, OddDeadKill, false,
1378 BaseReg, false, BaseUndef, false, OffUndef,
1379 Pred, PredReg, TII, isT2);
1380 NewBBI = std::prev(MBBI);
1381 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1382 EvenReg, EvenDeadKill, false,
1383 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1384 Pred, PredReg, TII, isT2);
1386 if (OddReg == EvenReg && EvenDeadKill) {
1387 // If the two source operands are the same, the kill marker is
1388 // probably on the first one. e.g.
1389 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1390 EvenDeadKill = false;
1393 // Never kill the base register in the first instruction.
1394 if (EvenReg == BaseReg)
1395 EvenDeadKill = false;
1396 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1397 EvenReg, EvenDeadKill, EvenUndef,
1398 BaseReg, false, BaseUndef, false, OffUndef,
1399 Pred, PredReg, TII, isT2);
1400 NewBBI = std::prev(MBBI);
1401 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1402 OddReg, OddDeadKill, OddUndef,
1403 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1404 Pred, PredReg, TII, isT2);
1419 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1420 /// ops of the same base and incrementing offset into LDM / STM ops.
1421 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1422 unsigned NumMerges = 0;
1423 unsigned NumMemOps = 0;
1425 unsigned CurrBase = 0;
1427 unsigned CurrSize = 0;
1428 ARMCC::CondCodes CurrPred = ARMCC::AL;
1429 unsigned CurrPredReg = 0;
1430 unsigned Position = 0;
1431 SmallVector<MachineBasicBlock::iterator,4> Merges;
1433 RS->enterBasicBlock(&MBB);
1434 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1436 if (FixInvalidRegPairOp(MBB, MBBI))
1439 bool Advance = false;
1440 bool TryMerge = false;
1441 bool Clobber = false;
1443 bool isMemOp = isMemoryOp(MBBI);
1445 int Opcode = MBBI->getOpcode();
1446 unsigned Size = getLSMultipleTransferSize(MBBI);
1447 const MachineOperand &MO = MBBI->getOperand(0);
1448 unsigned Reg = MO.getReg();
1449 bool isKill = MO.isDef() ? false : MO.isKill();
1450 unsigned Base = MBBI->getOperand(1).getReg();
1451 unsigned PredReg = 0;
1452 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1453 int Offset = getMemoryOpOffset(MBBI);
1456 // r5 := ldr [r5, #4]
1457 // r6 := ldr [r5, #8]
1459 // The second ldr has effectively broken the chain even though it
1460 // looks like the later ldr(s) use the same base register. Try to
1461 // merge the ldr's so far, including this one. But don't try to
1462 // combine the following ldr(s).
1463 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1466 // r4 := ldr [r0, #8]
1467 // r4 := ldr [r0, #4]
1469 // The optimization may reorder the second ldr in front of the first
1470 // ldr, which violates write after write(WAW) dependence. The same as
1471 // str. Try to merge inst(s) already in MemOps.
1472 bool Overlap = false;
1473 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
1474 if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
1480 if (CurrBase == 0 && !Clobber) {
1481 // Start of a new chain.
1486 CurrPredReg = PredReg;
1487 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1490 } else if (!Overlap) {
1496 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1497 // No need to match PredReg.
1498 // Continue adding to the queue.
1499 if (Offset > MemOps.back().Offset) {
1500 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1505 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1507 if (Offset < I->Offset) {
1508 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1513 } else if (Offset == I->Offset) {
1514 // Collision! This can't be merged!
1523 if (MBBI->isDebugValue()) {
1526 // Reach the end of the block, try merging the memory instructions.
1528 } else if (Advance) {
1532 // Reach the end of the block, try merging the memory instructions.
1539 if (NumMemOps > 1) {
1540 // Try to find a free register to use as a new base in case it's needed.
1541 // First advance to the instruction just before the start of the chain.
1542 AdvanceRS(MBB, MemOps);
1544 // Find a scratch register.
1546 RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass);
1548 // Process the load / store instructions.
1549 RS->forward(std::prev(MBBI));
1553 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1554 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1556 // Try folding preceding/trailing base inc/dec into the generated
1558 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1559 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1561 NumMerges += Merges.size();
1563 // Try folding preceding/trailing base inc/dec into those load/store
1564 // that were not merged to form LDM/STM ops.
1565 for (unsigned i = 0; i != NumMemOps; ++i)
1566 if (!MemOps[i].Merged)
1567 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1570 // RS may be pointing to an instruction that's deleted.
1571 RS->skipTo(std::prev(MBBI));
1572 } else if (NumMemOps == 1) {
1573 // Try folding preceding/trailing base inc/dec into the single
1575 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1577 RS->forward(std::prev(MBBI));
1584 CurrPred = ARMCC::AL;
1591 // If iterator hasn't been advanced and this is not a memory op, skip it.
1592 // It can't start a new chain anyway.
1593 if (!Advance && !isMemOp && MBBI != E) {
1599 return NumMerges > 0;
1602 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1603 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1604 /// directly restore the value of LR into pc.
1605 /// ldmfd sp!, {..., lr}
1608 /// ldmfd sp!, {..., lr}
1611 /// ldmfd sp!, {..., pc}
1612 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1613 // Thumb1 LDM doesn't allow high registers.
1614 if (isThumb1) return false;
1615 if (MBB.empty()) return false;
1617 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1618 if (MBBI != MBB.begin() &&
1619 (MBBI->getOpcode() == ARM::BX_RET ||
1620 MBBI->getOpcode() == ARM::tBX_RET ||
1621 MBBI->getOpcode() == ARM::MOVPCLR)) {
1622 MachineInstr *PrevMI = std::prev(MBBI);
1623 unsigned Opcode = PrevMI->getOpcode();
1624 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1625 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1626 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1627 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1628 if (MO.getReg() != ARM::LR)
1630 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1631 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1632 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1633 PrevMI->setDesc(TII->get(NewOpc));
1635 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1643 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1644 const TargetMachine &TM = Fn.getTarget();
1645 TL = TM.getSubtargetImpl()->getTargetLowering();
1646 AFI = Fn.getInfo<ARMFunctionInfo>();
1647 TII = TM.getSubtargetImpl()->getInstrInfo();
1648 TRI = TM.getSubtargetImpl()->getRegisterInfo();
1649 STI = &TM.getSubtarget<ARMSubtarget>();
1650 RS = new RegScavenger();
1651 isThumb2 = AFI->isThumb2Function();
1652 isThumb1 = AFI->isThumbFunction() && !isThumb2;
1654 bool Modified = false;
1655 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1657 MachineBasicBlock &MBB = *MFI;
1658 Modified |= LoadStoreMultipleOpti(MBB);
1659 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1660 Modified |= MergeReturnIntoLDM(MBB);
1668 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1669 /// load / stores from consecutive locations close to make it more
1670 /// likely they will be combined later.
1673 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1675 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1677 const DataLayout *TD;
1678 const TargetInstrInfo *TII;
1679 const TargetRegisterInfo *TRI;
1680 const ARMSubtarget *STI;
1681 MachineRegisterInfo *MRI;
1682 MachineFunction *MF;
1684 bool runOnMachineFunction(MachineFunction &Fn) override;
1686 const char *getPassName() const override {
1687 return "ARM pre- register allocation load / store optimization pass";
1691 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1692 unsigned &NewOpc, unsigned &EvenReg,
1693 unsigned &OddReg, unsigned &BaseReg,
1695 unsigned &PredReg, ARMCC::CondCodes &Pred,
1697 bool RescheduleOps(MachineBasicBlock *MBB,
1698 SmallVectorImpl<MachineInstr *> &Ops,
1699 unsigned Base, bool isLd,
1700 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1701 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1703 char ARMPreAllocLoadStoreOpt::ID = 0;
1706 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1707 TD = Fn.getSubtarget().getDataLayout();
1708 TII = Fn.getSubtarget().getInstrInfo();
1709 TRI = Fn.getSubtarget().getRegisterInfo();
1710 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1711 MRI = &Fn.getRegInfo();
1714 bool Modified = false;
1715 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1717 Modified |= RescheduleLoadStoreInstrs(MFI);
1722 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1723 MachineBasicBlock::iterator I,
1724 MachineBasicBlock::iterator E,
1725 SmallPtrSetImpl<MachineInstr*> &MemOps,
1726 SmallSet<unsigned, 4> &MemRegs,
1727 const TargetRegisterInfo *TRI) {
1728 // Are there stores / loads / calls between them?
1729 // FIXME: This is overly conservative. We should make use of alias information
1731 SmallSet<unsigned, 4> AddedRegPressure;
1733 if (I->isDebugValue() || MemOps.count(&*I))
1735 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1737 if (isLd && I->mayStore())
1742 // It's not safe to move the first 'str' down.
1745 // str r4, [r0, #+4]
1749 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1750 MachineOperand &MO = I->getOperand(j);
1753 unsigned Reg = MO.getReg();
1754 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1756 if (Reg != Base && !MemRegs.count(Reg))
1757 AddedRegPressure.insert(Reg);
1761 // Estimate register pressure increase due to the transformation.
1762 if (MemRegs.size() <= 4)
1763 // Ok if we are moving small number of instructions.
1765 return AddedRegPressure.size() <= MemRegs.size() * 2;
1769 /// Copy Op0 and Op1 operands into a new array assigned to MI.
1770 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1771 MachineInstr *Op1) {
1772 assert(MI->memoperands_empty() && "expected a new machineinstr");
1773 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1774 + (Op1->memoperands_end() - Op1->memoperands_begin());
1776 MachineFunction *MF = MI->getParent()->getParent();
1777 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1778 MachineSDNode::mmo_iterator MemEnd =
1779 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1781 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1782 MI->setMemRefs(MemBegin, MemEnd);
1786 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1788 unsigned &NewOpc, unsigned &EvenReg,
1789 unsigned &OddReg, unsigned &BaseReg,
1790 int &Offset, unsigned &PredReg,
1791 ARMCC::CondCodes &Pred,
1793 // Make sure we're allowed to generate LDRD/STRD.
1794 if (!STI->hasV5TEOps())
1797 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1799 unsigned Opcode = Op0->getOpcode();
1800 if (Opcode == ARM::LDRi12) {
1802 } else if (Opcode == ARM::STRi12) {
1804 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1805 NewOpc = ARM::t2LDRDi8;
1808 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1809 NewOpc = ARM::t2STRDi8;
1816 // Make sure the base address satisfies i64 ld / st alignment requirement.
1817 // At the moment, we ignore the memoryoperand's value.
1818 // If we want to use AliasAnalysis, we should check it accordingly.
1819 if (!Op0->hasOneMemOperand() ||
1820 (*Op0->memoperands_begin())->isVolatile())
1823 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1824 const Function *Func = MF->getFunction();
1825 unsigned ReqAlign = STI->hasV6Ops()
1826 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1827 : 8; // Pre-v6 need 8-byte align
1828 if (Align < ReqAlign)
1831 // Then make sure the immediate offset fits.
1832 int OffImm = getMemoryOpOffset(Op0);
1834 int Limit = (1 << 8) * Scale;
1835 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1839 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1841 AddSub = ARM_AM::sub;
1844 int Limit = (1 << 8) * Scale;
1845 if (OffImm >= Limit || (OffImm & (Scale-1)))
1847 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1849 EvenReg = Op0->getOperand(0).getReg();
1850 OddReg = Op1->getOperand(0).getReg();
1851 if (EvenReg == OddReg)
1853 BaseReg = Op0->getOperand(1).getReg();
1854 Pred = getInstrPredicate(Op0, PredReg);
1855 dl = Op0->getDebugLoc();
1859 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1860 SmallVectorImpl<MachineInstr *> &Ops,
1861 unsigned Base, bool isLd,
1862 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1863 bool RetVal = false;
1865 // Sort by offset (in reverse order).
1866 std::sort(Ops.begin(), Ops.end(),
1867 [](const MachineInstr *LHS, const MachineInstr *RHS) {
1868 int LOffset = getMemoryOpOffset(LHS);
1869 int ROffset = getMemoryOpOffset(RHS);
1870 assert(LHS == RHS || LOffset != ROffset);
1871 return LOffset > ROffset;
1874 // The loads / stores of the same base are in order. Scan them from first to
1875 // last and check for the following:
1876 // 1. Any def of base.
1878 while (Ops.size() > 1) {
1879 unsigned FirstLoc = ~0U;
1880 unsigned LastLoc = 0;
1881 MachineInstr *FirstOp = nullptr;
1882 MachineInstr *LastOp = nullptr;
1884 unsigned LastOpcode = 0;
1885 unsigned LastBytes = 0;
1886 unsigned NumMove = 0;
1887 for (int i = Ops.size() - 1; i >= 0; --i) {
1888 MachineInstr *Op = Ops[i];
1889 unsigned Loc = MI2LocMap[Op];
1890 if (Loc <= FirstLoc) {
1894 if (Loc >= LastLoc) {
1900 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
1901 if (LastOpcode && LSMOpcode != LastOpcode)
1904 int Offset = getMemoryOpOffset(Op);
1905 unsigned Bytes = getLSMultipleTransferSize(Op);
1907 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1910 LastOffset = Offset;
1912 LastOpcode = LSMOpcode;
1913 if (++NumMove == 8) // FIXME: Tune this limit.
1920 SmallPtrSet<MachineInstr*, 4> MemOps;
1921 SmallSet<unsigned, 4> MemRegs;
1922 for (int i = NumMove-1; i >= 0; --i) {
1923 MemOps.insert(Ops[i]);
1924 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1927 // Be conservative, if the instructions are too far apart, don't
1928 // move them. We want to limit the increase of register pressure.
1929 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1931 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1932 MemOps, MemRegs, TRI);
1934 for (unsigned i = 0; i != NumMove; ++i)
1937 // This is the new location for the loads / stores.
1938 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1939 while (InsertPos != MBB->end()
1940 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1943 // If we are moving a pair of loads / stores, see if it makes sense
1944 // to try to allocate a pair of registers that can form register pairs.
1945 MachineInstr *Op0 = Ops.back();
1946 MachineInstr *Op1 = Ops[Ops.size()-2];
1947 unsigned EvenReg = 0, OddReg = 0;
1948 unsigned BaseReg = 0, PredReg = 0;
1949 ARMCC::CondCodes Pred = ARMCC::AL;
1951 unsigned NewOpc = 0;
1954 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1955 EvenReg, OddReg, BaseReg,
1956 Offset, PredReg, Pred, isT2)) {
1960 const MCInstrDesc &MCID = TII->get(NewOpc);
1961 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
1962 MRI->constrainRegClass(EvenReg, TRC);
1963 MRI->constrainRegClass(OddReg, TRC);
1965 // Form the pair instruction.
1967 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1968 .addReg(EvenReg, RegState::Define)
1969 .addReg(OddReg, RegState::Define)
1971 // FIXME: We're converting from LDRi12 to an insn that still
1972 // uses addrmode2, so we need an explicit offset reg. It should
1973 // always by reg0 since we're transforming LDRi12s.
1976 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1977 concatenateMemOperands(MIB, Op0, Op1);
1978 DEBUG(dbgs() << "Formed " << *MIB << "\n");
1981 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1985 // FIXME: We're converting from LDRi12 to an insn that still
1986 // uses addrmode2, so we need an explicit offset reg. It should
1987 // always by reg0 since we're transforming STRi12s.
1990 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1991 concatenateMemOperands(MIB, Op0, Op1);
1992 DEBUG(dbgs() << "Formed " << *MIB << "\n");
1998 // Add register allocation hints to form register pairs.
1999 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
2000 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
2002 for (unsigned i = 0; i != NumMove; ++i) {
2003 MachineInstr *Op = Ops.back();
2005 MBB->splice(InsertPos, MBB, Op);
2009 NumLdStMoved += NumMove;
2019 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2020 bool RetVal = false;
2022 DenseMap<MachineInstr*, unsigned> MI2LocMap;
2023 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
2024 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
2025 SmallVector<unsigned, 4> LdBases;
2026 SmallVector<unsigned, 4> StBases;
2029 MachineBasicBlock::iterator MBBI = MBB->begin();
2030 MachineBasicBlock::iterator E = MBB->end();
2032 for (; MBBI != E; ++MBBI) {
2033 MachineInstr *MI = MBBI;
2034 if (MI->isCall() || MI->isTerminator()) {
2035 // Stop at barriers.
2040 if (!MI->isDebugValue())
2041 MI2LocMap[MI] = ++Loc;
2043 if (!isMemoryOp(MI))
2045 unsigned PredReg = 0;
2046 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2049 int Opc = MI->getOpcode();
2050 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
2051 unsigned Base = MI->getOperand(1).getReg();
2052 int Offset = getMemoryOpOffset(MI);
2054 bool StopHere = false;
2056 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2057 Base2LdsMap.find(Base);
2058 if (BI != Base2LdsMap.end()) {
2059 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2060 if (Offset == getMemoryOpOffset(BI->second[i])) {
2066 BI->second.push_back(MI);
2068 Base2LdsMap[Base].push_back(MI);
2069 LdBases.push_back(Base);
2072 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2073 Base2StsMap.find(Base);
2074 if (BI != Base2StsMap.end()) {
2075 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2076 if (Offset == getMemoryOpOffset(BI->second[i])) {
2082 BI->second.push_back(MI);
2084 Base2StsMap[Base].push_back(MI);
2085 StBases.push_back(Base);
2090 // Found a duplicate (a base+offset combination that's seen earlier).
2097 // Re-schedule loads.
2098 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2099 unsigned Base = LdBases[i];
2100 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2102 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2105 // Re-schedule stores.
2106 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2107 unsigned Base = StBases[i];
2108 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2110 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2114 Base2LdsMap.clear();
2115 Base2StsMap.clear();
2125 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
2126 /// optimization pass.
2127 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2129 return new ARMPreAllocLoadStoreOpt();
2130 return new ARMLoadStoreOpt();