1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMISelLowering.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMSubtarget.h"
21 #include "MCTargetDesc/ARMAddressingModes.h"
22 #include "Thumb1RegisterInfo.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/CodeGen/MachineBasicBlock.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include "llvm/CodeGen/MachineInstr.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/RegisterScavenging.h"
35 #include "llvm/CodeGen/SelectionDAGNodes.h"
36 #include "llvm/IR/DataLayout.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/Support/Debug.h"
40 #include "llvm/Support/ErrorHandling.h"
41 #include "llvm/Target/TargetInstrInfo.h"
42 #include "llvm/Target/TargetMachine.h"
43 #include "llvm/Target/TargetRegisterInfo.h"
46 #define DEBUG_TYPE "arm-ldst-opt"
48 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
49 STATISTIC(NumSTMGened , "Number of stm instructions generated");
50 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
51 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
52 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
53 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
54 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
55 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
56 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
57 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
58 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
60 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
61 /// load / store instructions to form ldm / stm instructions.
64 struct ARMLoadStoreOpt : public MachineFunctionPass {
66 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
68 const TargetInstrInfo *TII;
69 const TargetRegisterInfo *TRI;
70 const ARMSubtarget *STI;
71 const TargetLowering *TL;
74 bool isThumb1, isThumb2;
76 bool runOnMachineFunction(MachineFunction &Fn) override;
78 const char *getPassName() const override {
79 return "ARM load / store optimization pass";
83 struct MemOpQueueEntry {
88 MachineBasicBlock::iterator MBBI;
90 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
91 MachineBasicBlock::iterator i)
92 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
94 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
95 typedef MemOpQueue::iterator MemOpQueueIter;
97 void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
98 const MemOpQueue &MemOps, unsigned DefReg,
99 unsigned RangeBegin, unsigned RangeEnd);
100 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
101 int Offset, unsigned Base, bool BaseKill, int Opcode,
102 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
104 ArrayRef<std::pair<unsigned, bool> > Regs,
105 ArrayRef<unsigned> ImpDefs);
106 void MergeOpsUpdate(MachineBasicBlock &MBB,
108 unsigned memOpsBegin,
110 unsigned insertAfter,
115 ARMCC::CondCodes Pred,
119 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
120 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
121 int Opcode, unsigned Size,
122 ARMCC::CondCodes Pred, unsigned PredReg,
123 unsigned Scratch, MemOpQueue &MemOps,
124 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
125 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
126 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
127 MachineBasicBlock::iterator &MBBI);
128 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
129 MachineBasicBlock::iterator MBBI,
130 const TargetInstrInfo *TII,
132 MachineBasicBlock::iterator &I);
133 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
134 MachineBasicBlock::iterator MBBI,
136 MachineBasicBlock::iterator &I);
137 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
138 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
140 char ARMLoadStoreOpt::ID = 0;
143 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
145 default: llvm_unreachable("Unhandled opcode!");
149 default: llvm_unreachable("Unhandled submode!");
150 case ARM_AM::ia: return ARM::LDMIA;
151 case ARM_AM::da: return ARM::LDMDA;
152 case ARM_AM::db: return ARM::LDMDB;
153 case ARM_AM::ib: return ARM::LDMIB;
158 default: llvm_unreachable("Unhandled submode!");
159 case ARM_AM::ia: return ARM::STMIA;
160 case ARM_AM::da: return ARM::STMDA;
161 case ARM_AM::db: return ARM::STMDB;
162 case ARM_AM::ib: return ARM::STMIB;
165 // tLDMIA is writeback-only - unless the base register is in the input
169 default: llvm_unreachable("Unhandled submode!");
170 case ARM_AM::ia: return ARM::tLDMIA;
173 // There is no non-writeback tSTMIA either.
176 default: llvm_unreachable("Unhandled submode!");
177 case ARM_AM::ia: return ARM::tSTMIA_UPD;
183 default: llvm_unreachable("Unhandled submode!");
184 case ARM_AM::ia: return ARM::t2LDMIA;
185 case ARM_AM::db: return ARM::t2LDMDB;
191 default: llvm_unreachable("Unhandled submode!");
192 case ARM_AM::ia: return ARM::t2STMIA;
193 case ARM_AM::db: return ARM::t2STMDB;
198 default: llvm_unreachable("Unhandled submode!");
199 case ARM_AM::ia: return ARM::VLDMSIA;
200 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
205 default: llvm_unreachable("Unhandled submode!");
206 case ARM_AM::ia: return ARM::VSTMSIA;
207 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
212 default: llvm_unreachable("Unhandled submode!");
213 case ARM_AM::ia: return ARM::VLDMDIA;
214 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
219 default: llvm_unreachable("Unhandled submode!");
220 case ARM_AM::ia: return ARM::VSTMDIA;
221 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
229 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
231 default: llvm_unreachable("Unhandled opcode!");
238 case ARM::tLDMIA_UPD:
239 case ARM::tSTMIA_UPD:
240 case ARM::t2LDMIA_RET:
242 case ARM::t2LDMIA_UPD:
244 case ARM::t2STMIA_UPD:
246 case ARM::VLDMSIA_UPD:
248 case ARM::VSTMSIA_UPD:
250 case ARM::VLDMDIA_UPD:
252 case ARM::VSTMDIA_UPD:
266 case ARM::t2LDMDB_UPD:
268 case ARM::t2STMDB_UPD:
269 case ARM::VLDMSDB_UPD:
270 case ARM::VSTMSDB_UPD:
271 case ARM::VLDMDDB_UPD:
272 case ARM::VSTMDDB_UPD:
283 } // end namespace ARM_AM
284 } // end namespace llvm
286 static bool isT1i32Load(unsigned Opc) {
287 return Opc == ARM::tLDRi;
290 static bool isT2i32Load(unsigned Opc) {
291 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
294 static bool isi32Load(unsigned Opc) {
295 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
298 static bool isT1i32Store(unsigned Opc) {
299 return Opc == ARM::tSTRi;
302 static bool isT2i32Store(unsigned Opc) {
303 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
306 static bool isi32Store(unsigned Opc) {
307 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
310 /// MergeOps - Create and insert a LDM or STM with Base as base register and
311 /// registers in Regs as the register operands that would be loaded / stored.
312 /// It returns true if the transformation is done.
314 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
315 MachineBasicBlock::iterator MBBI,
316 int Offset, unsigned Base, bool BaseKill,
317 int Opcode, ARMCC::CondCodes Pred,
318 unsigned PredReg, unsigned Scratch, DebugLoc dl,
319 ArrayRef<std::pair<unsigned, bool> > Regs,
320 ArrayRef<unsigned> ImpDefs) {
321 // Only a single register to load / store. Don't bother.
322 unsigned NumRegs = Regs.size();
326 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
327 // Compute liveness information for that register to make the decision.
328 bool SafeToClobberCPSR = !isThumb1 ||
329 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) ==
330 MachineBasicBlock::LQR_Dead);
332 ARM_AM::AMSubMode Mode = ARM_AM::ia;
333 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
334 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
335 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
337 if (Offset == 4 && haveIBAndDA) {
339 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
341 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
342 // VLDM/VSTM do not support DB mode without also updating the base reg.
344 } else if (Offset != 0) {
345 // Check if this is a supported opcode before inserting instructions to
346 // calculate a new base register.
347 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
349 // If starting offset isn't zero, insert a MI to materialize a new base.
350 // But only do so if it is cost effective, i.e. merging more than two
355 // On Thumb1, it's not worth materializing a new base register without
356 // clobbering the CPSR (i.e. not using ADDS/SUBS).
357 if (!SafeToClobberCPSR)
361 if (isi32Load(Opcode)) {
362 // If it is a load, then just use one of the destination register to
363 // use as the new base.
364 NewBase = Regs[NumRegs-1].first;
366 // Use the scratch register to use as a new base.
373 isThumb2 ? ARM::t2ADDri :
374 (isThumb1 && Offset < 8) ? ARM::tADDi3 :
375 isThumb1 ? ARM::tADDi8 : ARM::ADDri;
380 isThumb2 ? ARM::t2SUBri :
381 (isThumb1 && Offset < 8) ? ARM::tSUBi3 :
382 isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
385 if (!TL->isLegalAddImmediate(Offset))
386 // FIXME: Try add with register operand?
387 return false; // Probably not worth it then.
390 // Thumb1: depending on immediate size, use either
391 // ADDS NewBase, Base, #imm3
394 // ADDS NewBase, #imm8.
395 if (Base != NewBase && Offset >= 8) {
396 // Need to insert a MOV to the new base first.
397 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
398 .addReg(Base, getKillRegState(BaseKill))
399 .addImm(Pred).addReg(PredReg);
400 // Set up BaseKill and Base correctly to insert the ADDS/SUBS below.
404 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true)
405 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
406 .addImm(Pred).addReg(PredReg);
408 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
409 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
410 .addImm(Pred).addReg(PredReg).addReg(0);
413 BaseKill = true; // New base is always killed straight away.
416 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
417 Opcode == ARM::VLDRD);
419 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
420 // base register writeback.
421 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
422 if (!Opcode) return false;
424 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
426 // Exception: If the base register is in the input reglist, Thumb1 LDM is
427 // non-writeback. Check for this.
428 if (Opcode == ARM::tLDMIA && isThumb1)
429 for (unsigned I = 0; I < NumRegs; ++I)
430 if (Base == Regs[I].first) {
435 // If the merged instruction has writeback and the base register is not killed
436 // it's not safe to do the merge on Thumb1. This is because resetting the base
437 // register writeback by inserting a SUBS sets the condition flags.
438 // FIXME: Try something clever here to see if resetting the base register can
439 // be avoided, e.g. by updating a later ADD/SUB of the base register with the
441 if (isThumb1 && Writeback && !BaseKill) return false;
443 MachineInstrBuilder MIB;
446 if (Opcode == ARM::tLDMIA)
447 // Update tLDMIA with writeback if necessary.
448 Opcode = ARM::tLDMIA_UPD;
450 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
452 // Thumb1: we might need to set base writeback when building the MI.
453 MIB.addReg(Base, getDefRegState(true))
454 .addReg(Base, getKillRegState(BaseKill));
456 // No writeback, simply build the MachineInstr.
457 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
458 MIB.addReg(Base, getKillRegState(BaseKill));
461 MIB.addImm(Pred).addReg(PredReg);
463 for (unsigned i = 0; i != NumRegs; ++i)
464 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
465 | getKillRegState(Regs[i].second));
467 // Add implicit defs for super-registers.
468 for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i)
469 MIB.addReg(ImpDefs[i], RegState::ImplicitDefine);
474 /// \brief Find all instructions using a given imp-def within a range.
476 /// We are trying to combine a range of instructions, one of which (located at
477 /// position RangeBegin) implicitly defines a register. The final LDM/STM will
478 /// be placed at RangeEnd, and so any uses of this definition between RangeStart
479 /// and RangeEnd must be modified to use an undefined value.
481 /// The live range continues until we find a second definition or one of the
482 /// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
483 /// we must consider all uses and decide which are relevant in a second pass.
484 void ARMLoadStoreOpt::findUsesOfImpDef(
485 SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,
486 unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
487 std::map<unsigned, MachineOperand *> Uses;
488 unsigned LastLivePos = RangeEnd;
490 // First we find all uses of this register with Position between RangeBegin
491 // and RangeEnd, any or all of these could be uses of a definition at
492 // RangeBegin. We also record the latest position a definition at RangeBegin
493 // would be considered live.
494 for (unsigned i = 0; i < MemOps.size(); ++i) {
495 MachineInstr &MI = *MemOps[i].MBBI;
496 unsigned MIPosition = MemOps[i].Position;
497 if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
500 // If this instruction defines the register, then any later use will be of
501 // that definition rather than ours.
502 if (MI.definesRegister(DefReg))
503 LastLivePos = std::min(LastLivePos, MIPosition);
505 MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
509 // If this instruction kills the register then (assuming liveness is
510 // correct when we start) we don't need to think about anything after here.
512 LastLivePos = std::min(LastLivePos, MIPosition);
514 Uses[MIPosition] = UseOp;
517 // Now we traverse the list of all uses, and append the ones that actually use
518 // our definition to the requested list.
519 for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),
522 // List is sorted by position so once we've found one out of range there
523 // will be no more to consider.
524 if (I->first > LastLivePos)
526 UsesOfImpDefs.push_back(I->second);
530 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
532 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
534 unsigned memOpsBegin, unsigned memOpsEnd,
535 unsigned insertAfter, int Offset,
536 unsigned Base, bool BaseKill,
538 ARMCC::CondCodes Pred, unsigned PredReg,
541 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
542 // First calculate which of the registers should be killed by the merged
544 const unsigned insertPos = memOps[insertAfter].Position;
545 SmallSet<unsigned, 4> KilledRegs;
546 DenseMap<unsigned, unsigned> Killer;
547 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
548 if (i == memOpsBegin) {
553 if (memOps[i].Position < insertPos && memOps[i].isKill) {
554 unsigned Reg = memOps[i].Reg;
555 KilledRegs.insert(Reg);
560 SmallVector<std::pair<unsigned, bool>, 8> Regs;
561 SmallVector<unsigned, 8> ImpDefs;
562 SmallVector<MachineOperand *, 8> UsesOfImpDefs;
563 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
564 unsigned Reg = memOps[i].Reg;
565 // If we are inserting the merged operation after an operation that
566 // uses the same register, make sure to transfer any kill flag.
567 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
568 Regs.push_back(std::make_pair(Reg, isKill));
570 // Collect any implicit defs of super-registers. They must be preserved.
571 for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) {
572 if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead())
574 unsigned DefReg = MO->getReg();
575 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
576 ImpDefs.push_back(DefReg);
578 // There may be other uses of the definition between this instruction and
579 // the eventual LDM/STM position. These should be marked undef if the
580 // merge takes place.
581 findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
586 // Try to do the merge.
587 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
589 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
590 Pred, PredReg, Scratch, dl, Regs, ImpDefs))
593 // Merge succeeded, update records.
594 Merges.push_back(std::prev(Loc));
596 // In gathering loads together, we may have moved the imp-def of a register
597 // past one of its uses. This is OK, since we know better than the rest of
598 // LLVM what's OK with ARM loads and stores; but we still have to adjust the
600 for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
601 E = UsesOfImpDefs.end();
605 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
606 // Remove kill flags from any memops that come before insertPos.
607 if (Regs[i-memOpsBegin].second) {
608 unsigned Reg = Regs[i-memOpsBegin].first;
609 if (KilledRegs.count(Reg)) {
610 unsigned j = Killer[Reg];
611 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
612 assert(Idx >= 0 && "Cannot find killing operand");
613 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
614 memOps[j].isKill = false;
616 memOps[i].isKill = true;
618 MBB.erase(memOps[i].MBBI);
619 // Update this memop to refer to the merged instruction.
620 // We may need to move kill flags again.
621 memOps[i].Merged = true;
622 memOps[i].MBBI = Merges.back();
623 memOps[i].Position = insertPos;
627 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
628 /// load / store multiple instructions.
630 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
631 unsigned Base, int Opcode, unsigned Size,
632 ARMCC::CondCodes Pred, unsigned PredReg,
633 unsigned Scratch, MemOpQueue &MemOps,
634 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
635 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
636 int Offset = MemOps[SIndex].Offset;
637 int SOffset = Offset;
638 unsigned insertAfter = SIndex;
639 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
640 DebugLoc dl = Loc->getDebugLoc();
641 const MachineOperand &PMO = Loc->getOperand(0);
642 unsigned PReg = PMO.getReg();
643 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
645 unsigned Limit = ~0U;
646 bool BaseKill = false;
647 // vldm / vstm limit are 32 for S variants, 16 for D variants.
665 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
666 int NewOffset = MemOps[i].Offset;
667 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
668 unsigned Reg = MO.getReg();
669 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
670 // Register numbers must be in ascending order. For VFP / NEON load and
671 // store multiples, the registers must also be consecutive and within the
672 // limit on the number of registers per instruction.
673 if (Reg != ARM::SP &&
674 NewOffset == Offset + (int)Size &&
675 ((isNotVFP && RegNum > PRegNum) ||
676 ((Count < Limit) && RegNum == PRegNum+1)) &&
677 // On Swift we don't want vldm/vstm to start with a odd register num
678 // because Q register unaligned vldm/vstm need more uops.
679 (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) {
684 // Can't merge this in. Try merge the earlier ones first.
685 // We need to compute BaseKill here because the MemOps may have been
687 BaseKill = Loc->killsRegister(Base);
689 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base,
690 BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
691 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
696 if (MemOps[i].Position > MemOps[insertAfter].Position) {
698 Loc = MemOps[i].MBBI;
702 BaseKill = Loc->killsRegister(Base);
703 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
704 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
707 static bool definesCPSR(MachineInstr *MI) {
708 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
709 const MachineOperand &MO = MI->getOperand(i);
712 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
713 // If the instruction has live CPSR def, then it's not safe to fold it
714 // into load / store.
721 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
722 unsigned Bytes, unsigned Limit,
723 ARMCC::CondCodes Pred, unsigned PredReg) {
724 unsigned MyPredReg = 0;
728 bool CheckCPSRDef = false;
729 switch (MI->getOpcode()) {
730 default: return false;
740 // Make sure the offset fits in 8 bits.
741 if (Bytes == 0 || (Limit && Bytes >= Limit))
744 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
745 MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
746 if (!(MI->getOperand(0).getReg() == Base &&
747 MI->getOperand(1).getReg() == Base &&
748 (MI->getOperand(2).getImm() * Scale) == Bytes &&
749 getInstrPredicate(MI, MyPredReg) == Pred &&
750 MyPredReg == PredReg))
753 return CheckCPSRDef ? !definesCPSR(MI) : true;
756 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
757 unsigned Bytes, unsigned Limit,
758 ARMCC::CondCodes Pred, unsigned PredReg) {
759 unsigned MyPredReg = 0;
763 bool CheckCPSRDef = false;
764 switch (MI->getOpcode()) {
765 default: return false;
775 if (Bytes == 0 || (Limit && Bytes >= Limit))
776 // Make sure the offset fits in 8 bits.
779 unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
780 MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
781 if (!(MI->getOperand(0).getReg() == Base &&
782 MI->getOperand(1).getReg() == Base &&
783 (MI->getOperand(2).getImm() * Scale) == Bytes &&
784 getInstrPredicate(MI, MyPredReg) == Pred &&
785 MyPredReg == PredReg))
788 return CheckCPSRDef ? !definesCPSR(MI) : true;
791 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
792 switch (MI->getOpcode()) {
817 case ARM::tLDMIA_UPD:
818 case ARM::tSTMIA_UPD:
825 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
828 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
832 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
833 ARM_AM::AMSubMode Mode) {
835 default: llvm_unreachable("Unhandled opcode!");
841 default: llvm_unreachable("Unhandled submode!");
842 case ARM_AM::ia: return ARM::LDMIA_UPD;
843 case ARM_AM::ib: return ARM::LDMIB_UPD;
844 case ARM_AM::da: return ARM::LDMDA_UPD;
845 case ARM_AM::db: return ARM::LDMDB_UPD;
852 default: llvm_unreachable("Unhandled submode!");
853 case ARM_AM::ia: return ARM::STMIA_UPD;
854 case ARM_AM::ib: return ARM::STMIB_UPD;
855 case ARM_AM::da: return ARM::STMDA_UPD;
856 case ARM_AM::db: return ARM::STMDB_UPD;
861 default: llvm_unreachable("Unhandled submode!");
862 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
863 case ARM_AM::db: return ARM::t2LDMDB_UPD;
868 default: llvm_unreachable("Unhandled submode!");
869 case ARM_AM::ia: return ARM::t2STMIA_UPD;
870 case ARM_AM::db: return ARM::t2STMDB_UPD;
874 default: llvm_unreachable("Unhandled submode!");
875 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
876 case ARM_AM::db: return ARM::VLDMSDB_UPD;
880 default: llvm_unreachable("Unhandled submode!");
881 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
882 case ARM_AM::db: return ARM::VLDMDDB_UPD;
886 default: llvm_unreachable("Unhandled submode!");
887 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
888 case ARM_AM::db: return ARM::VSTMSDB_UPD;
892 default: llvm_unreachable("Unhandled submode!");
893 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
894 case ARM_AM::db: return ARM::VSTMDDB_UPD;
899 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
900 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
902 /// stmia rn, <ra, rb, rc>
903 /// rn := rn + 4 * 3;
905 /// stmia rn!, <ra, rb, rc>
907 /// rn := rn - 4 * 3;
908 /// ldmia rn, <ra, rb, rc>
910 /// ldmdb rn!, <ra, rb, rc>
911 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
912 MachineBasicBlock::iterator MBBI,
914 MachineBasicBlock::iterator &I) {
915 // Thumb1 is already using updating loads/stores.
916 if (isThumb1) return false;
918 MachineInstr *MI = MBBI;
919 unsigned Base = MI->getOperand(0).getReg();
920 bool BaseKill = MI->getOperand(0).isKill();
921 unsigned Bytes = getLSMultipleTransferSize(MI);
922 unsigned PredReg = 0;
923 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
924 int Opcode = MI->getOpcode();
925 DebugLoc dl = MI->getDebugLoc();
927 // Can't use an updating ld/st if the base register is also a dest
928 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
929 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
930 if (MI->getOperand(i).getReg() == Base)
933 bool DoMerge = false;
934 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
936 // Try merging with the previous instruction.
937 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
938 if (MBBI != BeginMBBI) {
939 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
940 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
942 if (Mode == ARM_AM::ia &&
943 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
946 } else if (Mode == ARM_AM::ib &&
947 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
955 // Try merging with the next instruction.
956 MachineBasicBlock::iterator EndMBBI = MBB.end();
957 if (!DoMerge && MBBI != EndMBBI) {
958 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
959 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
961 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
962 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
964 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
965 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
980 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
981 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
982 .addReg(Base, getDefRegState(true)) // WB base register
983 .addReg(Base, getKillRegState(BaseKill))
984 .addImm(Pred).addReg(PredReg);
986 // Transfer the rest of operands.
987 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
988 MIB.addOperand(MI->getOperand(OpNum));
990 // Transfer memoperands.
991 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
997 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
998 ARM_AM::AddrOpc Mode) {
1001 return ARM::LDR_PRE_IMM;
1003 return ARM::STR_PRE_IMM;
1005 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1007 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1009 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1011 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1014 return ARM::t2LDR_PRE;
1017 return ARM::t2STR_PRE;
1018 default: llvm_unreachable("Unhandled opcode!");
1022 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1023 ARM_AM::AddrOpc Mode) {
1026 return ARM::LDR_POST_IMM;
1028 return ARM::STR_POST_IMM;
1030 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1032 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1034 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1036 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1039 return ARM::t2LDR_POST;
1042 return ARM::t2STR_POST;
1043 default: llvm_unreachable("Unhandled opcode!");
1047 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
1048 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1049 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
1050 MachineBasicBlock::iterator MBBI,
1051 const TargetInstrInfo *TII,
1053 MachineBasicBlock::iterator &I) {
1054 // Thumb1 doesn't have updating LDR/STR.
1055 // FIXME: Use LDM/STM with single register instead.
1056 if (isThumb1) return false;
1058 MachineInstr *MI = MBBI;
1059 unsigned Base = MI->getOperand(1).getReg();
1060 bool BaseKill = MI->getOperand(1).isKill();
1061 unsigned Bytes = getLSMultipleTransferSize(MI);
1062 int Opcode = MI->getOpcode();
1063 DebugLoc dl = MI->getDebugLoc();
1064 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1065 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1066 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1067 if (isi32Load(Opcode) || isi32Store(Opcode))
1068 if (MI->getOperand(2).getImm() != 0)
1070 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1073 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
1074 // Can't do the merge if the destination register is the same as the would-be
1075 // writeback register.
1076 if (MI->getOperand(0).getReg() == Base)
1079 unsigned PredReg = 0;
1080 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1081 bool DoMerge = false;
1082 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1083 unsigned NewOpc = 0;
1084 // AM2 - 12 bits, thumb2 - 8 bits.
1085 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
1087 // Try merging with the previous instruction.
1088 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1089 if (MBBI != BeginMBBI) {
1090 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1091 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1093 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1095 AddSub = ARM_AM::sub;
1096 } else if (!isAM5 &&
1097 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1101 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
1102 MBB.erase(PrevMBBI);
1106 // Try merging with the next instruction.
1107 MachineBasicBlock::iterator EndMBBI = MBB.end();
1108 if (!DoMerge && MBBI != EndMBBI) {
1109 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1110 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1113 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1115 AddSub = ARM_AM::sub;
1116 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1120 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
1121 if (NextMBBI == I) {
1125 MBB.erase(NextMBBI);
1133 // VLDM[SD]_UPD, VSTM[SD]_UPD
1134 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1135 // updating load/store-multiple instructions can be used with only one
1137 MachineOperand &MO = MI->getOperand(0);
1138 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1139 .addReg(Base, getDefRegState(true)) // WB base register
1140 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1141 .addImm(Pred).addReg(PredReg)
1142 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1143 getKillRegState(MO.isKill())));
1146 // LDR_PRE, LDR_POST
1147 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1148 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1149 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1150 .addReg(Base, RegState::Define)
1151 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1153 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1154 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1155 .addReg(Base, RegState::Define)
1156 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1159 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1160 // t2LDR_PRE, t2LDR_POST
1161 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1162 .addReg(Base, RegState::Define)
1163 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1166 MachineOperand &MO = MI->getOperand(0);
1167 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1168 // the vestigal zero-reg offset register. When that's fixed, this clause
1169 // can be removed entirely.
1170 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1171 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1172 // STR_PRE, STR_POST
1173 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1174 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1175 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1177 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1178 // t2STR_PRE, t2STR_POST
1179 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1180 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1181 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1189 /// isMemoryOp - Returns true if instruction is a memory operation that this
1190 /// pass is capable of operating on.
1191 static bool isMemoryOp(const MachineInstr *MI) {
1192 // When no memory operands are present, conservatively assume unaligned,
1193 // volatile, unfoldable.
1194 if (!MI->hasOneMemOperand())
1197 const MachineMemOperand *MMO = *MI->memoperands_begin();
1199 // Don't touch volatile memory accesses - we may be changing their order.
1200 if (MMO->isVolatile())
1203 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1205 if (MMO->getAlignment() < 4)
1208 // str <undef> could probably be eliminated entirely, but for now we just want
1209 // to avoid making a mess of it.
1210 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1211 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1212 MI->getOperand(0).isUndef())
1215 // Likewise don't mess with references to undefined addresses.
1216 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1217 MI->getOperand(1).isUndef())
1220 int Opcode = MI->getOpcode();
1225 return MI->getOperand(1).isReg();
1228 return MI->getOperand(1).isReg();
1237 return MI->getOperand(1).isReg();
1242 /// AdvanceRS - Advance register scavenger to just before the earliest memory
1243 /// op that is being merged.
1244 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1245 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1246 unsigned Position = MemOps[0].Position;
1247 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1248 if (MemOps[i].Position < Position) {
1249 Position = MemOps[i].Position;
1250 Loc = MemOps[i].MBBI;
1254 if (Loc != MBB.begin())
1255 RS->forward(std::prev(Loc));
1258 static int getMemoryOpOffset(const MachineInstr *MI) {
1259 int Opcode = MI->getOpcode();
1260 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1261 unsigned NumOperands = MI->getDesc().getNumOperands();
1262 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1264 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1265 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1266 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1267 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
1270 // Thumb1 immediate offsets are scaled by 4
1271 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi)
1272 return OffField * 4;
1274 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1275 : ARM_AM::getAM5Offset(OffField) * 4;
1277 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1280 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1286 static void InsertLDR_STR(MachineBasicBlock &MBB,
1287 MachineBasicBlock::iterator &MBBI,
1288 int Offset, bool isDef,
1289 DebugLoc dl, unsigned NewOpc,
1290 unsigned Reg, bool RegDeadKill, bool RegUndef,
1291 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1292 bool OffKill, bool OffUndef,
1293 ARMCC::CondCodes Pred, unsigned PredReg,
1294 const TargetInstrInfo *TII, bool isT2) {
1296 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1298 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1299 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1300 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1302 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1304 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1305 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1306 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1310 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1311 MachineBasicBlock::iterator &MBBI) {
1312 MachineInstr *MI = &*MBBI;
1313 unsigned Opcode = MI->getOpcode();
1314 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1315 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1316 const MachineOperand &BaseOp = MI->getOperand(2);
1317 unsigned BaseReg = BaseOp.getReg();
1318 unsigned EvenReg = MI->getOperand(0).getReg();
1319 unsigned OddReg = MI->getOperand(1).getReg();
1320 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1321 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1322 // ARM errata 602117: LDRD with base in list may result in incorrect base
1323 // register when interrupted or faulted.
1324 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1325 if (!Errata602117 &&
1326 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1329 MachineBasicBlock::iterator NewBBI = MBBI;
1330 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1331 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1332 bool EvenDeadKill = isLd ?
1333 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1334 bool EvenUndef = MI->getOperand(0).isUndef();
1335 bool OddDeadKill = isLd ?
1336 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1337 bool OddUndef = MI->getOperand(1).isUndef();
1338 bool BaseKill = BaseOp.isKill();
1339 bool BaseUndef = BaseOp.isUndef();
1340 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1341 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1342 int OffImm = getMemoryOpOffset(MI);
1343 unsigned PredReg = 0;
1344 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1346 if (OddRegNum > EvenRegNum && OffImm == 0) {
1347 // Ascending register numbers and no offset. It's safe to change it to a
1349 unsigned NewOpc = (isLd)
1350 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1351 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1353 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1354 .addReg(BaseReg, getKillRegState(BaseKill))
1355 .addImm(Pred).addReg(PredReg)
1356 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1357 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1360 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1361 .addReg(BaseReg, getKillRegState(BaseKill))
1362 .addImm(Pred).addReg(PredReg)
1364 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1366 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1369 NewBBI = std::prev(MBBI);
1371 // Split into two instructions.
1372 unsigned NewOpc = (isLd)
1373 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1374 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1375 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1376 // so adjust and use t2LDRi12 here for that.
1377 unsigned NewOpc2 = (isLd)
1378 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1379 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1380 DebugLoc dl = MBBI->getDebugLoc();
1381 // If this is a load and base register is killed, it may have been
1382 // re-defed by the load, make sure the first load does not clobber it.
1384 (BaseKill || OffKill) &&
1385 (TRI->regsOverlap(EvenReg, BaseReg))) {
1386 assert(!TRI->regsOverlap(OddReg, BaseReg));
1387 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1388 OddReg, OddDeadKill, false,
1389 BaseReg, false, BaseUndef, false, OffUndef,
1390 Pred, PredReg, TII, isT2);
1391 NewBBI = std::prev(MBBI);
1392 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1393 EvenReg, EvenDeadKill, false,
1394 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1395 Pred, PredReg, TII, isT2);
1397 if (OddReg == EvenReg && EvenDeadKill) {
1398 // If the two source operands are the same, the kill marker is
1399 // probably on the first one. e.g.
1400 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1401 EvenDeadKill = false;
1404 // Never kill the base register in the first instruction.
1405 if (EvenReg == BaseReg)
1406 EvenDeadKill = false;
1407 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1408 EvenReg, EvenDeadKill, EvenUndef,
1409 BaseReg, false, BaseUndef, false, OffUndef,
1410 Pred, PredReg, TII, isT2);
1411 NewBBI = std::prev(MBBI);
1412 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1413 OddReg, OddDeadKill, OddUndef,
1414 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1415 Pred, PredReg, TII, isT2);
1430 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1431 /// ops of the same base and incrementing offset into LDM / STM ops.
1432 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1433 unsigned NumMerges = 0;
1434 unsigned NumMemOps = 0;
1436 unsigned CurrBase = 0;
1438 unsigned CurrSize = 0;
1439 ARMCC::CondCodes CurrPred = ARMCC::AL;
1440 unsigned CurrPredReg = 0;
1441 unsigned Position = 0;
1442 SmallVector<MachineBasicBlock::iterator,4> Merges;
1444 RS->enterBasicBlock(&MBB);
1445 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1447 if (FixInvalidRegPairOp(MBB, MBBI))
1450 bool Advance = false;
1451 bool TryMerge = false;
1452 bool Clobber = false;
1454 bool isMemOp = isMemoryOp(MBBI);
1456 int Opcode = MBBI->getOpcode();
1457 unsigned Size = getLSMultipleTransferSize(MBBI);
1458 const MachineOperand &MO = MBBI->getOperand(0);
1459 unsigned Reg = MO.getReg();
1460 bool isKill = MO.isDef() ? false : MO.isKill();
1461 unsigned Base = MBBI->getOperand(1).getReg();
1462 unsigned PredReg = 0;
1463 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1464 int Offset = getMemoryOpOffset(MBBI);
1467 // r5 := ldr [r5, #4]
1468 // r6 := ldr [r5, #8]
1470 // The second ldr has effectively broken the chain even though it
1471 // looks like the later ldr(s) use the same base register. Try to
1472 // merge the ldr's so far, including this one. But don't try to
1473 // combine the following ldr(s).
1474 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1477 // r4 := ldr [r0, #8]
1478 // r4 := ldr [r0, #4]
1480 // The optimization may reorder the second ldr in front of the first
1481 // ldr, which violates write after write(WAW) dependence. The same as
1482 // str. Try to merge inst(s) already in MemOps.
1483 bool Overlap = false;
1484 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
1485 if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
1491 if (CurrBase == 0 && !Clobber) {
1492 // Start of a new chain.
1497 CurrPredReg = PredReg;
1498 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1501 } else if (!Overlap) {
1507 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1508 // No need to match PredReg.
1509 // Continue adding to the queue.
1510 if (Offset > MemOps.back().Offset) {
1511 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1516 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1518 if (Offset < I->Offset) {
1519 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1524 } else if (Offset == I->Offset) {
1525 // Collision! This can't be merged!
1534 if (MBBI->isDebugValue()) {
1537 // Reach the end of the block, try merging the memory instructions.
1539 } else if (Advance) {
1543 // Reach the end of the block, try merging the memory instructions.
1550 if (NumMemOps > 1) {
1551 // Try to find a free register to use as a new base in case it's needed.
1552 // First advance to the instruction just before the start of the chain.
1553 AdvanceRS(MBB, MemOps);
1555 // Find a scratch register.
1557 RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass);
1559 // Process the load / store instructions.
1560 RS->forward(std::prev(MBBI));
1564 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1565 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1567 // Try folding preceding/trailing base inc/dec into the generated
1569 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1570 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1572 NumMerges += Merges.size();
1574 // Try folding preceding/trailing base inc/dec into those load/store
1575 // that were not merged to form LDM/STM ops.
1576 for (unsigned i = 0; i != NumMemOps; ++i)
1577 if (!MemOps[i].Merged)
1578 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1581 // RS may be pointing to an instruction that's deleted.
1582 RS->skipTo(std::prev(MBBI));
1583 } else if (NumMemOps == 1) {
1584 // Try folding preceding/trailing base inc/dec into the single
1586 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1588 RS->forward(std::prev(MBBI));
1595 CurrPred = ARMCC::AL;
1602 // If iterator hasn't been advanced and this is not a memory op, skip it.
1603 // It can't start a new chain anyway.
1604 if (!Advance && !isMemOp && MBBI != E) {
1610 return NumMerges > 0;
1613 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1614 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1615 /// directly restore the value of LR into pc.
1616 /// ldmfd sp!, {..., lr}
1619 /// ldmfd sp!, {..., lr}
1622 /// ldmfd sp!, {..., pc}
1623 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1624 // Thumb1 LDM doesn't allow high registers.
1625 if (isThumb1) return false;
1626 if (MBB.empty()) return false;
1628 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1629 if (MBBI != MBB.begin() &&
1630 (MBBI->getOpcode() == ARM::BX_RET ||
1631 MBBI->getOpcode() == ARM::tBX_RET ||
1632 MBBI->getOpcode() == ARM::MOVPCLR)) {
1633 MachineInstr *PrevMI = std::prev(MBBI);
1634 unsigned Opcode = PrevMI->getOpcode();
1635 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1636 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1637 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1638 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1639 if (MO.getReg() != ARM::LR)
1641 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1642 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1643 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1644 PrevMI->setDesc(TII->get(NewOpc));
1646 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1654 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1655 const TargetMachine &TM = Fn.getTarget();
1656 TL = TM.getSubtargetImpl()->getTargetLowering();
1657 AFI = Fn.getInfo<ARMFunctionInfo>();
1658 TII = TM.getSubtargetImpl()->getInstrInfo();
1659 TRI = TM.getSubtargetImpl()->getRegisterInfo();
1660 STI = &TM.getSubtarget<ARMSubtarget>();
1661 RS = new RegScavenger();
1662 isThumb2 = AFI->isThumb2Function();
1663 isThumb1 = AFI->isThumbFunction() && !isThumb2;
1665 bool Modified = false;
1666 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1668 MachineBasicBlock &MBB = *MFI;
1669 Modified |= LoadStoreMultipleOpti(MBB);
1670 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1671 Modified |= MergeReturnIntoLDM(MBB);
1679 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1680 /// load / stores from consecutive locations close to make it more
1681 /// likely they will be combined later.
1684 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1686 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1688 const DataLayout *TD;
1689 const TargetInstrInfo *TII;
1690 const TargetRegisterInfo *TRI;
1691 const ARMSubtarget *STI;
1692 MachineRegisterInfo *MRI;
1693 MachineFunction *MF;
1695 bool runOnMachineFunction(MachineFunction &Fn) override;
1697 const char *getPassName() const override {
1698 return "ARM pre- register allocation load / store optimization pass";
1702 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1703 unsigned &NewOpc, unsigned &EvenReg,
1704 unsigned &OddReg, unsigned &BaseReg,
1706 unsigned &PredReg, ARMCC::CondCodes &Pred,
1708 bool RescheduleOps(MachineBasicBlock *MBB,
1709 SmallVectorImpl<MachineInstr *> &Ops,
1710 unsigned Base, bool isLd,
1711 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1712 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1714 char ARMPreAllocLoadStoreOpt::ID = 0;
1717 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1718 TD = Fn.getSubtarget().getDataLayout();
1719 TII = Fn.getSubtarget().getInstrInfo();
1720 TRI = Fn.getSubtarget().getRegisterInfo();
1721 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1722 MRI = &Fn.getRegInfo();
1725 bool Modified = false;
1726 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1728 Modified |= RescheduleLoadStoreInstrs(MFI);
1733 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1734 MachineBasicBlock::iterator I,
1735 MachineBasicBlock::iterator E,
1736 SmallPtrSetImpl<MachineInstr*> &MemOps,
1737 SmallSet<unsigned, 4> &MemRegs,
1738 const TargetRegisterInfo *TRI) {
1739 // Are there stores / loads / calls between them?
1740 // FIXME: This is overly conservative. We should make use of alias information
1742 SmallSet<unsigned, 4> AddedRegPressure;
1744 if (I->isDebugValue() || MemOps.count(&*I))
1746 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1748 if (isLd && I->mayStore())
1753 // It's not safe to move the first 'str' down.
1756 // str r4, [r0, #+4]
1760 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1761 MachineOperand &MO = I->getOperand(j);
1764 unsigned Reg = MO.getReg();
1765 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1767 if (Reg != Base && !MemRegs.count(Reg))
1768 AddedRegPressure.insert(Reg);
1772 // Estimate register pressure increase due to the transformation.
1773 if (MemRegs.size() <= 4)
1774 // Ok if we are moving small number of instructions.
1776 return AddedRegPressure.size() <= MemRegs.size() * 2;
1780 /// Copy Op0 and Op1 operands into a new array assigned to MI.
1781 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1782 MachineInstr *Op1) {
1783 assert(MI->memoperands_empty() && "expected a new machineinstr");
1784 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1785 + (Op1->memoperands_end() - Op1->memoperands_begin());
1787 MachineFunction *MF = MI->getParent()->getParent();
1788 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1789 MachineSDNode::mmo_iterator MemEnd =
1790 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1792 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1793 MI->setMemRefs(MemBegin, MemEnd);
1797 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1799 unsigned &NewOpc, unsigned &EvenReg,
1800 unsigned &OddReg, unsigned &BaseReg,
1801 int &Offset, unsigned &PredReg,
1802 ARMCC::CondCodes &Pred,
1804 // Make sure we're allowed to generate LDRD/STRD.
1805 if (!STI->hasV5TEOps())
1808 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1810 unsigned Opcode = Op0->getOpcode();
1811 if (Opcode == ARM::LDRi12) {
1813 } else if (Opcode == ARM::STRi12) {
1815 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1816 NewOpc = ARM::t2LDRDi8;
1819 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1820 NewOpc = ARM::t2STRDi8;
1827 // Make sure the base address satisfies i64 ld / st alignment requirement.
1828 // At the moment, we ignore the memoryoperand's value.
1829 // If we want to use AliasAnalysis, we should check it accordingly.
1830 if (!Op0->hasOneMemOperand() ||
1831 (*Op0->memoperands_begin())->isVolatile())
1834 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1835 const Function *Func = MF->getFunction();
1836 unsigned ReqAlign = STI->hasV6Ops()
1837 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1838 : 8; // Pre-v6 need 8-byte align
1839 if (Align < ReqAlign)
1842 // Then make sure the immediate offset fits.
1843 int OffImm = getMemoryOpOffset(Op0);
1845 int Limit = (1 << 8) * Scale;
1846 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1850 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1852 AddSub = ARM_AM::sub;
1855 int Limit = (1 << 8) * Scale;
1856 if (OffImm >= Limit || (OffImm & (Scale-1)))
1858 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1860 EvenReg = Op0->getOperand(0).getReg();
1861 OddReg = Op1->getOperand(0).getReg();
1862 if (EvenReg == OddReg)
1864 BaseReg = Op0->getOperand(1).getReg();
1865 Pred = getInstrPredicate(Op0, PredReg);
1866 dl = Op0->getDebugLoc();
1870 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1871 SmallVectorImpl<MachineInstr *> &Ops,
1872 unsigned Base, bool isLd,
1873 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1874 bool RetVal = false;
1876 // Sort by offset (in reverse order).
1877 std::sort(Ops.begin(), Ops.end(),
1878 [](const MachineInstr *LHS, const MachineInstr *RHS) {
1879 int LOffset = getMemoryOpOffset(LHS);
1880 int ROffset = getMemoryOpOffset(RHS);
1881 assert(LHS == RHS || LOffset != ROffset);
1882 return LOffset > ROffset;
1885 // The loads / stores of the same base are in order. Scan them from first to
1886 // last and check for the following:
1887 // 1. Any def of base.
1889 while (Ops.size() > 1) {
1890 unsigned FirstLoc = ~0U;
1891 unsigned LastLoc = 0;
1892 MachineInstr *FirstOp = nullptr;
1893 MachineInstr *LastOp = nullptr;
1895 unsigned LastOpcode = 0;
1896 unsigned LastBytes = 0;
1897 unsigned NumMove = 0;
1898 for (int i = Ops.size() - 1; i >= 0; --i) {
1899 MachineInstr *Op = Ops[i];
1900 unsigned Loc = MI2LocMap[Op];
1901 if (Loc <= FirstLoc) {
1905 if (Loc >= LastLoc) {
1911 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
1912 if (LastOpcode && LSMOpcode != LastOpcode)
1915 int Offset = getMemoryOpOffset(Op);
1916 unsigned Bytes = getLSMultipleTransferSize(Op);
1918 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1921 LastOffset = Offset;
1923 LastOpcode = LSMOpcode;
1924 if (++NumMove == 8) // FIXME: Tune this limit.
1931 SmallPtrSet<MachineInstr*, 4> MemOps;
1932 SmallSet<unsigned, 4> MemRegs;
1933 for (int i = NumMove-1; i >= 0; --i) {
1934 MemOps.insert(Ops[i]);
1935 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1938 // Be conservative, if the instructions are too far apart, don't
1939 // move them. We want to limit the increase of register pressure.
1940 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1942 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1943 MemOps, MemRegs, TRI);
1945 for (unsigned i = 0; i != NumMove; ++i)
1948 // This is the new location for the loads / stores.
1949 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1950 while (InsertPos != MBB->end()
1951 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1954 // If we are moving a pair of loads / stores, see if it makes sense
1955 // to try to allocate a pair of registers that can form register pairs.
1956 MachineInstr *Op0 = Ops.back();
1957 MachineInstr *Op1 = Ops[Ops.size()-2];
1958 unsigned EvenReg = 0, OddReg = 0;
1959 unsigned BaseReg = 0, PredReg = 0;
1960 ARMCC::CondCodes Pred = ARMCC::AL;
1962 unsigned NewOpc = 0;
1965 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1966 EvenReg, OddReg, BaseReg,
1967 Offset, PredReg, Pred, isT2)) {
1971 const MCInstrDesc &MCID = TII->get(NewOpc);
1972 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
1973 MRI->constrainRegClass(EvenReg, TRC);
1974 MRI->constrainRegClass(OddReg, TRC);
1976 // Form the pair instruction.
1978 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1979 .addReg(EvenReg, RegState::Define)
1980 .addReg(OddReg, RegState::Define)
1982 // FIXME: We're converting from LDRi12 to an insn that still
1983 // uses addrmode2, so we need an explicit offset reg. It should
1984 // always by reg0 since we're transforming LDRi12s.
1987 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1988 concatenateMemOperands(MIB, Op0, Op1);
1989 DEBUG(dbgs() << "Formed " << *MIB << "\n");
1992 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1996 // FIXME: We're converting from LDRi12 to an insn that still
1997 // uses addrmode2, so we need an explicit offset reg. It should
1998 // always by reg0 since we're transforming STRi12s.
2001 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2002 concatenateMemOperands(MIB, Op0, Op1);
2003 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2009 // Add register allocation hints to form register pairs.
2010 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
2011 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
2013 for (unsigned i = 0; i != NumMove; ++i) {
2014 MachineInstr *Op = Ops.back();
2016 MBB->splice(InsertPos, MBB, Op);
2020 NumLdStMoved += NumMove;
2030 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2031 bool RetVal = false;
2033 DenseMap<MachineInstr*, unsigned> MI2LocMap;
2034 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
2035 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
2036 SmallVector<unsigned, 4> LdBases;
2037 SmallVector<unsigned, 4> StBases;
2040 MachineBasicBlock::iterator MBBI = MBB->begin();
2041 MachineBasicBlock::iterator E = MBB->end();
2043 for (; MBBI != E; ++MBBI) {
2044 MachineInstr *MI = MBBI;
2045 if (MI->isCall() || MI->isTerminator()) {
2046 // Stop at barriers.
2051 if (!MI->isDebugValue())
2052 MI2LocMap[MI] = ++Loc;
2054 if (!isMemoryOp(MI))
2056 unsigned PredReg = 0;
2057 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2060 int Opc = MI->getOpcode();
2061 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
2062 unsigned Base = MI->getOperand(1).getReg();
2063 int Offset = getMemoryOpOffset(MI);
2065 bool StopHere = false;
2067 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2068 Base2LdsMap.find(Base);
2069 if (BI != Base2LdsMap.end()) {
2070 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2071 if (Offset == getMemoryOpOffset(BI->second[i])) {
2077 BI->second.push_back(MI);
2079 Base2LdsMap[Base].push_back(MI);
2080 LdBases.push_back(Base);
2083 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2084 Base2StsMap.find(Base);
2085 if (BI != Base2StsMap.end()) {
2086 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2087 if (Offset == getMemoryOpOffset(BI->second[i])) {
2093 BI->second.push_back(MI);
2095 Base2StsMap[Base].push_back(MI);
2096 StBases.push_back(Base);
2101 // Found a duplicate (a base+offset combination that's seen earlier).
2108 // Re-schedule loads.
2109 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2110 unsigned Base = LdBases[i];
2111 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2113 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2116 // Re-schedule stores.
2117 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2118 unsigned Base = StBases[i];
2119 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2121 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2125 Base2LdsMap.clear();
2126 Base2StsMap.clear();
2136 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
2137 /// optimization pass.
2138 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2140 return new ARMPreAllocLoadStoreOpt();
2141 return new ARMLoadStoreOpt();