1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMBaseInstrInfo.h"
18 #include "ARMBaseRegisterInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "MCTargetDesc/ARMAddressingModes.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/SmallSet.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/CodeGen/MachineBasicBlock.h"
28 #include "llvm/CodeGen/MachineFunctionPass.h"
29 #include "llvm/CodeGen/MachineInstr.h"
30 #include "llvm/CodeGen/MachineInstrBuilder.h"
31 #include "llvm/CodeGen/MachineRegisterInfo.h"
32 #include "llvm/CodeGen/RegisterScavenging.h"
33 #include "llvm/CodeGen/SelectionDAGNodes.h"
34 #include "llvm/IR/DataLayout.h"
35 #include "llvm/IR/DerivedTypes.h"
36 #include "llvm/IR/Function.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/ErrorHandling.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include "llvm/Target/TargetInstrInfo.h"
41 #include "llvm/Target/TargetMachine.h"
42 #include "llvm/Target/TargetRegisterInfo.h"
45 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
46 STATISTIC(NumSTMGened , "Number of stm instructions generated");
47 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
48 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
49 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
50 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
51 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
52 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
53 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
54 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
55 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
57 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
58 /// load / store instructions to form ldm / stm instructions.
61 struct ARMLoadStoreOpt : public MachineFunctionPass {
63 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
65 const TargetInstrInfo *TII;
66 const TargetRegisterInfo *TRI;
67 const ARMSubtarget *STI;
72 virtual bool runOnMachineFunction(MachineFunction &Fn);
74 virtual const char *getPassName() const {
75 return "ARM load / store optimization pass";
79 struct MemOpQueueEntry {
84 MachineBasicBlock::iterator MBBI;
86 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
87 MachineBasicBlock::iterator i)
88 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
90 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
91 typedef MemOpQueue::iterator MemOpQueueIter;
93 void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
94 const MemOpQueue &MemOps, unsigned DefReg,
95 unsigned RangeBegin, unsigned RangeEnd);
97 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
98 int Offset, unsigned Base, bool BaseKill, int Opcode,
99 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
101 ArrayRef<std::pair<unsigned, bool> > Regs,
102 ArrayRef<unsigned> ImpDefs);
103 void MergeOpsUpdate(MachineBasicBlock &MBB,
105 unsigned memOpsBegin,
107 unsigned insertAfter,
112 ARMCC::CondCodes Pred,
116 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
117 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
118 int Opcode, unsigned Size,
119 ARMCC::CondCodes Pred, unsigned PredReg,
120 unsigned Scratch, MemOpQueue &MemOps,
121 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
123 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
124 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
125 MachineBasicBlock::iterator &MBBI);
126 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
127 MachineBasicBlock::iterator MBBI,
128 const TargetInstrInfo *TII,
130 MachineBasicBlock::iterator &I);
131 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
132 MachineBasicBlock::iterator MBBI,
134 MachineBasicBlock::iterator &I);
135 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
136 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
138 char ARMLoadStoreOpt::ID = 0;
141 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
143 default: llvm_unreachable("Unhandled opcode!");
147 default: llvm_unreachable("Unhandled submode!");
148 case ARM_AM::ia: return ARM::LDMIA;
149 case ARM_AM::da: return ARM::LDMDA;
150 case ARM_AM::db: return ARM::LDMDB;
151 case ARM_AM::ib: return ARM::LDMIB;
156 default: llvm_unreachable("Unhandled submode!");
157 case ARM_AM::ia: return ARM::STMIA;
158 case ARM_AM::da: return ARM::STMDA;
159 case ARM_AM::db: return ARM::STMDB;
160 case ARM_AM::ib: return ARM::STMIB;
166 default: llvm_unreachable("Unhandled submode!");
167 case ARM_AM::ia: return ARM::t2LDMIA;
168 case ARM_AM::db: return ARM::t2LDMDB;
174 default: llvm_unreachable("Unhandled submode!");
175 case ARM_AM::ia: return ARM::t2STMIA;
176 case ARM_AM::db: return ARM::t2STMDB;
181 default: llvm_unreachable("Unhandled submode!");
182 case ARM_AM::ia: return ARM::VLDMSIA;
183 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
188 default: llvm_unreachable("Unhandled submode!");
189 case ARM_AM::ia: return ARM::VSTMSIA;
190 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
195 default: llvm_unreachable("Unhandled submode!");
196 case ARM_AM::ia: return ARM::VLDMDIA;
197 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
202 default: llvm_unreachable("Unhandled submode!");
203 case ARM_AM::ia: return ARM::VSTMDIA;
204 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
212 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
214 default: llvm_unreachable("Unhandled opcode!");
220 case ARM::t2LDMIA_RET:
222 case ARM::t2LDMIA_UPD:
224 case ARM::t2STMIA_UPD:
226 case ARM::VLDMSIA_UPD:
228 case ARM::VSTMSIA_UPD:
230 case ARM::VLDMDIA_UPD:
232 case ARM::VSTMDIA_UPD:
246 case ARM::t2LDMDB_UPD:
248 case ARM::t2STMDB_UPD:
249 case ARM::VLDMSDB_UPD:
250 case ARM::VSTMSDB_UPD:
251 case ARM::VLDMDDB_UPD:
252 case ARM::VSTMDDB_UPD:
263 } // end namespace ARM_AM
264 } // end namespace llvm
266 static bool isT2i32Load(unsigned Opc) {
267 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
270 static bool isi32Load(unsigned Opc) {
271 return Opc == ARM::LDRi12 || isT2i32Load(Opc);
274 static bool isT2i32Store(unsigned Opc) {
275 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
278 static bool isi32Store(unsigned Opc) {
279 return Opc == ARM::STRi12 || isT2i32Store(Opc);
282 /// MergeOps - Create and insert a LDM or STM with Base as base register and
283 /// registers in Regs as the register operands that would be loaded / stored.
284 /// It returns true if the transformation is done.
286 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
287 MachineBasicBlock::iterator MBBI,
288 int Offset, unsigned Base, bool BaseKill,
289 int Opcode, ARMCC::CondCodes Pred,
290 unsigned PredReg, unsigned Scratch, DebugLoc dl,
291 ArrayRef<std::pair<unsigned, bool> > Regs,
292 ArrayRef<unsigned> ImpDefs) {
293 // Only a single register to load / store. Don't bother.
294 unsigned NumRegs = Regs.size();
298 ARM_AM::AMSubMode Mode = ARM_AM::ia;
299 // VFP and Thumb2 do not support IB or DA modes.
300 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
301 bool haveIBAndDA = isNotVFP && !isThumb2;
302 if (Offset == 4 && haveIBAndDA)
304 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
306 else if (Offset == -4 * (int)NumRegs && isNotVFP)
307 // VLDM/VSTM do not support DB mode without also updating the base reg.
309 else if (Offset != 0) {
310 // Check if this is a supported opcode before we insert instructions to
311 // calculate a new base register.
312 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
314 // If starting offset isn't zero, insert a MI to materialize a new base.
315 // But only do so if it is cost effective, i.e. merging more than two
321 if (isi32Load(Opcode))
322 // If it is a load, then just use one of the destination register to
323 // use as the new base.
324 NewBase = Regs[NumRegs-1].first;
326 // Use the scratch register to use as a new base.
331 int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
333 BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
336 int ImmedOffset = isThumb2
337 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
338 if (ImmedOffset == -1)
339 // FIXME: Try t2ADDri12 or t2SUBri12?
340 return false; // Probably not worth it then.
342 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
343 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
344 .addImm(Pred).addReg(PredReg).addReg(0);
346 BaseKill = true; // New base is always killed right its use.
349 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
350 Opcode == ARM::VLDRD);
351 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
352 if (!Opcode) return false;
353 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
354 .addReg(Base, getKillRegState(BaseKill))
355 .addImm(Pred).addReg(PredReg);
356 for (unsigned i = 0; i != NumRegs; ++i)
357 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
358 | getKillRegState(Regs[i].second));
360 // Add implicit defs for super-registers.
361 for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i)
362 MIB.addReg(ImpDefs[i], RegState::ImplicitDefine);
367 /// \brief Find all instructions using a given imp-def within a range.
369 /// We are trying to combine a range of instructions, one of which (located at
370 /// position RangeBegin) implicitly defines a register. The final LDM/STM will
371 /// be placed at RangeEnd, and so any uses of this definition between RangeStart
372 /// and RangeEnd must be modified to use an undefined value.
374 /// The live range continues until we find a second definition or one of the
375 /// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
376 /// we must consider all uses and decide which are relevant in a second pass.
377 void ARMLoadStoreOpt::findUsesOfImpDef(
378 SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,
379 unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
380 std::map<unsigned, MachineOperand *> Uses;
381 unsigned LastLivePos = RangeEnd;
383 // First we find all uses of this register with Position between RangeBegin
384 // and RangeEnd, any or all of these could be uses of a definition at
385 // RangeBegin. We also record the latest position a definition at RangeBegin
386 // would be considered live.
387 for (unsigned i = 0; i < MemOps.size(); ++i) {
388 MachineInstr &MI = *MemOps[i].MBBI;
389 unsigned MIPosition = MemOps[i].Position;
390 if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
393 // If this instruction defines the register, then any later use will be of
394 // that definition rather than ours.
395 if (MI.definesRegister(DefReg))
396 LastLivePos = std::min(LastLivePos, MIPosition);
398 MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
402 // If this instruction kills the register then (assuming liveness is
403 // correct when we start) we don't need to think about anything after here.
405 LastLivePos = std::min(LastLivePos, MIPosition);
407 Uses[MIPosition] = UseOp;
410 // Now we traverse the list of all uses, and append the ones that actually use
411 // our definition to the requested list.
412 for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),
415 // List is sorted by position so once we've found one out of range there
416 // will be no more to consider.
417 if (I->first > LastLivePos)
419 UsesOfImpDefs.push_back(I->second);
423 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
425 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
427 unsigned memOpsBegin, unsigned memOpsEnd,
428 unsigned insertAfter, int Offset,
429 unsigned Base, bool BaseKill,
431 ARMCC::CondCodes Pred, unsigned PredReg,
434 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
435 // First calculate which of the registers should be killed by the merged
437 const unsigned insertPos = memOps[insertAfter].Position;
438 SmallSet<unsigned, 4> KilledRegs;
439 DenseMap<unsigned, unsigned> Killer;
440 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
441 if (i == memOpsBegin) {
446 if (memOps[i].Position < insertPos && memOps[i].isKill) {
447 unsigned Reg = memOps[i].Reg;
448 KilledRegs.insert(Reg);
453 SmallVector<std::pair<unsigned, bool>, 8> Regs;
454 SmallVector<unsigned, 8> ImpDefs;
455 SmallVector<MachineOperand *, 8> UsesOfImpDefs;
456 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
457 unsigned Reg = memOps[i].Reg;
458 // If we are inserting the merged operation after an operation that
459 // uses the same register, make sure to transfer any kill flag.
460 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
461 Regs.push_back(std::make_pair(Reg, isKill));
463 // Collect any implicit defs of super-registers. They must be preserved.
464 for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) {
465 if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead())
467 unsigned DefReg = MO->getReg();
468 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
469 ImpDefs.push_back(DefReg);
471 // There may be other uses of the definition between this instruction and
472 // the eventual LDM/STM position. These should be marked undef if the
473 // merge takes place.
474 findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
479 // Try to do the merge.
480 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
482 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
483 Pred, PredReg, Scratch, dl, Regs, ImpDefs))
486 // Merge succeeded, update records.
487 Merges.push_back(prior(Loc));
489 // In gathering loads together, we may have moved the imp-def of a register
490 // past one of its uses. This is OK, since we know better than the rest of
491 // LLVM what's OK with ARM loads and stores; but we still have to adjust the
493 for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
494 E = UsesOfImpDefs.end();
498 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
499 // Remove kill flags from any memops that come before insertPos.
500 if (Regs[i-memOpsBegin].second) {
501 unsigned Reg = Regs[i-memOpsBegin].first;
502 if (KilledRegs.count(Reg)) {
503 unsigned j = Killer[Reg];
504 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
505 assert(Idx >= 0 && "Cannot find killing operand");
506 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
507 memOps[j].isKill = false;
509 memOps[i].isKill = true;
511 MBB.erase(memOps[i].MBBI);
512 // Update this memop to refer to the merged instruction.
513 // We may need to move kill flags again.
514 memOps[i].Merged = true;
515 memOps[i].MBBI = Merges.back();
516 memOps[i].Position = insertPos;
520 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
521 /// load / store multiple instructions.
523 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
524 unsigned Base, int Opcode, unsigned Size,
525 ARMCC::CondCodes Pred, unsigned PredReg,
526 unsigned Scratch, MemOpQueue &MemOps,
527 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
528 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
529 int Offset = MemOps[SIndex].Offset;
530 int SOffset = Offset;
531 unsigned insertAfter = SIndex;
532 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
533 DebugLoc dl = Loc->getDebugLoc();
534 const MachineOperand &PMO = Loc->getOperand(0);
535 unsigned PReg = PMO.getReg();
536 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
538 unsigned Limit = ~0U;
540 // vldm / vstm limit are 32 for S variants, 16 for D variants.
558 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
559 int NewOffset = MemOps[i].Offset;
560 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
561 unsigned Reg = MO.getReg();
562 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
563 // Register numbers must be in ascending order. For VFP / NEON load and
564 // store multiples, the registers must also be consecutive and within the
565 // limit on the number of registers per instruction.
566 if (Reg != ARM::SP &&
567 NewOffset == Offset + (int)Size &&
568 ((isNotVFP && RegNum > PRegNum) ||
569 ((Count < Limit) && RegNum == PRegNum+1)) &&
570 // On Swift we don't want vldm/vstm to start with a odd register num
571 // because Q register unaligned vldm/vstm need more uops.
572 (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) {
577 // Can't merge this in. Try merge the earlier ones first.
578 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
579 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
580 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
585 if (MemOps[i].Position > MemOps[insertAfter].Position)
589 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
590 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
591 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
595 static bool definesCPSR(MachineInstr *MI) {
596 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
597 const MachineOperand &MO = MI->getOperand(i);
600 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
601 // If the instruction has live CPSR def, then it's not safe to fold it
602 // into load / store.
609 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
610 unsigned Bytes, unsigned Limit,
611 ARMCC::CondCodes Pred, unsigned PredReg) {
612 unsigned MyPredReg = 0;
616 bool CheckCPSRDef = false;
617 switch (MI->getOpcode()) {
618 default: return false;
627 // Make sure the offset fits in 8 bits.
628 if (Bytes == 0 || (Limit && Bytes >= Limit))
631 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
632 if (!(MI->getOperand(0).getReg() == Base &&
633 MI->getOperand(1).getReg() == Base &&
634 (MI->getOperand(2).getImm()*Scale) == Bytes &&
635 getInstrPredicate(MI, MyPredReg) == Pred &&
636 MyPredReg == PredReg))
639 return CheckCPSRDef ? !definesCPSR(MI) : true;
642 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
643 unsigned Bytes, unsigned Limit,
644 ARMCC::CondCodes Pred, unsigned PredReg) {
645 unsigned MyPredReg = 0;
649 bool CheckCPSRDef = false;
650 switch (MI->getOpcode()) {
651 default: return false;
660 if (Bytes == 0 || (Limit && Bytes >= Limit))
661 // Make sure the offset fits in 8 bits.
664 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
665 if (!(MI->getOperand(0).getReg() == Base &&
666 MI->getOperand(1).getReg() == Base &&
667 (MI->getOperand(2).getImm()*Scale) == Bytes &&
668 getInstrPredicate(MI, MyPredReg) == Pred &&
669 MyPredReg == PredReg))
672 return CheckCPSRDef ? !definesCPSR(MI) : true;
675 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
676 switch (MI->getOpcode()) {
704 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
707 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
711 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
712 ARM_AM::AMSubMode Mode) {
714 default: llvm_unreachable("Unhandled opcode!");
720 default: llvm_unreachable("Unhandled submode!");
721 case ARM_AM::ia: return ARM::LDMIA_UPD;
722 case ARM_AM::ib: return ARM::LDMIB_UPD;
723 case ARM_AM::da: return ARM::LDMDA_UPD;
724 case ARM_AM::db: return ARM::LDMDB_UPD;
731 default: llvm_unreachable("Unhandled submode!");
732 case ARM_AM::ia: return ARM::STMIA_UPD;
733 case ARM_AM::ib: return ARM::STMIB_UPD;
734 case ARM_AM::da: return ARM::STMDA_UPD;
735 case ARM_AM::db: return ARM::STMDB_UPD;
740 default: llvm_unreachable("Unhandled submode!");
741 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
742 case ARM_AM::db: return ARM::t2LDMDB_UPD;
747 default: llvm_unreachable("Unhandled submode!");
748 case ARM_AM::ia: return ARM::t2STMIA_UPD;
749 case ARM_AM::db: return ARM::t2STMDB_UPD;
753 default: llvm_unreachable("Unhandled submode!");
754 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
755 case ARM_AM::db: return ARM::VLDMSDB_UPD;
759 default: llvm_unreachable("Unhandled submode!");
760 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
761 case ARM_AM::db: return ARM::VLDMDDB_UPD;
765 default: llvm_unreachable("Unhandled submode!");
766 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
767 case ARM_AM::db: return ARM::VSTMSDB_UPD;
771 default: llvm_unreachable("Unhandled submode!");
772 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
773 case ARM_AM::db: return ARM::VSTMDDB_UPD;
778 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
779 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
781 /// stmia rn, <ra, rb, rc>
782 /// rn := rn + 4 * 3;
784 /// stmia rn!, <ra, rb, rc>
786 /// rn := rn - 4 * 3;
787 /// ldmia rn, <ra, rb, rc>
789 /// ldmdb rn!, <ra, rb, rc>
790 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
791 MachineBasicBlock::iterator MBBI,
793 MachineBasicBlock::iterator &I) {
794 MachineInstr *MI = MBBI;
795 unsigned Base = MI->getOperand(0).getReg();
796 bool BaseKill = MI->getOperand(0).isKill();
797 unsigned Bytes = getLSMultipleTransferSize(MI);
798 unsigned PredReg = 0;
799 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
800 int Opcode = MI->getOpcode();
801 DebugLoc dl = MI->getDebugLoc();
803 // Can't use an updating ld/st if the base register is also a dest
804 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
805 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
806 if (MI->getOperand(i).getReg() == Base)
809 bool DoMerge = false;
810 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
812 // Try merging with the previous instruction.
813 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
814 if (MBBI != BeginMBBI) {
815 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
816 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
818 if (Mode == ARM_AM::ia &&
819 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
822 } else if (Mode == ARM_AM::ib &&
823 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
831 // Try merging with the next instruction.
832 MachineBasicBlock::iterator EndMBBI = MBB.end();
833 if (!DoMerge && MBBI != EndMBBI) {
834 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
835 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
837 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
838 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
840 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
841 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
856 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
857 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
858 .addReg(Base, getDefRegState(true)) // WB base register
859 .addReg(Base, getKillRegState(BaseKill))
860 .addImm(Pred).addReg(PredReg);
862 // Transfer the rest of operands.
863 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
864 MIB.addOperand(MI->getOperand(OpNum));
866 // Transfer memoperands.
867 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
873 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
874 ARM_AM::AddrOpc Mode) {
877 return ARM::LDR_PRE_IMM;
879 return ARM::STR_PRE_IMM;
881 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
883 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
885 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
887 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
890 return ARM::t2LDR_PRE;
893 return ARM::t2STR_PRE;
894 default: llvm_unreachable("Unhandled opcode!");
898 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
899 ARM_AM::AddrOpc Mode) {
902 return ARM::LDR_POST_IMM;
904 return ARM::STR_POST_IMM;
906 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
908 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
910 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
912 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
915 return ARM::t2LDR_POST;
918 return ARM::t2STR_POST;
919 default: llvm_unreachable("Unhandled opcode!");
923 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
924 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
925 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
926 MachineBasicBlock::iterator MBBI,
927 const TargetInstrInfo *TII,
929 MachineBasicBlock::iterator &I) {
930 MachineInstr *MI = MBBI;
931 unsigned Base = MI->getOperand(1).getReg();
932 bool BaseKill = MI->getOperand(1).isKill();
933 unsigned Bytes = getLSMultipleTransferSize(MI);
934 int Opcode = MI->getOpcode();
935 DebugLoc dl = MI->getDebugLoc();
936 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
937 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
938 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
939 if (isi32Load(Opcode) || isi32Store(Opcode))
940 if (MI->getOperand(2).getImm() != 0)
942 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
945 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
946 // Can't do the merge if the destination register is the same as the would-be
947 // writeback register.
948 if (MI->getOperand(0).getReg() == Base)
951 unsigned PredReg = 0;
952 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
953 bool DoMerge = false;
954 ARM_AM::AddrOpc AddSub = ARM_AM::add;
956 // AM2 - 12 bits, thumb2 - 8 bits.
957 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
959 // Try merging with the previous instruction.
960 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
961 if (MBBI != BeginMBBI) {
962 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
963 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
965 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
967 AddSub = ARM_AM::sub;
969 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
973 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
978 // Try merging with the next instruction.
979 MachineBasicBlock::iterator EndMBBI = MBB.end();
980 if (!DoMerge && MBBI != EndMBBI) {
981 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
982 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
985 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
987 AddSub = ARM_AM::sub;
988 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
992 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
1005 // VLDM[SD}_UPD, VSTM[SD]_UPD
1006 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1007 // updating load/store-multiple instructions can be used with only one
1009 MachineOperand &MO = MI->getOperand(0);
1010 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1011 .addReg(Base, getDefRegState(true)) // WB base register
1012 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1013 .addImm(Pred).addReg(PredReg)
1014 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1015 getKillRegState(MO.isKill())));
1018 // LDR_PRE, LDR_POST
1019 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1020 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1021 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1022 .addReg(Base, RegState::Define)
1023 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1025 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1026 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1027 .addReg(Base, RegState::Define)
1028 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1031 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1032 // t2LDR_PRE, t2LDR_POST
1033 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1034 .addReg(Base, RegState::Define)
1035 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1038 MachineOperand &MO = MI->getOperand(0);
1039 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1040 // the vestigal zero-reg offset register. When that's fixed, this clause
1041 // can be removed entirely.
1042 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1043 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1044 // STR_PRE, STR_POST
1045 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1046 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1047 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1049 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1050 // t2STR_PRE, t2STR_POST
1051 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1052 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1053 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1061 /// isMemoryOp - Returns true if instruction is a memory operation that this
1062 /// pass is capable of operating on.
1063 static bool isMemoryOp(const MachineInstr *MI) {
1064 // When no memory operands are present, conservatively assume unaligned,
1065 // volatile, unfoldable.
1066 if (!MI->hasOneMemOperand())
1069 const MachineMemOperand *MMO = *MI->memoperands_begin();
1071 // Don't touch volatile memory accesses - we may be changing their order.
1072 if (MMO->isVolatile())
1075 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1077 if (MMO->getAlignment() < 4)
1080 // str <undef> could probably be eliminated entirely, but for now we just want
1081 // to avoid making a mess of it.
1082 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1083 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1084 MI->getOperand(0).isUndef())
1087 // Likewise don't mess with references to undefined addresses.
1088 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1089 MI->getOperand(1).isUndef())
1092 int Opcode = MI->getOpcode();
1097 return MI->getOperand(1).isReg();
1100 return MI->getOperand(1).isReg();
1107 return MI->getOperand(1).isReg();
1112 /// AdvanceRS - Advance register scavenger to just before the earliest memory
1113 /// op that is being merged.
1114 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1115 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1116 unsigned Position = MemOps[0].Position;
1117 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1118 if (MemOps[i].Position < Position) {
1119 Position = MemOps[i].Position;
1120 Loc = MemOps[i].MBBI;
1124 if (Loc != MBB.begin())
1125 RS->forward(prior(Loc));
1128 static int getMemoryOpOffset(const MachineInstr *MI) {
1129 int Opcode = MI->getOpcode();
1130 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1131 unsigned NumOperands = MI->getDesc().getNumOperands();
1132 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1134 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1135 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1136 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1137 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
1140 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1141 : ARM_AM::getAM5Offset(OffField) * 4;
1143 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1146 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1152 static void InsertLDR_STR(MachineBasicBlock &MBB,
1153 MachineBasicBlock::iterator &MBBI,
1154 int Offset, bool isDef,
1155 DebugLoc dl, unsigned NewOpc,
1156 unsigned Reg, bool RegDeadKill, bool RegUndef,
1157 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1158 bool OffKill, bool OffUndef,
1159 ARMCC::CondCodes Pred, unsigned PredReg,
1160 const TargetInstrInfo *TII, bool isT2) {
1162 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1164 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1165 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1166 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1168 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1170 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1171 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1172 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1176 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1177 MachineBasicBlock::iterator &MBBI) {
1178 MachineInstr *MI = &*MBBI;
1179 unsigned Opcode = MI->getOpcode();
1180 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1181 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1182 const MachineOperand &BaseOp = MI->getOperand(2);
1183 unsigned BaseReg = BaseOp.getReg();
1184 unsigned EvenReg = MI->getOperand(0).getReg();
1185 unsigned OddReg = MI->getOperand(1).getReg();
1186 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1187 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1188 // ARM errata 602117: LDRD with base in list may result in incorrect base
1189 // register when interrupted or faulted.
1190 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1191 if (!Errata602117 &&
1192 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1195 MachineBasicBlock::iterator NewBBI = MBBI;
1196 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1197 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1198 bool EvenDeadKill = isLd ?
1199 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1200 bool EvenUndef = MI->getOperand(0).isUndef();
1201 bool OddDeadKill = isLd ?
1202 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1203 bool OddUndef = MI->getOperand(1).isUndef();
1204 bool BaseKill = BaseOp.isKill();
1205 bool BaseUndef = BaseOp.isUndef();
1206 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1207 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1208 int OffImm = getMemoryOpOffset(MI);
1209 unsigned PredReg = 0;
1210 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1212 if (OddRegNum > EvenRegNum && OffImm == 0) {
1213 // Ascending register numbers and no offset. It's safe to change it to a
1215 unsigned NewOpc = (isLd)
1216 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1217 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1219 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1220 .addReg(BaseReg, getKillRegState(BaseKill))
1221 .addImm(Pred).addReg(PredReg)
1222 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1223 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1226 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1227 .addReg(BaseReg, getKillRegState(BaseKill))
1228 .addImm(Pred).addReg(PredReg)
1230 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1232 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1235 NewBBI = llvm::prior(MBBI);
1237 // Split into two instructions.
1238 unsigned NewOpc = (isLd)
1239 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1240 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1241 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1242 // so adjust and use t2LDRi12 here for that.
1243 unsigned NewOpc2 = (isLd)
1244 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1245 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1246 DebugLoc dl = MBBI->getDebugLoc();
1247 // If this is a load and base register is killed, it may have been
1248 // re-defed by the load, make sure the first load does not clobber it.
1250 (BaseKill || OffKill) &&
1251 (TRI->regsOverlap(EvenReg, BaseReg))) {
1252 assert(!TRI->regsOverlap(OddReg, BaseReg));
1253 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1254 OddReg, OddDeadKill, false,
1255 BaseReg, false, BaseUndef, false, OffUndef,
1256 Pred, PredReg, TII, isT2);
1257 NewBBI = llvm::prior(MBBI);
1258 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1259 EvenReg, EvenDeadKill, false,
1260 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1261 Pred, PredReg, TII, isT2);
1263 if (OddReg == EvenReg && EvenDeadKill) {
1264 // If the two source operands are the same, the kill marker is
1265 // probably on the first one. e.g.
1266 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1267 EvenDeadKill = false;
1270 // Never kill the base register in the first instruction.
1271 if (EvenReg == BaseReg)
1272 EvenDeadKill = false;
1273 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1274 EvenReg, EvenDeadKill, EvenUndef,
1275 BaseReg, false, BaseUndef, false, OffUndef,
1276 Pred, PredReg, TII, isT2);
1277 NewBBI = llvm::prior(MBBI);
1278 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1279 OddReg, OddDeadKill, OddUndef,
1280 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1281 Pred, PredReg, TII, isT2);
1296 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1297 /// ops of the same base and incrementing offset into LDM / STM ops.
1298 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1299 unsigned NumMerges = 0;
1300 unsigned NumMemOps = 0;
1302 unsigned CurrBase = 0;
1304 unsigned CurrSize = 0;
1305 ARMCC::CondCodes CurrPred = ARMCC::AL;
1306 unsigned CurrPredReg = 0;
1307 unsigned Position = 0;
1308 SmallVector<MachineBasicBlock::iterator,4> Merges;
1310 RS->enterBasicBlock(&MBB);
1311 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1313 if (FixInvalidRegPairOp(MBB, MBBI))
1316 bool Advance = false;
1317 bool TryMerge = false;
1318 bool Clobber = false;
1320 bool isMemOp = isMemoryOp(MBBI);
1322 int Opcode = MBBI->getOpcode();
1323 unsigned Size = getLSMultipleTransferSize(MBBI);
1324 const MachineOperand &MO = MBBI->getOperand(0);
1325 unsigned Reg = MO.getReg();
1326 bool isKill = MO.isDef() ? false : MO.isKill();
1327 unsigned Base = MBBI->getOperand(1).getReg();
1328 unsigned PredReg = 0;
1329 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1330 int Offset = getMemoryOpOffset(MBBI);
1333 // r5 := ldr [r5, #4]
1334 // r6 := ldr [r5, #8]
1336 // The second ldr has effectively broken the chain even though it
1337 // looks like the later ldr(s) use the same base register. Try to
1338 // merge the ldr's so far, including this one. But don't try to
1339 // combine the following ldr(s).
1340 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1343 // r4 := ldr [r0, #8]
1344 // r4 := ldr [r0, #4]
1346 // The optimization may reorder the second ldr in front of the first
1347 // ldr, which violates write after write(WAW) dependence. The same as
1348 // str. Try to merge inst(s) already in MemOps.
1349 bool Overlap = false;
1350 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
1351 if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
1357 if (CurrBase == 0 && !Clobber) {
1358 // Start of a new chain.
1363 CurrPredReg = PredReg;
1364 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1367 } else if (!Overlap) {
1373 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1374 // No need to match PredReg.
1375 // Continue adding to the queue.
1376 if (Offset > MemOps.back().Offset) {
1377 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1382 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1384 if (Offset < I->Offset) {
1385 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1390 } else if (Offset == I->Offset) {
1391 // Collision! This can't be merged!
1400 if (MBBI->isDebugValue()) {
1403 // Reach the end of the block, try merging the memory instructions.
1405 } else if (Advance) {
1409 // Reach the end of the block, try merging the memory instructions.
1415 if (NumMemOps > 1) {
1416 // Try to find a free register to use as a new base in case it's needed.
1417 // First advance to the instruction just before the start of the chain.
1418 AdvanceRS(MBB, MemOps);
1419 // Find a scratch register.
1420 unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass);
1421 // Process the load / store instructions.
1422 RS->forward(prior(MBBI));
1426 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1427 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1429 // Try folding preceding/trailing base inc/dec into the generated
1431 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1432 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1434 NumMerges += Merges.size();
1436 // Try folding preceding/trailing base inc/dec into those load/store
1437 // that were not merged to form LDM/STM ops.
1438 for (unsigned i = 0; i != NumMemOps; ++i)
1439 if (!MemOps[i].Merged)
1440 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1443 // RS may be pointing to an instruction that's deleted.
1444 RS->skipTo(prior(MBBI));
1445 } else if (NumMemOps == 1) {
1446 // Try folding preceding/trailing base inc/dec into the single
1448 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1450 RS->forward(prior(MBBI));
1457 CurrPred = ARMCC::AL;
1464 // If iterator hasn't been advanced and this is not a memory op, skip it.
1465 // It can't start a new chain anyway.
1466 if (!Advance && !isMemOp && MBBI != E) {
1472 return NumMerges > 0;
1475 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1476 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1477 /// directly restore the value of LR into pc.
1478 /// ldmfd sp!, {..., lr}
1481 /// ldmfd sp!, {..., lr}
1484 /// ldmfd sp!, {..., pc}
1485 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1486 if (MBB.empty()) return false;
1488 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1489 if (MBBI != MBB.begin() &&
1490 (MBBI->getOpcode() == ARM::BX_RET ||
1491 MBBI->getOpcode() == ARM::tBX_RET ||
1492 MBBI->getOpcode() == ARM::MOVPCLR)) {
1493 MachineInstr *PrevMI = prior(MBBI);
1494 unsigned Opcode = PrevMI->getOpcode();
1495 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1496 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1497 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1498 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1499 if (MO.getReg() != ARM::LR)
1501 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1502 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1503 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1504 PrevMI->setDesc(TII->get(NewOpc));
1506 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1514 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1515 const TargetMachine &TM = Fn.getTarget();
1516 AFI = Fn.getInfo<ARMFunctionInfo>();
1517 TII = TM.getInstrInfo();
1518 TRI = TM.getRegisterInfo();
1519 STI = &TM.getSubtarget<ARMSubtarget>();
1520 RS = new RegScavenger();
1521 isThumb2 = AFI->isThumb2Function();
1523 bool Modified = false;
1524 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1526 MachineBasicBlock &MBB = *MFI;
1527 Modified |= LoadStoreMultipleOpti(MBB);
1528 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1529 Modified |= MergeReturnIntoLDM(MBB);
1537 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1538 /// load / stores from consecutive locations close to make it more
1539 /// likely they will be combined later.
1542 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1544 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1546 const DataLayout *TD;
1547 const TargetInstrInfo *TII;
1548 const TargetRegisterInfo *TRI;
1549 const ARMSubtarget *STI;
1550 MachineRegisterInfo *MRI;
1551 MachineFunction *MF;
1553 virtual bool runOnMachineFunction(MachineFunction &Fn);
1555 virtual const char *getPassName() const {
1556 return "ARM pre- register allocation load / store optimization pass";
1560 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1561 unsigned &NewOpc, unsigned &EvenReg,
1562 unsigned &OddReg, unsigned &BaseReg,
1564 unsigned &PredReg, ARMCC::CondCodes &Pred,
1566 bool RescheduleOps(MachineBasicBlock *MBB,
1567 SmallVectorImpl<MachineInstr *> &Ops,
1568 unsigned Base, bool isLd,
1569 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1570 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1572 char ARMPreAllocLoadStoreOpt::ID = 0;
1575 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1576 TD = Fn.getTarget().getDataLayout();
1577 TII = Fn.getTarget().getInstrInfo();
1578 TRI = Fn.getTarget().getRegisterInfo();
1579 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1580 MRI = &Fn.getRegInfo();
1583 bool Modified = false;
1584 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1586 Modified |= RescheduleLoadStoreInstrs(MFI);
1591 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1592 MachineBasicBlock::iterator I,
1593 MachineBasicBlock::iterator E,
1594 SmallPtrSet<MachineInstr*, 4> &MemOps,
1595 SmallSet<unsigned, 4> &MemRegs,
1596 const TargetRegisterInfo *TRI) {
1597 // Are there stores / loads / calls between them?
1598 // FIXME: This is overly conservative. We should make use of alias information
1600 SmallSet<unsigned, 4> AddedRegPressure;
1602 if (I->isDebugValue() || MemOps.count(&*I))
1604 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1606 if (isLd && I->mayStore())
1611 // It's not safe to move the first 'str' down.
1614 // str r4, [r0, #+4]
1618 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1619 MachineOperand &MO = I->getOperand(j);
1622 unsigned Reg = MO.getReg();
1623 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1625 if (Reg != Base && !MemRegs.count(Reg))
1626 AddedRegPressure.insert(Reg);
1630 // Estimate register pressure increase due to the transformation.
1631 if (MemRegs.size() <= 4)
1632 // Ok if we are moving small number of instructions.
1634 return AddedRegPressure.size() <= MemRegs.size() * 2;
1638 /// Copy Op0 and Op1 operands into a new array assigned to MI.
1639 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1640 MachineInstr *Op1) {
1641 assert(MI->memoperands_empty() && "expected a new machineinstr");
1642 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1643 + (Op1->memoperands_end() - Op1->memoperands_begin());
1645 MachineFunction *MF = MI->getParent()->getParent();
1646 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1647 MachineSDNode::mmo_iterator MemEnd =
1648 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1650 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1651 MI->setMemRefs(MemBegin, MemEnd);
1655 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1657 unsigned &NewOpc, unsigned &EvenReg,
1658 unsigned &OddReg, unsigned &BaseReg,
1659 int &Offset, unsigned &PredReg,
1660 ARMCC::CondCodes &Pred,
1662 // Make sure we're allowed to generate LDRD/STRD.
1663 if (!STI->hasV5TEOps())
1666 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1668 unsigned Opcode = Op0->getOpcode();
1669 if (Opcode == ARM::LDRi12)
1671 else if (Opcode == ARM::STRi12)
1673 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1674 NewOpc = ARM::t2LDRDi8;
1677 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1678 NewOpc = ARM::t2STRDi8;
1684 // Make sure the base address satisfies i64 ld / st alignment requirement.
1685 // At the moment, we ignore the memoryoperand's value.
1686 // If we want to use AliasAnalysis, we should check it accordingly.
1687 if (!Op0->hasOneMemOperand() ||
1688 (*Op0->memoperands_begin())->isVolatile())
1691 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1692 const Function *Func = MF->getFunction();
1693 unsigned ReqAlign = STI->hasV6Ops()
1694 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1695 : 8; // Pre-v6 need 8-byte align
1696 if (Align < ReqAlign)
1699 // Then make sure the immediate offset fits.
1700 int OffImm = getMemoryOpOffset(Op0);
1702 int Limit = (1 << 8) * Scale;
1703 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1707 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1709 AddSub = ARM_AM::sub;
1712 int Limit = (1 << 8) * Scale;
1713 if (OffImm >= Limit || (OffImm & (Scale-1)))
1715 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1717 EvenReg = Op0->getOperand(0).getReg();
1718 OddReg = Op1->getOperand(0).getReg();
1719 if (EvenReg == OddReg)
1721 BaseReg = Op0->getOperand(1).getReg();
1722 Pred = getInstrPredicate(Op0, PredReg);
1723 dl = Op0->getDebugLoc();
1727 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1728 SmallVectorImpl<MachineInstr *> &Ops,
1729 unsigned Base, bool isLd,
1730 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1731 bool RetVal = false;
1733 // Sort by offset (in reverse order).
1734 std::sort(Ops.begin(), Ops.end(),
1735 [](const MachineInstr *LHS, const MachineInstr *RHS) {
1736 int LOffset = getMemoryOpOffset(LHS);
1737 int ROffset = getMemoryOpOffset(RHS);
1738 assert(LHS == RHS || LOffset != ROffset);
1739 return LOffset > ROffset;
1742 // The loads / stores of the same base are in order. Scan them from first to
1743 // last and check for the following:
1744 // 1. Any def of base.
1746 while (Ops.size() > 1) {
1747 unsigned FirstLoc = ~0U;
1748 unsigned LastLoc = 0;
1749 MachineInstr *FirstOp = 0;
1750 MachineInstr *LastOp = 0;
1752 unsigned LastOpcode = 0;
1753 unsigned LastBytes = 0;
1754 unsigned NumMove = 0;
1755 for (int i = Ops.size() - 1; i >= 0; --i) {
1756 MachineInstr *Op = Ops[i];
1757 unsigned Loc = MI2LocMap[Op];
1758 if (Loc <= FirstLoc) {
1762 if (Loc >= LastLoc) {
1768 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
1769 if (LastOpcode && LSMOpcode != LastOpcode)
1772 int Offset = getMemoryOpOffset(Op);
1773 unsigned Bytes = getLSMultipleTransferSize(Op);
1775 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1778 LastOffset = Offset;
1780 LastOpcode = LSMOpcode;
1781 if (++NumMove == 8) // FIXME: Tune this limit.
1788 SmallPtrSet<MachineInstr*, 4> MemOps;
1789 SmallSet<unsigned, 4> MemRegs;
1790 for (int i = NumMove-1; i >= 0; --i) {
1791 MemOps.insert(Ops[i]);
1792 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1795 // Be conservative, if the instructions are too far apart, don't
1796 // move them. We want to limit the increase of register pressure.
1797 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1799 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1800 MemOps, MemRegs, TRI);
1802 for (unsigned i = 0; i != NumMove; ++i)
1805 // This is the new location for the loads / stores.
1806 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1807 while (InsertPos != MBB->end()
1808 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1811 // If we are moving a pair of loads / stores, see if it makes sense
1812 // to try to allocate a pair of registers that can form register pairs.
1813 MachineInstr *Op0 = Ops.back();
1814 MachineInstr *Op1 = Ops[Ops.size()-2];
1815 unsigned EvenReg = 0, OddReg = 0;
1816 unsigned BaseReg = 0, PredReg = 0;
1817 ARMCC::CondCodes Pred = ARMCC::AL;
1819 unsigned NewOpc = 0;
1822 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1823 EvenReg, OddReg, BaseReg,
1824 Offset, PredReg, Pred, isT2)) {
1828 const MCInstrDesc &MCID = TII->get(NewOpc);
1829 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
1830 MRI->constrainRegClass(EvenReg, TRC);
1831 MRI->constrainRegClass(OddReg, TRC);
1833 // Form the pair instruction.
1835 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1836 .addReg(EvenReg, RegState::Define)
1837 .addReg(OddReg, RegState::Define)
1839 // FIXME: We're converting from LDRi12 to an insn that still
1840 // uses addrmode2, so we need an explicit offset reg. It should
1841 // always by reg0 since we're transforming LDRi12s.
1844 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1845 concatenateMemOperands(MIB, Op0, Op1);
1846 DEBUG(dbgs() << "Formed " << *MIB << "\n");
1849 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1853 // FIXME: We're converting from LDRi12 to an insn that still
1854 // uses addrmode2, so we need an explicit offset reg. It should
1855 // always by reg0 since we're transforming STRi12s.
1858 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1859 concatenateMemOperands(MIB, Op0, Op1);
1860 DEBUG(dbgs() << "Formed " << *MIB << "\n");
1866 // Add register allocation hints to form register pairs.
1867 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1868 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1870 for (unsigned i = 0; i != NumMove; ++i) {
1871 MachineInstr *Op = Ops.back();
1873 MBB->splice(InsertPos, MBB, Op);
1877 NumLdStMoved += NumMove;
1887 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1888 bool RetVal = false;
1890 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1891 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1892 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1893 SmallVector<unsigned, 4> LdBases;
1894 SmallVector<unsigned, 4> StBases;
1897 MachineBasicBlock::iterator MBBI = MBB->begin();
1898 MachineBasicBlock::iterator E = MBB->end();
1900 for (; MBBI != E; ++MBBI) {
1901 MachineInstr *MI = MBBI;
1902 if (MI->isCall() || MI->isTerminator()) {
1903 // Stop at barriers.
1908 if (!MI->isDebugValue())
1909 MI2LocMap[MI] = ++Loc;
1911 if (!isMemoryOp(MI))
1913 unsigned PredReg = 0;
1914 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
1917 int Opc = MI->getOpcode();
1918 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1919 unsigned Base = MI->getOperand(1).getReg();
1920 int Offset = getMemoryOpOffset(MI);
1922 bool StopHere = false;
1924 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1925 Base2LdsMap.find(Base);
1926 if (BI != Base2LdsMap.end()) {
1927 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1928 if (Offset == getMemoryOpOffset(BI->second[i])) {
1934 BI->second.push_back(MI);
1936 Base2LdsMap[Base].push_back(MI);
1937 LdBases.push_back(Base);
1940 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1941 Base2StsMap.find(Base);
1942 if (BI != Base2StsMap.end()) {
1943 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1944 if (Offset == getMemoryOpOffset(BI->second[i])) {
1950 BI->second.push_back(MI);
1952 Base2StsMap[Base].push_back(MI);
1953 StBases.push_back(Base);
1958 // Found a duplicate (a base+offset combination that's seen earlier).
1965 // Re-schedule loads.
1966 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1967 unsigned Base = LdBases[i];
1968 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
1970 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1973 // Re-schedule stores.
1974 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1975 unsigned Base = StBases[i];
1976 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
1978 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1982 Base2LdsMap.clear();
1983 Base2StsMap.clear();
1993 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1994 /// optimization pass.
1995 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1997 return new ARMPreAllocLoadStoreOpt();
1998 return new ARMLoadStoreOpt();