1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMMachineFunctionInfo.h"
19 #include "ARMSubtarget.h"
20 #include "MCTargetDesc/ARMAddressingModes.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/SmallSet.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/CodeGen/MachineBasicBlock.h"
28 #include "llvm/CodeGen/MachineFunctionPass.h"
29 #include "llvm/CodeGen/MachineInstr.h"
30 #include "llvm/CodeGen/MachineInstrBuilder.h"
31 #include "llvm/CodeGen/MachineRegisterInfo.h"
32 #include "llvm/CodeGen/RegisterScavenging.h"
33 #include "llvm/CodeGen/SelectionDAGNodes.h"
34 #include "llvm/IR/DataLayout.h"
35 #include "llvm/IR/DerivedTypes.h"
36 #include "llvm/IR/Function.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/ErrorHandling.h"
39 #include "llvm/Target/TargetInstrInfo.h"
40 #include "llvm/Target/TargetMachine.h"
41 #include "llvm/Target/TargetRegisterInfo.h"
44 #define DEBUG_TYPE "arm-ldst-opt"
46 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
47 STATISTIC(NumSTMGened , "Number of stm instructions generated");
48 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
49 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
50 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
51 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
52 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
53 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
54 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
55 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
56 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
58 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
59 /// load / store instructions to form ldm / stm instructions.
62 struct ARMLoadStoreOpt : public MachineFunctionPass {
64 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
66 const TargetInstrInfo *TII;
67 const TargetRegisterInfo *TRI;
68 const ARMSubtarget *STI;
73 bool runOnMachineFunction(MachineFunction &Fn) override;
75 const char *getPassName() const override {
76 return "ARM load / store optimization pass";
80 struct MemOpQueueEntry {
85 MachineBasicBlock::iterator MBBI;
87 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
88 MachineBasicBlock::iterator i)
89 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
91 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
92 typedef MemOpQueue::iterator MemOpQueueIter;
94 void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
95 const MemOpQueue &MemOps, unsigned DefReg,
96 unsigned RangeBegin, unsigned RangeEnd);
98 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
99 int Offset, unsigned Base, bool BaseKill, int Opcode,
100 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
102 ArrayRef<std::pair<unsigned, bool> > Regs,
103 ArrayRef<unsigned> ImpDefs);
104 void MergeOpsUpdate(MachineBasicBlock &MBB,
106 unsigned memOpsBegin,
108 unsigned insertAfter,
113 ARMCC::CondCodes Pred,
117 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
118 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
119 int Opcode, unsigned Size,
120 ARMCC::CondCodes Pred, unsigned PredReg,
121 unsigned Scratch, MemOpQueue &MemOps,
122 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
124 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
125 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
126 MachineBasicBlock::iterator &MBBI);
127 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
128 MachineBasicBlock::iterator MBBI,
129 const TargetInstrInfo *TII,
131 MachineBasicBlock::iterator &I);
132 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
133 MachineBasicBlock::iterator MBBI,
135 MachineBasicBlock::iterator &I);
136 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
137 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
139 char ARMLoadStoreOpt::ID = 0;
142 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
144 default: llvm_unreachable("Unhandled opcode!");
148 default: llvm_unreachable("Unhandled submode!");
149 case ARM_AM::ia: return ARM::LDMIA;
150 case ARM_AM::da: return ARM::LDMDA;
151 case ARM_AM::db: return ARM::LDMDB;
152 case ARM_AM::ib: return ARM::LDMIB;
157 default: llvm_unreachable("Unhandled submode!");
158 case ARM_AM::ia: return ARM::STMIA;
159 case ARM_AM::da: return ARM::STMDA;
160 case ARM_AM::db: return ARM::STMDB;
161 case ARM_AM::ib: return ARM::STMIB;
167 default: llvm_unreachable("Unhandled submode!");
168 case ARM_AM::ia: return ARM::t2LDMIA;
169 case ARM_AM::db: return ARM::t2LDMDB;
175 default: llvm_unreachable("Unhandled submode!");
176 case ARM_AM::ia: return ARM::t2STMIA;
177 case ARM_AM::db: return ARM::t2STMDB;
182 default: llvm_unreachable("Unhandled submode!");
183 case ARM_AM::ia: return ARM::VLDMSIA;
184 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
189 default: llvm_unreachable("Unhandled submode!");
190 case ARM_AM::ia: return ARM::VSTMSIA;
191 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
196 default: llvm_unreachable("Unhandled submode!");
197 case ARM_AM::ia: return ARM::VLDMDIA;
198 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
203 default: llvm_unreachable("Unhandled submode!");
204 case ARM_AM::ia: return ARM::VSTMDIA;
205 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
213 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
215 default: llvm_unreachable("Unhandled opcode!");
221 case ARM::t2LDMIA_RET:
223 case ARM::t2LDMIA_UPD:
225 case ARM::t2STMIA_UPD:
227 case ARM::VLDMSIA_UPD:
229 case ARM::VSTMSIA_UPD:
231 case ARM::VLDMDIA_UPD:
233 case ARM::VSTMDIA_UPD:
247 case ARM::t2LDMDB_UPD:
249 case ARM::t2STMDB_UPD:
250 case ARM::VLDMSDB_UPD:
251 case ARM::VSTMSDB_UPD:
252 case ARM::VLDMDDB_UPD:
253 case ARM::VSTMDDB_UPD:
264 } // end namespace ARM_AM
265 } // end namespace llvm
267 static bool isT2i32Load(unsigned Opc) {
268 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
271 static bool isi32Load(unsigned Opc) {
272 return Opc == ARM::LDRi12 || isT2i32Load(Opc);
275 static bool isT2i32Store(unsigned Opc) {
276 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
279 static bool isi32Store(unsigned Opc) {
280 return Opc == ARM::STRi12 || isT2i32Store(Opc);
283 /// MergeOps - Create and insert a LDM or STM with Base as base register and
284 /// registers in Regs as the register operands that would be loaded / stored.
285 /// It returns true if the transformation is done.
287 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
288 MachineBasicBlock::iterator MBBI,
289 int Offset, unsigned Base, bool BaseKill,
290 int Opcode, ARMCC::CondCodes Pred,
291 unsigned PredReg, unsigned Scratch, DebugLoc dl,
292 ArrayRef<std::pair<unsigned, bool> > Regs,
293 ArrayRef<unsigned> ImpDefs) {
294 // Only a single register to load / store. Don't bother.
295 unsigned NumRegs = Regs.size();
299 ARM_AM::AMSubMode Mode = ARM_AM::ia;
300 // VFP and Thumb2 do not support IB or DA modes.
301 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
302 bool haveIBAndDA = isNotVFP && !isThumb2;
303 if (Offset == 4 && haveIBAndDA) {
305 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
307 } else if (Offset == -4 * (int)NumRegs && isNotVFP) {
308 // VLDM/VSTM do not support DB mode without also updating the base reg.
310 } else if (Offset != 0) {
311 // Check if this is a supported opcode before inserting instructions to
312 // calculate a new base register.
313 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
315 // If starting offset isn't zero, insert a MI to materialize a new base.
316 // But only do so if it is cost effective, i.e. merging more than two
322 if (isi32Load(Opcode)) {
323 // If it is a load, then just use one of the destination register to
324 // use as the new base.
325 NewBase = Regs[NumRegs-1].first;
327 // Use the scratch register to use as a new base.
332 int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
334 BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
337 int ImmedOffset = isThumb2
338 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
339 if (ImmedOffset == -1)
340 // FIXME: Try t2ADDri12 or t2SUBri12?
341 return false; // Probably not worth it then.
343 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
344 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
345 .addImm(Pred).addReg(PredReg).addReg(0);
347 BaseKill = true; // New base is always killed straight away.
350 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
351 Opcode == ARM::VLDRD);
352 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
353 if (!Opcode) return false;
354 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
355 .addReg(Base, getKillRegState(BaseKill))
356 .addImm(Pred).addReg(PredReg);
357 for (unsigned i = 0; i != NumRegs; ++i)
358 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
359 | getKillRegState(Regs[i].second));
361 // Add implicit defs for super-registers.
362 for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i)
363 MIB.addReg(ImpDefs[i], RegState::ImplicitDefine);
368 /// \brief Find all instructions using a given imp-def within a range.
370 /// We are trying to combine a range of instructions, one of which (located at
371 /// position RangeBegin) implicitly defines a register. The final LDM/STM will
372 /// be placed at RangeEnd, and so any uses of this definition between RangeStart
373 /// and RangeEnd must be modified to use an undefined value.
375 /// The live range continues until we find a second definition or one of the
376 /// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
377 /// we must consider all uses and decide which are relevant in a second pass.
378 void ARMLoadStoreOpt::findUsesOfImpDef(
379 SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,
380 unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
381 std::map<unsigned, MachineOperand *> Uses;
382 unsigned LastLivePos = RangeEnd;
384 // First we find all uses of this register with Position between RangeBegin
385 // and RangeEnd, any or all of these could be uses of a definition at
386 // RangeBegin. We also record the latest position a definition at RangeBegin
387 // would be considered live.
388 for (unsigned i = 0; i < MemOps.size(); ++i) {
389 MachineInstr &MI = *MemOps[i].MBBI;
390 unsigned MIPosition = MemOps[i].Position;
391 if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
394 // If this instruction defines the register, then any later use will be of
395 // that definition rather than ours.
396 if (MI.definesRegister(DefReg))
397 LastLivePos = std::min(LastLivePos, MIPosition);
399 MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
403 // If this instruction kills the register then (assuming liveness is
404 // correct when we start) we don't need to think about anything after here.
406 LastLivePos = std::min(LastLivePos, MIPosition);
408 Uses[MIPosition] = UseOp;
411 // Now we traverse the list of all uses, and append the ones that actually use
412 // our definition to the requested list.
413 for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),
416 // List is sorted by position so once we've found one out of range there
417 // will be no more to consider.
418 if (I->first > LastLivePos)
420 UsesOfImpDefs.push_back(I->second);
424 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
426 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
428 unsigned memOpsBegin, unsigned memOpsEnd,
429 unsigned insertAfter, int Offset,
430 unsigned Base, bool BaseKill,
432 ARMCC::CondCodes Pred, unsigned PredReg,
435 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
436 // First calculate which of the registers should be killed by the merged
438 const unsigned insertPos = memOps[insertAfter].Position;
439 SmallSet<unsigned, 4> KilledRegs;
440 DenseMap<unsigned, unsigned> Killer;
441 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
442 if (i == memOpsBegin) {
447 if (memOps[i].Position < insertPos && memOps[i].isKill) {
448 unsigned Reg = memOps[i].Reg;
449 KilledRegs.insert(Reg);
454 SmallVector<std::pair<unsigned, bool>, 8> Regs;
455 SmallVector<unsigned, 8> ImpDefs;
456 SmallVector<MachineOperand *, 8> UsesOfImpDefs;
457 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
458 unsigned Reg = memOps[i].Reg;
459 // If we are inserting the merged operation after an operation that
460 // uses the same register, make sure to transfer any kill flag.
461 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
462 Regs.push_back(std::make_pair(Reg, isKill));
464 // Collect any implicit defs of super-registers. They must be preserved.
465 for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) {
466 if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead())
468 unsigned DefReg = MO->getReg();
469 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
470 ImpDefs.push_back(DefReg);
472 // There may be other uses of the definition between this instruction and
473 // the eventual LDM/STM position. These should be marked undef if the
474 // merge takes place.
475 findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
480 // Try to do the merge.
481 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
483 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
484 Pred, PredReg, Scratch, dl, Regs, ImpDefs))
487 // Merge succeeded, update records.
488 Merges.push_back(std::prev(Loc));
490 // In gathering loads together, we may have moved the imp-def of a register
491 // past one of its uses. This is OK, since we know better than the rest of
492 // LLVM what's OK with ARM loads and stores; but we still have to adjust the
494 for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
495 E = UsesOfImpDefs.end();
499 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
500 // Remove kill flags from any memops that come before insertPos.
501 if (Regs[i-memOpsBegin].second) {
502 unsigned Reg = Regs[i-memOpsBegin].first;
503 if (KilledRegs.count(Reg)) {
504 unsigned j = Killer[Reg];
505 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
506 assert(Idx >= 0 && "Cannot find killing operand");
507 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
508 memOps[j].isKill = false;
510 memOps[i].isKill = true;
512 MBB.erase(memOps[i].MBBI);
513 // Update this memop to refer to the merged instruction.
514 // We may need to move kill flags again.
515 memOps[i].Merged = true;
516 memOps[i].MBBI = Merges.back();
517 memOps[i].Position = insertPos;
521 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
522 /// load / store multiple instructions.
524 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
525 unsigned Base, int Opcode, unsigned Size,
526 ARMCC::CondCodes Pred, unsigned PredReg,
527 unsigned Scratch, MemOpQueue &MemOps,
528 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
529 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
530 int Offset = MemOps[SIndex].Offset;
531 int SOffset = Offset;
532 unsigned insertAfter = SIndex;
533 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
534 DebugLoc dl = Loc->getDebugLoc();
535 const MachineOperand &PMO = Loc->getOperand(0);
536 unsigned PReg = PMO.getReg();
537 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
539 unsigned Limit = ~0U;
541 // vldm / vstm limit are 32 for S variants, 16 for D variants.
559 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
560 int NewOffset = MemOps[i].Offset;
561 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
562 unsigned Reg = MO.getReg();
563 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
564 // Register numbers must be in ascending order. For VFP / NEON load and
565 // store multiples, the registers must also be consecutive and within the
566 // limit on the number of registers per instruction.
567 if (Reg != ARM::SP &&
568 NewOffset == Offset + (int)Size &&
569 ((isNotVFP && RegNum > PRegNum) ||
570 ((Count < Limit) && RegNum == PRegNum+1)) &&
571 // On Swift we don't want vldm/vstm to start with a odd register num
572 // because Q register unaligned vldm/vstm need more uops.
573 (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) {
578 // Can't merge this in. Try merge the earlier ones first.
579 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
580 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
581 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
586 if (MemOps[i].Position > MemOps[insertAfter].Position)
590 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
591 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
592 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
595 static bool definesCPSR(MachineInstr *MI) {
596 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
597 const MachineOperand &MO = MI->getOperand(i);
600 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
601 // If the instruction has live CPSR def, then it's not safe to fold it
602 // into load / store.
609 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
610 unsigned Bytes, unsigned Limit,
611 ARMCC::CondCodes Pred, unsigned PredReg) {
612 unsigned MyPredReg = 0;
616 bool CheckCPSRDef = false;
617 switch (MI->getOpcode()) {
618 default: return false;
627 // Make sure the offset fits in 8 bits.
628 if (Bytes == 0 || (Limit && Bytes >= Limit))
631 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
632 if (!(MI->getOperand(0).getReg() == Base &&
633 MI->getOperand(1).getReg() == Base &&
634 (MI->getOperand(2).getImm()*Scale) == Bytes &&
635 getInstrPredicate(MI, MyPredReg) == Pred &&
636 MyPredReg == PredReg))
639 return CheckCPSRDef ? !definesCPSR(MI) : true;
642 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
643 unsigned Bytes, unsigned Limit,
644 ARMCC::CondCodes Pred, unsigned PredReg) {
645 unsigned MyPredReg = 0;
649 bool CheckCPSRDef = false;
650 switch (MI->getOpcode()) {
651 default: return false;
660 if (Bytes == 0 || (Limit && Bytes >= Limit))
661 // Make sure the offset fits in 8 bits.
664 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
665 if (!(MI->getOperand(0).getReg() == Base &&
666 MI->getOperand(1).getReg() == Base &&
667 (MI->getOperand(2).getImm()*Scale) == Bytes &&
668 getInstrPredicate(MI, MyPredReg) == Pred &&
669 MyPredReg == PredReg))
672 return CheckCPSRDef ? !definesCPSR(MI) : true;
675 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
676 switch (MI->getOpcode()) {
704 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
707 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
711 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
712 ARM_AM::AMSubMode Mode) {
714 default: llvm_unreachable("Unhandled opcode!");
720 default: llvm_unreachable("Unhandled submode!");
721 case ARM_AM::ia: return ARM::LDMIA_UPD;
722 case ARM_AM::ib: return ARM::LDMIB_UPD;
723 case ARM_AM::da: return ARM::LDMDA_UPD;
724 case ARM_AM::db: return ARM::LDMDB_UPD;
731 default: llvm_unreachable("Unhandled submode!");
732 case ARM_AM::ia: return ARM::STMIA_UPD;
733 case ARM_AM::ib: return ARM::STMIB_UPD;
734 case ARM_AM::da: return ARM::STMDA_UPD;
735 case ARM_AM::db: return ARM::STMDB_UPD;
740 default: llvm_unreachable("Unhandled submode!");
741 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
742 case ARM_AM::db: return ARM::t2LDMDB_UPD;
747 default: llvm_unreachable("Unhandled submode!");
748 case ARM_AM::ia: return ARM::t2STMIA_UPD;
749 case ARM_AM::db: return ARM::t2STMDB_UPD;
753 default: llvm_unreachable("Unhandled submode!");
754 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
755 case ARM_AM::db: return ARM::VLDMSDB_UPD;
759 default: llvm_unreachable("Unhandled submode!");
760 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
761 case ARM_AM::db: return ARM::VLDMDDB_UPD;
765 default: llvm_unreachable("Unhandled submode!");
766 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
767 case ARM_AM::db: return ARM::VSTMSDB_UPD;
771 default: llvm_unreachable("Unhandled submode!");
772 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
773 case ARM_AM::db: return ARM::VSTMDDB_UPD;
778 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
779 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
781 /// stmia rn, <ra, rb, rc>
782 /// rn := rn + 4 * 3;
784 /// stmia rn!, <ra, rb, rc>
786 /// rn := rn - 4 * 3;
787 /// ldmia rn, <ra, rb, rc>
789 /// ldmdb rn!, <ra, rb, rc>
790 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
791 MachineBasicBlock::iterator MBBI,
793 MachineBasicBlock::iterator &I) {
794 MachineInstr *MI = MBBI;
795 unsigned Base = MI->getOperand(0).getReg();
796 bool BaseKill = MI->getOperand(0).isKill();
797 unsigned Bytes = getLSMultipleTransferSize(MI);
798 unsigned PredReg = 0;
799 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
800 int Opcode = MI->getOpcode();
801 DebugLoc dl = MI->getDebugLoc();
803 // Can't use an updating ld/st if the base register is also a dest
804 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
805 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
806 if (MI->getOperand(i).getReg() == Base)
809 bool DoMerge = false;
810 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
812 // Try merging with the previous instruction.
813 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
814 if (MBBI != BeginMBBI) {
815 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
816 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
818 if (Mode == ARM_AM::ia &&
819 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
822 } else if (Mode == ARM_AM::ib &&
823 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
831 // Try merging with the next instruction.
832 MachineBasicBlock::iterator EndMBBI = MBB.end();
833 if (!DoMerge && MBBI != EndMBBI) {
834 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
835 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
837 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
838 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
840 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
841 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
856 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
857 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
858 .addReg(Base, getDefRegState(true)) // WB base register
859 .addReg(Base, getKillRegState(BaseKill))
860 .addImm(Pred).addReg(PredReg);
862 // Transfer the rest of operands.
863 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
864 MIB.addOperand(MI->getOperand(OpNum));
866 // Transfer memoperands.
867 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
873 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
874 ARM_AM::AddrOpc Mode) {
877 return ARM::LDR_PRE_IMM;
879 return ARM::STR_PRE_IMM;
881 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
883 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
885 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
887 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
890 return ARM::t2LDR_PRE;
893 return ARM::t2STR_PRE;
894 default: llvm_unreachable("Unhandled opcode!");
898 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
899 ARM_AM::AddrOpc Mode) {
902 return ARM::LDR_POST_IMM;
904 return ARM::STR_POST_IMM;
906 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
908 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
910 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
912 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
915 return ARM::t2LDR_POST;
918 return ARM::t2STR_POST;
919 default: llvm_unreachable("Unhandled opcode!");
923 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
924 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
925 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
926 MachineBasicBlock::iterator MBBI,
927 const TargetInstrInfo *TII,
929 MachineBasicBlock::iterator &I) {
930 MachineInstr *MI = MBBI;
931 unsigned Base = MI->getOperand(1).getReg();
932 bool BaseKill = MI->getOperand(1).isKill();
933 unsigned Bytes = getLSMultipleTransferSize(MI);
934 int Opcode = MI->getOpcode();
935 DebugLoc dl = MI->getDebugLoc();
936 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
937 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
938 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
939 if (isi32Load(Opcode) || isi32Store(Opcode))
940 if (MI->getOperand(2).getImm() != 0)
942 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
945 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
946 // Can't do the merge if the destination register is the same as the would-be
947 // writeback register.
948 if (MI->getOperand(0).getReg() == Base)
951 unsigned PredReg = 0;
952 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
953 bool DoMerge = false;
954 ARM_AM::AddrOpc AddSub = ARM_AM::add;
956 // AM2 - 12 bits, thumb2 - 8 bits.
957 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
959 // Try merging with the previous instruction.
960 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
961 if (MBBI != BeginMBBI) {
962 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
963 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
965 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
967 AddSub = ARM_AM::sub;
969 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
973 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
978 // Try merging with the next instruction.
979 MachineBasicBlock::iterator EndMBBI = MBB.end();
980 if (!DoMerge && MBBI != EndMBBI) {
981 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
982 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
985 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
987 AddSub = ARM_AM::sub;
988 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
992 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
1005 // VLDM[SD]_UPD, VSTM[SD]_UPD
1006 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1007 // updating load/store-multiple instructions can be used with only one
1009 MachineOperand &MO = MI->getOperand(0);
1010 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1011 .addReg(Base, getDefRegState(true)) // WB base register
1012 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1013 .addImm(Pred).addReg(PredReg)
1014 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1015 getKillRegState(MO.isKill())));
1018 // LDR_PRE, LDR_POST
1019 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1020 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1021 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1022 .addReg(Base, RegState::Define)
1023 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1025 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1026 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1027 .addReg(Base, RegState::Define)
1028 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1031 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1032 // t2LDR_PRE, t2LDR_POST
1033 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1034 .addReg(Base, RegState::Define)
1035 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1038 MachineOperand &MO = MI->getOperand(0);
1039 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1040 // the vestigal zero-reg offset register. When that's fixed, this clause
1041 // can be removed entirely.
1042 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1043 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1044 // STR_PRE, STR_POST
1045 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1046 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1047 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1049 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1050 // t2STR_PRE, t2STR_POST
1051 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1052 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1053 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1061 /// isMemoryOp - Returns true if instruction is a memory operation that this
1062 /// pass is capable of operating on.
1063 static bool isMemoryOp(const MachineInstr *MI) {
1064 // When no memory operands are present, conservatively assume unaligned,
1065 // volatile, unfoldable.
1066 if (!MI->hasOneMemOperand())
1069 const MachineMemOperand *MMO = *MI->memoperands_begin();
1071 // Don't touch volatile memory accesses - we may be changing their order.
1072 if (MMO->isVolatile())
1075 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1077 if (MMO->getAlignment() < 4)
1080 // str <undef> could probably be eliminated entirely, but for now we just want
1081 // to avoid making a mess of it.
1082 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1083 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1084 MI->getOperand(0).isUndef())
1087 // Likewise don't mess with references to undefined addresses.
1088 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1089 MI->getOperand(1).isUndef())
1092 int Opcode = MI->getOpcode();
1097 return MI->getOperand(1).isReg();
1100 return MI->getOperand(1).isReg();
1107 return MI->getOperand(1).isReg();
1112 /// AdvanceRS - Advance register scavenger to just before the earliest memory
1113 /// op that is being merged.
1114 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1115 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1116 unsigned Position = MemOps[0].Position;
1117 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1118 if (MemOps[i].Position < Position) {
1119 Position = MemOps[i].Position;
1120 Loc = MemOps[i].MBBI;
1124 if (Loc != MBB.begin())
1125 RS->forward(std::prev(Loc));
1128 static int getMemoryOpOffset(const MachineInstr *MI) {
1129 int Opcode = MI->getOpcode();
1130 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1131 unsigned NumOperands = MI->getDesc().getNumOperands();
1132 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1134 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1135 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1136 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1137 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
1140 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1141 : ARM_AM::getAM5Offset(OffField) * 4;
1143 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1146 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1152 static void InsertLDR_STR(MachineBasicBlock &MBB,
1153 MachineBasicBlock::iterator &MBBI,
1154 int Offset, bool isDef,
1155 DebugLoc dl, unsigned NewOpc,
1156 unsigned Reg, bool RegDeadKill, bool RegUndef,
1157 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1158 bool OffKill, bool OffUndef,
1159 ARMCC::CondCodes Pred, unsigned PredReg,
1160 const TargetInstrInfo *TII, bool isT2) {
1162 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1164 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1165 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1166 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1168 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1170 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1171 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1172 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1176 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1177 MachineBasicBlock::iterator &MBBI) {
1178 MachineInstr *MI = &*MBBI;
1179 unsigned Opcode = MI->getOpcode();
1180 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1181 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1182 const MachineOperand &BaseOp = MI->getOperand(2);
1183 unsigned BaseReg = BaseOp.getReg();
1184 unsigned EvenReg = MI->getOperand(0).getReg();
1185 unsigned OddReg = MI->getOperand(1).getReg();
1186 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1187 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1188 // ARM errata 602117: LDRD with base in list may result in incorrect base
1189 // register when interrupted or faulted.
1190 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1191 if (!Errata602117 &&
1192 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1195 MachineBasicBlock::iterator NewBBI = MBBI;
1196 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1197 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1198 bool EvenDeadKill = isLd ?
1199 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1200 bool EvenUndef = MI->getOperand(0).isUndef();
1201 bool OddDeadKill = isLd ?
1202 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1203 bool OddUndef = MI->getOperand(1).isUndef();
1204 bool BaseKill = BaseOp.isKill();
1205 bool BaseUndef = BaseOp.isUndef();
1206 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1207 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1208 int OffImm = getMemoryOpOffset(MI);
1209 unsigned PredReg = 0;
1210 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1212 if (OddRegNum > EvenRegNum && OffImm == 0) {
1213 // Ascending register numbers and no offset. It's safe to change it to a
1215 unsigned NewOpc = (isLd)
1216 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1217 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1219 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1220 .addReg(BaseReg, getKillRegState(BaseKill))
1221 .addImm(Pred).addReg(PredReg)
1222 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1223 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1226 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1227 .addReg(BaseReg, getKillRegState(BaseKill))
1228 .addImm(Pred).addReg(PredReg)
1230 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1232 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1235 NewBBI = std::prev(MBBI);
1237 // Split into two instructions.
1238 unsigned NewOpc = (isLd)
1239 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1240 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1241 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1242 // so adjust and use t2LDRi12 here for that.
1243 unsigned NewOpc2 = (isLd)
1244 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1245 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1246 DebugLoc dl = MBBI->getDebugLoc();
1247 // If this is a load and base register is killed, it may have been
1248 // re-defed by the load, make sure the first load does not clobber it.
1250 (BaseKill || OffKill) &&
1251 (TRI->regsOverlap(EvenReg, BaseReg))) {
1252 assert(!TRI->regsOverlap(OddReg, BaseReg));
1253 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1254 OddReg, OddDeadKill, false,
1255 BaseReg, false, BaseUndef, false, OffUndef,
1256 Pred, PredReg, TII, isT2);
1257 NewBBI = std::prev(MBBI);
1258 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1259 EvenReg, EvenDeadKill, false,
1260 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1261 Pred, PredReg, TII, isT2);
1263 if (OddReg == EvenReg && EvenDeadKill) {
1264 // If the two source operands are the same, the kill marker is
1265 // probably on the first one. e.g.
1266 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1267 EvenDeadKill = false;
1270 // Never kill the base register in the first instruction.
1271 if (EvenReg == BaseReg)
1272 EvenDeadKill = false;
1273 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1274 EvenReg, EvenDeadKill, EvenUndef,
1275 BaseReg, false, BaseUndef, false, OffUndef,
1276 Pred, PredReg, TII, isT2);
1277 NewBBI = std::prev(MBBI);
1278 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1279 OddReg, OddDeadKill, OddUndef,
1280 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1281 Pred, PredReg, TII, isT2);
1296 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1297 /// ops of the same base and incrementing offset into LDM / STM ops.
1298 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1299 unsigned NumMerges = 0;
1300 unsigned NumMemOps = 0;
1302 unsigned CurrBase = 0;
1304 unsigned CurrSize = 0;
1305 ARMCC::CondCodes CurrPred = ARMCC::AL;
1306 unsigned CurrPredReg = 0;
1307 unsigned Position = 0;
1308 SmallVector<MachineBasicBlock::iterator,4> Merges;
1310 RS->enterBasicBlock(&MBB);
1311 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1313 if (FixInvalidRegPairOp(MBB, MBBI))
1316 bool Advance = false;
1317 bool TryMerge = false;
1318 bool Clobber = false;
1320 bool isMemOp = isMemoryOp(MBBI);
1322 int Opcode = MBBI->getOpcode();
1323 unsigned Size = getLSMultipleTransferSize(MBBI);
1324 const MachineOperand &MO = MBBI->getOperand(0);
1325 unsigned Reg = MO.getReg();
1326 bool isKill = MO.isDef() ? false : MO.isKill();
1327 unsigned Base = MBBI->getOperand(1).getReg();
1328 unsigned PredReg = 0;
1329 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1330 int Offset = getMemoryOpOffset(MBBI);
1333 // r5 := ldr [r5, #4]
1334 // r6 := ldr [r5, #8]
1336 // The second ldr has effectively broken the chain even though it
1337 // looks like the later ldr(s) use the same base register. Try to
1338 // merge the ldr's so far, including this one. But don't try to
1339 // combine the following ldr(s).
1340 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1343 // r4 := ldr [r0, #8]
1344 // r4 := ldr [r0, #4]
1346 // The optimization may reorder the second ldr in front of the first
1347 // ldr, which violates write after write(WAW) dependence. The same as
1348 // str. Try to merge inst(s) already in MemOps.
1349 bool Overlap = false;
1350 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
1351 if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
1357 if (CurrBase == 0 && !Clobber) {
1358 // Start of a new chain.
1363 CurrPredReg = PredReg;
1364 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1367 } else if (!Overlap) {
1373 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1374 // No need to match PredReg.
1375 // Continue adding to the queue.
1376 if (Offset > MemOps.back().Offset) {
1377 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1382 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1384 if (Offset < I->Offset) {
1385 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1390 } else if (Offset == I->Offset) {
1391 // Collision! This can't be merged!
1400 if (MBBI->isDebugValue()) {
1403 // Reach the end of the block, try merging the memory instructions.
1405 } else if (Advance) {
1409 // Reach the end of the block, try merging the memory instructions.
1416 if (NumMemOps > 1) {
1417 // Try to find a free register to use as a new base in case it's needed.
1418 // First advance to the instruction just before the start of the chain.
1419 AdvanceRS(MBB, MemOps);
1420 // Find a scratch register.
1421 unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass);
1422 // Process the load / store instructions.
1423 RS->forward(std::prev(MBBI));
1427 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1428 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1430 // Try folding preceding/trailing base inc/dec into the generated
1432 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1433 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1435 NumMerges += Merges.size();
1437 // Try folding preceding/trailing base inc/dec into those load/store
1438 // that were not merged to form LDM/STM ops.
1439 for (unsigned i = 0; i != NumMemOps; ++i)
1440 if (!MemOps[i].Merged)
1441 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1444 // RS may be pointing to an instruction that's deleted.
1445 RS->skipTo(std::prev(MBBI));
1446 } else if (NumMemOps == 1) {
1447 // Try folding preceding/trailing base inc/dec into the single
1449 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1451 RS->forward(std::prev(MBBI));
1458 CurrPred = ARMCC::AL;
1465 // If iterator hasn't been advanced and this is not a memory op, skip it.
1466 // It can't start a new chain anyway.
1467 if (!Advance && !isMemOp && MBBI != E) {
1473 return NumMerges > 0;
1476 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1477 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1478 /// directly restore the value of LR into pc.
1479 /// ldmfd sp!, {..., lr}
1482 /// ldmfd sp!, {..., lr}
1485 /// ldmfd sp!, {..., pc}
1486 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1487 if (MBB.empty()) return false;
1489 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1490 if (MBBI != MBB.begin() &&
1491 (MBBI->getOpcode() == ARM::BX_RET ||
1492 MBBI->getOpcode() == ARM::tBX_RET ||
1493 MBBI->getOpcode() == ARM::MOVPCLR)) {
1494 MachineInstr *PrevMI = std::prev(MBBI);
1495 unsigned Opcode = PrevMI->getOpcode();
1496 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1497 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1498 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1499 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1500 if (MO.getReg() != ARM::LR)
1502 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1503 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1504 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1505 PrevMI->setDesc(TII->get(NewOpc));
1507 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1515 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1516 const TargetMachine &TM = Fn.getTarget();
1517 AFI = Fn.getInfo<ARMFunctionInfo>();
1518 TII = TM.getInstrInfo();
1519 TRI = TM.getRegisterInfo();
1520 STI = &TM.getSubtarget<ARMSubtarget>();
1521 RS = new RegScavenger();
1522 isThumb2 = AFI->isThumb2Function();
1524 bool Modified = false;
1525 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1527 MachineBasicBlock &MBB = *MFI;
1528 Modified |= LoadStoreMultipleOpti(MBB);
1529 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1530 Modified |= MergeReturnIntoLDM(MBB);
1538 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1539 /// load / stores from consecutive locations close to make it more
1540 /// likely they will be combined later.
1543 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1545 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1547 const DataLayout *TD;
1548 const TargetInstrInfo *TII;
1549 const TargetRegisterInfo *TRI;
1550 const ARMSubtarget *STI;
1551 MachineRegisterInfo *MRI;
1552 MachineFunction *MF;
1554 bool runOnMachineFunction(MachineFunction &Fn) override;
1556 const char *getPassName() const override {
1557 return "ARM pre- register allocation load / store optimization pass";
1561 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1562 unsigned &NewOpc, unsigned &EvenReg,
1563 unsigned &OddReg, unsigned &BaseReg,
1565 unsigned &PredReg, ARMCC::CondCodes &Pred,
1567 bool RescheduleOps(MachineBasicBlock *MBB,
1568 SmallVectorImpl<MachineInstr *> &Ops,
1569 unsigned Base, bool isLd,
1570 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1571 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1573 char ARMPreAllocLoadStoreOpt::ID = 0;
1576 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1577 TD = Fn.getTarget().getDataLayout();
1578 TII = Fn.getTarget().getInstrInfo();
1579 TRI = Fn.getTarget().getRegisterInfo();
1580 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1581 MRI = &Fn.getRegInfo();
1584 bool Modified = false;
1585 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1587 Modified |= RescheduleLoadStoreInstrs(MFI);
1592 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1593 MachineBasicBlock::iterator I,
1594 MachineBasicBlock::iterator E,
1595 SmallPtrSet<MachineInstr*, 4> &MemOps,
1596 SmallSet<unsigned, 4> &MemRegs,
1597 const TargetRegisterInfo *TRI) {
1598 // Are there stores / loads / calls between them?
1599 // FIXME: This is overly conservative. We should make use of alias information
1601 SmallSet<unsigned, 4> AddedRegPressure;
1603 if (I->isDebugValue() || MemOps.count(&*I))
1605 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1607 if (isLd && I->mayStore())
1612 // It's not safe to move the first 'str' down.
1615 // str r4, [r0, #+4]
1619 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1620 MachineOperand &MO = I->getOperand(j);
1623 unsigned Reg = MO.getReg();
1624 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1626 if (Reg != Base && !MemRegs.count(Reg))
1627 AddedRegPressure.insert(Reg);
1631 // Estimate register pressure increase due to the transformation.
1632 if (MemRegs.size() <= 4)
1633 // Ok if we are moving small number of instructions.
1635 return AddedRegPressure.size() <= MemRegs.size() * 2;
1639 /// Copy Op0 and Op1 operands into a new array assigned to MI.
1640 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1641 MachineInstr *Op1) {
1642 assert(MI->memoperands_empty() && "expected a new machineinstr");
1643 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1644 + (Op1->memoperands_end() - Op1->memoperands_begin());
1646 MachineFunction *MF = MI->getParent()->getParent();
1647 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1648 MachineSDNode::mmo_iterator MemEnd =
1649 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1651 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1652 MI->setMemRefs(MemBegin, MemEnd);
1656 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1658 unsigned &NewOpc, unsigned &EvenReg,
1659 unsigned &OddReg, unsigned &BaseReg,
1660 int &Offset, unsigned &PredReg,
1661 ARMCC::CondCodes &Pred,
1663 // Make sure we're allowed to generate LDRD/STRD.
1664 if (!STI->hasV5TEOps())
1667 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1669 unsigned Opcode = Op0->getOpcode();
1670 if (Opcode == ARM::LDRi12) {
1672 } else if (Opcode == ARM::STRi12) {
1674 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1675 NewOpc = ARM::t2LDRDi8;
1678 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1679 NewOpc = ARM::t2STRDi8;
1686 // Make sure the base address satisfies i64 ld / st alignment requirement.
1687 // At the moment, we ignore the memoryoperand's value.
1688 // If we want to use AliasAnalysis, we should check it accordingly.
1689 if (!Op0->hasOneMemOperand() ||
1690 (*Op0->memoperands_begin())->isVolatile())
1693 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1694 const Function *Func = MF->getFunction();
1695 unsigned ReqAlign = STI->hasV6Ops()
1696 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1697 : 8; // Pre-v6 need 8-byte align
1698 if (Align < ReqAlign)
1701 // Then make sure the immediate offset fits.
1702 int OffImm = getMemoryOpOffset(Op0);
1704 int Limit = (1 << 8) * Scale;
1705 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1709 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1711 AddSub = ARM_AM::sub;
1714 int Limit = (1 << 8) * Scale;
1715 if (OffImm >= Limit || (OffImm & (Scale-1)))
1717 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1719 EvenReg = Op0->getOperand(0).getReg();
1720 OddReg = Op1->getOperand(0).getReg();
1721 if (EvenReg == OddReg)
1723 BaseReg = Op0->getOperand(1).getReg();
1724 Pred = getInstrPredicate(Op0, PredReg);
1725 dl = Op0->getDebugLoc();
1729 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1730 SmallVectorImpl<MachineInstr *> &Ops,
1731 unsigned Base, bool isLd,
1732 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1733 bool RetVal = false;
1735 // Sort by offset (in reverse order).
1736 std::sort(Ops.begin(), Ops.end(),
1737 [](const MachineInstr *LHS, const MachineInstr *RHS) {
1738 int LOffset = getMemoryOpOffset(LHS);
1739 int ROffset = getMemoryOpOffset(RHS);
1740 assert(LHS == RHS || LOffset != ROffset);
1741 return LOffset > ROffset;
1744 // The loads / stores of the same base are in order. Scan them from first to
1745 // last and check for the following:
1746 // 1. Any def of base.
1748 while (Ops.size() > 1) {
1749 unsigned FirstLoc = ~0U;
1750 unsigned LastLoc = 0;
1751 MachineInstr *FirstOp = nullptr;
1752 MachineInstr *LastOp = nullptr;
1754 unsigned LastOpcode = 0;
1755 unsigned LastBytes = 0;
1756 unsigned NumMove = 0;
1757 for (int i = Ops.size() - 1; i >= 0; --i) {
1758 MachineInstr *Op = Ops[i];
1759 unsigned Loc = MI2LocMap[Op];
1760 if (Loc <= FirstLoc) {
1764 if (Loc >= LastLoc) {
1770 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
1771 if (LastOpcode && LSMOpcode != LastOpcode)
1774 int Offset = getMemoryOpOffset(Op);
1775 unsigned Bytes = getLSMultipleTransferSize(Op);
1777 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1780 LastOffset = Offset;
1782 LastOpcode = LSMOpcode;
1783 if (++NumMove == 8) // FIXME: Tune this limit.
1790 SmallPtrSet<MachineInstr*, 4> MemOps;
1791 SmallSet<unsigned, 4> MemRegs;
1792 for (int i = NumMove-1; i >= 0; --i) {
1793 MemOps.insert(Ops[i]);
1794 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1797 // Be conservative, if the instructions are too far apart, don't
1798 // move them. We want to limit the increase of register pressure.
1799 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1801 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1802 MemOps, MemRegs, TRI);
1804 for (unsigned i = 0; i != NumMove; ++i)
1807 // This is the new location for the loads / stores.
1808 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1809 while (InsertPos != MBB->end()
1810 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1813 // If we are moving a pair of loads / stores, see if it makes sense
1814 // to try to allocate a pair of registers that can form register pairs.
1815 MachineInstr *Op0 = Ops.back();
1816 MachineInstr *Op1 = Ops[Ops.size()-2];
1817 unsigned EvenReg = 0, OddReg = 0;
1818 unsigned BaseReg = 0, PredReg = 0;
1819 ARMCC::CondCodes Pred = ARMCC::AL;
1821 unsigned NewOpc = 0;
1824 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1825 EvenReg, OddReg, BaseReg,
1826 Offset, PredReg, Pred, isT2)) {
1830 const MCInstrDesc &MCID = TII->get(NewOpc);
1831 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
1832 MRI->constrainRegClass(EvenReg, TRC);
1833 MRI->constrainRegClass(OddReg, TRC);
1835 // Form the pair instruction.
1837 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1838 .addReg(EvenReg, RegState::Define)
1839 .addReg(OddReg, RegState::Define)
1841 // FIXME: We're converting from LDRi12 to an insn that still
1842 // uses addrmode2, so we need an explicit offset reg. It should
1843 // always by reg0 since we're transforming LDRi12s.
1846 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1847 concatenateMemOperands(MIB, Op0, Op1);
1848 DEBUG(dbgs() << "Formed " << *MIB << "\n");
1851 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1855 // FIXME: We're converting from LDRi12 to an insn that still
1856 // uses addrmode2, so we need an explicit offset reg. It should
1857 // always by reg0 since we're transforming STRi12s.
1860 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1861 concatenateMemOperands(MIB, Op0, Op1);
1862 DEBUG(dbgs() << "Formed " << *MIB << "\n");
1868 // Add register allocation hints to form register pairs.
1869 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1870 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1872 for (unsigned i = 0; i != NumMove; ++i) {
1873 MachineInstr *Op = Ops.back();
1875 MBB->splice(InsertPos, MBB, Op);
1879 NumLdStMoved += NumMove;
1889 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1890 bool RetVal = false;
1892 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1893 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1894 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1895 SmallVector<unsigned, 4> LdBases;
1896 SmallVector<unsigned, 4> StBases;
1899 MachineBasicBlock::iterator MBBI = MBB->begin();
1900 MachineBasicBlock::iterator E = MBB->end();
1902 for (; MBBI != E; ++MBBI) {
1903 MachineInstr *MI = MBBI;
1904 if (MI->isCall() || MI->isTerminator()) {
1905 // Stop at barriers.
1910 if (!MI->isDebugValue())
1911 MI2LocMap[MI] = ++Loc;
1913 if (!isMemoryOp(MI))
1915 unsigned PredReg = 0;
1916 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
1919 int Opc = MI->getOpcode();
1920 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1921 unsigned Base = MI->getOperand(1).getReg();
1922 int Offset = getMemoryOpOffset(MI);
1924 bool StopHere = false;
1926 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1927 Base2LdsMap.find(Base);
1928 if (BI != Base2LdsMap.end()) {
1929 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1930 if (Offset == getMemoryOpOffset(BI->second[i])) {
1936 BI->second.push_back(MI);
1938 Base2LdsMap[Base].push_back(MI);
1939 LdBases.push_back(Base);
1942 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1943 Base2StsMap.find(Base);
1944 if (BI != Base2StsMap.end()) {
1945 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1946 if (Offset == getMemoryOpOffset(BI->second[i])) {
1952 BI->second.push_back(MI);
1954 Base2StsMap[Base].push_back(MI);
1955 StBases.push_back(Base);
1960 // Found a duplicate (a base+offset combination that's seen earlier).
1967 // Re-schedule loads.
1968 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1969 unsigned Base = LdBases[i];
1970 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
1972 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1975 // Re-schedule stores.
1976 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1977 unsigned Base = StBases[i];
1978 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
1980 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1984 Base2LdsMap.clear();
1985 Base2StsMap.clear();
1995 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1996 /// optimization pass.
1997 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1999 return new ARMPreAllocLoadStoreOpt();
2000 return new ARMLoadStoreOpt();