1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMMachineFunctionInfo.h"
19 #include "ARMSubtarget.h"
20 #include "MCTargetDesc/ARMAddressingModes.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/SmallSet.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/CodeGen/MachineBasicBlock.h"
28 #include "llvm/CodeGen/MachineFunctionPass.h"
29 #include "llvm/CodeGen/MachineInstr.h"
30 #include "llvm/CodeGen/MachineInstrBuilder.h"
31 #include "llvm/CodeGen/MachineRegisterInfo.h"
32 #include "llvm/CodeGen/RegisterScavenging.h"
33 #include "llvm/CodeGen/SelectionDAGNodes.h"
34 #include "llvm/IR/DataLayout.h"
35 #include "llvm/IR/DerivedTypes.h"
36 #include "llvm/IR/Function.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/ErrorHandling.h"
39 #include "llvm/Target/TargetInstrInfo.h"
40 #include "llvm/Target/TargetMachine.h"
41 #include "llvm/Target/TargetRegisterInfo.h"
44 #define DEBUG_TYPE "arm-ldst-opt"
46 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
47 STATISTIC(NumSTMGened , "Number of stm instructions generated");
48 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
49 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
50 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
51 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
52 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
53 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
54 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
55 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
56 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
58 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
59 /// load / store instructions to form ldm / stm instructions.
62 struct ARMLoadStoreOpt : public MachineFunctionPass {
64 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
66 const TargetInstrInfo *TII;
67 const TargetRegisterInfo *TRI;
68 const ARMSubtarget *STI;
71 bool isThumb1, isThumb2;
73 bool runOnMachineFunction(MachineFunction &Fn) override;
75 const char *getPassName() const override {
76 return "ARM load / store optimization pass";
80 struct MemOpQueueEntry {
85 MachineBasicBlock::iterator MBBI;
87 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
88 MachineBasicBlock::iterator i)
89 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
91 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
92 typedef MemOpQueue::iterator MemOpQueueIter;
94 void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
95 const MemOpQueue &MemOps, unsigned DefReg,
96 unsigned RangeBegin, unsigned RangeEnd);
98 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
99 int Offset, unsigned Base, bool BaseKill, int Opcode,
100 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
102 ArrayRef<std::pair<unsigned, bool> > Regs,
103 ArrayRef<unsigned> ImpDefs);
104 void MergeOpsUpdate(MachineBasicBlock &MBB,
106 unsigned memOpsBegin,
108 unsigned insertAfter,
113 ARMCC::CondCodes Pred,
117 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
118 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
119 int Opcode, unsigned Size,
120 ARMCC::CondCodes Pred, unsigned PredReg,
121 unsigned Scratch, MemOpQueue &MemOps,
122 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
124 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
125 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
126 MachineBasicBlock::iterator &MBBI);
127 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
128 MachineBasicBlock::iterator MBBI,
129 const TargetInstrInfo *TII,
131 MachineBasicBlock::iterator &I);
132 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
133 MachineBasicBlock::iterator MBBI,
135 MachineBasicBlock::iterator &I);
136 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
137 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
139 char ARMLoadStoreOpt::ID = 0;
142 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
144 default: llvm_unreachable("Unhandled opcode!");
148 default: llvm_unreachable("Unhandled submode!");
149 case ARM_AM::ia: return ARM::LDMIA;
150 case ARM_AM::da: return ARM::LDMDA;
151 case ARM_AM::db: return ARM::LDMDB;
152 case ARM_AM::ib: return ARM::LDMIB;
157 default: llvm_unreachable("Unhandled submode!");
158 case ARM_AM::ia: return ARM::STMIA;
159 case ARM_AM::da: return ARM::STMDA;
160 case ARM_AM::db: return ARM::STMDB;
161 case ARM_AM::ib: return ARM::STMIB;
167 default: llvm_unreachable("Unhandled submode!");
168 case ARM_AM::ia: return ARM::t2LDMIA;
169 case ARM_AM::db: return ARM::t2LDMDB;
175 default: llvm_unreachable("Unhandled submode!");
176 case ARM_AM::ia: return ARM::t2STMIA;
177 case ARM_AM::db: return ARM::t2STMDB;
182 default: llvm_unreachable("Unhandled submode!");
183 case ARM_AM::ia: return ARM::VLDMSIA;
184 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
189 default: llvm_unreachable("Unhandled submode!");
190 case ARM_AM::ia: return ARM::VSTMSIA;
191 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
196 default: llvm_unreachable("Unhandled submode!");
197 case ARM_AM::ia: return ARM::VLDMDIA;
198 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
203 default: llvm_unreachable("Unhandled submode!");
204 case ARM_AM::ia: return ARM::VSTMDIA;
205 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
213 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
215 default: llvm_unreachable("Unhandled opcode!");
221 case ARM::t2LDMIA_RET:
223 case ARM::t2LDMIA_UPD:
225 case ARM::t2STMIA_UPD:
227 case ARM::VLDMSIA_UPD:
229 case ARM::VSTMSIA_UPD:
231 case ARM::VLDMDIA_UPD:
233 case ARM::VSTMDIA_UPD:
247 case ARM::t2LDMDB_UPD:
249 case ARM::t2STMDB_UPD:
250 case ARM::VLDMSDB_UPD:
251 case ARM::VSTMSDB_UPD:
252 case ARM::VLDMDDB_UPD:
253 case ARM::VSTMDDB_UPD:
264 } // end namespace ARM_AM
265 } // end namespace llvm
267 static bool isT2i32Load(unsigned Opc) {
268 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
271 static bool isi32Load(unsigned Opc) {
272 return Opc == ARM::LDRi12 || isT2i32Load(Opc);
275 static bool isT2i32Store(unsigned Opc) {
276 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
279 static bool isi32Store(unsigned Opc) {
280 return Opc == ARM::STRi12 || isT2i32Store(Opc);
283 /// MergeOps - Create and insert a LDM or STM with Base as base register and
284 /// registers in Regs as the register operands that would be loaded / stored.
285 /// It returns true if the transformation is done.
287 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
288 MachineBasicBlock::iterator MBBI,
289 int Offset, unsigned Base, bool BaseKill,
290 int Opcode, ARMCC::CondCodes Pred,
291 unsigned PredReg, unsigned Scratch, DebugLoc dl,
292 ArrayRef<std::pair<unsigned, bool> > Regs,
293 ArrayRef<unsigned> ImpDefs) {
294 // Only a single register to load / store. Don't bother.
295 unsigned NumRegs = Regs.size();
299 ARM_AM::AMSubMode Mode = ARM_AM::ia;
300 // VFP and Thumb2 do not support IB or DA modes.
301 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
302 bool haveIBAndDA = isNotVFP && !isThumb2;
303 if (Offset == 4 && haveIBAndDA) {
305 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
307 } else if (Offset == -4 * (int)NumRegs && isNotVFP) {
308 // VLDM/VSTM do not support DB mode without also updating the base reg.
310 } else if (Offset != 0) {
311 // Check if this is a supported opcode before inserting instructions to
312 // calculate a new base register.
313 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
315 // If starting offset isn't zero, insert a MI to materialize a new base.
316 // But only do so if it is cost effective, i.e. merging more than two
322 if (isi32Load(Opcode)) {
323 // If it is a load, then just use one of the destination register to
324 // use as the new base.
325 NewBase = Regs[NumRegs-1].first;
327 // Use the scratch register to use as a new base.
332 int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
334 BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
337 int ImmedOffset = isThumb2
338 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
339 if (ImmedOffset == -1)
340 // FIXME: Try t2ADDri12 or t2SUBri12?
341 return false; // Probably not worth it then.
343 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
344 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
345 .addImm(Pred).addReg(PredReg).addReg(0);
347 BaseKill = true; // New base is always killed straight away.
350 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
351 Opcode == ARM::VLDRD);
352 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
353 if (!Opcode) return false;
354 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
355 .addReg(Base, getKillRegState(BaseKill))
356 .addImm(Pred).addReg(PredReg);
357 for (unsigned i = 0; i != NumRegs; ++i)
358 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
359 | getKillRegState(Regs[i].second));
361 // Add implicit defs for super-registers.
362 for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i)
363 MIB.addReg(ImpDefs[i], RegState::ImplicitDefine);
368 /// \brief Find all instructions using a given imp-def within a range.
370 /// We are trying to combine a range of instructions, one of which (located at
371 /// position RangeBegin) implicitly defines a register. The final LDM/STM will
372 /// be placed at RangeEnd, and so any uses of this definition between RangeStart
373 /// and RangeEnd must be modified to use an undefined value.
375 /// The live range continues until we find a second definition or one of the
376 /// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
377 /// we must consider all uses and decide which are relevant in a second pass.
378 void ARMLoadStoreOpt::findUsesOfImpDef(
379 SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,
380 unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
381 std::map<unsigned, MachineOperand *> Uses;
382 unsigned LastLivePos = RangeEnd;
384 // First we find all uses of this register with Position between RangeBegin
385 // and RangeEnd, any or all of these could be uses of a definition at
386 // RangeBegin. We also record the latest position a definition at RangeBegin
387 // would be considered live.
388 for (unsigned i = 0; i < MemOps.size(); ++i) {
389 MachineInstr &MI = *MemOps[i].MBBI;
390 unsigned MIPosition = MemOps[i].Position;
391 if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
394 // If this instruction defines the register, then any later use will be of
395 // that definition rather than ours.
396 if (MI.definesRegister(DefReg))
397 LastLivePos = std::min(LastLivePos, MIPosition);
399 MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
403 // If this instruction kills the register then (assuming liveness is
404 // correct when we start) we don't need to think about anything after here.
406 LastLivePos = std::min(LastLivePos, MIPosition);
408 Uses[MIPosition] = UseOp;
411 // Now we traverse the list of all uses, and append the ones that actually use
412 // our definition to the requested list.
413 for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),
416 // List is sorted by position so once we've found one out of range there
417 // will be no more to consider.
418 if (I->first > LastLivePos)
420 UsesOfImpDefs.push_back(I->second);
424 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
426 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
428 unsigned memOpsBegin, unsigned memOpsEnd,
429 unsigned insertAfter, int Offset,
430 unsigned Base, bool BaseKill,
432 ARMCC::CondCodes Pred, unsigned PredReg,
435 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
436 // First calculate which of the registers should be killed by the merged
438 const unsigned insertPos = memOps[insertAfter].Position;
439 SmallSet<unsigned, 4> KilledRegs;
440 DenseMap<unsigned, unsigned> Killer;
441 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
442 if (i == memOpsBegin) {
447 if (memOps[i].Position < insertPos && memOps[i].isKill) {
448 unsigned Reg = memOps[i].Reg;
449 KilledRegs.insert(Reg);
454 SmallVector<std::pair<unsigned, bool>, 8> Regs;
455 SmallVector<unsigned, 8> ImpDefs;
456 SmallVector<MachineOperand *, 8> UsesOfImpDefs;
457 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
458 unsigned Reg = memOps[i].Reg;
459 // If we are inserting the merged operation after an operation that
460 // uses the same register, make sure to transfer any kill flag.
461 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
462 Regs.push_back(std::make_pair(Reg, isKill));
464 // Collect any implicit defs of super-registers. They must be preserved.
465 for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) {
466 if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead())
468 unsigned DefReg = MO->getReg();
469 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
470 ImpDefs.push_back(DefReg);
472 // There may be other uses of the definition between this instruction and
473 // the eventual LDM/STM position. These should be marked undef if the
474 // merge takes place.
475 findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
480 // Try to do the merge.
481 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
483 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
484 Pred, PredReg, Scratch, dl, Regs, ImpDefs))
487 // Merge succeeded, update records.
488 Merges.push_back(std::prev(Loc));
490 // In gathering loads together, we may have moved the imp-def of a register
491 // past one of its uses. This is OK, since we know better than the rest of
492 // LLVM what's OK with ARM loads and stores; but we still have to adjust the
494 for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
495 E = UsesOfImpDefs.end();
499 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
500 // Remove kill flags from any memops that come before insertPos.
501 if (Regs[i-memOpsBegin].second) {
502 unsigned Reg = Regs[i-memOpsBegin].first;
503 if (KilledRegs.count(Reg)) {
504 unsigned j = Killer[Reg];
505 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
506 assert(Idx >= 0 && "Cannot find killing operand");
507 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
508 memOps[j].isKill = false;
510 memOps[i].isKill = true;
512 MBB.erase(memOps[i].MBBI);
513 // Update this memop to refer to the merged instruction.
514 // We may need to move kill flags again.
515 memOps[i].Merged = true;
516 memOps[i].MBBI = Merges.back();
517 memOps[i].Position = insertPos;
521 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
522 /// load / store multiple instructions.
524 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
525 unsigned Base, int Opcode, unsigned Size,
526 ARMCC::CondCodes Pred, unsigned PredReg,
527 unsigned Scratch, MemOpQueue &MemOps,
528 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
529 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
530 int Offset = MemOps[SIndex].Offset;
531 int SOffset = Offset;
532 unsigned insertAfter = SIndex;
533 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
534 DebugLoc dl = Loc->getDebugLoc();
535 const MachineOperand &PMO = Loc->getOperand(0);
536 unsigned PReg = PMO.getReg();
537 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
539 unsigned Limit = ~0U;
541 // vldm / vstm limit are 32 for S variants, 16 for D variants.
559 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
560 int NewOffset = MemOps[i].Offset;
561 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
562 unsigned Reg = MO.getReg();
563 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
564 // Register numbers must be in ascending order. For VFP / NEON load and
565 // store multiples, the registers must also be consecutive and within the
566 // limit on the number of registers per instruction.
567 if (Reg != ARM::SP &&
568 NewOffset == Offset + (int)Size &&
569 ((isNotVFP && RegNum > PRegNum) ||
570 ((Count < Limit) && RegNum == PRegNum+1)) &&
571 // On Swift we don't want vldm/vstm to start with a odd register num
572 // because Q register unaligned vldm/vstm need more uops.
573 (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) {
578 // Can't merge this in. Try merge the earlier ones first.
579 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
580 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
581 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
586 if (MemOps[i].Position > MemOps[insertAfter].Position)
590 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
591 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
592 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
595 static bool definesCPSR(MachineInstr *MI) {
596 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
597 const MachineOperand &MO = MI->getOperand(i);
600 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
601 // If the instruction has live CPSR def, then it's not safe to fold it
602 // into load / store.
609 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
610 unsigned Bytes, unsigned Limit,
611 ARMCC::CondCodes Pred, unsigned PredReg) {
612 unsigned MyPredReg = 0;
616 bool CheckCPSRDef = false;
617 switch (MI->getOpcode()) {
618 default: return false;
627 // Make sure the offset fits in 8 bits.
628 if (Bytes == 0 || (Limit && Bytes >= Limit))
631 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
632 if (!(MI->getOperand(0).getReg() == Base &&
633 MI->getOperand(1).getReg() == Base &&
634 (MI->getOperand(2).getImm()*Scale) == Bytes &&
635 getInstrPredicate(MI, MyPredReg) == Pred &&
636 MyPredReg == PredReg))
639 return CheckCPSRDef ? !definesCPSR(MI) : true;
642 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
643 unsigned Bytes, unsigned Limit,
644 ARMCC::CondCodes Pred, unsigned PredReg) {
645 unsigned MyPredReg = 0;
649 bool CheckCPSRDef = false;
650 switch (MI->getOpcode()) {
651 default: return false;
660 if (Bytes == 0 || (Limit && Bytes >= Limit))
661 // Make sure the offset fits in 8 bits.
664 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
665 if (!(MI->getOperand(0).getReg() == Base &&
666 MI->getOperand(1).getReg() == Base &&
667 (MI->getOperand(2).getImm()*Scale) == Bytes &&
668 getInstrPredicate(MI, MyPredReg) == Pred &&
669 MyPredReg == PredReg))
672 return CheckCPSRDef ? !definesCPSR(MI) : true;
675 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
676 switch (MI->getOpcode()) {
704 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
707 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
711 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
712 ARM_AM::AMSubMode Mode) {
714 default: llvm_unreachable("Unhandled opcode!");
720 default: llvm_unreachable("Unhandled submode!");
721 case ARM_AM::ia: return ARM::LDMIA_UPD;
722 case ARM_AM::ib: return ARM::LDMIB_UPD;
723 case ARM_AM::da: return ARM::LDMDA_UPD;
724 case ARM_AM::db: return ARM::LDMDB_UPD;
731 default: llvm_unreachable("Unhandled submode!");
732 case ARM_AM::ia: return ARM::STMIA_UPD;
733 case ARM_AM::ib: return ARM::STMIB_UPD;
734 case ARM_AM::da: return ARM::STMDA_UPD;
735 case ARM_AM::db: return ARM::STMDB_UPD;
740 default: llvm_unreachable("Unhandled submode!");
741 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
742 case ARM_AM::db: return ARM::t2LDMDB_UPD;
747 default: llvm_unreachable("Unhandled submode!");
748 case ARM_AM::ia: return ARM::t2STMIA_UPD;
749 case ARM_AM::db: return ARM::t2STMDB_UPD;
753 default: llvm_unreachable("Unhandled submode!");
754 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
755 case ARM_AM::db: return ARM::VLDMSDB_UPD;
759 default: llvm_unreachable("Unhandled submode!");
760 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
761 case ARM_AM::db: return ARM::VLDMDDB_UPD;
765 default: llvm_unreachable("Unhandled submode!");
766 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
767 case ARM_AM::db: return ARM::VSTMSDB_UPD;
771 default: llvm_unreachable("Unhandled submode!");
772 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
773 case ARM_AM::db: return ARM::VSTMDDB_UPD;
778 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
779 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
781 /// stmia rn, <ra, rb, rc>
782 /// rn := rn + 4 * 3;
784 /// stmia rn!, <ra, rb, rc>
786 /// rn := rn - 4 * 3;
787 /// ldmia rn, <ra, rb, rc>
789 /// ldmdb rn!, <ra, rb, rc>
790 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
791 MachineBasicBlock::iterator MBBI,
793 MachineBasicBlock::iterator &I) {
794 MachineInstr *MI = MBBI;
795 unsigned Base = MI->getOperand(0).getReg();
796 bool BaseKill = MI->getOperand(0).isKill();
797 unsigned Bytes = getLSMultipleTransferSize(MI);
798 unsigned PredReg = 0;
799 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
800 int Opcode = MI->getOpcode();
801 DebugLoc dl = MI->getDebugLoc();
803 // Can't use an updating ld/st if the base register is also a dest
804 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
805 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
806 if (MI->getOperand(i).getReg() == Base)
809 bool DoMerge = false;
810 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
812 // Try merging with the previous instruction.
813 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
814 if (MBBI != BeginMBBI) {
815 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
816 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
818 if (Mode == ARM_AM::ia &&
819 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
822 } else if (Mode == ARM_AM::ib &&
823 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
831 // Try merging with the next instruction.
832 MachineBasicBlock::iterator EndMBBI = MBB.end();
833 if (!DoMerge && MBBI != EndMBBI) {
834 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
835 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
837 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
838 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
840 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
841 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
856 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
857 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
858 .addReg(Base, getDefRegState(true)) // WB base register
859 .addReg(Base, getKillRegState(BaseKill))
860 .addImm(Pred).addReg(PredReg);
862 // Transfer the rest of operands.
863 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
864 MIB.addOperand(MI->getOperand(OpNum));
866 // Transfer memoperands.
867 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
873 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
874 ARM_AM::AddrOpc Mode) {
877 return ARM::LDR_PRE_IMM;
879 return ARM::STR_PRE_IMM;
881 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
883 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
885 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
887 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
890 return ARM::t2LDR_PRE;
893 return ARM::t2STR_PRE;
894 default: llvm_unreachable("Unhandled opcode!");
898 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
899 ARM_AM::AddrOpc Mode) {
902 return ARM::LDR_POST_IMM;
904 return ARM::STR_POST_IMM;
906 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
908 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
910 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
912 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
915 return ARM::t2LDR_POST;
918 return ARM::t2STR_POST;
919 default: llvm_unreachable("Unhandled opcode!");
923 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
924 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
925 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
926 MachineBasicBlock::iterator MBBI,
927 const TargetInstrInfo *TII,
929 MachineBasicBlock::iterator &I) {
930 MachineInstr *MI = MBBI;
931 unsigned Base = MI->getOperand(1).getReg();
932 bool BaseKill = MI->getOperand(1).isKill();
933 unsigned Bytes = getLSMultipleTransferSize(MI);
934 int Opcode = MI->getOpcode();
935 DebugLoc dl = MI->getDebugLoc();
936 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
937 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
938 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
939 if (isi32Load(Opcode) || isi32Store(Opcode))
940 if (MI->getOperand(2).getImm() != 0)
942 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
945 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
946 // Can't do the merge if the destination register is the same as the would-be
947 // writeback register.
948 if (MI->getOperand(0).getReg() == Base)
951 unsigned PredReg = 0;
952 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
953 bool DoMerge = false;
954 ARM_AM::AddrOpc AddSub = ARM_AM::add;
956 // AM2 - 12 bits, thumb2 - 8 bits.
957 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
959 // Try merging with the previous instruction.
960 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
961 if (MBBI != BeginMBBI) {
962 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
963 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
965 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
967 AddSub = ARM_AM::sub;
969 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
973 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
978 // Try merging with the next instruction.
979 MachineBasicBlock::iterator EndMBBI = MBB.end();
980 if (!DoMerge && MBBI != EndMBBI) {
981 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
982 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
985 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
987 AddSub = ARM_AM::sub;
988 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
992 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
1005 // VLDM[SD]_UPD, VSTM[SD]_UPD
1006 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1007 // updating load/store-multiple instructions can be used with only one
1009 MachineOperand &MO = MI->getOperand(0);
1010 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1011 .addReg(Base, getDefRegState(true)) // WB base register
1012 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1013 .addImm(Pred).addReg(PredReg)
1014 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1015 getKillRegState(MO.isKill())));
1018 // LDR_PRE, LDR_POST
1019 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1020 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1021 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1022 .addReg(Base, RegState::Define)
1023 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1025 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1026 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1027 .addReg(Base, RegState::Define)
1028 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1031 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1032 // t2LDR_PRE, t2LDR_POST
1033 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1034 .addReg(Base, RegState::Define)
1035 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1038 MachineOperand &MO = MI->getOperand(0);
1039 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1040 // the vestigal zero-reg offset register. When that's fixed, this clause
1041 // can be removed entirely.
1042 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1043 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1044 // STR_PRE, STR_POST
1045 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1046 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1047 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1049 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1050 // t2STR_PRE, t2STR_POST
1051 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1052 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1053 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1061 /// isMemoryOp - Returns true if instruction is a memory operation that this
1062 /// pass is capable of operating on.
1063 static bool isMemoryOp(const MachineInstr *MI) {
1064 // When no memory operands are present, conservatively assume unaligned,
1065 // volatile, unfoldable.
1066 if (!MI->hasOneMemOperand())
1069 const MachineMemOperand *MMO = *MI->memoperands_begin();
1071 // Don't touch volatile memory accesses - we may be changing their order.
1072 if (MMO->isVolatile())
1075 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1077 if (MMO->getAlignment() < 4)
1080 // str <undef> could probably be eliminated entirely, but for now we just want
1081 // to avoid making a mess of it.
1082 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1083 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1084 MI->getOperand(0).isUndef())
1087 // Likewise don't mess with references to undefined addresses.
1088 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1089 MI->getOperand(1).isUndef())
1092 int Opcode = MI->getOpcode();
1097 return MI->getOperand(1).isReg();
1100 return MI->getOperand(1).isReg();
1107 return MI->getOperand(1).isReg();
1112 /// AdvanceRS - Advance register scavenger to just before the earliest memory
1113 /// op that is being merged.
1114 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1115 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1116 unsigned Position = MemOps[0].Position;
1117 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1118 if (MemOps[i].Position < Position) {
1119 Position = MemOps[i].Position;
1120 Loc = MemOps[i].MBBI;
1124 if (Loc != MBB.begin())
1125 RS->forward(std::prev(Loc));
1128 static int getMemoryOpOffset(const MachineInstr *MI) {
1129 int Opcode = MI->getOpcode();
1130 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1131 unsigned NumOperands = MI->getDesc().getNumOperands();
1132 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1134 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1135 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1136 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1137 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
1140 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1141 : ARM_AM::getAM5Offset(OffField) * 4;
1143 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1146 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1152 static void InsertLDR_STR(MachineBasicBlock &MBB,
1153 MachineBasicBlock::iterator &MBBI,
1154 int Offset, bool isDef,
1155 DebugLoc dl, unsigned NewOpc,
1156 unsigned Reg, bool RegDeadKill, bool RegUndef,
1157 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1158 bool OffKill, bool OffUndef,
1159 ARMCC::CondCodes Pred, unsigned PredReg,
1160 const TargetInstrInfo *TII, bool isT2) {
1162 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1164 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1165 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1166 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1168 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1170 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1171 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1172 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1176 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1177 MachineBasicBlock::iterator &MBBI) {
1178 MachineInstr *MI = &*MBBI;
1179 unsigned Opcode = MI->getOpcode();
1180 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1181 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1182 const MachineOperand &BaseOp = MI->getOperand(2);
1183 unsigned BaseReg = BaseOp.getReg();
1184 unsigned EvenReg = MI->getOperand(0).getReg();
1185 unsigned OddReg = MI->getOperand(1).getReg();
1186 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1187 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1188 // ARM errata 602117: LDRD with base in list may result in incorrect base
1189 // register when interrupted or faulted.
1190 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1191 if (!Errata602117 &&
1192 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1195 MachineBasicBlock::iterator NewBBI = MBBI;
1196 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1197 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1198 bool EvenDeadKill = isLd ?
1199 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1200 bool EvenUndef = MI->getOperand(0).isUndef();
1201 bool OddDeadKill = isLd ?
1202 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1203 bool OddUndef = MI->getOperand(1).isUndef();
1204 bool BaseKill = BaseOp.isKill();
1205 bool BaseUndef = BaseOp.isUndef();
1206 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1207 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1208 int OffImm = getMemoryOpOffset(MI);
1209 unsigned PredReg = 0;
1210 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1212 if (OddRegNum > EvenRegNum && OffImm == 0) {
1213 // Ascending register numbers and no offset. It's safe to change it to a
1215 unsigned NewOpc = (isLd)
1216 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1217 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1219 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1220 .addReg(BaseReg, getKillRegState(BaseKill))
1221 .addImm(Pred).addReg(PredReg)
1222 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1223 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1226 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1227 .addReg(BaseReg, getKillRegState(BaseKill))
1228 .addImm(Pred).addReg(PredReg)
1230 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1232 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1235 NewBBI = std::prev(MBBI);
1237 // Split into two instructions.
1238 unsigned NewOpc = (isLd)
1239 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1240 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1241 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1242 // so adjust and use t2LDRi12 here for that.
1243 unsigned NewOpc2 = (isLd)
1244 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1245 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1246 DebugLoc dl = MBBI->getDebugLoc();
1247 // If this is a load and base register is killed, it may have been
1248 // re-defed by the load, make sure the first load does not clobber it.
1250 (BaseKill || OffKill) &&
1251 (TRI->regsOverlap(EvenReg, BaseReg))) {
1252 assert(!TRI->regsOverlap(OddReg, BaseReg));
1253 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1254 OddReg, OddDeadKill, false,
1255 BaseReg, false, BaseUndef, false, OffUndef,
1256 Pred, PredReg, TII, isT2);
1257 NewBBI = std::prev(MBBI);
1258 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1259 EvenReg, EvenDeadKill, false,
1260 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1261 Pred, PredReg, TII, isT2);
1263 if (OddReg == EvenReg && EvenDeadKill) {
1264 // If the two source operands are the same, the kill marker is
1265 // probably on the first one. e.g.
1266 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1267 EvenDeadKill = false;
1270 // Never kill the base register in the first instruction.
1271 if (EvenReg == BaseReg)
1272 EvenDeadKill = false;
1273 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1274 EvenReg, EvenDeadKill, EvenUndef,
1275 BaseReg, false, BaseUndef, false, OffUndef,
1276 Pred, PredReg, TII, isT2);
1277 NewBBI = std::prev(MBBI);
1278 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1279 OddReg, OddDeadKill, OddUndef,
1280 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1281 Pred, PredReg, TII, isT2);
1296 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1297 /// ops of the same base and incrementing offset into LDM / STM ops.
1298 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1299 unsigned NumMerges = 0;
1300 unsigned NumMemOps = 0;
1302 unsigned CurrBase = 0;
1304 unsigned CurrSize = 0;
1305 ARMCC::CondCodes CurrPred = ARMCC::AL;
1306 unsigned CurrPredReg = 0;
1307 unsigned Position = 0;
1308 SmallVector<MachineBasicBlock::iterator,4> Merges;
1310 RS->enterBasicBlock(&MBB);
1311 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1313 if (FixInvalidRegPairOp(MBB, MBBI))
1316 bool Advance = false;
1317 bool TryMerge = false;
1318 bool Clobber = false;
1320 bool isMemOp = isMemoryOp(MBBI);
1322 int Opcode = MBBI->getOpcode();
1323 unsigned Size = getLSMultipleTransferSize(MBBI);
1324 const MachineOperand &MO = MBBI->getOperand(0);
1325 unsigned Reg = MO.getReg();
1326 bool isKill = MO.isDef() ? false : MO.isKill();
1327 unsigned Base = MBBI->getOperand(1).getReg();
1328 unsigned PredReg = 0;
1329 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1330 int Offset = getMemoryOpOffset(MBBI);
1333 // r5 := ldr [r5, #4]
1334 // r6 := ldr [r5, #8]
1336 // The second ldr has effectively broken the chain even though it
1337 // looks like the later ldr(s) use the same base register. Try to
1338 // merge the ldr's so far, including this one. But don't try to
1339 // combine the following ldr(s).
1340 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1343 // r4 := ldr [r0, #8]
1344 // r4 := ldr [r0, #4]
1346 // The optimization may reorder the second ldr in front of the first
1347 // ldr, which violates write after write(WAW) dependence. The same as
1348 // str. Try to merge inst(s) already in MemOps.
1349 bool Overlap = false;
1350 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
1351 if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
1357 if (CurrBase == 0 && !Clobber) {
1358 // Start of a new chain.
1363 CurrPredReg = PredReg;
1364 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1367 } else if (!Overlap) {
1373 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1374 // No need to match PredReg.
1375 // Continue adding to the queue.
1376 if (Offset > MemOps.back().Offset) {
1377 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1382 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1384 if (Offset < I->Offset) {
1385 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1390 } else if (Offset == I->Offset) {
1391 // Collision! This can't be merged!
1400 if (MBBI->isDebugValue()) {
1403 // Reach the end of the block, try merging the memory instructions.
1405 } else if (Advance) {
1409 // Reach the end of the block, try merging the memory instructions.
1416 if (NumMemOps > 1) {
1417 // Try to find a free register to use as a new base in case it's needed.
1418 // First advance to the instruction just before the start of the chain.
1419 AdvanceRS(MBB, MemOps);
1420 // Find a scratch register.
1421 unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass);
1422 // Process the load / store instructions.
1423 RS->forward(std::prev(MBBI));
1427 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1428 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1430 // Try folding preceding/trailing base inc/dec into the generated
1432 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1433 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1435 NumMerges += Merges.size();
1437 // Try folding preceding/trailing base inc/dec into those load/store
1438 // that were not merged to form LDM/STM ops.
1439 for (unsigned i = 0; i != NumMemOps; ++i)
1440 if (!MemOps[i].Merged)
1441 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1444 // RS may be pointing to an instruction that's deleted.
1445 RS->skipTo(std::prev(MBBI));
1446 } else if (NumMemOps == 1) {
1447 // Try folding preceding/trailing base inc/dec into the single
1449 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1451 RS->forward(std::prev(MBBI));
1458 CurrPred = ARMCC::AL;
1465 // If iterator hasn't been advanced and this is not a memory op, skip it.
1466 // It can't start a new chain anyway.
1467 if (!Advance && !isMemOp && MBBI != E) {
1473 return NumMerges > 0;
1476 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1477 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1478 /// directly restore the value of LR into pc.
1479 /// ldmfd sp!, {..., lr}
1482 /// ldmfd sp!, {..., lr}
1485 /// ldmfd sp!, {..., pc}
1486 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1487 if (MBB.empty()) return false;
1489 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1490 if (MBBI != MBB.begin() &&
1491 (MBBI->getOpcode() == ARM::BX_RET ||
1492 MBBI->getOpcode() == ARM::tBX_RET ||
1493 MBBI->getOpcode() == ARM::MOVPCLR)) {
1494 MachineInstr *PrevMI = std::prev(MBBI);
1495 unsigned Opcode = PrevMI->getOpcode();
1496 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1497 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1498 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1499 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1500 if (MO.getReg() != ARM::LR)
1502 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1503 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1504 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1505 PrevMI->setDesc(TII->get(NewOpc));
1507 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1515 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1516 const TargetMachine &TM = Fn.getTarget();
1517 AFI = Fn.getInfo<ARMFunctionInfo>();
1518 TII = TM.getInstrInfo();
1519 TRI = TM.getRegisterInfo();
1520 STI = &TM.getSubtarget<ARMSubtarget>();
1521 RS = new RegScavenger();
1522 isThumb2 = AFI->isThumb2Function();
1523 isThumb1 = AFI->isThumbFunction() && !isThumb2;
1525 // Don't do anything in this pass with Thumb1 for now.
1526 if (isThumb1) return false;
1528 bool Modified = false;
1529 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1531 MachineBasicBlock &MBB = *MFI;
1532 Modified |= LoadStoreMultipleOpti(MBB);
1533 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1534 Modified |= MergeReturnIntoLDM(MBB);
1542 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1543 /// load / stores from consecutive locations close to make it more
1544 /// likely they will be combined later.
1547 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1549 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1551 const DataLayout *TD;
1552 const TargetInstrInfo *TII;
1553 const TargetRegisterInfo *TRI;
1554 const ARMSubtarget *STI;
1555 MachineRegisterInfo *MRI;
1556 MachineFunction *MF;
1558 bool runOnMachineFunction(MachineFunction &Fn) override;
1560 const char *getPassName() const override {
1561 return "ARM pre- register allocation load / store optimization pass";
1565 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1566 unsigned &NewOpc, unsigned &EvenReg,
1567 unsigned &OddReg, unsigned &BaseReg,
1569 unsigned &PredReg, ARMCC::CondCodes &Pred,
1571 bool RescheduleOps(MachineBasicBlock *MBB,
1572 SmallVectorImpl<MachineInstr *> &Ops,
1573 unsigned Base, bool isLd,
1574 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1575 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1577 char ARMPreAllocLoadStoreOpt::ID = 0;
1580 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1581 TD = Fn.getTarget().getDataLayout();
1582 TII = Fn.getTarget().getInstrInfo();
1583 TRI = Fn.getTarget().getRegisterInfo();
1584 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1585 MRI = &Fn.getRegInfo();
1588 ARMFunctionInfo *AFI = Fn.getInfo<ARMFunctionInfo>();
1589 bool isThumb1 = AFI->isThumbFunction() && !AFI->isThumb2Function();
1590 // Don't do anything in this pass with Thumb1 for now.
1591 if (isThumb1) return false;
1593 bool Modified = false;
1594 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1596 Modified |= RescheduleLoadStoreInstrs(MFI);
1601 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1602 MachineBasicBlock::iterator I,
1603 MachineBasicBlock::iterator E,
1604 SmallPtrSet<MachineInstr*, 4> &MemOps,
1605 SmallSet<unsigned, 4> &MemRegs,
1606 const TargetRegisterInfo *TRI) {
1607 // Are there stores / loads / calls between them?
1608 // FIXME: This is overly conservative. We should make use of alias information
1610 SmallSet<unsigned, 4> AddedRegPressure;
1612 if (I->isDebugValue() || MemOps.count(&*I))
1614 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1616 if (isLd && I->mayStore())
1621 // It's not safe to move the first 'str' down.
1624 // str r4, [r0, #+4]
1628 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1629 MachineOperand &MO = I->getOperand(j);
1632 unsigned Reg = MO.getReg();
1633 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1635 if (Reg != Base && !MemRegs.count(Reg))
1636 AddedRegPressure.insert(Reg);
1640 // Estimate register pressure increase due to the transformation.
1641 if (MemRegs.size() <= 4)
1642 // Ok if we are moving small number of instructions.
1644 return AddedRegPressure.size() <= MemRegs.size() * 2;
1648 /// Copy Op0 and Op1 operands into a new array assigned to MI.
1649 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1650 MachineInstr *Op1) {
1651 assert(MI->memoperands_empty() && "expected a new machineinstr");
1652 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1653 + (Op1->memoperands_end() - Op1->memoperands_begin());
1655 MachineFunction *MF = MI->getParent()->getParent();
1656 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1657 MachineSDNode::mmo_iterator MemEnd =
1658 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1660 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1661 MI->setMemRefs(MemBegin, MemEnd);
1665 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1667 unsigned &NewOpc, unsigned &EvenReg,
1668 unsigned &OddReg, unsigned &BaseReg,
1669 int &Offset, unsigned &PredReg,
1670 ARMCC::CondCodes &Pred,
1672 // Make sure we're allowed to generate LDRD/STRD.
1673 if (!STI->hasV5TEOps())
1676 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1678 unsigned Opcode = Op0->getOpcode();
1679 if (Opcode == ARM::LDRi12) {
1681 } else if (Opcode == ARM::STRi12) {
1683 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1684 NewOpc = ARM::t2LDRDi8;
1687 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1688 NewOpc = ARM::t2STRDi8;
1695 // Make sure the base address satisfies i64 ld / st alignment requirement.
1696 // At the moment, we ignore the memoryoperand's value.
1697 // If we want to use AliasAnalysis, we should check it accordingly.
1698 if (!Op0->hasOneMemOperand() ||
1699 (*Op0->memoperands_begin())->isVolatile())
1702 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1703 const Function *Func = MF->getFunction();
1704 unsigned ReqAlign = STI->hasV6Ops()
1705 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1706 : 8; // Pre-v6 need 8-byte align
1707 if (Align < ReqAlign)
1710 // Then make sure the immediate offset fits.
1711 int OffImm = getMemoryOpOffset(Op0);
1713 int Limit = (1 << 8) * Scale;
1714 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1718 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1720 AddSub = ARM_AM::sub;
1723 int Limit = (1 << 8) * Scale;
1724 if (OffImm >= Limit || (OffImm & (Scale-1)))
1726 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1728 EvenReg = Op0->getOperand(0).getReg();
1729 OddReg = Op1->getOperand(0).getReg();
1730 if (EvenReg == OddReg)
1732 BaseReg = Op0->getOperand(1).getReg();
1733 Pred = getInstrPredicate(Op0, PredReg);
1734 dl = Op0->getDebugLoc();
1738 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1739 SmallVectorImpl<MachineInstr *> &Ops,
1740 unsigned Base, bool isLd,
1741 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1742 bool RetVal = false;
1744 // Sort by offset (in reverse order).
1745 std::sort(Ops.begin(), Ops.end(),
1746 [](const MachineInstr *LHS, const MachineInstr *RHS) {
1747 int LOffset = getMemoryOpOffset(LHS);
1748 int ROffset = getMemoryOpOffset(RHS);
1749 assert(LHS == RHS || LOffset != ROffset);
1750 return LOffset > ROffset;
1753 // The loads / stores of the same base are in order. Scan them from first to
1754 // last and check for the following:
1755 // 1. Any def of base.
1757 while (Ops.size() > 1) {
1758 unsigned FirstLoc = ~0U;
1759 unsigned LastLoc = 0;
1760 MachineInstr *FirstOp = nullptr;
1761 MachineInstr *LastOp = nullptr;
1763 unsigned LastOpcode = 0;
1764 unsigned LastBytes = 0;
1765 unsigned NumMove = 0;
1766 for (int i = Ops.size() - 1; i >= 0; --i) {
1767 MachineInstr *Op = Ops[i];
1768 unsigned Loc = MI2LocMap[Op];
1769 if (Loc <= FirstLoc) {
1773 if (Loc >= LastLoc) {
1779 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
1780 if (LastOpcode && LSMOpcode != LastOpcode)
1783 int Offset = getMemoryOpOffset(Op);
1784 unsigned Bytes = getLSMultipleTransferSize(Op);
1786 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1789 LastOffset = Offset;
1791 LastOpcode = LSMOpcode;
1792 if (++NumMove == 8) // FIXME: Tune this limit.
1799 SmallPtrSet<MachineInstr*, 4> MemOps;
1800 SmallSet<unsigned, 4> MemRegs;
1801 for (int i = NumMove-1; i >= 0; --i) {
1802 MemOps.insert(Ops[i]);
1803 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1806 // Be conservative, if the instructions are too far apart, don't
1807 // move them. We want to limit the increase of register pressure.
1808 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1810 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1811 MemOps, MemRegs, TRI);
1813 for (unsigned i = 0; i != NumMove; ++i)
1816 // This is the new location for the loads / stores.
1817 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1818 while (InsertPos != MBB->end()
1819 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1822 // If we are moving a pair of loads / stores, see if it makes sense
1823 // to try to allocate a pair of registers that can form register pairs.
1824 MachineInstr *Op0 = Ops.back();
1825 MachineInstr *Op1 = Ops[Ops.size()-2];
1826 unsigned EvenReg = 0, OddReg = 0;
1827 unsigned BaseReg = 0, PredReg = 0;
1828 ARMCC::CondCodes Pred = ARMCC::AL;
1830 unsigned NewOpc = 0;
1833 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1834 EvenReg, OddReg, BaseReg,
1835 Offset, PredReg, Pred, isT2)) {
1839 const MCInstrDesc &MCID = TII->get(NewOpc);
1840 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
1841 MRI->constrainRegClass(EvenReg, TRC);
1842 MRI->constrainRegClass(OddReg, TRC);
1844 // Form the pair instruction.
1846 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1847 .addReg(EvenReg, RegState::Define)
1848 .addReg(OddReg, RegState::Define)
1850 // FIXME: We're converting from LDRi12 to an insn that still
1851 // uses addrmode2, so we need an explicit offset reg. It should
1852 // always by reg0 since we're transforming LDRi12s.
1855 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1856 concatenateMemOperands(MIB, Op0, Op1);
1857 DEBUG(dbgs() << "Formed " << *MIB << "\n");
1860 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1864 // FIXME: We're converting from LDRi12 to an insn that still
1865 // uses addrmode2, so we need an explicit offset reg. It should
1866 // always by reg0 since we're transforming STRi12s.
1869 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1870 concatenateMemOperands(MIB, Op0, Op1);
1871 DEBUG(dbgs() << "Formed " << *MIB << "\n");
1877 // Add register allocation hints to form register pairs.
1878 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1879 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1881 for (unsigned i = 0; i != NumMove; ++i) {
1882 MachineInstr *Op = Ops.back();
1884 MBB->splice(InsertPos, MBB, Op);
1888 NumLdStMoved += NumMove;
1898 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1899 bool RetVal = false;
1901 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1902 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1903 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1904 SmallVector<unsigned, 4> LdBases;
1905 SmallVector<unsigned, 4> StBases;
1908 MachineBasicBlock::iterator MBBI = MBB->begin();
1909 MachineBasicBlock::iterator E = MBB->end();
1911 for (; MBBI != E; ++MBBI) {
1912 MachineInstr *MI = MBBI;
1913 if (MI->isCall() || MI->isTerminator()) {
1914 // Stop at barriers.
1919 if (!MI->isDebugValue())
1920 MI2LocMap[MI] = ++Loc;
1922 if (!isMemoryOp(MI))
1924 unsigned PredReg = 0;
1925 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
1928 int Opc = MI->getOpcode();
1929 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1930 unsigned Base = MI->getOperand(1).getReg();
1931 int Offset = getMemoryOpOffset(MI);
1933 bool StopHere = false;
1935 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1936 Base2LdsMap.find(Base);
1937 if (BI != Base2LdsMap.end()) {
1938 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1939 if (Offset == getMemoryOpOffset(BI->second[i])) {
1945 BI->second.push_back(MI);
1947 Base2LdsMap[Base].push_back(MI);
1948 LdBases.push_back(Base);
1951 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1952 Base2StsMap.find(Base);
1953 if (BI != Base2StsMap.end()) {
1954 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1955 if (Offset == getMemoryOpOffset(BI->second[i])) {
1961 BI->second.push_back(MI);
1963 Base2StsMap[Base].push_back(MI);
1964 StBases.push_back(Base);
1969 // Found a duplicate (a base+offset combination that's seen earlier).
1976 // Re-schedule loads.
1977 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1978 unsigned Base = LdBases[i];
1979 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
1981 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1984 // Re-schedule stores.
1985 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1986 unsigned Base = StBases[i];
1987 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
1989 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1993 Base2LdsMap.clear();
1994 Base2StsMap.clear();
2004 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
2005 /// optimization pass.
2006 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2008 return new ARMPreAllocLoadStoreOpt();
2009 return new ARMLoadStoreOpt();