1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMBaseInstrInfo.h"
18 #include "ARMMachineFunctionInfo.h"
19 #include "ARMRegisterInfo.h"
20 #include "MCTargetDesc/ARMAddressingModes.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/CodeGen/SelectionDAGNodes.h"
30 #include "llvm/Target/TargetData.h"
31 #include "llvm/Target/TargetInstrInfo.h"
32 #include "llvm/Target/TargetMachine.h"
33 #include "llvm/Target/TargetRegisterInfo.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/ADT/DenseMap.h"
36 #include "llvm/ADT/STLExtras.h"
37 #include "llvm/ADT/SmallPtrSet.h"
38 #include "llvm/ADT/SmallSet.h"
39 #include "llvm/ADT/SmallVector.h"
40 #include "llvm/ADT/Statistic.h"
43 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
44 STATISTIC(NumSTMGened , "Number of stm instructions generated");
45 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
46 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
47 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
48 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
49 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
50 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
51 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
52 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
53 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
55 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
56 /// load / store instructions to form ldm / stm instructions.
59 struct ARMLoadStoreOpt : public MachineFunctionPass {
61 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
63 const TargetInstrInfo *TII;
64 const TargetRegisterInfo *TRI;
69 virtual bool runOnMachineFunction(MachineFunction &Fn);
71 virtual const char *getPassName() const {
72 return "ARM load / store optimization pass";
76 struct MemOpQueueEntry {
81 MachineBasicBlock::iterator MBBI;
83 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
84 MachineBasicBlock::iterator i)
85 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
87 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
88 typedef MemOpQueue::iterator MemOpQueueIter;
90 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
91 int Offset, unsigned Base, bool BaseKill, int Opcode,
92 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
93 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
94 void MergeOpsUpdate(MachineBasicBlock &MBB,
103 ARMCC::CondCodes Pred,
107 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
108 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
109 int Opcode, unsigned Size,
110 ARMCC::CondCodes Pred, unsigned PredReg,
111 unsigned Scratch, MemOpQueue &MemOps,
112 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
114 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
115 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
116 MachineBasicBlock::iterator &MBBI);
117 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
118 MachineBasicBlock::iterator MBBI,
119 const TargetInstrInfo *TII,
121 MachineBasicBlock::iterator &I);
122 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
123 MachineBasicBlock::iterator MBBI,
125 MachineBasicBlock::iterator &I);
126 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
127 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
129 char ARMLoadStoreOpt::ID = 0;
132 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
134 default: llvm_unreachable("Unhandled opcode!");
138 default: llvm_unreachable("Unhandled submode!");
139 case ARM_AM::ia: return ARM::LDMIA;
140 case ARM_AM::da: return ARM::LDMDA;
141 case ARM_AM::db: return ARM::LDMDB;
142 case ARM_AM::ib: return ARM::LDMIB;
148 default: llvm_unreachable("Unhandled submode!");
149 case ARM_AM::ia: return ARM::STMIA;
150 case ARM_AM::da: return ARM::STMDA;
151 case ARM_AM::db: return ARM::STMDB;
152 case ARM_AM::ib: return ARM::STMIB;
159 default: llvm_unreachable("Unhandled submode!");
160 case ARM_AM::ia: return ARM::t2LDMIA;
161 case ARM_AM::db: return ARM::t2LDMDB;
168 default: llvm_unreachable("Unhandled submode!");
169 case ARM_AM::ia: return ARM::t2STMIA;
170 case ARM_AM::db: return ARM::t2STMDB;
176 default: llvm_unreachable("Unhandled submode!");
177 case ARM_AM::ia: return ARM::VLDMSIA;
178 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
184 default: llvm_unreachable("Unhandled submode!");
185 case ARM_AM::ia: return ARM::VSTMSIA;
186 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
192 default: llvm_unreachable("Unhandled submode!");
193 case ARM_AM::ia: return ARM::VLDMDIA;
194 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
200 default: llvm_unreachable("Unhandled submode!");
201 case ARM_AM::ia: return ARM::VSTMDIA;
202 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
213 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
215 default: llvm_unreachable("Unhandled opcode!");
221 case ARM::t2LDMIA_RET:
223 case ARM::t2LDMIA_UPD:
225 case ARM::t2STMIA_UPD:
227 case ARM::VLDMSIA_UPD:
229 case ARM::VSTMSIA_UPD:
231 case ARM::VLDMDIA_UPD:
233 case ARM::VSTMDIA_UPD:
247 case ARM::t2LDMDB_UPD:
249 case ARM::t2STMDB_UPD:
250 case ARM::VLDMSDB_UPD:
251 case ARM::VSTMSDB_UPD:
252 case ARM::VLDMDDB_UPD:
253 case ARM::VSTMDDB_UPD:
263 return ARM_AM::bad_am_submode;
266 } // end namespace ARM_AM
267 } // end namespace llvm
269 static bool isT2i32Load(unsigned Opc) {
270 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
273 static bool isi32Load(unsigned Opc) {
274 return Opc == ARM::LDRi12 || isT2i32Load(Opc);
277 static bool isT2i32Store(unsigned Opc) {
278 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
281 static bool isi32Store(unsigned Opc) {
282 return Opc == ARM::STRi12 || isT2i32Store(Opc);
285 /// MergeOps - Create and insert a LDM or STM with Base as base register and
286 /// registers in Regs as the register operands that would be loaded / stored.
287 /// It returns true if the transformation is done.
289 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
290 MachineBasicBlock::iterator MBBI,
291 int Offset, unsigned Base, bool BaseKill,
292 int Opcode, ARMCC::CondCodes Pred,
293 unsigned PredReg, unsigned Scratch, DebugLoc dl,
294 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
295 // Only a single register to load / store. Don't bother.
296 unsigned NumRegs = Regs.size();
300 ARM_AM::AMSubMode Mode = ARM_AM::ia;
301 // VFP and Thumb2 do not support IB or DA modes.
302 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
303 bool haveIBAndDA = isNotVFP && !isThumb2;
304 if (Offset == 4 && haveIBAndDA)
306 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
308 else if (Offset == -4 * (int)NumRegs && isNotVFP)
309 // VLDM/VSTM do not support DB mode without also updating the base reg.
311 else if (Offset != 0) {
312 // Check if this is a supported opcode before we insert instructions to
313 // calculate a new base register.
314 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
316 // If starting offset isn't zero, insert a MI to materialize a new base.
317 // But only do so if it is cost effective, i.e. merging more than two
323 if (isi32Load(Opcode))
324 // If it is a load, then just use one of the destination register to
325 // use as the new base.
326 NewBase = Regs[NumRegs-1].first;
328 // Use the scratch register to use as a new base.
333 int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
335 BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
338 int ImmedOffset = isThumb2
339 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
340 if (ImmedOffset == -1)
341 // FIXME: Try t2ADDri12 or t2SUBri12?
342 return false; // Probably not worth it then.
344 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
345 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
346 .addImm(Pred).addReg(PredReg).addReg(0);
348 BaseKill = true; // New base is always killed right its use.
351 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
352 Opcode == ARM::VLDRD);
353 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
354 if (!Opcode) return false;
355 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
356 .addReg(Base, getKillRegState(BaseKill))
357 .addImm(Pred).addReg(PredReg);
358 for (unsigned i = 0; i != NumRegs; ++i)
359 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
360 | getKillRegState(Regs[i].second));
365 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
367 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
369 unsigned memOpsBegin, unsigned memOpsEnd,
370 unsigned insertAfter, int Offset,
371 unsigned Base, bool BaseKill,
373 ARMCC::CondCodes Pred, unsigned PredReg,
376 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
377 // First calculate which of the registers should be killed by the merged
379 const unsigned insertPos = memOps[insertAfter].Position;
380 SmallSet<unsigned, 4> KilledRegs;
381 DenseMap<unsigned, unsigned> Killer;
382 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
383 if (i == memOpsBegin) {
388 if (memOps[i].Position < insertPos && memOps[i].isKill) {
389 unsigned Reg = memOps[i].Reg;
390 KilledRegs.insert(Reg);
395 SmallVector<std::pair<unsigned, bool>, 8> Regs;
396 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
397 unsigned Reg = memOps[i].Reg;
398 // If we are inserting the merged operation after an operation that
399 // uses the same register, make sure to transfer any kill flag.
400 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
401 Regs.push_back(std::make_pair(Reg, isKill));
404 // Try to do the merge.
405 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
407 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
408 Pred, PredReg, Scratch, dl, Regs))
411 // Merge succeeded, update records.
412 Merges.push_back(prior(Loc));
413 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
414 // Remove kill flags from any memops that come before insertPos.
415 if (Regs[i-memOpsBegin].second) {
416 unsigned Reg = Regs[i-memOpsBegin].first;
417 if (KilledRegs.count(Reg)) {
418 unsigned j = Killer[Reg];
419 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
420 assert(Idx >= 0 && "Cannot find killing operand");
421 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
422 memOps[j].isKill = false;
424 memOps[i].isKill = true;
426 MBB.erase(memOps[i].MBBI);
427 // Update this memop to refer to the merged instruction.
428 // We may need to move kill flags again.
429 memOps[i].Merged = true;
430 memOps[i].MBBI = Merges.back();
431 memOps[i].Position = insertPos;
435 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
436 /// load / store multiple instructions.
438 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
439 unsigned Base, int Opcode, unsigned Size,
440 ARMCC::CondCodes Pred, unsigned PredReg,
441 unsigned Scratch, MemOpQueue &MemOps,
442 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
443 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
444 int Offset = MemOps[SIndex].Offset;
445 int SOffset = Offset;
446 unsigned insertAfter = SIndex;
447 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
448 DebugLoc dl = Loc->getDebugLoc();
449 const MachineOperand &PMO = Loc->getOperand(0);
450 unsigned PReg = PMO.getReg();
451 unsigned PRegNum = PMO.isUndef() ? UINT_MAX
452 : getARMRegisterNumbering(PReg);
454 unsigned Limit = ~0U;
456 // vldm / vstm limit are 32 for S variants, 16 for D variants.
474 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
475 int NewOffset = MemOps[i].Offset;
476 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
477 unsigned Reg = MO.getReg();
478 unsigned RegNum = MO.isUndef() ? UINT_MAX
479 : getARMRegisterNumbering(Reg);
480 // Register numbers must be in ascending order. For VFP / NEON load and
481 // store multiples, the registers must also be consecutive and within the
482 // limit on the number of registers per instruction.
483 if (Reg != ARM::SP &&
484 NewOffset == Offset + (int)Size &&
485 ((isNotVFP && RegNum > PRegNum) ||
486 ((Count < Limit) && RegNum == PRegNum+1))) {
491 // Can't merge this in. Try merge the earlier ones first.
492 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
493 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
494 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
499 if (MemOps[i].Position > MemOps[insertAfter].Position)
503 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
504 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
505 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
509 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
510 unsigned Bytes, unsigned Limit,
511 ARMCC::CondCodes Pred, unsigned PredReg){
512 unsigned MyPredReg = 0;
515 if (MI->getOpcode() != ARM::t2SUBri &&
516 MI->getOpcode() != ARM::tSUBspi &&
517 MI->getOpcode() != ARM::SUBri)
520 // Make sure the offset fits in 8 bits.
521 if (Bytes == 0 || (Limit && Bytes >= Limit))
524 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
525 return (MI->getOperand(0).getReg() == Base &&
526 MI->getOperand(1).getReg() == Base &&
527 (MI->getOperand(2).getImm()*Scale) == Bytes &&
528 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
529 MyPredReg == PredReg);
532 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
533 unsigned Bytes, unsigned Limit,
534 ARMCC::CondCodes Pred, unsigned PredReg){
535 unsigned MyPredReg = 0;
538 if (MI->getOpcode() != ARM::t2ADDri &&
539 MI->getOpcode() != ARM::tADDspi &&
540 MI->getOpcode() != ARM::ADDri)
543 if (Bytes == 0 || (Limit && Bytes >= Limit))
544 // Make sure the offset fits in 8 bits.
547 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
548 return (MI->getOperand(0).getReg() == Base &&
549 MI->getOperand(1).getReg() == Base &&
550 (MI->getOperand(2).getImm()*Scale) == Bytes &&
551 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
552 MyPredReg == PredReg);
555 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
556 switch (MI->getOpcode()) {
584 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
587 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
591 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
592 ARM_AM::AMSubMode Mode) {
594 default: llvm_unreachable("Unhandled opcode!");
600 default: llvm_unreachable("Unhandled submode!");
601 case ARM_AM::ia: return ARM::LDMIA_UPD;
602 case ARM_AM::ib: return ARM::LDMIB_UPD;
603 case ARM_AM::da: return ARM::LDMDA_UPD;
604 case ARM_AM::db: return ARM::LDMDB_UPD;
612 default: llvm_unreachable("Unhandled submode!");
613 case ARM_AM::ia: return ARM::STMIA_UPD;
614 case ARM_AM::ib: return ARM::STMIB_UPD;
615 case ARM_AM::da: return ARM::STMDA_UPD;
616 case ARM_AM::db: return ARM::STMDB_UPD;
622 default: llvm_unreachable("Unhandled submode!");
623 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
624 case ARM_AM::db: return ARM::t2LDMDB_UPD;
630 default: llvm_unreachable("Unhandled submode!");
631 case ARM_AM::ia: return ARM::t2STMIA_UPD;
632 case ARM_AM::db: return ARM::t2STMDB_UPD;
637 default: llvm_unreachable("Unhandled submode!");
638 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
639 case ARM_AM::db: return ARM::VLDMSDB_UPD;
644 default: llvm_unreachable("Unhandled submode!");
645 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
646 case ARM_AM::db: return ARM::VLDMDDB_UPD;
651 default: llvm_unreachable("Unhandled submode!");
652 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
653 case ARM_AM::db: return ARM::VSTMSDB_UPD;
658 default: llvm_unreachable("Unhandled submode!");
659 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
660 case ARM_AM::db: return ARM::VSTMDDB_UPD;
668 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
669 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
671 /// stmia rn, <ra, rb, rc>
672 /// rn := rn + 4 * 3;
674 /// stmia rn!, <ra, rb, rc>
676 /// rn := rn - 4 * 3;
677 /// ldmia rn, <ra, rb, rc>
679 /// ldmdb rn!, <ra, rb, rc>
680 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
681 MachineBasicBlock::iterator MBBI,
683 MachineBasicBlock::iterator &I) {
684 MachineInstr *MI = MBBI;
685 unsigned Base = MI->getOperand(0).getReg();
686 bool BaseKill = MI->getOperand(0).isKill();
687 unsigned Bytes = getLSMultipleTransferSize(MI);
688 unsigned PredReg = 0;
689 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
690 int Opcode = MI->getOpcode();
691 DebugLoc dl = MI->getDebugLoc();
693 // Can't use an updating ld/st if the base register is also a dest
694 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
695 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
696 if (MI->getOperand(i).getReg() == Base)
699 bool DoMerge = false;
700 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
702 // Try merging with the previous instruction.
703 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
704 if (MBBI != BeginMBBI) {
705 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
706 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
708 if (Mode == ARM_AM::ia &&
709 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
712 } else if (Mode == ARM_AM::ib &&
713 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
721 // Try merging with the next instruction.
722 MachineBasicBlock::iterator EndMBBI = MBB.end();
723 if (!DoMerge && MBBI != EndMBBI) {
724 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
725 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
727 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
728 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
730 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
731 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
746 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
747 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
748 .addReg(Base, getDefRegState(true)) // WB base register
749 .addReg(Base, getKillRegState(BaseKill))
750 .addImm(Pred).addReg(PredReg);
752 // Transfer the rest of operands.
753 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
754 MIB.addOperand(MI->getOperand(OpNum));
756 // Transfer memoperands.
757 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
763 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
764 ARM_AM::AddrOpc Mode) {
767 return ARM::LDR_PRE_IMM;
769 return ARM::STR_PRE_IMM;
771 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
773 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
775 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
777 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
780 return ARM::t2LDR_PRE;
783 return ARM::t2STR_PRE;
784 default: llvm_unreachable("Unhandled opcode!");
789 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
790 ARM_AM::AddrOpc Mode) {
793 return ARM::LDR_POST_IMM;
795 return ARM::STR_POST_IMM;
797 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
799 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
801 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
803 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
806 return ARM::t2LDR_POST;
809 return ARM::t2STR_POST;
810 default: llvm_unreachable("Unhandled opcode!");
815 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
816 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
817 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
818 MachineBasicBlock::iterator MBBI,
819 const TargetInstrInfo *TII,
821 MachineBasicBlock::iterator &I) {
822 MachineInstr *MI = MBBI;
823 unsigned Base = MI->getOperand(1).getReg();
824 bool BaseKill = MI->getOperand(1).isKill();
825 unsigned Bytes = getLSMultipleTransferSize(MI);
826 int Opcode = MI->getOpcode();
827 DebugLoc dl = MI->getDebugLoc();
828 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
829 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
830 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
831 if (isi32Load(Opcode) || isi32Store(Opcode))
832 if (MI->getOperand(2).getImm() != 0)
834 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
837 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
838 // Can't do the merge if the destination register is the same as the would-be
839 // writeback register.
840 if (isLd && MI->getOperand(0).getReg() == Base)
843 unsigned PredReg = 0;
844 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
845 bool DoMerge = false;
846 ARM_AM::AddrOpc AddSub = ARM_AM::add;
848 // AM2 - 12 bits, thumb2 - 8 bits.
849 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
851 // Try merging with the previous instruction.
852 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
853 if (MBBI != BeginMBBI) {
854 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
855 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
857 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
859 AddSub = ARM_AM::sub;
861 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
865 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
870 // Try merging with the next instruction.
871 MachineBasicBlock::iterator EndMBBI = MBB.end();
872 if (!DoMerge && MBBI != EndMBBI) {
873 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
874 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
877 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
879 AddSub = ARM_AM::sub;
880 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
884 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
897 // VLDM[SD}_UPD, VSTM[SD]_UPD
898 // (There are no base-updating versions of VLDR/VSTR instructions, but the
899 // updating load/store-multiple instructions can be used with only one
901 MachineOperand &MO = MI->getOperand(0);
902 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
903 .addReg(Base, getDefRegState(true)) // WB base register
904 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
905 .addImm(Pred).addReg(PredReg)
906 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
907 getKillRegState(MO.isKill())));
910 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
912 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
913 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
914 .addReg(Base, RegState::Define)
915 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
917 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
918 .addReg(Base, RegState::Define)
919 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
922 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
923 // t2LDR_PRE, t2LDR_POST
924 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
925 .addReg(Base, RegState::Define)
926 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
929 MachineOperand &MO = MI->getOperand(0);
930 // FIXME: post-indexed stores use am2offset_imm, which still encodes
931 // the vestigal zero-reg offset register. When that's fixed, this clause
932 // can be removed entirely.
933 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
934 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
936 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
937 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
938 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
940 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
941 // t2STR_PRE, t2STR_POST
942 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
943 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
944 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
952 /// isMemoryOp - Returns true if instruction is a memory operation that this
953 /// pass is capable of operating on.
954 static bool isMemoryOp(const MachineInstr *MI) {
955 // When no memory operands are present, conservatively assume unaligned,
956 // volatile, unfoldable.
957 if (!MI->hasOneMemOperand())
960 const MachineMemOperand *MMO = *MI->memoperands_begin();
962 // Don't touch volatile memory accesses - we may be changing their order.
963 if (MMO->isVolatile())
966 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
968 if (MMO->getAlignment() < 4)
971 // str <undef> could probably be eliminated entirely, but for now we just want
972 // to avoid making a mess of it.
973 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
974 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
975 MI->getOperand(0).isUndef())
978 // Likewise don't mess with references to undefined addresses.
979 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
980 MI->getOperand(1).isUndef())
983 int Opcode = MI->getOpcode();
988 return MI->getOperand(1).isReg();
991 return MI->getOperand(1).isReg();
998 return MI->getOperand(1).isReg();
1003 /// AdvanceRS - Advance register scavenger to just before the earliest memory
1004 /// op that is being merged.
1005 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1006 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1007 unsigned Position = MemOps[0].Position;
1008 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1009 if (MemOps[i].Position < Position) {
1010 Position = MemOps[i].Position;
1011 Loc = MemOps[i].MBBI;
1015 if (Loc != MBB.begin())
1016 RS->forward(prior(Loc));
1019 static int getMemoryOpOffset(const MachineInstr *MI) {
1020 int Opcode = MI->getOpcode();
1021 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1022 unsigned NumOperands = MI->getDesc().getNumOperands();
1023 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1025 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1026 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1027 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1028 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
1031 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1032 : ARM_AM::getAM5Offset(OffField) * 4;
1034 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1037 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1043 static void InsertLDR_STR(MachineBasicBlock &MBB,
1044 MachineBasicBlock::iterator &MBBI,
1045 int Offset, bool isDef,
1046 DebugLoc dl, unsigned NewOpc,
1047 unsigned Reg, bool RegDeadKill, bool RegUndef,
1048 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1049 bool OffKill, bool OffUndef,
1050 ARMCC::CondCodes Pred, unsigned PredReg,
1051 const TargetInstrInfo *TII, bool isT2) {
1053 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1055 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1056 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1057 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1059 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1061 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1062 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1063 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1067 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1068 MachineBasicBlock::iterator &MBBI) {
1069 MachineInstr *MI = &*MBBI;
1070 unsigned Opcode = MI->getOpcode();
1071 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1072 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1073 unsigned EvenReg = MI->getOperand(0).getReg();
1074 unsigned OddReg = MI->getOperand(1).getReg();
1075 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1076 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1077 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
1080 MachineBasicBlock::iterator NewBBI = MBBI;
1081 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1082 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1083 bool EvenDeadKill = isLd ?
1084 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1085 bool EvenUndef = MI->getOperand(0).isUndef();
1086 bool OddDeadKill = isLd ?
1087 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1088 bool OddUndef = MI->getOperand(1).isUndef();
1089 const MachineOperand &BaseOp = MI->getOperand(2);
1090 unsigned BaseReg = BaseOp.getReg();
1091 bool BaseKill = BaseOp.isKill();
1092 bool BaseUndef = BaseOp.isUndef();
1093 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1094 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1095 int OffImm = getMemoryOpOffset(MI);
1096 unsigned PredReg = 0;
1097 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
1099 if (OddRegNum > EvenRegNum && OffImm == 0) {
1100 // Ascending register numbers and no offset. It's safe to change it to a
1102 unsigned NewOpc = (isLd)
1103 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1104 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1106 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1107 .addReg(BaseReg, getKillRegState(BaseKill))
1108 .addImm(Pred).addReg(PredReg)
1109 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1110 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1113 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1114 .addReg(BaseReg, getKillRegState(BaseKill))
1115 .addImm(Pred).addReg(PredReg)
1117 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1119 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1122 NewBBI = llvm::prior(MBBI);
1124 // Split into two instructions.
1125 unsigned NewOpc = (isLd)
1126 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1127 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1128 DebugLoc dl = MBBI->getDebugLoc();
1129 // If this is a load and base register is killed, it may have been
1130 // re-defed by the load, make sure the first load does not clobber it.
1132 (BaseKill || OffKill) &&
1133 (TRI->regsOverlap(EvenReg, BaseReg))) {
1134 assert(!TRI->regsOverlap(OddReg, BaseReg));
1135 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1136 OddReg, OddDeadKill, false,
1137 BaseReg, false, BaseUndef, false, OffUndef,
1138 Pred, PredReg, TII, isT2);
1139 NewBBI = llvm::prior(MBBI);
1140 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1141 EvenReg, EvenDeadKill, false,
1142 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1143 Pred, PredReg, TII, isT2);
1145 if (OddReg == EvenReg && EvenDeadKill) {
1146 // If the two source operands are the same, the kill marker is
1147 // probably on the first one. e.g.
1148 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1149 EvenDeadKill = false;
1152 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1153 EvenReg, EvenDeadKill, EvenUndef,
1154 BaseReg, false, BaseUndef, false, OffUndef,
1155 Pred, PredReg, TII, isT2);
1156 NewBBI = llvm::prior(MBBI);
1157 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1158 OddReg, OddDeadKill, OddUndef,
1159 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1160 Pred, PredReg, TII, isT2);
1175 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1176 /// ops of the same base and incrementing offset into LDM / STM ops.
1177 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1178 unsigned NumMerges = 0;
1179 unsigned NumMemOps = 0;
1181 unsigned CurrBase = 0;
1183 unsigned CurrSize = 0;
1184 ARMCC::CondCodes CurrPred = ARMCC::AL;
1185 unsigned CurrPredReg = 0;
1186 unsigned Position = 0;
1187 SmallVector<MachineBasicBlock::iterator,4> Merges;
1189 RS->enterBasicBlock(&MBB);
1190 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1192 if (FixInvalidRegPairOp(MBB, MBBI))
1195 bool Advance = false;
1196 bool TryMerge = false;
1197 bool Clobber = false;
1199 bool isMemOp = isMemoryOp(MBBI);
1201 int Opcode = MBBI->getOpcode();
1202 unsigned Size = getLSMultipleTransferSize(MBBI);
1203 const MachineOperand &MO = MBBI->getOperand(0);
1204 unsigned Reg = MO.getReg();
1205 bool isKill = MO.isDef() ? false : MO.isKill();
1206 unsigned Base = MBBI->getOperand(1).getReg();
1207 unsigned PredReg = 0;
1208 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1209 int Offset = getMemoryOpOffset(MBBI);
1212 // r5 := ldr [r5, #4]
1213 // r6 := ldr [r5, #8]
1215 // The second ldr has effectively broken the chain even though it
1216 // looks like the later ldr(s) use the same base register. Try to
1217 // merge the ldr's so far, including this one. But don't try to
1218 // combine the following ldr(s).
1219 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1220 if (CurrBase == 0 && !Clobber) {
1221 // Start of a new chain.
1226 CurrPredReg = PredReg;
1227 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1236 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1237 // No need to match PredReg.
1238 // Continue adding to the queue.
1239 if (Offset > MemOps.back().Offset) {
1240 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1245 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1247 if (Offset < I->Offset) {
1248 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1253 } else if (Offset == I->Offset) {
1254 // Collision! This can't be merged!
1263 if (MBBI->isDebugValue()) {
1266 // Reach the end of the block, try merging the memory instructions.
1268 } else if (Advance) {
1272 // Reach the end of the block, try merging the memory instructions.
1278 if (NumMemOps > 1) {
1279 // Try to find a free register to use as a new base in case it's needed.
1280 // First advance to the instruction just before the start of the chain.
1281 AdvanceRS(MBB, MemOps);
1282 // Find a scratch register.
1283 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1284 // Process the load / store instructions.
1285 RS->forward(prior(MBBI));
1289 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1290 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1292 // Try folding preceding/trailing base inc/dec into the generated
1294 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1295 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1297 NumMerges += Merges.size();
1299 // Try folding preceding/trailing base inc/dec into those load/store
1300 // that were not merged to form LDM/STM ops.
1301 for (unsigned i = 0; i != NumMemOps; ++i)
1302 if (!MemOps[i].Merged)
1303 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1306 // RS may be pointing to an instruction that's deleted.
1307 RS->skipTo(prior(MBBI));
1308 } else if (NumMemOps == 1) {
1309 // Try folding preceding/trailing base inc/dec into the single
1311 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1313 RS->forward(prior(MBBI));
1320 CurrPred = ARMCC::AL;
1327 // If iterator hasn't been advanced and this is not a memory op, skip it.
1328 // It can't start a new chain anyway.
1329 if (!Advance && !isMemOp && MBBI != E) {
1335 return NumMerges > 0;
1338 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1339 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1340 /// directly restore the value of LR into pc.
1341 /// ldmfd sp!, {..., lr}
1344 /// ldmfd sp!, {..., lr}
1347 /// ldmfd sp!, {..., pc}
1348 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1349 if (MBB.empty()) return false;
1351 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1352 if (MBBI != MBB.begin() &&
1353 (MBBI->getOpcode() == ARM::BX_RET ||
1354 MBBI->getOpcode() == ARM::tBX_RET ||
1355 MBBI->getOpcode() == ARM::MOVPCLR)) {
1356 MachineInstr *PrevMI = prior(MBBI);
1357 unsigned Opcode = PrevMI->getOpcode();
1358 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1359 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1360 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1361 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1362 if (MO.getReg() != ARM::LR)
1364 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1365 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1366 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1367 PrevMI->setDesc(TII->get(NewOpc));
1369 PrevMI->copyImplicitOps(&*MBBI);
1377 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1378 const TargetMachine &TM = Fn.getTarget();
1379 AFI = Fn.getInfo<ARMFunctionInfo>();
1380 TII = TM.getInstrInfo();
1381 TRI = TM.getRegisterInfo();
1382 RS = new RegScavenger();
1383 isThumb2 = AFI->isThumb2Function();
1385 bool Modified = false;
1386 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1388 MachineBasicBlock &MBB = *MFI;
1389 Modified |= LoadStoreMultipleOpti(MBB);
1390 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1391 Modified |= MergeReturnIntoLDM(MBB);
1399 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1400 /// load / stores from consecutive locations close to make it more
1401 /// likely they will be combined later.
1404 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1406 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1408 const TargetData *TD;
1409 const TargetInstrInfo *TII;
1410 const TargetRegisterInfo *TRI;
1411 const ARMSubtarget *STI;
1412 MachineRegisterInfo *MRI;
1413 MachineFunction *MF;
1415 virtual bool runOnMachineFunction(MachineFunction &Fn);
1417 virtual const char *getPassName() const {
1418 return "ARM pre- register allocation load / store optimization pass";
1422 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1423 unsigned &NewOpc, unsigned &EvenReg,
1424 unsigned &OddReg, unsigned &BaseReg,
1426 unsigned &PredReg, ARMCC::CondCodes &Pred,
1428 bool RescheduleOps(MachineBasicBlock *MBB,
1429 SmallVector<MachineInstr*, 4> &Ops,
1430 unsigned Base, bool isLd,
1431 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1432 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1434 char ARMPreAllocLoadStoreOpt::ID = 0;
1437 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1438 TD = Fn.getTarget().getTargetData();
1439 TII = Fn.getTarget().getInstrInfo();
1440 TRI = Fn.getTarget().getRegisterInfo();
1441 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1442 MRI = &Fn.getRegInfo();
1445 bool Modified = false;
1446 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1448 Modified |= RescheduleLoadStoreInstrs(MFI);
1453 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1454 MachineBasicBlock::iterator I,
1455 MachineBasicBlock::iterator E,
1456 SmallPtrSet<MachineInstr*, 4> &MemOps,
1457 SmallSet<unsigned, 4> &MemRegs,
1458 const TargetRegisterInfo *TRI) {
1459 // Are there stores / loads / calls between them?
1460 // FIXME: This is overly conservative. We should make use of alias information
1462 SmallSet<unsigned, 4> AddedRegPressure;
1464 if (I->isDebugValue() || MemOps.count(&*I))
1466 const MCInstrDesc &MCID = I->getDesc();
1467 if (MCID.isCall() || MCID.isTerminator() || I->hasUnmodeledSideEffects())
1469 if (isLd && MCID.mayStore())
1474 // It's not safe to move the first 'str' down.
1477 // str r4, [r0, #+4]
1478 if (MCID.mayStore())
1481 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1482 MachineOperand &MO = I->getOperand(j);
1485 unsigned Reg = MO.getReg();
1486 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1488 if (Reg != Base && !MemRegs.count(Reg))
1489 AddedRegPressure.insert(Reg);
1493 // Estimate register pressure increase due to the transformation.
1494 if (MemRegs.size() <= 4)
1495 // Ok if we are moving small number of instructions.
1497 return AddedRegPressure.size() <= MemRegs.size() * 2;
1501 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1503 unsigned &NewOpc, unsigned &EvenReg,
1504 unsigned &OddReg, unsigned &BaseReg,
1505 int &Offset, unsigned &PredReg,
1506 ARMCC::CondCodes &Pred,
1508 // Make sure we're allowed to generate LDRD/STRD.
1509 if (!STI->hasV5TEOps())
1512 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1514 unsigned Opcode = Op0->getOpcode();
1515 if (Opcode == ARM::LDRi12)
1517 else if (Opcode == ARM::STRi12)
1519 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1520 NewOpc = ARM::t2LDRDi8;
1523 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1524 NewOpc = ARM::t2STRDi8;
1530 // Make sure the base address satisfies i64 ld / st alignment requirement.
1531 if (!Op0->hasOneMemOperand() ||
1532 !(*Op0->memoperands_begin())->getValue() ||
1533 (*Op0->memoperands_begin())->isVolatile())
1536 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1537 const Function *Func = MF->getFunction();
1538 unsigned ReqAlign = STI->hasV6Ops()
1539 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1540 : 8; // Pre-v6 need 8-byte align
1541 if (Align < ReqAlign)
1544 // Then make sure the immediate offset fits.
1545 int OffImm = getMemoryOpOffset(Op0);
1547 int Limit = (1 << 8) * Scale;
1548 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1552 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1554 AddSub = ARM_AM::sub;
1557 int Limit = (1 << 8) * Scale;
1558 if (OffImm >= Limit || (OffImm & (Scale-1)))
1560 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1562 EvenReg = Op0->getOperand(0).getReg();
1563 OddReg = Op1->getOperand(0).getReg();
1564 if (EvenReg == OddReg)
1566 BaseReg = Op0->getOperand(1).getReg();
1567 Pred = llvm::getInstrPredicate(Op0, PredReg);
1568 dl = Op0->getDebugLoc();
1573 struct OffsetCompare {
1574 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1575 int LOffset = getMemoryOpOffset(LHS);
1576 int ROffset = getMemoryOpOffset(RHS);
1577 assert(LHS == RHS || LOffset != ROffset);
1578 return LOffset > ROffset;
1583 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1584 SmallVector<MachineInstr*, 4> &Ops,
1585 unsigned Base, bool isLd,
1586 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1587 bool RetVal = false;
1589 // Sort by offset (in reverse order).
1590 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1592 // The loads / stores of the same base are in order. Scan them from first to
1593 // last and check for the following:
1594 // 1. Any def of base.
1596 while (Ops.size() > 1) {
1597 unsigned FirstLoc = ~0U;
1598 unsigned LastLoc = 0;
1599 MachineInstr *FirstOp = 0;
1600 MachineInstr *LastOp = 0;
1602 unsigned LastOpcode = 0;
1603 unsigned LastBytes = 0;
1604 unsigned NumMove = 0;
1605 for (int i = Ops.size() - 1; i >= 0; --i) {
1606 MachineInstr *Op = Ops[i];
1607 unsigned Loc = MI2LocMap[Op];
1608 if (Loc <= FirstLoc) {
1612 if (Loc >= LastLoc) {
1617 unsigned Opcode = Op->getOpcode();
1618 if (LastOpcode && Opcode != LastOpcode)
1621 int Offset = getMemoryOpOffset(Op);
1622 unsigned Bytes = getLSMultipleTransferSize(Op);
1624 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1627 LastOffset = Offset;
1629 LastOpcode = Opcode;
1630 if (++NumMove == 8) // FIXME: Tune this limit.
1637 SmallPtrSet<MachineInstr*, 4> MemOps;
1638 SmallSet<unsigned, 4> MemRegs;
1639 for (int i = NumMove-1; i >= 0; --i) {
1640 MemOps.insert(Ops[i]);
1641 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1644 // Be conservative, if the instructions are too far apart, don't
1645 // move them. We want to limit the increase of register pressure.
1646 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1648 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1649 MemOps, MemRegs, TRI);
1651 for (unsigned i = 0; i != NumMove; ++i)
1654 // This is the new location for the loads / stores.
1655 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1656 while (InsertPos != MBB->end()
1657 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1660 // If we are moving a pair of loads / stores, see if it makes sense
1661 // to try to allocate a pair of registers that can form register pairs.
1662 MachineInstr *Op0 = Ops.back();
1663 MachineInstr *Op1 = Ops[Ops.size()-2];
1664 unsigned EvenReg = 0, OddReg = 0;
1665 unsigned BaseReg = 0, PredReg = 0;
1666 ARMCC::CondCodes Pred = ARMCC::AL;
1668 unsigned NewOpc = 0;
1671 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1672 EvenReg, OddReg, BaseReg,
1673 Offset, PredReg, Pred, isT2)) {
1677 const MCInstrDesc &MCID = TII->get(NewOpc);
1678 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI);
1679 MRI->constrainRegClass(EvenReg, TRC);
1680 MRI->constrainRegClass(OddReg, TRC);
1682 // Form the pair instruction.
1684 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1685 .addReg(EvenReg, RegState::Define)
1686 .addReg(OddReg, RegState::Define)
1688 // FIXME: We're converting from LDRi12 to an insn that still
1689 // uses addrmode2, so we need an explicit offset reg. It should
1690 // always by reg0 since we're transforming LDRi12s.
1693 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1696 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1700 // FIXME: We're converting from LDRi12 to an insn that still
1701 // uses addrmode2, so we need an explicit offset reg. It should
1702 // always by reg0 since we're transforming STRi12s.
1705 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1711 // Add register allocation hints to form register pairs.
1712 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1713 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1715 for (unsigned i = 0; i != NumMove; ++i) {
1716 MachineInstr *Op = Ops.back();
1718 MBB->splice(InsertPos, MBB, Op);
1722 NumLdStMoved += NumMove;
1732 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1733 bool RetVal = false;
1735 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1736 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1737 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1738 SmallVector<unsigned, 4> LdBases;
1739 SmallVector<unsigned, 4> StBases;
1742 MachineBasicBlock::iterator MBBI = MBB->begin();
1743 MachineBasicBlock::iterator E = MBB->end();
1745 for (; MBBI != E; ++MBBI) {
1746 MachineInstr *MI = MBBI;
1747 const MCInstrDesc &MCID = MI->getDesc();
1748 if (MCID.isCall() || MCID.isTerminator()) {
1749 // Stop at barriers.
1754 if (!MI->isDebugValue())
1755 MI2LocMap[MI] = ++Loc;
1757 if (!isMemoryOp(MI))
1759 unsigned PredReg = 0;
1760 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1763 int Opc = MI->getOpcode();
1764 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1765 unsigned Base = MI->getOperand(1).getReg();
1766 int Offset = getMemoryOpOffset(MI);
1768 bool StopHere = false;
1770 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1771 Base2LdsMap.find(Base);
1772 if (BI != Base2LdsMap.end()) {
1773 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1774 if (Offset == getMemoryOpOffset(BI->second[i])) {
1780 BI->second.push_back(MI);
1782 SmallVector<MachineInstr*, 4> MIs;
1784 Base2LdsMap[Base] = MIs;
1785 LdBases.push_back(Base);
1788 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1789 Base2StsMap.find(Base);
1790 if (BI != Base2StsMap.end()) {
1791 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1792 if (Offset == getMemoryOpOffset(BI->second[i])) {
1798 BI->second.push_back(MI);
1800 SmallVector<MachineInstr*, 4> MIs;
1802 Base2StsMap[Base] = MIs;
1803 StBases.push_back(Base);
1808 // Found a duplicate (a base+offset combination that's seen earlier).
1815 // Re-schedule loads.
1816 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1817 unsigned Base = LdBases[i];
1818 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1820 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1823 // Re-schedule stores.
1824 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1825 unsigned Base = StBases[i];
1826 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1828 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1832 Base2LdsMap.clear();
1833 Base2StsMap.clear();
1843 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1844 /// optimization pass.
1845 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1847 return new ARMPreAllocLoadStoreOpt();
1848 return new ARMLoadStoreOpt();