1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMBaseInstrInfo.h"
18 #include "ARMMachineFunctionInfo.h"
19 #include "ARMRegisterInfo.h"
20 #include "MCTargetDesc/ARMAddressingModes.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/CodeGen/SelectionDAGNodes.h"
30 #include "llvm/Target/TargetData.h"
31 #include "llvm/Target/TargetInstrInfo.h"
32 #include "llvm/Target/TargetMachine.h"
33 #include "llvm/Target/TargetRegisterInfo.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/raw_ostream.h"
37 #include "llvm/ADT/DenseMap.h"
38 #include "llvm/ADT/STLExtras.h"
39 #include "llvm/ADT/SmallPtrSet.h"
40 #include "llvm/ADT/SmallSet.h"
41 #include "llvm/ADT/SmallVector.h"
42 #include "llvm/ADT/Statistic.h"
45 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
46 STATISTIC(NumSTMGened , "Number of stm instructions generated");
47 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
48 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
49 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
50 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
51 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
52 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
53 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
54 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
55 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
57 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
58 /// load / store instructions to form ldm / stm instructions.
61 struct ARMLoadStoreOpt : public MachineFunctionPass {
63 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
65 const TargetInstrInfo *TII;
66 const TargetRegisterInfo *TRI;
67 const ARMSubtarget *STI;
72 virtual bool runOnMachineFunction(MachineFunction &Fn);
74 virtual const char *getPassName() const {
75 return "ARM load / store optimization pass";
79 struct MemOpQueueEntry {
84 MachineBasicBlock::iterator MBBI;
86 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
87 MachineBasicBlock::iterator i)
88 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
90 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
91 typedef MemOpQueue::iterator MemOpQueueIter;
93 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
94 int Offset, unsigned Base, bool BaseKill, int Opcode,
95 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
96 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
97 void MergeOpsUpdate(MachineBasicBlock &MBB,
101 unsigned insertAfter,
106 ARMCC::CondCodes Pred,
110 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
111 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
112 int Opcode, unsigned Size,
113 ARMCC::CondCodes Pred, unsigned PredReg,
114 unsigned Scratch, MemOpQueue &MemOps,
115 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
117 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
118 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
119 MachineBasicBlock::iterator &MBBI);
120 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
121 MachineBasicBlock::iterator MBBI,
122 const TargetInstrInfo *TII,
124 MachineBasicBlock::iterator &I);
125 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
126 MachineBasicBlock::iterator MBBI,
128 MachineBasicBlock::iterator &I);
129 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
130 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
132 char ARMLoadStoreOpt::ID = 0;
135 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
137 default: llvm_unreachable("Unhandled opcode!");
141 default: llvm_unreachable("Unhandled submode!");
142 case ARM_AM::ia: return ARM::LDMIA;
143 case ARM_AM::da: return ARM::LDMDA;
144 case ARM_AM::db: return ARM::LDMDB;
145 case ARM_AM::ib: return ARM::LDMIB;
151 default: llvm_unreachable("Unhandled submode!");
152 case ARM_AM::ia: return ARM::STMIA;
153 case ARM_AM::da: return ARM::STMDA;
154 case ARM_AM::db: return ARM::STMDB;
155 case ARM_AM::ib: return ARM::STMIB;
162 default: llvm_unreachable("Unhandled submode!");
163 case ARM_AM::ia: return ARM::t2LDMIA;
164 case ARM_AM::db: return ARM::t2LDMDB;
171 default: llvm_unreachable("Unhandled submode!");
172 case ARM_AM::ia: return ARM::t2STMIA;
173 case ARM_AM::db: return ARM::t2STMDB;
179 default: llvm_unreachable("Unhandled submode!");
180 case ARM_AM::ia: return ARM::VLDMSIA;
181 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
187 default: llvm_unreachable("Unhandled submode!");
188 case ARM_AM::ia: return ARM::VSTMSIA;
189 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
195 default: llvm_unreachable("Unhandled submode!");
196 case ARM_AM::ia: return ARM::VLDMDIA;
197 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
203 default: llvm_unreachable("Unhandled submode!");
204 case ARM_AM::ia: return ARM::VSTMDIA;
205 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
216 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
218 default: llvm_unreachable("Unhandled opcode!");
224 case ARM::t2LDMIA_RET:
226 case ARM::t2LDMIA_UPD:
228 case ARM::t2STMIA_UPD:
230 case ARM::VLDMSIA_UPD:
232 case ARM::VSTMSIA_UPD:
234 case ARM::VLDMDIA_UPD:
236 case ARM::VSTMDIA_UPD:
250 case ARM::t2LDMDB_UPD:
252 case ARM::t2STMDB_UPD:
253 case ARM::VLDMSDB_UPD:
254 case ARM::VSTMSDB_UPD:
255 case ARM::VLDMDDB_UPD:
256 case ARM::VSTMDDB_UPD:
266 return ARM_AM::bad_am_submode;
269 } // end namespace ARM_AM
270 } // end namespace llvm
272 static bool isT2i32Load(unsigned Opc) {
273 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
276 static bool isi32Load(unsigned Opc) {
277 return Opc == ARM::LDRi12 || isT2i32Load(Opc);
280 static bool isT2i32Store(unsigned Opc) {
281 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
284 static bool isi32Store(unsigned Opc) {
285 return Opc == ARM::STRi12 || isT2i32Store(Opc);
288 /// MergeOps - Create and insert a LDM or STM with Base as base register and
289 /// registers in Regs as the register operands that would be loaded / stored.
290 /// It returns true if the transformation is done.
292 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
293 MachineBasicBlock::iterator MBBI,
294 int Offset, unsigned Base, bool BaseKill,
295 int Opcode, ARMCC::CondCodes Pred,
296 unsigned PredReg, unsigned Scratch, DebugLoc dl,
297 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
298 // Only a single register to load / store. Don't bother.
299 unsigned NumRegs = Regs.size();
303 ARM_AM::AMSubMode Mode = ARM_AM::ia;
304 // VFP and Thumb2 do not support IB or DA modes.
305 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
306 bool haveIBAndDA = isNotVFP && !isThumb2;
307 if (Offset == 4 && haveIBAndDA)
309 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
311 else if (Offset == -4 * (int)NumRegs && isNotVFP)
312 // VLDM/VSTM do not support DB mode without also updating the base reg.
314 else if (Offset != 0) {
315 // Check if this is a supported opcode before we insert instructions to
316 // calculate a new base register.
317 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
319 // If starting offset isn't zero, insert a MI to materialize a new base.
320 // But only do so if it is cost effective, i.e. merging more than two
326 if (isi32Load(Opcode))
327 // If it is a load, then just use one of the destination register to
328 // use as the new base.
329 NewBase = Regs[NumRegs-1].first;
331 // Use the scratch register to use as a new base.
336 int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
338 BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
341 int ImmedOffset = isThumb2
342 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
343 if (ImmedOffset == -1)
344 // FIXME: Try t2ADDri12 or t2SUBri12?
345 return false; // Probably not worth it then.
347 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
348 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
349 .addImm(Pred).addReg(PredReg).addReg(0);
351 BaseKill = true; // New base is always killed right its use.
354 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
355 Opcode == ARM::VLDRD);
356 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
357 if (!Opcode) return false;
358 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
359 .addReg(Base, getKillRegState(BaseKill))
360 .addImm(Pred).addReg(PredReg);
361 for (unsigned i = 0; i != NumRegs; ++i)
362 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
363 | getKillRegState(Regs[i].second));
368 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
370 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
372 unsigned memOpsBegin, unsigned memOpsEnd,
373 unsigned insertAfter, int Offset,
374 unsigned Base, bool BaseKill,
376 ARMCC::CondCodes Pred, unsigned PredReg,
379 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
380 // First calculate which of the registers should be killed by the merged
382 const unsigned insertPos = memOps[insertAfter].Position;
383 SmallSet<unsigned, 4> KilledRegs;
384 DenseMap<unsigned, unsigned> Killer;
385 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
386 if (i == memOpsBegin) {
391 if (memOps[i].Position < insertPos && memOps[i].isKill) {
392 unsigned Reg = memOps[i].Reg;
393 KilledRegs.insert(Reg);
398 SmallVector<std::pair<unsigned, bool>, 8> Regs;
399 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
400 unsigned Reg = memOps[i].Reg;
401 // If we are inserting the merged operation after an operation that
402 // uses the same register, make sure to transfer any kill flag.
403 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
404 Regs.push_back(std::make_pair(Reg, isKill));
407 // Try to do the merge.
408 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
410 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
411 Pred, PredReg, Scratch, dl, Regs))
414 // Merge succeeded, update records.
415 Merges.push_back(prior(Loc));
416 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
417 // Remove kill flags from any memops that come before insertPos.
418 if (Regs[i-memOpsBegin].second) {
419 unsigned Reg = Regs[i-memOpsBegin].first;
420 if (KilledRegs.count(Reg)) {
421 unsigned j = Killer[Reg];
422 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
423 assert(Idx >= 0 && "Cannot find killing operand");
424 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
425 memOps[j].isKill = false;
427 memOps[i].isKill = true;
429 MBB.erase(memOps[i].MBBI);
430 // Update this memop to refer to the merged instruction.
431 // We may need to move kill flags again.
432 memOps[i].Merged = true;
433 memOps[i].MBBI = Merges.back();
434 memOps[i].Position = insertPos;
438 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
439 /// load / store multiple instructions.
441 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
442 unsigned Base, int Opcode, unsigned Size,
443 ARMCC::CondCodes Pred, unsigned PredReg,
444 unsigned Scratch, MemOpQueue &MemOps,
445 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
446 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
447 int Offset = MemOps[SIndex].Offset;
448 int SOffset = Offset;
449 unsigned insertAfter = SIndex;
450 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
451 DebugLoc dl = Loc->getDebugLoc();
452 const MachineOperand &PMO = Loc->getOperand(0);
453 unsigned PReg = PMO.getReg();
454 unsigned PRegNum = PMO.isUndef() ? UINT_MAX
455 : getARMRegisterNumbering(PReg);
457 unsigned Limit = ~0U;
459 // vldm / vstm limit are 32 for S variants, 16 for D variants.
477 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
478 int NewOffset = MemOps[i].Offset;
479 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
480 unsigned Reg = MO.getReg();
481 unsigned RegNum = MO.isUndef() ? UINT_MAX
482 : getARMRegisterNumbering(Reg);
483 // Register numbers must be in ascending order. For VFP / NEON load and
484 // store multiples, the registers must also be consecutive and within the
485 // limit on the number of registers per instruction.
486 if (Reg != ARM::SP &&
487 NewOffset == Offset + (int)Size &&
488 ((isNotVFP && RegNum > PRegNum) ||
489 ((Count < Limit) && RegNum == PRegNum+1))) {
494 // Can't merge this in. Try merge the earlier ones first.
495 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
496 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
497 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
502 if (MemOps[i].Position > MemOps[insertAfter].Position)
506 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
507 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
508 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
512 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
513 unsigned Bytes, unsigned Limit,
514 ARMCC::CondCodes Pred, unsigned PredReg){
515 unsigned MyPredReg = 0;
518 if (MI->getOpcode() != ARM::t2SUBri &&
519 MI->getOpcode() != ARM::tSUBspi &&
520 MI->getOpcode() != ARM::SUBri)
523 // Make sure the offset fits in 8 bits.
524 if (Bytes == 0 || (Limit && Bytes >= Limit))
527 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
528 return (MI->getOperand(0).getReg() == Base &&
529 MI->getOperand(1).getReg() == Base &&
530 (MI->getOperand(2).getImm()*Scale) == Bytes &&
531 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
532 MyPredReg == PredReg);
535 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
536 unsigned Bytes, unsigned Limit,
537 ARMCC::CondCodes Pred, unsigned PredReg){
538 unsigned MyPredReg = 0;
541 if (MI->getOpcode() != ARM::t2ADDri &&
542 MI->getOpcode() != ARM::tADDspi &&
543 MI->getOpcode() != ARM::ADDri)
546 if (Bytes == 0 || (Limit && Bytes >= Limit))
547 // Make sure the offset fits in 8 bits.
550 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
551 return (MI->getOperand(0).getReg() == Base &&
552 MI->getOperand(1).getReg() == Base &&
553 (MI->getOperand(2).getImm()*Scale) == Bytes &&
554 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
555 MyPredReg == PredReg);
558 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
559 switch (MI->getOpcode()) {
587 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
590 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
594 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
595 ARM_AM::AMSubMode Mode) {
597 default: llvm_unreachable("Unhandled opcode!");
603 default: llvm_unreachable("Unhandled submode!");
604 case ARM_AM::ia: return ARM::LDMIA_UPD;
605 case ARM_AM::ib: return ARM::LDMIB_UPD;
606 case ARM_AM::da: return ARM::LDMDA_UPD;
607 case ARM_AM::db: return ARM::LDMDB_UPD;
615 default: llvm_unreachable("Unhandled submode!");
616 case ARM_AM::ia: return ARM::STMIA_UPD;
617 case ARM_AM::ib: return ARM::STMIB_UPD;
618 case ARM_AM::da: return ARM::STMDA_UPD;
619 case ARM_AM::db: return ARM::STMDB_UPD;
625 default: llvm_unreachable("Unhandled submode!");
626 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
627 case ARM_AM::db: return ARM::t2LDMDB_UPD;
633 default: llvm_unreachable("Unhandled submode!");
634 case ARM_AM::ia: return ARM::t2STMIA_UPD;
635 case ARM_AM::db: return ARM::t2STMDB_UPD;
640 default: llvm_unreachable("Unhandled submode!");
641 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
642 case ARM_AM::db: return ARM::VLDMSDB_UPD;
647 default: llvm_unreachable("Unhandled submode!");
648 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
649 case ARM_AM::db: return ARM::VLDMDDB_UPD;
654 default: llvm_unreachable("Unhandled submode!");
655 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
656 case ARM_AM::db: return ARM::VSTMSDB_UPD;
661 default: llvm_unreachable("Unhandled submode!");
662 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
663 case ARM_AM::db: return ARM::VSTMDDB_UPD;
671 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
672 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
674 /// stmia rn, <ra, rb, rc>
675 /// rn := rn + 4 * 3;
677 /// stmia rn!, <ra, rb, rc>
679 /// rn := rn - 4 * 3;
680 /// ldmia rn, <ra, rb, rc>
682 /// ldmdb rn!, <ra, rb, rc>
683 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
684 MachineBasicBlock::iterator MBBI,
686 MachineBasicBlock::iterator &I) {
687 MachineInstr *MI = MBBI;
688 unsigned Base = MI->getOperand(0).getReg();
689 bool BaseKill = MI->getOperand(0).isKill();
690 unsigned Bytes = getLSMultipleTransferSize(MI);
691 unsigned PredReg = 0;
692 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
693 int Opcode = MI->getOpcode();
694 DebugLoc dl = MI->getDebugLoc();
696 // Can't use an updating ld/st if the base register is also a dest
697 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
698 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
699 if (MI->getOperand(i).getReg() == Base)
702 bool DoMerge = false;
703 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
705 // Try merging with the previous instruction.
706 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
707 if (MBBI != BeginMBBI) {
708 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
709 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
711 if (Mode == ARM_AM::ia &&
712 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
715 } else if (Mode == ARM_AM::ib &&
716 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
724 // Try merging with the next instruction.
725 MachineBasicBlock::iterator EndMBBI = MBB.end();
726 if (!DoMerge && MBBI != EndMBBI) {
727 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
728 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
730 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
731 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
733 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
734 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
749 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
750 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
751 .addReg(Base, getDefRegState(true)) // WB base register
752 .addReg(Base, getKillRegState(BaseKill))
753 .addImm(Pred).addReg(PredReg);
755 // Transfer the rest of operands.
756 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
757 MIB.addOperand(MI->getOperand(OpNum));
759 // Transfer memoperands.
760 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
766 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
767 ARM_AM::AddrOpc Mode) {
770 return ARM::LDR_PRE_IMM;
772 return ARM::STR_PRE_IMM;
774 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
776 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
778 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
780 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
783 return ARM::t2LDR_PRE;
786 return ARM::t2STR_PRE;
787 default: llvm_unreachable("Unhandled opcode!");
792 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
793 ARM_AM::AddrOpc Mode) {
796 return ARM::LDR_POST_IMM;
798 return ARM::STR_POST_IMM;
800 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
802 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
804 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
806 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
809 return ARM::t2LDR_POST;
812 return ARM::t2STR_POST;
813 default: llvm_unreachable("Unhandled opcode!");
818 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
819 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
820 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
821 MachineBasicBlock::iterator MBBI,
822 const TargetInstrInfo *TII,
824 MachineBasicBlock::iterator &I) {
825 MachineInstr *MI = MBBI;
826 unsigned Base = MI->getOperand(1).getReg();
827 bool BaseKill = MI->getOperand(1).isKill();
828 unsigned Bytes = getLSMultipleTransferSize(MI);
829 int Opcode = MI->getOpcode();
830 DebugLoc dl = MI->getDebugLoc();
831 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
832 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
833 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
834 if (isi32Load(Opcode) || isi32Store(Opcode))
835 if (MI->getOperand(2).getImm() != 0)
837 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
840 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
841 // Can't do the merge if the destination register is the same as the would-be
842 // writeback register.
843 if (isLd && MI->getOperand(0).getReg() == Base)
846 unsigned PredReg = 0;
847 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
848 bool DoMerge = false;
849 ARM_AM::AddrOpc AddSub = ARM_AM::add;
851 // AM2 - 12 bits, thumb2 - 8 bits.
852 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
854 // Try merging with the previous instruction.
855 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
856 if (MBBI != BeginMBBI) {
857 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
858 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
860 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
862 AddSub = ARM_AM::sub;
864 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
868 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
873 // Try merging with the next instruction.
874 MachineBasicBlock::iterator EndMBBI = MBB.end();
875 if (!DoMerge && MBBI != EndMBBI) {
876 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
877 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
880 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
882 AddSub = ARM_AM::sub;
883 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
887 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
900 // VLDM[SD}_UPD, VSTM[SD]_UPD
901 // (There are no base-updating versions of VLDR/VSTR instructions, but the
902 // updating load/store-multiple instructions can be used with only one
904 MachineOperand &MO = MI->getOperand(0);
905 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
906 .addReg(Base, getDefRegState(true)) // WB base register
907 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
908 .addImm(Pred).addReg(PredReg)
909 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
910 getKillRegState(MO.isKill())));
914 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
915 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
916 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
917 .addReg(Base, RegState::Define)
918 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
920 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
921 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
922 .addReg(Base, RegState::Define)
923 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
926 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
927 // t2LDR_PRE, t2LDR_POST
928 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
929 .addReg(Base, RegState::Define)
930 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
933 MachineOperand &MO = MI->getOperand(0);
934 // FIXME: post-indexed stores use am2offset_imm, which still encodes
935 // the vestigal zero-reg offset register. When that's fixed, this clause
936 // can be removed entirely.
937 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
938 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
940 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
941 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
942 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
944 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
945 // t2STR_PRE, t2STR_POST
946 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
947 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
948 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
956 /// isMemoryOp - Returns true if instruction is a memory operation that this
957 /// pass is capable of operating on.
958 static bool isMemoryOp(const MachineInstr *MI) {
959 // When no memory operands are present, conservatively assume unaligned,
960 // volatile, unfoldable.
961 if (!MI->hasOneMemOperand())
964 const MachineMemOperand *MMO = *MI->memoperands_begin();
966 // Don't touch volatile memory accesses - we may be changing their order.
967 if (MMO->isVolatile())
970 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
972 if (MMO->getAlignment() < 4)
975 // str <undef> could probably be eliminated entirely, but for now we just want
976 // to avoid making a mess of it.
977 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
978 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
979 MI->getOperand(0).isUndef())
982 // Likewise don't mess with references to undefined addresses.
983 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
984 MI->getOperand(1).isUndef())
987 int Opcode = MI->getOpcode();
992 return MI->getOperand(1).isReg();
995 return MI->getOperand(1).isReg();
1002 return MI->getOperand(1).isReg();
1007 /// AdvanceRS - Advance register scavenger to just before the earliest memory
1008 /// op that is being merged.
1009 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1010 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1011 unsigned Position = MemOps[0].Position;
1012 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1013 if (MemOps[i].Position < Position) {
1014 Position = MemOps[i].Position;
1015 Loc = MemOps[i].MBBI;
1019 if (Loc != MBB.begin())
1020 RS->forward(prior(Loc));
1023 static int getMemoryOpOffset(const MachineInstr *MI) {
1024 int Opcode = MI->getOpcode();
1025 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1026 unsigned NumOperands = MI->getDesc().getNumOperands();
1027 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1029 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1030 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1031 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1032 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
1035 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1036 : ARM_AM::getAM5Offset(OffField) * 4;
1038 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1041 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1047 static void InsertLDR_STR(MachineBasicBlock &MBB,
1048 MachineBasicBlock::iterator &MBBI,
1049 int Offset, bool isDef,
1050 DebugLoc dl, unsigned NewOpc,
1051 unsigned Reg, bool RegDeadKill, bool RegUndef,
1052 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1053 bool OffKill, bool OffUndef,
1054 ARMCC::CondCodes Pred, unsigned PredReg,
1055 const TargetInstrInfo *TII, bool isT2) {
1057 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1059 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1060 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1061 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1063 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1065 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1066 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1067 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1071 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1072 MachineBasicBlock::iterator &MBBI) {
1073 MachineInstr *MI = &*MBBI;
1074 unsigned Opcode = MI->getOpcode();
1075 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1076 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1077 const MachineOperand &BaseOp = MI->getOperand(2);
1078 unsigned BaseReg = BaseOp.getReg();
1079 unsigned EvenReg = MI->getOperand(0).getReg();
1080 unsigned OddReg = MI->getOperand(1).getReg();
1081 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1082 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1083 // ARM errata 602117: LDRD with base in list may result in incorrect base
1084 // register when interrupted or faulted.
1085 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1086 if (!Errata602117 &&
1087 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1090 MachineBasicBlock::iterator NewBBI = MBBI;
1091 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1092 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1093 bool EvenDeadKill = isLd ?
1094 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1095 bool EvenUndef = MI->getOperand(0).isUndef();
1096 bool OddDeadKill = isLd ?
1097 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1098 bool OddUndef = MI->getOperand(1).isUndef();
1099 bool BaseKill = BaseOp.isKill();
1100 bool BaseUndef = BaseOp.isUndef();
1101 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1102 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1103 int OffImm = getMemoryOpOffset(MI);
1104 unsigned PredReg = 0;
1105 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
1107 if (OddRegNum > EvenRegNum && OffImm == 0) {
1108 // Ascending register numbers and no offset. It's safe to change it to a
1110 unsigned NewOpc = (isLd)
1111 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1112 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1114 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1115 .addReg(BaseReg, getKillRegState(BaseKill))
1116 .addImm(Pred).addReg(PredReg)
1117 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1118 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1121 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1122 .addReg(BaseReg, getKillRegState(BaseKill))
1123 .addImm(Pred).addReg(PredReg)
1125 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1127 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1130 NewBBI = llvm::prior(MBBI);
1132 // Split into two instructions.
1133 unsigned NewOpc = (isLd)
1134 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1135 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1136 DebugLoc dl = MBBI->getDebugLoc();
1137 // If this is a load and base register is killed, it may have been
1138 // re-defed by the load, make sure the first load does not clobber it.
1140 (BaseKill || OffKill) &&
1141 (TRI->regsOverlap(EvenReg, BaseReg))) {
1142 assert(!TRI->regsOverlap(OddReg, BaseReg));
1143 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1144 OddReg, OddDeadKill, false,
1145 BaseReg, false, BaseUndef, false, OffUndef,
1146 Pred, PredReg, TII, isT2);
1147 NewBBI = llvm::prior(MBBI);
1148 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1149 EvenReg, EvenDeadKill, false,
1150 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1151 Pred, PredReg, TII, isT2);
1153 if (OddReg == EvenReg && EvenDeadKill) {
1154 // If the two source operands are the same, the kill marker is
1155 // probably on the first one. e.g.
1156 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1157 EvenDeadKill = false;
1160 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1161 EvenReg, EvenDeadKill, EvenUndef,
1162 BaseReg, false, BaseUndef, false, OffUndef,
1163 Pred, PredReg, TII, isT2);
1164 NewBBI = llvm::prior(MBBI);
1165 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1166 OddReg, OddDeadKill, OddUndef,
1167 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1168 Pred, PredReg, TII, isT2);
1183 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1184 /// ops of the same base and incrementing offset into LDM / STM ops.
1185 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1186 unsigned NumMerges = 0;
1187 unsigned NumMemOps = 0;
1189 unsigned CurrBase = 0;
1191 unsigned CurrSize = 0;
1192 ARMCC::CondCodes CurrPred = ARMCC::AL;
1193 unsigned CurrPredReg = 0;
1194 unsigned Position = 0;
1195 SmallVector<MachineBasicBlock::iterator,4> Merges;
1197 RS->enterBasicBlock(&MBB);
1198 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1200 if (FixInvalidRegPairOp(MBB, MBBI))
1203 bool Advance = false;
1204 bool TryMerge = false;
1205 bool Clobber = false;
1207 bool isMemOp = isMemoryOp(MBBI);
1209 int Opcode = MBBI->getOpcode();
1210 unsigned Size = getLSMultipleTransferSize(MBBI);
1211 const MachineOperand &MO = MBBI->getOperand(0);
1212 unsigned Reg = MO.getReg();
1213 bool isKill = MO.isDef() ? false : MO.isKill();
1214 unsigned Base = MBBI->getOperand(1).getReg();
1215 unsigned PredReg = 0;
1216 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1217 int Offset = getMemoryOpOffset(MBBI);
1220 // r5 := ldr [r5, #4]
1221 // r6 := ldr [r5, #8]
1223 // The second ldr has effectively broken the chain even though it
1224 // looks like the later ldr(s) use the same base register. Try to
1225 // merge the ldr's so far, including this one. But don't try to
1226 // combine the following ldr(s).
1227 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1228 if (CurrBase == 0 && !Clobber) {
1229 // Start of a new chain.
1234 CurrPredReg = PredReg;
1235 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1244 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1245 // No need to match PredReg.
1246 // Continue adding to the queue.
1247 if (Offset > MemOps.back().Offset) {
1248 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1253 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1255 if (Offset < I->Offset) {
1256 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1261 } else if (Offset == I->Offset) {
1262 // Collision! This can't be merged!
1271 if (MBBI->isDebugValue()) {
1274 // Reach the end of the block, try merging the memory instructions.
1276 } else if (Advance) {
1280 // Reach the end of the block, try merging the memory instructions.
1286 if (NumMemOps > 1) {
1287 // Try to find a free register to use as a new base in case it's needed.
1288 // First advance to the instruction just before the start of the chain.
1289 AdvanceRS(MBB, MemOps);
1290 // Find a scratch register.
1291 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1292 // Process the load / store instructions.
1293 RS->forward(prior(MBBI));
1297 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1298 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1300 // Try folding preceding/trailing base inc/dec into the generated
1302 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1303 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1305 NumMerges += Merges.size();
1307 // Try folding preceding/trailing base inc/dec into those load/store
1308 // that were not merged to form LDM/STM ops.
1309 for (unsigned i = 0; i != NumMemOps; ++i)
1310 if (!MemOps[i].Merged)
1311 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1314 // RS may be pointing to an instruction that's deleted.
1315 RS->skipTo(prior(MBBI));
1316 } else if (NumMemOps == 1) {
1317 // Try folding preceding/trailing base inc/dec into the single
1319 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1321 RS->forward(prior(MBBI));
1328 CurrPred = ARMCC::AL;
1335 // If iterator hasn't been advanced and this is not a memory op, skip it.
1336 // It can't start a new chain anyway.
1337 if (!Advance && !isMemOp && MBBI != E) {
1343 return NumMerges > 0;
1346 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1347 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1348 /// directly restore the value of LR into pc.
1349 /// ldmfd sp!, {..., lr}
1352 /// ldmfd sp!, {..., lr}
1355 /// ldmfd sp!, {..., pc}
1356 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1357 if (MBB.empty()) return false;
1359 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1360 if (MBBI != MBB.begin() &&
1361 (MBBI->getOpcode() == ARM::BX_RET ||
1362 MBBI->getOpcode() == ARM::tBX_RET ||
1363 MBBI->getOpcode() == ARM::MOVPCLR)) {
1364 MachineInstr *PrevMI = prior(MBBI);
1365 unsigned Opcode = PrevMI->getOpcode();
1366 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1367 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1368 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1369 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1370 if (MO.getReg() != ARM::LR)
1372 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1373 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1374 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1375 PrevMI->setDesc(TII->get(NewOpc));
1377 PrevMI->copyImplicitOps(&*MBBI);
1385 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1386 const TargetMachine &TM = Fn.getTarget();
1387 AFI = Fn.getInfo<ARMFunctionInfo>();
1388 TII = TM.getInstrInfo();
1389 TRI = TM.getRegisterInfo();
1390 STI = &TM.getSubtarget<ARMSubtarget>();
1391 RS = new RegScavenger();
1392 isThumb2 = AFI->isThumb2Function();
1394 bool Modified = false;
1395 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1397 MachineBasicBlock &MBB = *MFI;
1398 Modified |= LoadStoreMultipleOpti(MBB);
1399 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1400 Modified |= MergeReturnIntoLDM(MBB);
1408 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1409 /// load / stores from consecutive locations close to make it more
1410 /// likely they will be combined later.
1413 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1415 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1417 const TargetData *TD;
1418 const TargetInstrInfo *TII;
1419 const TargetRegisterInfo *TRI;
1420 const ARMSubtarget *STI;
1421 MachineRegisterInfo *MRI;
1422 MachineFunction *MF;
1424 virtual bool runOnMachineFunction(MachineFunction &Fn);
1426 virtual const char *getPassName() const {
1427 return "ARM pre- register allocation load / store optimization pass";
1431 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1432 unsigned &NewOpc, unsigned &EvenReg,
1433 unsigned &OddReg, unsigned &BaseReg,
1435 unsigned &PredReg, ARMCC::CondCodes &Pred,
1437 bool RescheduleOps(MachineBasicBlock *MBB,
1438 SmallVector<MachineInstr*, 4> &Ops,
1439 unsigned Base, bool isLd,
1440 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1441 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1443 char ARMPreAllocLoadStoreOpt::ID = 0;
1446 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1447 TD = Fn.getTarget().getTargetData();
1448 TII = Fn.getTarget().getInstrInfo();
1449 TRI = Fn.getTarget().getRegisterInfo();
1450 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1451 MRI = &Fn.getRegInfo();
1454 bool Modified = false;
1455 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1457 Modified |= RescheduleLoadStoreInstrs(MFI);
1462 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1463 MachineBasicBlock::iterator I,
1464 MachineBasicBlock::iterator E,
1465 SmallPtrSet<MachineInstr*, 4> &MemOps,
1466 SmallSet<unsigned, 4> &MemRegs,
1467 const TargetRegisterInfo *TRI) {
1468 // Are there stores / loads / calls between them?
1469 // FIXME: This is overly conservative. We should make use of alias information
1471 SmallSet<unsigned, 4> AddedRegPressure;
1473 if (I->isDebugValue() || MemOps.count(&*I))
1475 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1477 if (isLd && I->mayStore())
1482 // It's not safe to move the first 'str' down.
1485 // str r4, [r0, #+4]
1489 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1490 MachineOperand &MO = I->getOperand(j);
1493 unsigned Reg = MO.getReg();
1494 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1496 if (Reg != Base && !MemRegs.count(Reg))
1497 AddedRegPressure.insert(Reg);
1501 // Estimate register pressure increase due to the transformation.
1502 if (MemRegs.size() <= 4)
1503 // Ok if we are moving small number of instructions.
1505 return AddedRegPressure.size() <= MemRegs.size() * 2;
1509 /// Copy Op0 and Op1 operands into a new array assigned to MI.
1510 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1511 MachineInstr *Op1) {
1512 assert(MI->memoperands_empty() && "expected a new machineinstr");
1513 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1514 + (Op1->memoperands_end() - Op1->memoperands_begin());
1516 MachineFunction *MF = MI->getParent()->getParent();
1517 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1518 MachineSDNode::mmo_iterator MemEnd =
1519 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1521 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1522 MI->setMemRefs(MemBegin, MemEnd);
1526 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1528 unsigned &NewOpc, unsigned &EvenReg,
1529 unsigned &OddReg, unsigned &BaseReg,
1530 int &Offset, unsigned &PredReg,
1531 ARMCC::CondCodes &Pred,
1533 // Make sure we're allowed to generate LDRD/STRD.
1534 if (!STI->hasV5TEOps())
1537 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1539 unsigned Opcode = Op0->getOpcode();
1540 if (Opcode == ARM::LDRi12)
1542 else if (Opcode == ARM::STRi12)
1544 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1545 NewOpc = ARM::t2LDRDi8;
1548 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1549 NewOpc = ARM::t2STRDi8;
1555 // Make sure the base address satisfies i64 ld / st alignment requirement.
1556 if (!Op0->hasOneMemOperand() ||
1557 !(*Op0->memoperands_begin())->getValue() ||
1558 (*Op0->memoperands_begin())->isVolatile())
1561 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1562 const Function *Func = MF->getFunction();
1563 unsigned ReqAlign = STI->hasV6Ops()
1564 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1565 : 8; // Pre-v6 need 8-byte align
1566 if (Align < ReqAlign)
1569 // Then make sure the immediate offset fits.
1570 int OffImm = getMemoryOpOffset(Op0);
1572 int Limit = (1 << 8) * Scale;
1573 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1577 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1579 AddSub = ARM_AM::sub;
1582 int Limit = (1 << 8) * Scale;
1583 if (OffImm >= Limit || (OffImm & (Scale-1)))
1585 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1587 EvenReg = Op0->getOperand(0).getReg();
1588 OddReg = Op1->getOperand(0).getReg();
1589 if (EvenReg == OddReg)
1591 BaseReg = Op0->getOperand(1).getReg();
1592 Pred = llvm::getInstrPredicate(Op0, PredReg);
1593 dl = Op0->getDebugLoc();
1598 struct OffsetCompare {
1599 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1600 int LOffset = getMemoryOpOffset(LHS);
1601 int ROffset = getMemoryOpOffset(RHS);
1602 assert(LHS == RHS || LOffset != ROffset);
1603 return LOffset > ROffset;
1608 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1609 SmallVector<MachineInstr*, 4> &Ops,
1610 unsigned Base, bool isLd,
1611 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1612 bool RetVal = false;
1614 // Sort by offset (in reverse order).
1615 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1617 // The loads / stores of the same base are in order. Scan them from first to
1618 // last and check for the following:
1619 // 1. Any def of base.
1621 while (Ops.size() > 1) {
1622 unsigned FirstLoc = ~0U;
1623 unsigned LastLoc = 0;
1624 MachineInstr *FirstOp = 0;
1625 MachineInstr *LastOp = 0;
1627 unsigned LastOpcode = 0;
1628 unsigned LastBytes = 0;
1629 unsigned NumMove = 0;
1630 for (int i = Ops.size() - 1; i >= 0; --i) {
1631 MachineInstr *Op = Ops[i];
1632 unsigned Loc = MI2LocMap[Op];
1633 if (Loc <= FirstLoc) {
1637 if (Loc >= LastLoc) {
1642 unsigned Opcode = Op->getOpcode();
1643 if (LastOpcode && Opcode != LastOpcode)
1646 int Offset = getMemoryOpOffset(Op);
1647 unsigned Bytes = getLSMultipleTransferSize(Op);
1649 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1652 LastOffset = Offset;
1654 LastOpcode = Opcode;
1655 if (++NumMove == 8) // FIXME: Tune this limit.
1662 SmallPtrSet<MachineInstr*, 4> MemOps;
1663 SmallSet<unsigned, 4> MemRegs;
1664 for (int i = NumMove-1; i >= 0; --i) {
1665 MemOps.insert(Ops[i]);
1666 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1669 // Be conservative, if the instructions are too far apart, don't
1670 // move them. We want to limit the increase of register pressure.
1671 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1673 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1674 MemOps, MemRegs, TRI);
1676 for (unsigned i = 0; i != NumMove; ++i)
1679 // This is the new location for the loads / stores.
1680 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1681 while (InsertPos != MBB->end()
1682 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1685 // If we are moving a pair of loads / stores, see if it makes sense
1686 // to try to allocate a pair of registers that can form register pairs.
1687 MachineInstr *Op0 = Ops.back();
1688 MachineInstr *Op1 = Ops[Ops.size()-2];
1689 unsigned EvenReg = 0, OddReg = 0;
1690 unsigned BaseReg = 0, PredReg = 0;
1691 ARMCC::CondCodes Pred = ARMCC::AL;
1693 unsigned NewOpc = 0;
1696 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1697 EvenReg, OddReg, BaseReg,
1698 Offset, PredReg, Pred, isT2)) {
1702 const MCInstrDesc &MCID = TII->get(NewOpc);
1703 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI);
1704 MRI->constrainRegClass(EvenReg, TRC);
1705 MRI->constrainRegClass(OddReg, TRC);
1707 // Form the pair instruction.
1709 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1710 .addReg(EvenReg, RegState::Define)
1711 .addReg(OddReg, RegState::Define)
1713 // FIXME: We're converting from LDRi12 to an insn that still
1714 // uses addrmode2, so we need an explicit offset reg. It should
1715 // always by reg0 since we're transforming LDRi12s.
1718 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1719 concatenateMemOperands(MIB, Op0, Op1);
1720 DEBUG(dbgs() << "Formed " << *MIB << "\n");
1723 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1727 // FIXME: We're converting from LDRi12 to an insn that still
1728 // uses addrmode2, so we need an explicit offset reg. It should
1729 // always by reg0 since we're transforming STRi12s.
1732 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1733 concatenateMemOperands(MIB, Op0, Op1);
1734 DEBUG(dbgs() << "Formed " << *MIB << "\n");
1740 // Add register allocation hints to form register pairs.
1741 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1742 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1744 for (unsigned i = 0; i != NumMove; ++i) {
1745 MachineInstr *Op = Ops.back();
1747 MBB->splice(InsertPos, MBB, Op);
1751 NumLdStMoved += NumMove;
1761 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1762 bool RetVal = false;
1764 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1765 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1766 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1767 SmallVector<unsigned, 4> LdBases;
1768 SmallVector<unsigned, 4> StBases;
1771 MachineBasicBlock::iterator MBBI = MBB->begin();
1772 MachineBasicBlock::iterator E = MBB->end();
1774 for (; MBBI != E; ++MBBI) {
1775 MachineInstr *MI = MBBI;
1776 if (MI->isCall() || MI->isTerminator()) {
1777 // Stop at barriers.
1782 if (!MI->isDebugValue())
1783 MI2LocMap[MI] = ++Loc;
1785 if (!isMemoryOp(MI))
1787 unsigned PredReg = 0;
1788 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1791 int Opc = MI->getOpcode();
1792 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1793 unsigned Base = MI->getOperand(1).getReg();
1794 int Offset = getMemoryOpOffset(MI);
1796 bool StopHere = false;
1798 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1799 Base2LdsMap.find(Base);
1800 if (BI != Base2LdsMap.end()) {
1801 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1802 if (Offset == getMemoryOpOffset(BI->second[i])) {
1808 BI->second.push_back(MI);
1810 SmallVector<MachineInstr*, 4> MIs;
1812 Base2LdsMap[Base] = MIs;
1813 LdBases.push_back(Base);
1816 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1817 Base2StsMap.find(Base);
1818 if (BI != Base2StsMap.end()) {
1819 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1820 if (Offset == getMemoryOpOffset(BI->second[i])) {
1826 BI->second.push_back(MI);
1828 SmallVector<MachineInstr*, 4> MIs;
1830 Base2StsMap[Base] = MIs;
1831 StBases.push_back(Base);
1836 // Found a duplicate (a base+offset combination that's seen earlier).
1843 // Re-schedule loads.
1844 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1845 unsigned Base = LdBases[i];
1846 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1848 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1851 // Re-schedule stores.
1852 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1853 unsigned Base = StBases[i];
1854 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1856 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1860 Base2LdsMap.clear();
1861 Base2StsMap.clear();
1871 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1872 /// optimization pass.
1873 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1875 return new ARMPreAllocLoadStoreOpt();
1876 return new ARMLoadStoreOpt();