1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/Target/TargetData.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallSet.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/ADT/Statistic.h"
42 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
43 STATISTIC(NumSTMGened , "Number of stm instructions generated");
44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55 /// load / store instructions to form ldm / stm instructions.
58 struct ARMLoadStoreOpt : public MachineFunctionPass {
60 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
62 const TargetInstrInfo *TII;
63 const TargetRegisterInfo *TRI;
68 virtual bool runOnMachineFunction(MachineFunction &Fn);
70 virtual const char *getPassName() const {
71 return "ARM load / store optimization pass";
75 struct MemOpQueueEntry {
80 MachineBasicBlock::iterator MBBI;
82 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
83 MachineBasicBlock::iterator i)
84 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
86 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
87 typedef MemOpQueue::iterator MemOpQueueIter;
89 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
90 int Offset, unsigned Base, bool BaseKill, int Opcode,
91 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
92 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
93 void MergeOpsUpdate(MachineBasicBlock &MBB,
102 ARMCC::CondCodes Pred,
106 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
107 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
108 int Opcode, unsigned Size,
109 ARMCC::CondCodes Pred, unsigned PredReg,
110 unsigned Scratch, MemOpQueue &MemOps,
111 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
113 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
114 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
115 MachineBasicBlock::iterator &MBBI);
116 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
117 MachineBasicBlock::iterator MBBI,
118 const TargetInstrInfo *TII,
120 MachineBasicBlock::iterator &I);
121 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
122 MachineBasicBlock::iterator MBBI,
124 MachineBasicBlock::iterator &I);
125 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
126 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
128 char ARMLoadStoreOpt::ID = 0;
131 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
133 default: llvm_unreachable("Unhandled opcode!");
137 default: llvm_unreachable("Unhandled submode!");
138 case ARM_AM::ia: return ARM::LDMIA;
139 case ARM_AM::da: return ARM::LDMDA;
140 case ARM_AM::db: return ARM::LDMDB;
141 case ARM_AM::ib: return ARM::LDMIB;
147 default: llvm_unreachable("Unhandled submode!");
148 case ARM_AM::ia: return ARM::STMIA;
149 case ARM_AM::da: return ARM::STMDA;
150 case ARM_AM::db: return ARM::STMDB;
151 case ARM_AM::ib: return ARM::STMIB;
158 default: llvm_unreachable("Unhandled submode!");
159 case ARM_AM::ia: return ARM::t2LDMIA;
160 case ARM_AM::db: return ARM::t2LDMDB;
167 default: llvm_unreachable("Unhandled submode!");
168 case ARM_AM::ia: return ARM::t2STMIA;
169 case ARM_AM::db: return ARM::t2STMDB;
175 default: llvm_unreachable("Unhandled submode!");
176 case ARM_AM::ia: return ARM::VLDMSIA;
177 case ARM_AM::db: return ARM::VLDMSDB;
183 default: llvm_unreachable("Unhandled submode!");
184 case ARM_AM::ia: return ARM::VSTMSIA;
185 case ARM_AM::db: return ARM::VSTMSDB;
191 default: llvm_unreachable("Unhandled submode!");
192 case ARM_AM::ia: return ARM::VLDMDIA;
193 case ARM_AM::db: return ARM::VLDMDDB;
199 default: llvm_unreachable("Unhandled submode!");
200 case ARM_AM::ia: return ARM::VSTMDIA;
201 case ARM_AM::db: return ARM::VSTMDDB;
212 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
214 default: llvm_unreachable("Unhandled opcode!");
220 case ARM::t2LDMIA_UPD:
222 case ARM::t2STMIA_UPD:
224 case ARM::VLDMSIA_UPD:
226 case ARM::VSTMSIA_UPD:
228 case ARM::VLDMDIA_UPD:
230 case ARM::VSTMDIA_UPD:
244 case ARM::t2LDMDB_UPD:
246 case ARM::t2STMDB_UPD:
248 case ARM::VLDMSDB_UPD:
250 case ARM::VSTMSDB_UPD:
252 case ARM::VLDMDDB_UPD:
254 case ARM::VSTMDDB_UPD:
264 return ARM_AM::bad_am_submode;
267 } // end namespace ARM_AM
268 } // end namespace llvm
270 static bool isT2i32Load(unsigned Opc) {
271 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
274 static bool isi32Load(unsigned Opc) {
275 return Opc == ARM::LDRi12 || isT2i32Load(Opc);
278 static bool isT2i32Store(unsigned Opc) {
279 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
282 static bool isi32Store(unsigned Opc) {
283 return Opc == ARM::STRi12 || isT2i32Store(Opc);
286 /// MergeOps - Create and insert a LDM or STM with Base as base register and
287 /// registers in Regs as the register operands that would be loaded / stored.
288 /// It returns true if the transformation is done.
290 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
291 MachineBasicBlock::iterator MBBI,
292 int Offset, unsigned Base, bool BaseKill,
293 int Opcode, ARMCC::CondCodes Pred,
294 unsigned PredReg, unsigned Scratch, DebugLoc dl,
295 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
296 // Only a single register to load / store. Don't bother.
297 unsigned NumRegs = Regs.size();
301 ARM_AM::AMSubMode Mode = ARM_AM::ia;
302 // VFP and Thumb2 do not support IB or DA modes.
303 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
304 bool haveIBAndDA = isNotVFP && !isThumb2;
305 if (Offset == 4 && haveIBAndDA)
307 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
309 else if (Offset == -4 * (int)NumRegs && isNotVFP)
310 // VLDM/VSTM do not support DB mode without also updating the base reg.
312 else if (Offset != 0) {
313 // If starting offset isn't zero, insert a MI to materialize a new base.
314 // But only do so if it is cost effective, i.e. merging more than two
320 if (isi32Load(Opcode))
321 // If it is a load, then just use one of the destination register to
322 // use as the new base.
323 NewBase = Regs[NumRegs-1].first;
325 // Use the scratch register to use as a new base.
330 int BaseOpc = !isThumb2
332 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
336 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
339 int ImmedOffset = isThumb2
340 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
341 if (ImmedOffset == -1)
342 // FIXME: Try t2ADDri12 or t2SUBri12?
343 return false; // Probably not worth it then.
345 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
346 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
347 .addImm(Pred).addReg(PredReg).addReg(0);
349 BaseKill = true; // New base is always killed right its use.
352 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
353 Opcode == ARM::VLDRD);
354 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
355 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
356 .addReg(Base, getKillRegState(BaseKill))
357 .addImm(Pred).addReg(PredReg);
358 for (unsigned i = 0; i != NumRegs; ++i)
359 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
360 | getKillRegState(Regs[i].second));
365 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
367 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
369 unsigned memOpsBegin, unsigned memOpsEnd,
370 unsigned insertAfter, int Offset,
371 unsigned Base, bool BaseKill,
373 ARMCC::CondCodes Pred, unsigned PredReg,
376 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
377 // First calculate which of the registers should be killed by the merged
379 const unsigned insertPos = memOps[insertAfter].Position;
381 SmallSet<unsigned, 4> UnavailRegs;
382 SmallSet<unsigned, 4> KilledRegs;
383 DenseMap<unsigned, unsigned> Killer;
384 for (unsigned i = 0; i < memOpsBegin; ++i) {
385 if (memOps[i].Position < insertPos && memOps[i].isKill) {
386 unsigned Reg = memOps[i].Reg;
387 if (memOps[i].Merged)
388 UnavailRegs.insert(Reg);
390 KilledRegs.insert(Reg);
395 for (unsigned i = memOpsEnd, e = memOps.size(); i != e; ++i) {
396 if (memOps[i].Position < insertPos && memOps[i].isKill) {
397 unsigned Reg = memOps[i].Reg;
398 KilledRegs.insert(Reg);
403 SmallVector<std::pair<unsigned, bool>, 8> Regs;
404 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
405 unsigned Reg = memOps[i].Reg;
406 if (UnavailRegs.count(Reg))
407 // Register is killed before and it's not easy / possible to update the
408 // kill marker on already merged instructions. Abort.
411 // If we are inserting the merged operation after an unmerged operation that
412 // uses the same register, make sure to transfer any kill flag.
413 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
414 Regs.push_back(std::make_pair(Reg, isKill));
417 // Try to do the merge.
418 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
420 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
421 Pred, PredReg, Scratch, dl, Regs))
424 // Merge succeeded, update records.
425 Merges.push_back(prior(Loc));
426 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
427 // Remove kill flags from any unmerged memops that come before insertPos.
428 if (Regs[i-memOpsBegin].second) {
429 unsigned Reg = Regs[i-memOpsBegin].first;
430 if (KilledRegs.count(Reg)) {
431 unsigned j = Killer[Reg];
432 memOps[j].MBBI->getOperand(0).setIsKill(false);
433 memOps[j].isKill = false;
436 MBB.erase(memOps[i].MBBI);
437 memOps[i].Merged = true;
441 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
442 /// load / store multiple instructions.
444 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
445 unsigned Base, int Opcode, unsigned Size,
446 ARMCC::CondCodes Pred, unsigned PredReg,
447 unsigned Scratch, MemOpQueue &MemOps,
448 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
449 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
450 int Offset = MemOps[SIndex].Offset;
451 int SOffset = Offset;
452 unsigned insertAfter = SIndex;
453 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
454 DebugLoc dl = Loc->getDebugLoc();
455 const MachineOperand &PMO = Loc->getOperand(0);
456 unsigned PReg = PMO.getReg();
457 unsigned PRegNum = PMO.isUndef() ? UINT_MAX
458 : getARMRegisterNumbering(PReg);
461 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
462 int NewOffset = MemOps[i].Offset;
463 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
464 unsigned Reg = MO.getReg();
465 unsigned RegNum = MO.isUndef() ? UINT_MAX
466 : getARMRegisterNumbering(Reg);
467 // Register numbers must be in ascending order. For VFP, the registers
468 // must also be consecutive and there is a limit of 16 double-word
469 // registers per instruction.
470 if (Reg != ARM::SP &&
471 NewOffset == Offset + (int)Size &&
472 ((isNotVFP && RegNum > PRegNum)
473 || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) {
478 // Can't merge this in. Try merge the earlier ones first.
479 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
480 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
481 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
486 if (MemOps[i].Position > MemOps[insertAfter].Position)
490 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
491 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
492 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
496 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
497 unsigned Bytes, unsigned Limit,
498 ARMCC::CondCodes Pred, unsigned PredReg){
499 unsigned MyPredReg = 0;
502 if (MI->getOpcode() != ARM::t2SUBri &&
503 MI->getOpcode() != ARM::t2SUBrSPi &&
504 MI->getOpcode() != ARM::t2SUBrSPi12 &&
505 MI->getOpcode() != ARM::tSUBspi &&
506 MI->getOpcode() != ARM::SUBri)
509 // Make sure the offset fits in 8 bits.
510 if (Bytes == 0 || (Limit && Bytes >= Limit))
513 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
514 return (MI->getOperand(0).getReg() == Base &&
515 MI->getOperand(1).getReg() == Base &&
516 (MI->getOperand(2).getImm()*Scale) == Bytes &&
517 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
518 MyPredReg == PredReg);
521 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
522 unsigned Bytes, unsigned Limit,
523 ARMCC::CondCodes Pred, unsigned PredReg){
524 unsigned MyPredReg = 0;
527 if (MI->getOpcode() != ARM::t2ADDri &&
528 MI->getOpcode() != ARM::t2ADDrSPi &&
529 MI->getOpcode() != ARM::t2ADDrSPi12 &&
530 MI->getOpcode() != ARM::tADDspi &&
531 MI->getOpcode() != ARM::ADDri)
534 if (Bytes == 0 || (Limit && Bytes >= Limit))
535 // Make sure the offset fits in 8 bits.
538 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
539 return (MI->getOperand(0).getReg() == Base &&
540 MI->getOperand(1).getReg() == Base &&
541 (MI->getOperand(2).getImm()*Scale) == Bytes &&
542 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
543 MyPredReg == PredReg);
546 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
547 switch (MI->getOpcode()) {
577 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
582 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
586 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
587 ARM_AM::AMSubMode Mode) {
589 default: llvm_unreachable("Unhandled opcode!");
595 default: llvm_unreachable("Unhandled submode!");
596 case ARM_AM::ia: return ARM::LDMIA_UPD;
597 case ARM_AM::ib: return ARM::LDMIB_UPD;
598 case ARM_AM::da: return ARM::LDMDA_UPD;
599 case ARM_AM::db: return ARM::LDMDB_UPD;
607 default: llvm_unreachable("Unhandled submode!");
608 case ARM_AM::ia: return ARM::STMIA_UPD;
609 case ARM_AM::ib: return ARM::STMIB_UPD;
610 case ARM_AM::da: return ARM::STMDA_UPD;
611 case ARM_AM::db: return ARM::STMDB_UPD;
617 default: llvm_unreachable("Unhandled submode!");
618 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
619 case ARM_AM::db: return ARM::t2LDMDB_UPD;
625 default: llvm_unreachable("Unhandled submode!");
626 case ARM_AM::ia: return ARM::t2STMIA_UPD;
627 case ARM_AM::db: return ARM::t2STMDB_UPD;
633 default: llvm_unreachable("Unhandled submode!");
634 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
635 case ARM_AM::db: return ARM::VLDMSDB_UPD;
641 default: llvm_unreachable("Unhandled submode!");
642 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
643 case ARM_AM::db: return ARM::VLDMDDB_UPD;
649 default: llvm_unreachable("Unhandled submode!");
650 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
651 case ARM_AM::db: return ARM::VSTMSDB_UPD;
657 default: llvm_unreachable("Unhandled submode!");
658 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
659 case ARM_AM::db: return ARM::VSTMDDB_UPD;
667 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
668 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
670 /// stmia rn, <ra, rb, rc>
671 /// rn := rn + 4 * 3;
673 /// stmia rn!, <ra, rb, rc>
675 /// rn := rn - 4 * 3;
676 /// ldmia rn, <ra, rb, rc>
678 /// ldmdb rn!, <ra, rb, rc>
679 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
680 MachineBasicBlock::iterator MBBI,
682 MachineBasicBlock::iterator &I) {
683 MachineInstr *MI = MBBI;
684 unsigned Base = MI->getOperand(0).getReg();
685 bool BaseKill = MI->getOperand(0).isKill();
686 unsigned Bytes = getLSMultipleTransferSize(MI);
687 unsigned PredReg = 0;
688 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
689 int Opcode = MI->getOpcode();
690 DebugLoc dl = MI->getDebugLoc();
692 // Can't use an updating ld/st if the base register is also a dest
693 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
694 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
695 if (MI->getOperand(i).getReg() == Base)
698 bool DoMerge = false;
699 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
701 // Try merging with the previous instruction.
702 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
703 if (MBBI != BeginMBBI) {
704 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
705 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
707 if (Mode == ARM_AM::ia &&
708 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
711 } else if (Mode == ARM_AM::ib &&
712 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
720 // Try merging with the next instruction.
721 MachineBasicBlock::iterator EndMBBI = MBB.end();
722 if (!DoMerge && MBBI != EndMBBI) {
723 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
724 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
726 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
727 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
729 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
730 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
745 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
746 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
747 .addReg(Base, getDefRegState(true)) // WB base register
748 .addReg(Base, getKillRegState(BaseKill))
749 .addImm(Pred).addReg(PredReg);
751 // Transfer the rest of operands.
752 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
753 MIB.addOperand(MI->getOperand(OpNum));
755 // Transfer memoperands.
756 (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
762 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
763 ARM_AM::AddrOpc Mode) {
770 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
772 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
774 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
776 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
779 return ARM::t2LDR_PRE;
782 return ARM::t2STR_PRE;
783 default: llvm_unreachable("Unhandled opcode!");
788 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
789 ARM_AM::AddrOpc Mode) {
792 return ARM::LDR_POST;
794 return ARM::STR_POST;
796 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
798 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
800 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
802 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
805 return ARM::t2LDR_POST;
808 return ARM::t2STR_POST;
809 default: llvm_unreachable("Unhandled opcode!");
814 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
815 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
816 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
817 MachineBasicBlock::iterator MBBI,
818 const TargetInstrInfo *TII,
820 MachineBasicBlock::iterator &I) {
821 MachineInstr *MI = MBBI;
822 unsigned Base = MI->getOperand(1).getReg();
823 bool BaseKill = MI->getOperand(1).isKill();
824 unsigned Bytes = getLSMultipleTransferSize(MI);
825 int Opcode = MI->getOpcode();
826 DebugLoc dl = MI->getDebugLoc();
827 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
828 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
829 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
830 if (isi32Load(Opcode) || isi32Store(Opcode))
831 if (MI->getOperand(2).getImm() != 0)
833 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
836 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
837 // Can't do the merge if the destination register is the same as the would-be
838 // writeback register.
839 if (isLd && MI->getOperand(0).getReg() == Base)
842 unsigned PredReg = 0;
843 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
844 bool DoMerge = false;
845 ARM_AM::AddrOpc AddSub = ARM_AM::add;
847 // AM2 - 12 bits, thumb2 - 8 bits.
848 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
850 // Try merging with the previous instruction.
851 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
852 if (MBBI != BeginMBBI) {
853 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
854 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
856 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
858 AddSub = ARM_AM::sub;
860 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
864 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
869 // Try merging with the next instruction.
870 MachineBasicBlock::iterator EndMBBI = MBB.end();
871 if (!DoMerge && MBBI != EndMBBI) {
872 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
873 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
876 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
878 AddSub = ARM_AM::sub;
879 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
883 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
897 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
899 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
902 // VLDM[SD}_UPD, VSTM[SD]_UPD
903 // (There are no base-updating versions of VLDR/VSTR instructions, but the
904 // updating load/store-multiple instructions can be used with only one
906 MachineOperand &MO = MI->getOperand(0);
907 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
908 .addReg(Base, getDefRegState(true)) // WB base register
909 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
910 .addImm(Pred).addReg(PredReg)
911 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
912 getKillRegState(MO.isKill())));
915 // LDR_PRE, LDR_POST,
916 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
917 .addReg(Base, RegState::Define)
918 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
920 // t2LDR_PRE, t2LDR_POST
921 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
922 .addReg(Base, RegState::Define)
923 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
925 MachineOperand &MO = MI->getOperand(0);
928 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
929 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
930 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
932 // t2STR_PRE, t2STR_POST
933 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
934 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
935 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
942 /// isMemoryOp - Returns true if instruction is a memory operations (that this
943 /// pass is capable of operating on).
944 static bool isMemoryOp(const MachineInstr *MI) {
945 // When no memory operands are present, conservatively assume unaligned,
946 // volatile, unfoldable.
947 if (!MI->hasOneMemOperand())
950 const MachineMemOperand *MMO = *MI->memoperands_begin();
952 // Don't touch volatile memory accesses - we may be changing their order.
953 if (MMO->isVolatile())
956 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
958 if (MMO->getAlignment() < 4)
961 // str <undef> could probably be eliminated entirely, but for now we just want
962 // to avoid making a mess of it.
963 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
964 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
965 MI->getOperand(0).isUndef())
968 // Likewise don't mess with references to undefined addresses.
969 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
970 MI->getOperand(1).isUndef())
973 int Opcode = MI->getOpcode();
978 return MI->getOperand(1).isReg();
981 return MI->getOperand(1).isReg();
988 return MI->getOperand(1).isReg();
993 /// AdvanceRS - Advance register scavenger to just before the earliest memory
994 /// op that is being merged.
995 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
996 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
997 unsigned Position = MemOps[0].Position;
998 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
999 if (MemOps[i].Position < Position) {
1000 Position = MemOps[i].Position;
1001 Loc = MemOps[i].MBBI;
1005 if (Loc != MBB.begin())
1006 RS->forward(prior(Loc));
1009 static int getMemoryOpOffset(const MachineInstr *MI) {
1010 int Opcode = MI->getOpcode();
1011 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1012 unsigned NumOperands = MI->getDesc().getNumOperands();
1013 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1015 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1016 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1017 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1018 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
1021 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1022 : ARM_AM::getAM5Offset(OffField) * 4;
1024 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1027 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1033 static void InsertLDR_STR(MachineBasicBlock &MBB,
1034 MachineBasicBlock::iterator &MBBI,
1035 int Offset, bool isDef,
1036 DebugLoc dl, unsigned NewOpc,
1037 unsigned Reg, bool RegDeadKill, bool RegUndef,
1038 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1039 bool OffKill, bool OffUndef,
1040 ARMCC::CondCodes Pred, unsigned PredReg,
1041 const TargetInstrInfo *TII, bool isT2) {
1043 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1045 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1046 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1047 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1049 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1051 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1052 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1053 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1057 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1058 MachineBasicBlock::iterator &MBBI) {
1059 MachineInstr *MI = &*MBBI;
1060 unsigned Opcode = MI->getOpcode();
1061 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1062 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1063 unsigned EvenReg = MI->getOperand(0).getReg();
1064 unsigned OddReg = MI->getOperand(1).getReg();
1065 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1066 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1067 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
1070 MachineBasicBlock::iterator NewBBI = MBBI;
1071 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1072 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1073 bool EvenDeadKill = isLd ?
1074 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1075 bool EvenUndef = MI->getOperand(0).isUndef();
1076 bool OddDeadKill = isLd ?
1077 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1078 bool OddUndef = MI->getOperand(1).isUndef();
1079 const MachineOperand &BaseOp = MI->getOperand(2);
1080 unsigned BaseReg = BaseOp.getReg();
1081 bool BaseKill = BaseOp.isKill();
1082 bool BaseUndef = BaseOp.isUndef();
1083 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1084 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1085 int OffImm = getMemoryOpOffset(MI);
1086 unsigned PredReg = 0;
1087 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
1089 if (OddRegNum > EvenRegNum && OffImm == 0) {
1090 // Ascending register numbers and no offset. It's safe to change it to a
1092 unsigned NewOpc = (isLd)
1093 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1094 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1096 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1097 .addReg(BaseReg, getKillRegState(BaseKill))
1098 .addImm(Pred).addReg(PredReg)
1099 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1100 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1103 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1104 .addReg(BaseReg, getKillRegState(BaseKill))
1105 .addImm(Pred).addReg(PredReg)
1107 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1109 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1112 NewBBI = llvm::prior(MBBI);
1114 // Split into two instructions.
1115 unsigned NewOpc = (isLd)
1116 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1117 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1118 DebugLoc dl = MBBI->getDebugLoc();
1119 // If this is a load and base register is killed, it may have been
1120 // re-defed by the load, make sure the first load does not clobber it.
1122 (BaseKill || OffKill) &&
1123 (TRI->regsOverlap(EvenReg, BaseReg))) {
1124 assert(!TRI->regsOverlap(OddReg, BaseReg));
1125 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1126 OddReg, OddDeadKill, false,
1127 BaseReg, false, BaseUndef, false, OffUndef,
1128 Pred, PredReg, TII, isT2);
1129 NewBBI = llvm::prior(MBBI);
1130 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1131 EvenReg, EvenDeadKill, false,
1132 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1133 Pred, PredReg, TII, isT2);
1135 if (OddReg == EvenReg && EvenDeadKill) {
1136 // If the two source operands are the same, the kill marker is
1137 // probably on the first one. e.g.
1138 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1139 EvenDeadKill = false;
1142 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1143 EvenReg, EvenDeadKill, EvenUndef,
1144 BaseReg, false, BaseUndef, false, OffUndef,
1145 Pred, PredReg, TII, isT2);
1146 NewBBI = llvm::prior(MBBI);
1147 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1148 OddReg, OddDeadKill, OddUndef,
1149 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1150 Pred, PredReg, TII, isT2);
1165 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1166 /// ops of the same base and incrementing offset into LDM / STM ops.
1167 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1168 unsigned NumMerges = 0;
1169 unsigned NumMemOps = 0;
1171 unsigned CurrBase = 0;
1173 unsigned CurrSize = 0;
1174 ARMCC::CondCodes CurrPred = ARMCC::AL;
1175 unsigned CurrPredReg = 0;
1176 unsigned Position = 0;
1177 SmallVector<MachineBasicBlock::iterator,4> Merges;
1179 RS->enterBasicBlock(&MBB);
1180 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1182 if (FixInvalidRegPairOp(MBB, MBBI))
1185 bool Advance = false;
1186 bool TryMerge = false;
1187 bool Clobber = false;
1189 bool isMemOp = isMemoryOp(MBBI);
1191 int Opcode = MBBI->getOpcode();
1192 unsigned Size = getLSMultipleTransferSize(MBBI);
1193 const MachineOperand &MO = MBBI->getOperand(0);
1194 unsigned Reg = MO.getReg();
1195 bool isKill = MO.isDef() ? false : MO.isKill();
1196 unsigned Base = MBBI->getOperand(1).getReg();
1197 unsigned PredReg = 0;
1198 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1199 int Offset = getMemoryOpOffset(MBBI);
1202 // r5 := ldr [r5, #4]
1203 // r6 := ldr [r5, #8]
1205 // The second ldr has effectively broken the chain even though it
1206 // looks like the later ldr(s) use the same base register. Try to
1207 // merge the ldr's so far, including this one. But don't try to
1208 // combine the following ldr(s).
1209 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1210 if (CurrBase == 0 && !Clobber) {
1211 // Start of a new chain.
1216 CurrPredReg = PredReg;
1217 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1226 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1227 // No need to match PredReg.
1228 // Continue adding to the queue.
1229 if (Offset > MemOps.back().Offset) {
1230 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1235 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1237 if (Offset < I->Offset) {
1238 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1243 } else if (Offset == I->Offset) {
1244 // Collision! This can't be merged!
1253 if (MBBI->isDebugValue()) {
1256 // Reach the end of the block, try merging the memory instructions.
1258 } else if (Advance) {
1262 // Reach the end of the block, try merging the memory instructions.
1268 if (NumMemOps > 1) {
1269 // Try to find a free register to use as a new base in case it's needed.
1270 // First advance to the instruction just before the start of the chain.
1271 AdvanceRS(MBB, MemOps);
1272 // Find a scratch register.
1273 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1274 // Process the load / store instructions.
1275 RS->forward(prior(MBBI));
1279 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1280 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1282 // Try folding preceeding/trailing base inc/dec into the generated
1284 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1285 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1287 NumMerges += Merges.size();
1289 // Try folding preceeding/trailing base inc/dec into those load/store
1290 // that were not merged to form LDM/STM ops.
1291 for (unsigned i = 0; i != NumMemOps; ++i)
1292 if (!MemOps[i].Merged)
1293 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1296 // RS may be pointing to an instruction that's deleted.
1297 RS->skipTo(prior(MBBI));
1298 } else if (NumMemOps == 1) {
1299 // Try folding preceeding/trailing base inc/dec into the single
1301 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1303 RS->forward(prior(MBBI));
1310 CurrPred = ARMCC::AL;
1317 // If iterator hasn't been advanced and this is not a memory op, skip it.
1318 // It can't start a new chain anyway.
1319 if (!Advance && !isMemOp && MBBI != E) {
1325 return NumMerges > 0;
1329 struct OffsetCompare {
1330 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1331 int LOffset = getMemoryOpOffset(LHS);
1332 int ROffset = getMemoryOpOffset(RHS);
1333 assert(LHS == RHS || LOffset != ROffset);
1334 return LOffset > ROffset;
1339 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1340 /// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it
1341 /// directly restore the value of LR into pc.
1342 /// ldmfd sp!, {..., lr}
1345 /// ldmfd sp!, {..., lr}
1348 /// ldmfd sp!, {..., pc}
1349 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1350 if (MBB.empty()) return false;
1352 MachineBasicBlock::iterator MBBI = prior(MBB.end());
1353 if (MBBI != MBB.begin() &&
1354 (MBBI->getOpcode() == ARM::BX_RET ||
1355 MBBI->getOpcode() == ARM::tBX_RET ||
1356 MBBI->getOpcode() == ARM::MOVPCLR)) {
1357 MachineInstr *PrevMI = prior(MBBI);
1358 unsigned Opcode = PrevMI->getOpcode();
1359 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1360 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1361 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1362 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1363 if (MO.getReg() != ARM::LR)
1365 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1366 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1367 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1368 PrevMI->setDesc(TII->get(NewOpc));
1370 PrevMI->copyImplicitOps(&*MBBI);
1378 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1379 const TargetMachine &TM = Fn.getTarget();
1380 AFI = Fn.getInfo<ARMFunctionInfo>();
1381 TII = TM.getInstrInfo();
1382 TRI = TM.getRegisterInfo();
1383 RS = new RegScavenger();
1384 isThumb2 = AFI->isThumb2Function();
1386 bool Modified = false;
1387 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1389 MachineBasicBlock &MBB = *MFI;
1390 Modified |= LoadStoreMultipleOpti(MBB);
1391 Modified |= MergeReturnIntoLDM(MBB);
1399 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1400 /// load / stores from consecutive locations close to make it more
1401 /// likely they will be combined later.
1404 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1406 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1408 const TargetData *TD;
1409 const TargetInstrInfo *TII;
1410 const TargetRegisterInfo *TRI;
1411 const ARMSubtarget *STI;
1412 MachineRegisterInfo *MRI;
1413 MachineFunction *MF;
1415 virtual bool runOnMachineFunction(MachineFunction &Fn);
1417 virtual const char *getPassName() const {
1418 return "ARM pre- register allocation load / store optimization pass";
1422 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1423 unsigned &NewOpc, unsigned &EvenReg,
1424 unsigned &OddReg, unsigned &BaseReg,
1426 unsigned &PredReg, ARMCC::CondCodes &Pred,
1428 bool RescheduleOps(MachineBasicBlock *MBB,
1429 SmallVector<MachineInstr*, 4> &Ops,
1430 unsigned Base, bool isLd,
1431 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1432 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1434 char ARMPreAllocLoadStoreOpt::ID = 0;
1437 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1438 TD = Fn.getTarget().getTargetData();
1439 TII = Fn.getTarget().getInstrInfo();
1440 TRI = Fn.getTarget().getRegisterInfo();
1441 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1442 MRI = &Fn.getRegInfo();
1445 bool Modified = false;
1446 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1448 Modified |= RescheduleLoadStoreInstrs(MFI);
1453 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1454 MachineBasicBlock::iterator I,
1455 MachineBasicBlock::iterator E,
1456 SmallPtrSet<MachineInstr*, 4> &MemOps,
1457 SmallSet<unsigned, 4> &MemRegs,
1458 const TargetRegisterInfo *TRI) {
1459 // Are there stores / loads / calls between them?
1460 // FIXME: This is overly conservative. We should make use of alias information
1462 SmallSet<unsigned, 4> AddedRegPressure;
1464 if (I->isDebugValue() || MemOps.count(&*I))
1466 const TargetInstrDesc &TID = I->getDesc();
1467 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
1469 if (isLd && TID.mayStore())
1474 // It's not safe to move the first 'str' down.
1477 // str r4, [r0, #+4]
1481 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1482 MachineOperand &MO = I->getOperand(j);
1485 unsigned Reg = MO.getReg();
1486 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1488 if (Reg != Base && !MemRegs.count(Reg))
1489 AddedRegPressure.insert(Reg);
1493 // Estimate register pressure increase due to the transformation.
1494 if (MemRegs.size() <= 4)
1495 // Ok if we are moving small number of instructions.
1497 return AddedRegPressure.size() <= MemRegs.size() * 2;
1501 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1503 unsigned &NewOpc, unsigned &EvenReg,
1504 unsigned &OddReg, unsigned &BaseReg,
1505 int &Offset, unsigned &PredReg,
1506 ARMCC::CondCodes &Pred,
1508 // Make sure we're allowed to generate LDRD/STRD.
1509 if (!STI->hasV5TEOps())
1512 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1514 unsigned Opcode = Op0->getOpcode();
1515 if (Opcode == ARM::LDRi12)
1517 else if (Opcode == ARM::STRi12)
1519 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1520 NewOpc = ARM::t2LDRDi8;
1523 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1524 NewOpc = ARM::t2STRDi8;
1530 // Make sure the base address satisfies i64 ld / st alignment requirement.
1531 if (!Op0->hasOneMemOperand() ||
1532 !(*Op0->memoperands_begin())->getValue() ||
1533 (*Op0->memoperands_begin())->isVolatile())
1536 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1537 const Function *Func = MF->getFunction();
1538 unsigned ReqAlign = STI->hasV6Ops()
1539 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1540 : 8; // Pre-v6 need 8-byte align
1541 if (Align < ReqAlign)
1544 // Then make sure the immediate offset fits.
1545 int OffImm = getMemoryOpOffset(Op0);
1549 // Can't fall back to t2LDRi8 / t2STRi8.
1552 int Limit = (1 << 8) * Scale;
1553 if (OffImm >= Limit || (OffImm & (Scale-1)))
1558 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1560 AddSub = ARM_AM::sub;
1563 int Limit = (1 << 8) * Scale;
1564 if (OffImm >= Limit || (OffImm & (Scale-1)))
1566 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1568 EvenReg = Op0->getOperand(0).getReg();
1569 OddReg = Op1->getOperand(0).getReg();
1570 if (EvenReg == OddReg)
1572 BaseReg = Op0->getOperand(1).getReg();
1573 Pred = llvm::getInstrPredicate(Op0, PredReg);
1574 dl = Op0->getDebugLoc();
1578 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1579 SmallVector<MachineInstr*, 4> &Ops,
1580 unsigned Base, bool isLd,
1581 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1582 bool RetVal = false;
1584 // Sort by offset (in reverse order).
1585 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1587 // The loads / stores of the same base are in order. Scan them from first to
1588 // last and check for the following:
1589 // 1. Any def of base.
1591 while (Ops.size() > 1) {
1592 unsigned FirstLoc = ~0U;
1593 unsigned LastLoc = 0;
1594 MachineInstr *FirstOp = 0;
1595 MachineInstr *LastOp = 0;
1597 unsigned LastOpcode = 0;
1598 unsigned LastBytes = 0;
1599 unsigned NumMove = 0;
1600 for (int i = Ops.size() - 1; i >= 0; --i) {
1601 MachineInstr *Op = Ops[i];
1602 unsigned Loc = MI2LocMap[Op];
1603 if (Loc <= FirstLoc) {
1607 if (Loc >= LastLoc) {
1612 unsigned Opcode = Op->getOpcode();
1613 if (LastOpcode && Opcode != LastOpcode)
1616 int Offset = getMemoryOpOffset(Op);
1617 unsigned Bytes = getLSMultipleTransferSize(Op);
1619 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1622 LastOffset = Offset;
1624 LastOpcode = Opcode;
1625 if (++NumMove == 8) // FIXME: Tune this limit.
1632 SmallPtrSet<MachineInstr*, 4> MemOps;
1633 SmallSet<unsigned, 4> MemRegs;
1634 for (int i = NumMove-1; i >= 0; --i) {
1635 MemOps.insert(Ops[i]);
1636 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1639 // Be conservative, if the instructions are too far apart, don't
1640 // move them. We want to limit the increase of register pressure.
1641 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1643 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1644 MemOps, MemRegs, TRI);
1646 for (unsigned i = 0; i != NumMove; ++i)
1649 // This is the new location for the loads / stores.
1650 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1651 while (InsertPos != MBB->end()
1652 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1655 // If we are moving a pair of loads / stores, see if it makes sense
1656 // to try to allocate a pair of registers that can form register pairs.
1657 MachineInstr *Op0 = Ops.back();
1658 MachineInstr *Op1 = Ops[Ops.size()-2];
1659 unsigned EvenReg = 0, OddReg = 0;
1660 unsigned BaseReg = 0, PredReg = 0;
1661 ARMCC::CondCodes Pred = ARMCC::AL;
1663 unsigned NewOpc = 0;
1666 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1667 EvenReg, OddReg, BaseReg,
1668 Offset, PredReg, Pred, isT2)) {
1672 // Form the pair instruction.
1674 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1675 dl, TII->get(NewOpc))
1676 .addReg(EvenReg, RegState::Define)
1677 .addReg(OddReg, RegState::Define)
1679 // FIXME: We're converting from LDRi12 to an insn that still
1680 // uses addrmode2, so we need an explicit offset reg. It should
1681 // always by reg0 since we're transforming LDRi12s.
1684 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1687 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1688 dl, TII->get(NewOpc))
1692 // FIXME: We're converting from LDRi12 to an insn that still
1693 // uses addrmode2, so we need an explicit offset reg. It should
1694 // always by reg0 since we're transforming STRi12s.
1697 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1703 // Add register allocation hints to form register pairs.
1704 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1705 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1707 for (unsigned i = 0; i != NumMove; ++i) {
1708 MachineInstr *Op = Ops.back();
1710 MBB->splice(InsertPos, MBB, Op);
1714 NumLdStMoved += NumMove;
1724 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1725 bool RetVal = false;
1727 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1728 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1729 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1730 SmallVector<unsigned, 4> LdBases;
1731 SmallVector<unsigned, 4> StBases;
1734 MachineBasicBlock::iterator MBBI = MBB->begin();
1735 MachineBasicBlock::iterator E = MBB->end();
1737 for (; MBBI != E; ++MBBI) {
1738 MachineInstr *MI = MBBI;
1739 const TargetInstrDesc &TID = MI->getDesc();
1740 if (TID.isCall() || TID.isTerminator()) {
1741 // Stop at barriers.
1746 if (!MI->isDebugValue())
1747 MI2LocMap[MI] = ++Loc;
1749 if (!isMemoryOp(MI))
1751 unsigned PredReg = 0;
1752 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1755 int Opc = MI->getOpcode();
1756 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1757 unsigned Base = MI->getOperand(1).getReg();
1758 int Offset = getMemoryOpOffset(MI);
1760 bool StopHere = false;
1762 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1763 Base2LdsMap.find(Base);
1764 if (BI != Base2LdsMap.end()) {
1765 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1766 if (Offset == getMemoryOpOffset(BI->second[i])) {
1772 BI->second.push_back(MI);
1774 SmallVector<MachineInstr*, 4> MIs;
1776 Base2LdsMap[Base] = MIs;
1777 LdBases.push_back(Base);
1780 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1781 Base2StsMap.find(Base);
1782 if (BI != Base2StsMap.end()) {
1783 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1784 if (Offset == getMemoryOpOffset(BI->second[i])) {
1790 BI->second.push_back(MI);
1792 SmallVector<MachineInstr*, 4> MIs;
1794 Base2StsMap[Base] = MIs;
1795 StBases.push_back(Base);
1800 // Found a duplicate (a base+offset combination that's seen earlier).
1807 // Re-schedule loads.
1808 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1809 unsigned Base = LdBases[i];
1810 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1812 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1815 // Re-schedule stores.
1816 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1817 unsigned Base = StBases[i];
1818 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1820 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1824 Base2LdsMap.clear();
1825 Base2StsMap.clear();
1835 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1836 /// optimization pass.
1837 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1839 return new ARMPreAllocLoadStoreOpt();
1840 return new ARMLoadStoreOpt();