1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/Target/TargetData.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallSet.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/ADT/Statistic.h"
42 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
43 STATISTIC(NumSTMGened , "Number of stm instructions generated");
44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55 /// load / store instructions to form ldm / stm instructions.
58 struct ARMLoadStoreOpt : public MachineFunctionPass {
60 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
62 const TargetInstrInfo *TII;
63 const TargetRegisterInfo *TRI;
68 virtual bool runOnMachineFunction(MachineFunction &Fn);
70 virtual const char *getPassName() const {
71 return "ARM load / store optimization pass";
75 struct MemOpQueueEntry {
80 MachineBasicBlock::iterator MBBI;
82 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
83 MachineBasicBlock::iterator i)
84 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
86 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
87 typedef MemOpQueue::iterator MemOpQueueIter;
89 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
90 int Offset, unsigned Base, bool BaseKill, int Opcode,
91 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
92 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
93 void MergeOpsUpdate(MachineBasicBlock &MBB,
102 ARMCC::CondCodes Pred,
106 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
107 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
108 int Opcode, unsigned Size,
109 ARMCC::CondCodes Pred, unsigned PredReg,
110 unsigned Scratch, MemOpQueue &MemOps,
111 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
113 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
114 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
115 MachineBasicBlock::iterator &MBBI);
116 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
117 MachineBasicBlock::iterator MBBI,
118 const TargetInstrInfo *TII,
120 MachineBasicBlock::iterator &I);
121 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
122 MachineBasicBlock::iterator MBBI,
124 MachineBasicBlock::iterator &I);
125 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
126 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
128 char ARMLoadStoreOpt::ID = 0;
131 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
133 default: llvm_unreachable("Unhandled opcode!");
137 default: llvm_unreachable("Unhandled submode!");
138 case ARM_AM::ia: return ARM::LDMIA;
139 case ARM_AM::da: return ARM::LDMDA;
140 case ARM_AM::db: return ARM::LDMDB;
141 case ARM_AM::ib: return ARM::LDMIB;
147 default: llvm_unreachable("Unhandled submode!");
148 case ARM_AM::ia: return ARM::STMIA;
149 case ARM_AM::da: return ARM::STMDA;
150 case ARM_AM::db: return ARM::STMDB;
151 case ARM_AM::ib: return ARM::STMIB;
158 default: llvm_unreachable("Unhandled submode!");
159 case ARM_AM::ia: return ARM::t2LDMIA;
160 case ARM_AM::db: return ARM::t2LDMDB;
167 default: llvm_unreachable("Unhandled submode!");
168 case ARM_AM::ia: return ARM::t2STMIA;
169 case ARM_AM::db: return ARM::t2STMDB;
175 default: llvm_unreachable("Unhandled submode!");
176 case ARM_AM::ia: return ARM::VLDMSIA;
177 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
183 default: llvm_unreachable("Unhandled submode!");
184 case ARM_AM::ia: return ARM::VSTMSIA;
185 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
191 default: llvm_unreachable("Unhandled submode!");
192 case ARM_AM::ia: return ARM::VLDMDIA;
193 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
199 default: llvm_unreachable("Unhandled submode!");
200 case ARM_AM::ia: return ARM::VSTMDIA;
201 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
212 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
214 default: llvm_unreachable("Unhandled opcode!");
220 case ARM::t2LDMIA_RET:
222 case ARM::t2LDMIA_UPD:
224 case ARM::t2STMIA_UPD:
226 case ARM::VLDMSIA_UPD:
228 case ARM::VSTMSIA_UPD:
230 case ARM::VLDMDIA_UPD:
232 case ARM::VSTMDIA_UPD:
246 case ARM::t2LDMDB_UPD:
248 case ARM::t2STMDB_UPD:
249 case ARM::VLDMSDB_UPD:
250 case ARM::VSTMSDB_UPD:
251 case ARM::VLDMDDB_UPD:
252 case ARM::VSTMDDB_UPD:
262 return ARM_AM::bad_am_submode;
265 } // end namespace ARM_AM
266 } // end namespace llvm
268 static bool isT2i32Load(unsigned Opc) {
269 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
272 static bool isi32Load(unsigned Opc) {
273 return Opc == ARM::LDRi12 || isT2i32Load(Opc);
276 static bool isT2i32Store(unsigned Opc) {
277 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
280 static bool isi32Store(unsigned Opc) {
281 return Opc == ARM::STRi12 || isT2i32Store(Opc);
284 /// MergeOps - Create and insert a LDM or STM with Base as base register and
285 /// registers in Regs as the register operands that would be loaded / stored.
286 /// It returns true if the transformation is done.
288 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
289 MachineBasicBlock::iterator MBBI,
290 int Offset, unsigned Base, bool BaseKill,
291 int Opcode, ARMCC::CondCodes Pred,
292 unsigned PredReg, unsigned Scratch, DebugLoc dl,
293 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
294 // Only a single register to load / store. Don't bother.
295 unsigned NumRegs = Regs.size();
299 ARM_AM::AMSubMode Mode = ARM_AM::ia;
300 // VFP and Thumb2 do not support IB or DA modes.
301 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
302 bool haveIBAndDA = isNotVFP && !isThumb2;
303 if (Offset == 4 && haveIBAndDA)
305 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
307 else if (Offset == -4 * (int)NumRegs && isNotVFP)
308 // VLDM/VSTM do not support DB mode without also updating the base reg.
310 else if (Offset != 0) {
311 // If starting offset isn't zero, insert a MI to materialize a new base.
312 // But only do so if it is cost effective, i.e. merging more than two
318 if (isi32Load(Opcode))
319 // If it is a load, then just use one of the destination register to
320 // use as the new base.
321 NewBase = Regs[NumRegs-1].first;
323 // Use the scratch register to use as a new base.
328 int BaseOpc = !isThumb2
330 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
334 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
337 int ImmedOffset = isThumb2
338 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
339 if (ImmedOffset == -1)
340 // FIXME: Try t2ADDri12 or t2SUBri12?
341 return false; // Probably not worth it then.
343 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
344 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
345 .addImm(Pred).addReg(PredReg).addReg(0);
347 BaseKill = true; // New base is always killed right its use.
350 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
351 Opcode == ARM::VLDRD);
352 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
353 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
354 .addReg(Base, getKillRegState(BaseKill))
355 .addImm(Pred).addReg(PredReg);
356 for (unsigned i = 0; i != NumRegs; ++i)
357 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
358 | getKillRegState(Regs[i].second));
363 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
365 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
367 unsigned memOpsBegin, unsigned memOpsEnd,
368 unsigned insertAfter, int Offset,
369 unsigned Base, bool BaseKill,
371 ARMCC::CondCodes Pred, unsigned PredReg,
374 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
375 // First calculate which of the registers should be killed by the merged
377 const unsigned insertPos = memOps[insertAfter].Position;
378 SmallSet<unsigned, 4> KilledRegs;
379 DenseMap<unsigned, unsigned> Killer;
380 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
381 if (i == memOpsBegin) {
386 if (memOps[i].Position < insertPos && memOps[i].isKill) {
387 unsigned Reg = memOps[i].Reg;
388 KilledRegs.insert(Reg);
393 SmallVector<std::pair<unsigned, bool>, 8> Regs;
394 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
395 unsigned Reg = memOps[i].Reg;
396 // If we are inserting the merged operation after an operation that
397 // uses the same register, make sure to transfer any kill flag.
398 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
399 Regs.push_back(std::make_pair(Reg, isKill));
402 // Try to do the merge.
403 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
405 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
406 Pred, PredReg, Scratch, dl, Regs))
409 // Merge succeeded, update records.
410 Merges.push_back(prior(Loc));
411 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
412 // Remove kill flags from any memops that come before insertPos.
413 if (Regs[i-memOpsBegin].second) {
414 unsigned Reg = Regs[i-memOpsBegin].first;
415 if (KilledRegs.count(Reg)) {
416 unsigned j = Killer[Reg];
417 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
418 assert(Idx >= 0 && "Cannot find killing operand");
419 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
420 memOps[j].isKill = false;
422 memOps[i].isKill = true;
424 MBB.erase(memOps[i].MBBI);
425 // Update this memop to refer to the merged instruction.
426 // We may need to move kill flags again.
427 memOps[i].Merged = true;
428 memOps[i].MBBI = Merges.back();
429 memOps[i].Position = insertPos;
433 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
434 /// load / store multiple instructions.
436 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
437 unsigned Base, int Opcode, unsigned Size,
438 ARMCC::CondCodes Pred, unsigned PredReg,
439 unsigned Scratch, MemOpQueue &MemOps,
440 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
441 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
442 int Offset = MemOps[SIndex].Offset;
443 int SOffset = Offset;
444 unsigned insertAfter = SIndex;
445 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
446 DebugLoc dl = Loc->getDebugLoc();
447 const MachineOperand &PMO = Loc->getOperand(0);
448 unsigned PReg = PMO.getReg();
449 unsigned PRegNum = PMO.isUndef() ? UINT_MAX
450 : getARMRegisterNumbering(PReg);
453 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
454 int NewOffset = MemOps[i].Offset;
455 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
456 unsigned Reg = MO.getReg();
457 unsigned RegNum = MO.isUndef() ? UINT_MAX
458 : getARMRegisterNumbering(Reg);
459 // Register numbers must be in ascending order. For VFP, the registers
460 // must also be consecutive and there is a limit of 16 double-word
461 // registers per instruction.
462 if (Reg != ARM::SP &&
463 NewOffset == Offset + (int)Size &&
464 ((isNotVFP && RegNum > PRegNum)
465 || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) {
470 // Can't merge this in. Try merge the earlier ones first.
471 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
472 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
473 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
478 if (MemOps[i].Position > MemOps[insertAfter].Position)
482 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
483 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
484 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
488 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
489 unsigned Bytes, unsigned Limit,
490 ARMCC::CondCodes Pred, unsigned PredReg){
491 unsigned MyPredReg = 0;
494 if (MI->getOpcode() != ARM::t2SUBri &&
495 MI->getOpcode() != ARM::t2SUBrSPi &&
496 MI->getOpcode() != ARM::t2SUBrSPi12 &&
497 MI->getOpcode() != ARM::tSUBspi &&
498 MI->getOpcode() != ARM::SUBri)
501 // Make sure the offset fits in 8 bits.
502 if (Bytes == 0 || (Limit && Bytes >= Limit))
505 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
506 return (MI->getOperand(0).getReg() == Base &&
507 MI->getOperand(1).getReg() == Base &&
508 (MI->getOperand(2).getImm()*Scale) == Bytes &&
509 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
510 MyPredReg == PredReg);
513 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
514 unsigned Bytes, unsigned Limit,
515 ARMCC::CondCodes Pred, unsigned PredReg){
516 unsigned MyPredReg = 0;
519 if (MI->getOpcode() != ARM::t2ADDri &&
520 MI->getOpcode() != ARM::t2ADDrSPi &&
521 MI->getOpcode() != ARM::t2ADDrSPi12 &&
522 MI->getOpcode() != ARM::tADDspi &&
523 MI->getOpcode() != ARM::ADDri)
526 if (Bytes == 0 || (Limit && Bytes >= Limit))
527 // Make sure the offset fits in 8 bits.
530 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
531 return (MI->getOperand(0).getReg() == Base &&
532 MI->getOperand(1).getReg() == Base &&
533 (MI->getOperand(2).getImm()*Scale) == Bytes &&
534 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
535 MyPredReg == PredReg);
538 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
539 switch (MI->getOpcode()) {
567 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
570 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
574 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
575 ARM_AM::AMSubMode Mode) {
577 default: llvm_unreachable("Unhandled opcode!");
583 default: llvm_unreachable("Unhandled submode!");
584 case ARM_AM::ia: return ARM::LDMIA_UPD;
585 case ARM_AM::ib: return ARM::LDMIB_UPD;
586 case ARM_AM::da: return ARM::LDMDA_UPD;
587 case ARM_AM::db: return ARM::LDMDB_UPD;
595 default: llvm_unreachable("Unhandled submode!");
596 case ARM_AM::ia: return ARM::STMIA_UPD;
597 case ARM_AM::ib: return ARM::STMIB_UPD;
598 case ARM_AM::da: return ARM::STMDA_UPD;
599 case ARM_AM::db: return ARM::STMDB_UPD;
605 default: llvm_unreachable("Unhandled submode!");
606 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
607 case ARM_AM::db: return ARM::t2LDMDB_UPD;
613 default: llvm_unreachable("Unhandled submode!");
614 case ARM_AM::ia: return ARM::t2STMIA_UPD;
615 case ARM_AM::db: return ARM::t2STMDB_UPD;
620 default: llvm_unreachable("Unhandled submode!");
621 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
622 case ARM_AM::db: return ARM::VLDMSDB_UPD;
627 default: llvm_unreachable("Unhandled submode!");
628 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
629 case ARM_AM::db: return ARM::VLDMDDB_UPD;
634 default: llvm_unreachable("Unhandled submode!");
635 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
636 case ARM_AM::db: return ARM::VSTMSDB_UPD;
641 default: llvm_unreachable("Unhandled submode!");
642 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
643 case ARM_AM::db: return ARM::VSTMDDB_UPD;
651 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
652 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
654 /// stmia rn, <ra, rb, rc>
655 /// rn := rn + 4 * 3;
657 /// stmia rn!, <ra, rb, rc>
659 /// rn := rn - 4 * 3;
660 /// ldmia rn, <ra, rb, rc>
662 /// ldmdb rn!, <ra, rb, rc>
663 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
664 MachineBasicBlock::iterator MBBI,
666 MachineBasicBlock::iterator &I) {
667 MachineInstr *MI = MBBI;
668 unsigned Base = MI->getOperand(0).getReg();
669 bool BaseKill = MI->getOperand(0).isKill();
670 unsigned Bytes = getLSMultipleTransferSize(MI);
671 unsigned PredReg = 0;
672 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
673 int Opcode = MI->getOpcode();
674 DebugLoc dl = MI->getDebugLoc();
676 // Can't use an updating ld/st if the base register is also a dest
677 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
678 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
679 if (MI->getOperand(i).getReg() == Base)
682 bool DoMerge = false;
683 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
685 // Try merging with the previous instruction.
686 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
687 if (MBBI != BeginMBBI) {
688 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
689 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
691 if (Mode == ARM_AM::ia &&
692 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
695 } else if (Mode == ARM_AM::ib &&
696 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
704 // Try merging with the next instruction.
705 MachineBasicBlock::iterator EndMBBI = MBB.end();
706 if (!DoMerge && MBBI != EndMBBI) {
707 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
708 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
710 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
711 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
713 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
714 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
729 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
730 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
731 .addReg(Base, getDefRegState(true)) // WB base register
732 .addReg(Base, getKillRegState(BaseKill))
733 .addImm(Pred).addReg(PredReg);
735 // Transfer the rest of operands.
736 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
737 MIB.addOperand(MI->getOperand(OpNum));
739 // Transfer memoperands.
740 (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
746 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
747 ARM_AM::AddrOpc Mode) {
754 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
756 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
758 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
760 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
763 return ARM::t2LDR_PRE;
766 return ARM::t2STR_PRE;
767 default: llvm_unreachable("Unhandled opcode!");
772 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
773 ARM_AM::AddrOpc Mode) {
776 return ARM::LDR_POST;
778 return ARM::STR_POST;
780 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
782 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
784 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
786 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
789 return ARM::t2LDR_POST;
792 return ARM::t2STR_POST;
793 default: llvm_unreachable("Unhandled opcode!");
798 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
799 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
800 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
801 MachineBasicBlock::iterator MBBI,
802 const TargetInstrInfo *TII,
804 MachineBasicBlock::iterator &I) {
805 MachineInstr *MI = MBBI;
806 unsigned Base = MI->getOperand(1).getReg();
807 bool BaseKill = MI->getOperand(1).isKill();
808 unsigned Bytes = getLSMultipleTransferSize(MI);
809 int Opcode = MI->getOpcode();
810 DebugLoc dl = MI->getDebugLoc();
811 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
812 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
813 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
814 if (isi32Load(Opcode) || isi32Store(Opcode))
815 if (MI->getOperand(2).getImm() != 0)
817 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
820 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
821 // Can't do the merge if the destination register is the same as the would-be
822 // writeback register.
823 if (isLd && MI->getOperand(0).getReg() == Base)
826 unsigned PredReg = 0;
827 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
828 bool DoMerge = false;
829 ARM_AM::AddrOpc AddSub = ARM_AM::add;
831 // AM2 - 12 bits, thumb2 - 8 bits.
832 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
834 // Try merging with the previous instruction.
835 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
836 if (MBBI != BeginMBBI) {
837 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
838 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
840 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
842 AddSub = ARM_AM::sub;
844 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
848 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
853 // Try merging with the next instruction.
854 MachineBasicBlock::iterator EndMBBI = MBB.end();
855 if (!DoMerge && MBBI != EndMBBI) {
856 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
857 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
860 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
862 AddSub = ARM_AM::sub;
863 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
867 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
881 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
883 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
886 // VLDM[SD}_UPD, VSTM[SD]_UPD
887 // (There are no base-updating versions of VLDR/VSTR instructions, but the
888 // updating load/store-multiple instructions can be used with only one
890 MachineOperand &MO = MI->getOperand(0);
891 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
892 .addReg(Base, getDefRegState(true)) // WB base register
893 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
894 .addImm(Pred).addReg(PredReg)
895 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
896 getKillRegState(MO.isKill())));
899 // LDR_PRE, LDR_POST,
900 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
901 .addReg(Base, RegState::Define)
902 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
904 // t2LDR_PRE, t2LDR_POST
905 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
906 .addReg(Base, RegState::Define)
907 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
909 MachineOperand &MO = MI->getOperand(0);
912 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
913 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
914 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
916 // t2STR_PRE, t2STR_POST
917 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
918 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
919 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
926 /// isMemoryOp - Returns true if instruction is a memory operations (that this
927 /// pass is capable of operating on).
928 static bool isMemoryOp(const MachineInstr *MI) {
929 // When no memory operands are present, conservatively assume unaligned,
930 // volatile, unfoldable.
931 if (!MI->hasOneMemOperand())
934 const MachineMemOperand *MMO = *MI->memoperands_begin();
936 // Don't touch volatile memory accesses - we may be changing their order.
937 if (MMO->isVolatile())
940 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
942 if (MMO->getAlignment() < 4)
945 // str <undef> could probably be eliminated entirely, but for now we just want
946 // to avoid making a mess of it.
947 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
948 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
949 MI->getOperand(0).isUndef())
952 // Likewise don't mess with references to undefined addresses.
953 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
954 MI->getOperand(1).isUndef())
957 int Opcode = MI->getOpcode();
962 return MI->getOperand(1).isReg();
965 return MI->getOperand(1).isReg();
972 return MI->getOperand(1).isReg();
977 /// AdvanceRS - Advance register scavenger to just before the earliest memory
978 /// op that is being merged.
979 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
980 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
981 unsigned Position = MemOps[0].Position;
982 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
983 if (MemOps[i].Position < Position) {
984 Position = MemOps[i].Position;
985 Loc = MemOps[i].MBBI;
989 if (Loc != MBB.begin())
990 RS->forward(prior(Loc));
993 static int getMemoryOpOffset(const MachineInstr *MI) {
994 int Opcode = MI->getOpcode();
995 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
996 unsigned NumOperands = MI->getDesc().getNumOperands();
997 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
999 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1000 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1001 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1002 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
1005 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1006 : ARM_AM::getAM5Offset(OffField) * 4;
1008 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1011 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1017 static void InsertLDR_STR(MachineBasicBlock &MBB,
1018 MachineBasicBlock::iterator &MBBI,
1019 int Offset, bool isDef,
1020 DebugLoc dl, unsigned NewOpc,
1021 unsigned Reg, bool RegDeadKill, bool RegUndef,
1022 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1023 bool OffKill, bool OffUndef,
1024 ARMCC::CondCodes Pred, unsigned PredReg,
1025 const TargetInstrInfo *TII, bool isT2) {
1027 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1029 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1030 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1031 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1033 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1035 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1036 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1037 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1041 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1042 MachineBasicBlock::iterator &MBBI) {
1043 MachineInstr *MI = &*MBBI;
1044 unsigned Opcode = MI->getOpcode();
1045 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1046 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1047 unsigned EvenReg = MI->getOperand(0).getReg();
1048 unsigned OddReg = MI->getOperand(1).getReg();
1049 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1050 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1051 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
1054 MachineBasicBlock::iterator NewBBI = MBBI;
1055 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1056 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1057 bool EvenDeadKill = isLd ?
1058 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1059 bool EvenUndef = MI->getOperand(0).isUndef();
1060 bool OddDeadKill = isLd ?
1061 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1062 bool OddUndef = MI->getOperand(1).isUndef();
1063 const MachineOperand &BaseOp = MI->getOperand(2);
1064 unsigned BaseReg = BaseOp.getReg();
1065 bool BaseKill = BaseOp.isKill();
1066 bool BaseUndef = BaseOp.isUndef();
1067 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1068 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1069 int OffImm = getMemoryOpOffset(MI);
1070 unsigned PredReg = 0;
1071 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
1073 if (OddRegNum > EvenRegNum && OffImm == 0) {
1074 // Ascending register numbers and no offset. It's safe to change it to a
1076 unsigned NewOpc = (isLd)
1077 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1078 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1080 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1081 .addReg(BaseReg, getKillRegState(BaseKill))
1082 .addImm(Pred).addReg(PredReg)
1083 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1084 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1087 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1088 .addReg(BaseReg, getKillRegState(BaseKill))
1089 .addImm(Pred).addReg(PredReg)
1091 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1093 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1096 NewBBI = llvm::prior(MBBI);
1098 // Split into two instructions.
1099 unsigned NewOpc = (isLd)
1100 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1101 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1102 DebugLoc dl = MBBI->getDebugLoc();
1103 // If this is a load and base register is killed, it may have been
1104 // re-defed by the load, make sure the first load does not clobber it.
1106 (BaseKill || OffKill) &&
1107 (TRI->regsOverlap(EvenReg, BaseReg))) {
1108 assert(!TRI->regsOverlap(OddReg, BaseReg));
1109 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1110 OddReg, OddDeadKill, false,
1111 BaseReg, false, BaseUndef, false, OffUndef,
1112 Pred, PredReg, TII, isT2);
1113 NewBBI = llvm::prior(MBBI);
1114 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1115 EvenReg, EvenDeadKill, false,
1116 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1117 Pred, PredReg, TII, isT2);
1119 if (OddReg == EvenReg && EvenDeadKill) {
1120 // If the two source operands are the same, the kill marker is
1121 // probably on the first one. e.g.
1122 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1123 EvenDeadKill = false;
1126 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1127 EvenReg, EvenDeadKill, EvenUndef,
1128 BaseReg, false, BaseUndef, false, OffUndef,
1129 Pred, PredReg, TII, isT2);
1130 NewBBI = llvm::prior(MBBI);
1131 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1132 OddReg, OddDeadKill, OddUndef,
1133 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1134 Pred, PredReg, TII, isT2);
1149 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1150 /// ops of the same base and incrementing offset into LDM / STM ops.
1151 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1152 unsigned NumMerges = 0;
1153 unsigned NumMemOps = 0;
1155 unsigned CurrBase = 0;
1157 unsigned CurrSize = 0;
1158 ARMCC::CondCodes CurrPred = ARMCC::AL;
1159 unsigned CurrPredReg = 0;
1160 unsigned Position = 0;
1161 SmallVector<MachineBasicBlock::iterator,4> Merges;
1163 RS->enterBasicBlock(&MBB);
1164 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1166 if (FixInvalidRegPairOp(MBB, MBBI))
1169 bool Advance = false;
1170 bool TryMerge = false;
1171 bool Clobber = false;
1173 bool isMemOp = isMemoryOp(MBBI);
1175 int Opcode = MBBI->getOpcode();
1176 unsigned Size = getLSMultipleTransferSize(MBBI);
1177 const MachineOperand &MO = MBBI->getOperand(0);
1178 unsigned Reg = MO.getReg();
1179 bool isKill = MO.isDef() ? false : MO.isKill();
1180 unsigned Base = MBBI->getOperand(1).getReg();
1181 unsigned PredReg = 0;
1182 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1183 int Offset = getMemoryOpOffset(MBBI);
1186 // r5 := ldr [r5, #4]
1187 // r6 := ldr [r5, #8]
1189 // The second ldr has effectively broken the chain even though it
1190 // looks like the later ldr(s) use the same base register. Try to
1191 // merge the ldr's so far, including this one. But don't try to
1192 // combine the following ldr(s).
1193 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1194 if (CurrBase == 0 && !Clobber) {
1195 // Start of a new chain.
1200 CurrPredReg = PredReg;
1201 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1210 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1211 // No need to match PredReg.
1212 // Continue adding to the queue.
1213 if (Offset > MemOps.back().Offset) {
1214 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1219 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1221 if (Offset < I->Offset) {
1222 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1227 } else if (Offset == I->Offset) {
1228 // Collision! This can't be merged!
1237 if (MBBI->isDebugValue()) {
1240 // Reach the end of the block, try merging the memory instructions.
1242 } else if (Advance) {
1246 // Reach the end of the block, try merging the memory instructions.
1252 if (NumMemOps > 1) {
1253 // Try to find a free register to use as a new base in case it's needed.
1254 // First advance to the instruction just before the start of the chain.
1255 AdvanceRS(MBB, MemOps);
1256 // Find a scratch register.
1257 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1258 // Process the load / store instructions.
1259 RS->forward(prior(MBBI));
1263 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1264 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1266 // Try folding preceeding/trailing base inc/dec into the generated
1268 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1269 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1271 NumMerges += Merges.size();
1273 // Try folding preceeding/trailing base inc/dec into those load/store
1274 // that were not merged to form LDM/STM ops.
1275 for (unsigned i = 0; i != NumMemOps; ++i)
1276 if (!MemOps[i].Merged)
1277 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1280 // RS may be pointing to an instruction that's deleted.
1281 RS->skipTo(prior(MBBI));
1282 } else if (NumMemOps == 1) {
1283 // Try folding preceeding/trailing base inc/dec into the single
1285 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1287 RS->forward(prior(MBBI));
1294 CurrPred = ARMCC::AL;
1301 // If iterator hasn't been advanced and this is not a memory op, skip it.
1302 // It can't start a new chain anyway.
1303 if (!Advance && !isMemOp && MBBI != E) {
1309 return NumMerges > 0;
1312 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1313 /// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it
1314 /// directly restore the value of LR into pc.
1315 /// ldmfd sp!, {..., lr}
1318 /// ldmfd sp!, {..., lr}
1321 /// ldmfd sp!, {..., pc}
1322 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1323 if (MBB.empty()) return false;
1325 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1326 if (MBBI != MBB.begin() &&
1327 (MBBI->getOpcode() == ARM::BX_RET ||
1328 MBBI->getOpcode() == ARM::tBX_RET ||
1329 MBBI->getOpcode() == ARM::MOVPCLR)) {
1330 MachineInstr *PrevMI = prior(MBBI);
1331 unsigned Opcode = PrevMI->getOpcode();
1332 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1333 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1334 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1335 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1336 if (MO.getReg() != ARM::LR)
1338 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1339 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1340 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1341 PrevMI->setDesc(TII->get(NewOpc));
1343 PrevMI->copyImplicitOps(&*MBBI);
1351 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1352 const TargetMachine &TM = Fn.getTarget();
1353 AFI = Fn.getInfo<ARMFunctionInfo>();
1354 TII = TM.getInstrInfo();
1355 TRI = TM.getRegisterInfo();
1356 RS = new RegScavenger();
1357 isThumb2 = AFI->isThumb2Function();
1359 bool Modified = false;
1360 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1362 MachineBasicBlock &MBB = *MFI;
1363 Modified |= LoadStoreMultipleOpti(MBB);
1364 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1365 Modified |= MergeReturnIntoLDM(MBB);
1373 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1374 /// load / stores from consecutive locations close to make it more
1375 /// likely they will be combined later.
1378 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1380 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1382 const TargetData *TD;
1383 const TargetInstrInfo *TII;
1384 const TargetRegisterInfo *TRI;
1385 const ARMSubtarget *STI;
1386 MachineRegisterInfo *MRI;
1387 MachineFunction *MF;
1389 virtual bool runOnMachineFunction(MachineFunction &Fn);
1391 virtual const char *getPassName() const {
1392 return "ARM pre- register allocation load / store optimization pass";
1396 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1397 unsigned &NewOpc, unsigned &EvenReg,
1398 unsigned &OddReg, unsigned &BaseReg,
1400 unsigned &PredReg, ARMCC::CondCodes &Pred,
1402 bool RescheduleOps(MachineBasicBlock *MBB,
1403 SmallVector<MachineInstr*, 4> &Ops,
1404 unsigned Base, bool isLd,
1405 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1406 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1408 char ARMPreAllocLoadStoreOpt::ID = 0;
1411 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1412 TD = Fn.getTarget().getTargetData();
1413 TII = Fn.getTarget().getInstrInfo();
1414 TRI = Fn.getTarget().getRegisterInfo();
1415 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1416 MRI = &Fn.getRegInfo();
1419 bool Modified = false;
1420 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1422 Modified |= RescheduleLoadStoreInstrs(MFI);
1427 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1428 MachineBasicBlock::iterator I,
1429 MachineBasicBlock::iterator E,
1430 SmallPtrSet<MachineInstr*, 4> &MemOps,
1431 SmallSet<unsigned, 4> &MemRegs,
1432 const TargetRegisterInfo *TRI) {
1433 // Are there stores / loads / calls between them?
1434 // FIXME: This is overly conservative. We should make use of alias information
1436 SmallSet<unsigned, 4> AddedRegPressure;
1438 if (I->isDebugValue() || MemOps.count(&*I))
1440 const TargetInstrDesc &TID = I->getDesc();
1441 if (TID.isCall() || TID.isTerminator() || I->hasUnmodeledSideEffects())
1443 if (isLd && TID.mayStore())
1448 // It's not safe to move the first 'str' down.
1451 // str r4, [r0, #+4]
1455 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1456 MachineOperand &MO = I->getOperand(j);
1459 unsigned Reg = MO.getReg();
1460 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1462 if (Reg != Base && !MemRegs.count(Reg))
1463 AddedRegPressure.insert(Reg);
1467 // Estimate register pressure increase due to the transformation.
1468 if (MemRegs.size() <= 4)
1469 // Ok if we are moving small number of instructions.
1471 return AddedRegPressure.size() <= MemRegs.size() * 2;
1475 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1477 unsigned &NewOpc, unsigned &EvenReg,
1478 unsigned &OddReg, unsigned &BaseReg,
1479 int &Offset, unsigned &PredReg,
1480 ARMCC::CondCodes &Pred,
1482 // Make sure we're allowed to generate LDRD/STRD.
1483 if (!STI->hasV5TEOps())
1486 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1488 unsigned Opcode = Op0->getOpcode();
1489 if (Opcode == ARM::LDRi12)
1491 else if (Opcode == ARM::STRi12)
1493 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1494 NewOpc = ARM::t2LDRDi8;
1497 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1498 NewOpc = ARM::t2STRDi8;
1504 // Make sure the base address satisfies i64 ld / st alignment requirement.
1505 if (!Op0->hasOneMemOperand() ||
1506 !(*Op0->memoperands_begin())->getValue() ||
1507 (*Op0->memoperands_begin())->isVolatile())
1510 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1511 const Function *Func = MF->getFunction();
1512 unsigned ReqAlign = STI->hasV6Ops()
1513 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1514 : 8; // Pre-v6 need 8-byte align
1515 if (Align < ReqAlign)
1518 // Then make sure the immediate offset fits.
1519 int OffImm = getMemoryOpOffset(Op0);
1521 int Limit = (1 << 8) * Scale;
1522 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1526 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1528 AddSub = ARM_AM::sub;
1531 int Limit = (1 << 8) * Scale;
1532 if (OffImm >= Limit || (OffImm & (Scale-1)))
1534 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1536 EvenReg = Op0->getOperand(0).getReg();
1537 OddReg = Op1->getOperand(0).getReg();
1538 if (EvenReg == OddReg)
1540 BaseReg = Op0->getOperand(1).getReg();
1541 Pred = llvm::getInstrPredicate(Op0, PredReg);
1542 dl = Op0->getDebugLoc();
1547 struct OffsetCompare {
1548 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1549 int LOffset = getMemoryOpOffset(LHS);
1550 int ROffset = getMemoryOpOffset(RHS);
1551 assert(LHS == RHS || LOffset != ROffset);
1552 return LOffset > ROffset;
1557 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1558 SmallVector<MachineInstr*, 4> &Ops,
1559 unsigned Base, bool isLd,
1560 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1561 bool RetVal = false;
1563 // Sort by offset (in reverse order).
1564 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1566 // The loads / stores of the same base are in order. Scan them from first to
1567 // last and check for the following:
1568 // 1. Any def of base.
1570 while (Ops.size() > 1) {
1571 unsigned FirstLoc = ~0U;
1572 unsigned LastLoc = 0;
1573 MachineInstr *FirstOp = 0;
1574 MachineInstr *LastOp = 0;
1576 unsigned LastOpcode = 0;
1577 unsigned LastBytes = 0;
1578 unsigned NumMove = 0;
1579 for (int i = Ops.size() - 1; i >= 0; --i) {
1580 MachineInstr *Op = Ops[i];
1581 unsigned Loc = MI2LocMap[Op];
1582 if (Loc <= FirstLoc) {
1586 if (Loc >= LastLoc) {
1591 unsigned Opcode = Op->getOpcode();
1592 if (LastOpcode && Opcode != LastOpcode)
1595 int Offset = getMemoryOpOffset(Op);
1596 unsigned Bytes = getLSMultipleTransferSize(Op);
1598 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1601 LastOffset = Offset;
1603 LastOpcode = Opcode;
1604 if (++NumMove == 8) // FIXME: Tune this limit.
1611 SmallPtrSet<MachineInstr*, 4> MemOps;
1612 SmallSet<unsigned, 4> MemRegs;
1613 for (int i = NumMove-1; i >= 0; --i) {
1614 MemOps.insert(Ops[i]);
1615 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1618 // Be conservative, if the instructions are too far apart, don't
1619 // move them. We want to limit the increase of register pressure.
1620 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1622 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1623 MemOps, MemRegs, TRI);
1625 for (unsigned i = 0; i != NumMove; ++i)
1628 // This is the new location for the loads / stores.
1629 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1630 while (InsertPos != MBB->end()
1631 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1634 // If we are moving a pair of loads / stores, see if it makes sense
1635 // to try to allocate a pair of registers that can form register pairs.
1636 MachineInstr *Op0 = Ops.back();
1637 MachineInstr *Op1 = Ops[Ops.size()-2];
1638 unsigned EvenReg = 0, OddReg = 0;
1639 unsigned BaseReg = 0, PredReg = 0;
1640 ARMCC::CondCodes Pred = ARMCC::AL;
1642 unsigned NewOpc = 0;
1645 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1646 EvenReg, OddReg, BaseReg,
1647 Offset, PredReg, Pred, isT2)) {
1651 // Form the pair instruction.
1653 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1654 dl, TII->get(NewOpc))
1655 .addReg(EvenReg, RegState::Define)
1656 .addReg(OddReg, RegState::Define)
1658 // FIXME: We're converting from LDRi12 to an insn that still
1659 // uses addrmode2, so we need an explicit offset reg. It should
1660 // always by reg0 since we're transforming LDRi12s.
1663 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1666 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1667 dl, TII->get(NewOpc))
1671 // FIXME: We're converting from LDRi12 to an insn that still
1672 // uses addrmode2, so we need an explicit offset reg. It should
1673 // always by reg0 since we're transforming STRi12s.
1676 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1682 // Add register allocation hints to form register pairs.
1683 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1684 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1686 for (unsigned i = 0; i != NumMove; ++i) {
1687 MachineInstr *Op = Ops.back();
1689 MBB->splice(InsertPos, MBB, Op);
1693 NumLdStMoved += NumMove;
1703 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1704 bool RetVal = false;
1706 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1707 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1708 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1709 SmallVector<unsigned, 4> LdBases;
1710 SmallVector<unsigned, 4> StBases;
1713 MachineBasicBlock::iterator MBBI = MBB->begin();
1714 MachineBasicBlock::iterator E = MBB->end();
1716 for (; MBBI != E; ++MBBI) {
1717 MachineInstr *MI = MBBI;
1718 const TargetInstrDesc &TID = MI->getDesc();
1719 if (TID.isCall() || TID.isTerminator()) {
1720 // Stop at barriers.
1725 if (!MI->isDebugValue())
1726 MI2LocMap[MI] = ++Loc;
1728 if (!isMemoryOp(MI))
1730 unsigned PredReg = 0;
1731 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1734 int Opc = MI->getOpcode();
1735 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1736 unsigned Base = MI->getOperand(1).getReg();
1737 int Offset = getMemoryOpOffset(MI);
1739 bool StopHere = false;
1741 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1742 Base2LdsMap.find(Base);
1743 if (BI != Base2LdsMap.end()) {
1744 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1745 if (Offset == getMemoryOpOffset(BI->second[i])) {
1751 BI->second.push_back(MI);
1753 SmallVector<MachineInstr*, 4> MIs;
1755 Base2LdsMap[Base] = MIs;
1756 LdBases.push_back(Base);
1759 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1760 Base2StsMap.find(Base);
1761 if (BI != Base2StsMap.end()) {
1762 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1763 if (Offset == getMemoryOpOffset(BI->second[i])) {
1769 BI->second.push_back(MI);
1771 SmallVector<MachineInstr*, 4> MIs;
1773 Base2StsMap[Base] = MIs;
1774 StBases.push_back(Base);
1779 // Found a duplicate (a base+offset combination that's seen earlier).
1786 // Re-schedule loads.
1787 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1788 unsigned Base = LdBases[i];
1789 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1791 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1794 // Re-schedule stores.
1795 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1796 unsigned Base = StBases[i];
1797 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1799 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1803 Base2LdsMap.clear();
1804 Base2StsMap.clear();
1814 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1815 /// optimization pass.
1816 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1818 return new ARMPreAllocLoadStoreOpt();
1819 return new ARMLoadStoreOpt();