1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/Target/TargetData.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallSet.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/ADT/Statistic.h"
42 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
43 STATISTIC(NumSTMGened , "Number of stm instructions generated");
44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55 /// load / store instructions to form ldm / stm instructions.
58 struct ARMLoadStoreOpt : public MachineFunctionPass {
60 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
62 const TargetInstrInfo *TII;
63 const TargetRegisterInfo *TRI;
68 virtual bool runOnMachineFunction(MachineFunction &Fn);
70 virtual const char *getPassName() const {
71 return "ARM load / store optimization pass";
75 struct MemOpQueueEntry {
80 MachineBasicBlock::iterator MBBI;
82 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
83 MachineBasicBlock::iterator i)
84 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
86 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
87 typedef MemOpQueue::iterator MemOpQueueIter;
89 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
90 int Offset, unsigned Base, bool BaseKill, int Opcode,
91 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
92 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
93 void MergeOpsUpdate(MachineBasicBlock &MBB,
102 ARMCC::CondCodes Pred,
106 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
107 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
108 int Opcode, unsigned Size,
109 ARMCC::CondCodes Pred, unsigned PredReg,
110 unsigned Scratch, MemOpQueue &MemOps,
111 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
113 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
114 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
115 MachineBasicBlock::iterator &MBBI);
116 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
117 MachineBasicBlock::iterator MBBI,
118 const TargetInstrInfo *TII,
120 MachineBasicBlock::iterator &I);
121 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
122 MachineBasicBlock::iterator MBBI,
124 MachineBasicBlock::iterator &I);
125 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
126 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
128 char ARMLoadStoreOpt::ID = 0;
131 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
133 default: llvm_unreachable("Unhandled opcode!");
137 default: llvm_unreachable("Unhandled submode!");
138 case ARM_AM::ia: return ARM::LDMIA;
139 case ARM_AM::da: return ARM::LDMDA;
140 case ARM_AM::db: return ARM::LDMDB;
141 case ARM_AM::ib: return ARM::LDMIB;
147 default: llvm_unreachable("Unhandled submode!");
148 case ARM_AM::ia: return ARM::STMIA;
149 case ARM_AM::da: return ARM::STMDA;
150 case ARM_AM::db: return ARM::STMDB;
151 case ARM_AM::ib: return ARM::STMIB;
158 default: llvm_unreachable("Unhandled submode!");
159 case ARM_AM::ia: return ARM::t2LDMIA;
160 case ARM_AM::db: return ARM::t2LDMDB;
167 default: llvm_unreachable("Unhandled submode!");
168 case ARM_AM::ia: return ARM::t2STMIA;
169 case ARM_AM::db: return ARM::t2STMDB;
175 default: llvm_unreachable("Unhandled submode!");
176 case ARM_AM::ia: return ARM::VLDMSIA;
177 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
183 default: llvm_unreachable("Unhandled submode!");
184 case ARM_AM::ia: return ARM::VSTMSIA;
185 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
191 default: llvm_unreachable("Unhandled submode!");
192 case ARM_AM::ia: return ARM::VLDMDIA;
193 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
199 default: llvm_unreachable("Unhandled submode!");
200 case ARM_AM::ia: return ARM::VSTMDIA;
201 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
212 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
214 default: llvm_unreachable("Unhandled opcode!");
220 case ARM::t2LDMIA_RET:
222 case ARM::t2LDMIA_UPD:
224 case ARM::t2STMIA_UPD:
226 case ARM::VLDMSIA_UPD:
228 case ARM::VSTMSIA_UPD:
230 case ARM::VLDMDIA_UPD:
232 case ARM::VSTMDIA_UPD:
246 case ARM::t2LDMDB_UPD:
248 case ARM::t2STMDB_UPD:
249 case ARM::VLDMSDB_UPD:
250 case ARM::VSTMSDB_UPD:
251 case ARM::VLDMDDB_UPD:
252 case ARM::VSTMDDB_UPD:
262 return ARM_AM::bad_am_submode;
265 } // end namespace ARM_AM
266 } // end namespace llvm
268 static bool isT2i32Load(unsigned Opc) {
269 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
272 static bool isi32Load(unsigned Opc) {
273 return Opc == ARM::LDRi12 || isT2i32Load(Opc);
276 static bool isT2i32Store(unsigned Opc) {
277 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
280 static bool isi32Store(unsigned Opc) {
281 return Opc == ARM::STRi12 || isT2i32Store(Opc);
284 /// MergeOps - Create and insert a LDM or STM with Base as base register and
285 /// registers in Regs as the register operands that would be loaded / stored.
286 /// It returns true if the transformation is done.
288 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
289 MachineBasicBlock::iterator MBBI,
290 int Offset, unsigned Base, bool BaseKill,
291 int Opcode, ARMCC::CondCodes Pred,
292 unsigned PredReg, unsigned Scratch, DebugLoc dl,
293 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
294 // Only a single register to load / store. Don't bother.
295 unsigned NumRegs = Regs.size();
299 ARM_AM::AMSubMode Mode = ARM_AM::ia;
300 // VFP and Thumb2 do not support IB or DA modes.
301 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
302 bool haveIBAndDA = isNotVFP && !isThumb2;
303 if (Offset == 4 && haveIBAndDA)
305 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
307 else if (Offset == -4 * (int)NumRegs && isNotVFP)
308 // VLDM/VSTM do not support DB mode without also updating the base reg.
310 else if (Offset != 0) {
311 // If starting offset isn't zero, insert a MI to materialize a new base.
312 // But only do so if it is cost effective, i.e. merging more than two
318 if (isi32Load(Opcode))
319 // If it is a load, then just use one of the destination register to
320 // use as the new base.
321 NewBase = Regs[NumRegs-1].first;
323 // Use the scratch register to use as a new base.
328 int BaseOpc = !isThumb2
330 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
334 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
337 int ImmedOffset = isThumb2
338 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
339 if (ImmedOffset == -1)
340 // FIXME: Try t2ADDri12 or t2SUBri12?
341 return false; // Probably not worth it then.
343 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
344 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
345 .addImm(Pred).addReg(PredReg).addReg(0);
347 BaseKill = true; // New base is always killed right its use.
350 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
351 Opcode == ARM::VLDRD);
352 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
353 if (!Opcode) return false;
354 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
355 .addReg(Base, getKillRegState(BaseKill))
356 .addImm(Pred).addReg(PredReg);
357 for (unsigned i = 0; i != NumRegs; ++i)
358 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
359 | getKillRegState(Regs[i].second));
364 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
366 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
368 unsigned memOpsBegin, unsigned memOpsEnd,
369 unsigned insertAfter, int Offset,
370 unsigned Base, bool BaseKill,
372 ARMCC::CondCodes Pred, unsigned PredReg,
375 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
376 // First calculate which of the registers should be killed by the merged
378 const unsigned insertPos = memOps[insertAfter].Position;
379 SmallSet<unsigned, 4> KilledRegs;
380 DenseMap<unsigned, unsigned> Killer;
381 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
382 if (i == memOpsBegin) {
387 if (memOps[i].Position < insertPos && memOps[i].isKill) {
388 unsigned Reg = memOps[i].Reg;
389 KilledRegs.insert(Reg);
394 SmallVector<std::pair<unsigned, bool>, 8> Regs;
395 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
396 unsigned Reg = memOps[i].Reg;
397 // If we are inserting the merged operation after an operation that
398 // uses the same register, make sure to transfer any kill flag.
399 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
400 Regs.push_back(std::make_pair(Reg, isKill));
403 // Try to do the merge.
404 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
406 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
407 Pred, PredReg, Scratch, dl, Regs))
410 // Merge succeeded, update records.
411 Merges.push_back(prior(Loc));
412 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
413 // Remove kill flags from any memops that come before insertPos.
414 if (Regs[i-memOpsBegin].second) {
415 unsigned Reg = Regs[i-memOpsBegin].first;
416 if (KilledRegs.count(Reg)) {
417 unsigned j = Killer[Reg];
418 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
419 assert(Idx >= 0 && "Cannot find killing operand");
420 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
421 memOps[j].isKill = false;
423 memOps[i].isKill = true;
425 MBB.erase(memOps[i].MBBI);
426 // Update this memop to refer to the merged instruction.
427 // We may need to move kill flags again.
428 memOps[i].Merged = true;
429 memOps[i].MBBI = Merges.back();
430 memOps[i].Position = insertPos;
434 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
435 /// load / store multiple instructions.
437 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
438 unsigned Base, int Opcode, unsigned Size,
439 ARMCC::CondCodes Pred, unsigned PredReg,
440 unsigned Scratch, MemOpQueue &MemOps,
441 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
442 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
443 int Offset = MemOps[SIndex].Offset;
444 int SOffset = Offset;
445 unsigned insertAfter = SIndex;
446 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
447 DebugLoc dl = Loc->getDebugLoc();
448 const MachineOperand &PMO = Loc->getOperand(0);
449 unsigned PReg = PMO.getReg();
450 unsigned PRegNum = PMO.isUndef() ? UINT_MAX
451 : getARMRegisterNumbering(PReg);
454 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
455 int NewOffset = MemOps[i].Offset;
456 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
457 unsigned Reg = MO.getReg();
458 unsigned RegNum = MO.isUndef() ? UINT_MAX
459 : getARMRegisterNumbering(Reg);
460 // Register numbers must be in ascending order. For VFP, the registers
461 // must also be consecutive and there is a limit of 16 double-word
462 // registers per instruction.
463 if (Reg != ARM::SP &&
464 NewOffset == Offset + (int)Size &&
465 ((isNotVFP && RegNum > PRegNum)
466 || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) {
471 // Can't merge this in. Try merge the earlier ones first.
472 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
473 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
474 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
479 if (MemOps[i].Position > MemOps[insertAfter].Position)
483 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
484 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
485 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
489 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
490 unsigned Bytes, unsigned Limit,
491 ARMCC::CondCodes Pred, unsigned PredReg){
492 unsigned MyPredReg = 0;
495 if (MI->getOpcode() != ARM::t2SUBri &&
496 MI->getOpcode() != ARM::t2SUBrSPi &&
497 MI->getOpcode() != ARM::t2SUBrSPi12 &&
498 MI->getOpcode() != ARM::tSUBspi &&
499 MI->getOpcode() != ARM::SUBri)
502 // Make sure the offset fits in 8 bits.
503 if (Bytes == 0 || (Limit && Bytes >= Limit))
506 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
507 return (MI->getOperand(0).getReg() == Base &&
508 MI->getOperand(1).getReg() == Base &&
509 (MI->getOperand(2).getImm()*Scale) == Bytes &&
510 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
511 MyPredReg == PredReg);
514 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
515 unsigned Bytes, unsigned Limit,
516 ARMCC::CondCodes Pred, unsigned PredReg){
517 unsigned MyPredReg = 0;
520 if (MI->getOpcode() != ARM::t2ADDri &&
521 MI->getOpcode() != ARM::t2ADDrSPi &&
522 MI->getOpcode() != ARM::t2ADDrSPi12 &&
523 MI->getOpcode() != ARM::tADDspi &&
524 MI->getOpcode() != ARM::ADDri)
527 if (Bytes == 0 || (Limit && Bytes >= Limit))
528 // Make sure the offset fits in 8 bits.
531 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
532 return (MI->getOperand(0).getReg() == Base &&
533 MI->getOperand(1).getReg() == Base &&
534 (MI->getOperand(2).getImm()*Scale) == Bytes &&
535 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
536 MyPredReg == PredReg);
539 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
540 switch (MI->getOpcode()) {
568 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
571 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
575 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
576 ARM_AM::AMSubMode Mode) {
578 default: llvm_unreachable("Unhandled opcode!");
584 default: llvm_unreachable("Unhandled submode!");
585 case ARM_AM::ia: return ARM::LDMIA_UPD;
586 case ARM_AM::ib: return ARM::LDMIB_UPD;
587 case ARM_AM::da: return ARM::LDMDA_UPD;
588 case ARM_AM::db: return ARM::LDMDB_UPD;
596 default: llvm_unreachable("Unhandled submode!");
597 case ARM_AM::ia: return ARM::STMIA_UPD;
598 case ARM_AM::ib: return ARM::STMIB_UPD;
599 case ARM_AM::da: return ARM::STMDA_UPD;
600 case ARM_AM::db: return ARM::STMDB_UPD;
606 default: llvm_unreachable("Unhandled submode!");
607 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
608 case ARM_AM::db: return ARM::t2LDMDB_UPD;
614 default: llvm_unreachable("Unhandled submode!");
615 case ARM_AM::ia: return ARM::t2STMIA_UPD;
616 case ARM_AM::db: return ARM::t2STMDB_UPD;
621 default: llvm_unreachable("Unhandled submode!");
622 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
623 case ARM_AM::db: return ARM::VLDMSDB_UPD;
628 default: llvm_unreachable("Unhandled submode!");
629 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
630 case ARM_AM::db: return ARM::VLDMDDB_UPD;
635 default: llvm_unreachable("Unhandled submode!");
636 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
637 case ARM_AM::db: return ARM::VSTMSDB_UPD;
642 default: llvm_unreachable("Unhandled submode!");
643 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
644 case ARM_AM::db: return ARM::VSTMDDB_UPD;
652 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
653 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
655 /// stmia rn, <ra, rb, rc>
656 /// rn := rn + 4 * 3;
658 /// stmia rn!, <ra, rb, rc>
660 /// rn := rn - 4 * 3;
661 /// ldmia rn, <ra, rb, rc>
663 /// ldmdb rn!, <ra, rb, rc>
664 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
665 MachineBasicBlock::iterator MBBI,
667 MachineBasicBlock::iterator &I) {
668 MachineInstr *MI = MBBI;
669 unsigned Base = MI->getOperand(0).getReg();
670 bool BaseKill = MI->getOperand(0).isKill();
671 unsigned Bytes = getLSMultipleTransferSize(MI);
672 unsigned PredReg = 0;
673 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
674 int Opcode = MI->getOpcode();
675 DebugLoc dl = MI->getDebugLoc();
677 // Can't use an updating ld/st if the base register is also a dest
678 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
679 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
680 if (MI->getOperand(i).getReg() == Base)
683 bool DoMerge = false;
684 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
686 // Try merging with the previous instruction.
687 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
688 if (MBBI != BeginMBBI) {
689 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
690 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
692 if (Mode == ARM_AM::ia &&
693 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
696 } else if (Mode == ARM_AM::ib &&
697 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
705 // Try merging with the next instruction.
706 MachineBasicBlock::iterator EndMBBI = MBB.end();
707 if (!DoMerge && MBBI != EndMBBI) {
708 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
709 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
711 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
712 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
714 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
715 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
730 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
731 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
732 .addReg(Base, getDefRegState(true)) // WB base register
733 .addReg(Base, getKillRegState(BaseKill))
734 .addImm(Pred).addReg(PredReg);
736 // Transfer the rest of operands.
737 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
738 MIB.addOperand(MI->getOperand(OpNum));
740 // Transfer memoperands.
741 (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
747 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
748 ARM_AM::AddrOpc Mode) {
755 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
757 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
759 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
761 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
764 return ARM::t2LDR_PRE;
767 return ARM::t2STR_PRE;
768 default: llvm_unreachable("Unhandled opcode!");
773 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
774 ARM_AM::AddrOpc Mode) {
777 return ARM::LDR_POST;
779 return ARM::STR_POST;
781 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
783 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
785 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
787 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
790 return ARM::t2LDR_POST;
793 return ARM::t2STR_POST;
794 default: llvm_unreachable("Unhandled opcode!");
799 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
800 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
801 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
802 MachineBasicBlock::iterator MBBI,
803 const TargetInstrInfo *TII,
805 MachineBasicBlock::iterator &I) {
806 MachineInstr *MI = MBBI;
807 unsigned Base = MI->getOperand(1).getReg();
808 bool BaseKill = MI->getOperand(1).isKill();
809 unsigned Bytes = getLSMultipleTransferSize(MI);
810 int Opcode = MI->getOpcode();
811 DebugLoc dl = MI->getDebugLoc();
812 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
813 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
814 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
815 if (isi32Load(Opcode) || isi32Store(Opcode))
816 if (MI->getOperand(2).getImm() != 0)
818 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
821 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
822 // Can't do the merge if the destination register is the same as the would-be
823 // writeback register.
824 if (isLd && MI->getOperand(0).getReg() == Base)
827 unsigned PredReg = 0;
828 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
829 bool DoMerge = false;
830 ARM_AM::AddrOpc AddSub = ARM_AM::add;
832 // AM2 - 12 bits, thumb2 - 8 bits.
833 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
835 // Try merging with the previous instruction.
836 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
837 if (MBBI != BeginMBBI) {
838 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
839 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
841 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
843 AddSub = ARM_AM::sub;
845 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
849 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
854 // Try merging with the next instruction.
855 MachineBasicBlock::iterator EndMBBI = MBB.end();
856 if (!DoMerge && MBBI != EndMBBI) {
857 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
858 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
861 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
863 AddSub = ARM_AM::sub;
864 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
868 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
882 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
884 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
887 // VLDM[SD}_UPD, VSTM[SD]_UPD
888 // (There are no base-updating versions of VLDR/VSTR instructions, but the
889 // updating load/store-multiple instructions can be used with only one
891 MachineOperand &MO = MI->getOperand(0);
892 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
893 .addReg(Base, getDefRegState(true)) // WB base register
894 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
895 .addImm(Pred).addReg(PredReg)
896 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
897 getKillRegState(MO.isKill())));
900 // LDR_PRE, LDR_POST,
901 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
902 .addReg(Base, RegState::Define)
903 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
905 // t2LDR_PRE, t2LDR_POST
906 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
907 .addReg(Base, RegState::Define)
908 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
910 MachineOperand &MO = MI->getOperand(0);
913 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
914 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
915 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
917 // t2STR_PRE, t2STR_POST
918 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
919 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
920 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
927 /// isMemoryOp - Returns true if instruction is a memory operations (that this
928 /// pass is capable of operating on).
929 static bool isMemoryOp(const MachineInstr *MI) {
930 // When no memory operands are present, conservatively assume unaligned,
931 // volatile, unfoldable.
932 if (!MI->hasOneMemOperand())
935 const MachineMemOperand *MMO = *MI->memoperands_begin();
937 // Don't touch volatile memory accesses - we may be changing their order.
938 if (MMO->isVolatile())
941 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
943 if (MMO->getAlignment() < 4)
946 // str <undef> could probably be eliminated entirely, but for now we just want
947 // to avoid making a mess of it.
948 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
949 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
950 MI->getOperand(0).isUndef())
953 // Likewise don't mess with references to undefined addresses.
954 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
955 MI->getOperand(1).isUndef())
958 int Opcode = MI->getOpcode();
963 return MI->getOperand(1).isReg();
966 return MI->getOperand(1).isReg();
973 return MI->getOperand(1).isReg();
978 /// AdvanceRS - Advance register scavenger to just before the earliest memory
979 /// op that is being merged.
980 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
981 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
982 unsigned Position = MemOps[0].Position;
983 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
984 if (MemOps[i].Position < Position) {
985 Position = MemOps[i].Position;
986 Loc = MemOps[i].MBBI;
990 if (Loc != MBB.begin())
991 RS->forward(prior(Loc));
994 static int getMemoryOpOffset(const MachineInstr *MI) {
995 int Opcode = MI->getOpcode();
996 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
997 unsigned NumOperands = MI->getDesc().getNumOperands();
998 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1000 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1001 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1002 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1003 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
1006 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1007 : ARM_AM::getAM5Offset(OffField) * 4;
1009 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1012 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1018 static void InsertLDR_STR(MachineBasicBlock &MBB,
1019 MachineBasicBlock::iterator &MBBI,
1020 int Offset, bool isDef,
1021 DebugLoc dl, unsigned NewOpc,
1022 unsigned Reg, bool RegDeadKill, bool RegUndef,
1023 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1024 bool OffKill, bool OffUndef,
1025 ARMCC::CondCodes Pred, unsigned PredReg,
1026 const TargetInstrInfo *TII, bool isT2) {
1028 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1030 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1031 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1032 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1034 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1036 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1037 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1038 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1042 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1043 MachineBasicBlock::iterator &MBBI) {
1044 MachineInstr *MI = &*MBBI;
1045 unsigned Opcode = MI->getOpcode();
1046 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1047 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1048 unsigned EvenReg = MI->getOperand(0).getReg();
1049 unsigned OddReg = MI->getOperand(1).getReg();
1050 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1051 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1052 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
1055 MachineBasicBlock::iterator NewBBI = MBBI;
1056 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1057 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1058 bool EvenDeadKill = isLd ?
1059 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1060 bool EvenUndef = MI->getOperand(0).isUndef();
1061 bool OddDeadKill = isLd ?
1062 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1063 bool OddUndef = MI->getOperand(1).isUndef();
1064 const MachineOperand &BaseOp = MI->getOperand(2);
1065 unsigned BaseReg = BaseOp.getReg();
1066 bool BaseKill = BaseOp.isKill();
1067 bool BaseUndef = BaseOp.isUndef();
1068 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1069 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1070 int OffImm = getMemoryOpOffset(MI);
1071 unsigned PredReg = 0;
1072 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
1074 if (OddRegNum > EvenRegNum && OffImm == 0) {
1075 // Ascending register numbers and no offset. It's safe to change it to a
1077 unsigned NewOpc = (isLd)
1078 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1079 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1081 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1082 .addReg(BaseReg, getKillRegState(BaseKill))
1083 .addImm(Pred).addReg(PredReg)
1084 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1085 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1088 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1089 .addReg(BaseReg, getKillRegState(BaseKill))
1090 .addImm(Pred).addReg(PredReg)
1092 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1094 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1097 NewBBI = llvm::prior(MBBI);
1099 // Split into two instructions.
1100 unsigned NewOpc = (isLd)
1101 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1102 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1103 DebugLoc dl = MBBI->getDebugLoc();
1104 // If this is a load and base register is killed, it may have been
1105 // re-defed by the load, make sure the first load does not clobber it.
1107 (BaseKill || OffKill) &&
1108 (TRI->regsOverlap(EvenReg, BaseReg))) {
1109 assert(!TRI->regsOverlap(OddReg, BaseReg));
1110 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1111 OddReg, OddDeadKill, false,
1112 BaseReg, false, BaseUndef, false, OffUndef,
1113 Pred, PredReg, TII, isT2);
1114 NewBBI = llvm::prior(MBBI);
1115 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1116 EvenReg, EvenDeadKill, false,
1117 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1118 Pred, PredReg, TII, isT2);
1120 if (OddReg == EvenReg && EvenDeadKill) {
1121 // If the two source operands are the same, the kill marker is
1122 // probably on the first one. e.g.
1123 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1124 EvenDeadKill = false;
1127 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1128 EvenReg, EvenDeadKill, EvenUndef,
1129 BaseReg, false, BaseUndef, false, OffUndef,
1130 Pred, PredReg, TII, isT2);
1131 NewBBI = llvm::prior(MBBI);
1132 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1133 OddReg, OddDeadKill, OddUndef,
1134 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1135 Pred, PredReg, TII, isT2);
1150 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1151 /// ops of the same base and incrementing offset into LDM / STM ops.
1152 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1153 unsigned NumMerges = 0;
1154 unsigned NumMemOps = 0;
1156 unsigned CurrBase = 0;
1158 unsigned CurrSize = 0;
1159 ARMCC::CondCodes CurrPred = ARMCC::AL;
1160 unsigned CurrPredReg = 0;
1161 unsigned Position = 0;
1162 SmallVector<MachineBasicBlock::iterator,4> Merges;
1164 RS->enterBasicBlock(&MBB);
1165 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1167 if (FixInvalidRegPairOp(MBB, MBBI))
1170 bool Advance = false;
1171 bool TryMerge = false;
1172 bool Clobber = false;
1174 bool isMemOp = isMemoryOp(MBBI);
1176 int Opcode = MBBI->getOpcode();
1177 unsigned Size = getLSMultipleTransferSize(MBBI);
1178 const MachineOperand &MO = MBBI->getOperand(0);
1179 unsigned Reg = MO.getReg();
1180 bool isKill = MO.isDef() ? false : MO.isKill();
1181 unsigned Base = MBBI->getOperand(1).getReg();
1182 unsigned PredReg = 0;
1183 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1184 int Offset = getMemoryOpOffset(MBBI);
1187 // r5 := ldr [r5, #4]
1188 // r6 := ldr [r5, #8]
1190 // The second ldr has effectively broken the chain even though it
1191 // looks like the later ldr(s) use the same base register. Try to
1192 // merge the ldr's so far, including this one. But don't try to
1193 // combine the following ldr(s).
1194 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1195 if (CurrBase == 0 && !Clobber) {
1196 // Start of a new chain.
1201 CurrPredReg = PredReg;
1202 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1211 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1212 // No need to match PredReg.
1213 // Continue adding to the queue.
1214 if (Offset > MemOps.back().Offset) {
1215 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1220 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1222 if (Offset < I->Offset) {
1223 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1228 } else if (Offset == I->Offset) {
1229 // Collision! This can't be merged!
1238 if (MBBI->isDebugValue()) {
1241 // Reach the end of the block, try merging the memory instructions.
1243 } else if (Advance) {
1247 // Reach the end of the block, try merging the memory instructions.
1253 if (NumMemOps > 1) {
1254 // Try to find a free register to use as a new base in case it's needed.
1255 // First advance to the instruction just before the start of the chain.
1256 AdvanceRS(MBB, MemOps);
1257 // Find a scratch register.
1258 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1259 // Process the load / store instructions.
1260 RS->forward(prior(MBBI));
1264 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1265 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1267 // Try folding preceeding/trailing base inc/dec into the generated
1269 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1270 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1272 NumMerges += Merges.size();
1274 // Try folding preceeding/trailing base inc/dec into those load/store
1275 // that were not merged to form LDM/STM ops.
1276 for (unsigned i = 0; i != NumMemOps; ++i)
1277 if (!MemOps[i].Merged)
1278 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1281 // RS may be pointing to an instruction that's deleted.
1282 RS->skipTo(prior(MBBI));
1283 } else if (NumMemOps == 1) {
1284 // Try folding preceeding/trailing base inc/dec into the single
1286 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1288 RS->forward(prior(MBBI));
1295 CurrPred = ARMCC::AL;
1302 // If iterator hasn't been advanced and this is not a memory op, skip it.
1303 // It can't start a new chain anyway.
1304 if (!Advance && !isMemOp && MBBI != E) {
1310 return NumMerges > 0;
1313 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1314 /// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it
1315 /// directly restore the value of LR into pc.
1316 /// ldmfd sp!, {..., lr}
1319 /// ldmfd sp!, {..., lr}
1322 /// ldmfd sp!, {..., pc}
1323 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1324 if (MBB.empty()) return false;
1326 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1327 if (MBBI != MBB.begin() &&
1328 (MBBI->getOpcode() == ARM::BX_RET ||
1329 MBBI->getOpcode() == ARM::tBX_RET ||
1330 MBBI->getOpcode() == ARM::MOVPCLR)) {
1331 MachineInstr *PrevMI = prior(MBBI);
1332 unsigned Opcode = PrevMI->getOpcode();
1333 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1334 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1335 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1336 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1337 if (MO.getReg() != ARM::LR)
1339 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1340 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1341 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1342 PrevMI->setDesc(TII->get(NewOpc));
1344 PrevMI->copyImplicitOps(&*MBBI);
1352 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1353 const TargetMachine &TM = Fn.getTarget();
1354 AFI = Fn.getInfo<ARMFunctionInfo>();
1355 TII = TM.getInstrInfo();
1356 TRI = TM.getRegisterInfo();
1357 RS = new RegScavenger();
1358 isThumb2 = AFI->isThumb2Function();
1360 bool Modified = false;
1361 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1363 MachineBasicBlock &MBB = *MFI;
1364 Modified |= LoadStoreMultipleOpti(MBB);
1365 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1366 Modified |= MergeReturnIntoLDM(MBB);
1374 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1375 /// load / stores from consecutive locations close to make it more
1376 /// likely they will be combined later.
1379 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1381 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1383 const TargetData *TD;
1384 const TargetInstrInfo *TII;
1385 const TargetRegisterInfo *TRI;
1386 const ARMSubtarget *STI;
1387 MachineRegisterInfo *MRI;
1388 MachineFunction *MF;
1390 virtual bool runOnMachineFunction(MachineFunction &Fn);
1392 virtual const char *getPassName() const {
1393 return "ARM pre- register allocation load / store optimization pass";
1397 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1398 unsigned &NewOpc, unsigned &EvenReg,
1399 unsigned &OddReg, unsigned &BaseReg,
1401 unsigned &PredReg, ARMCC::CondCodes &Pred,
1403 bool RescheduleOps(MachineBasicBlock *MBB,
1404 SmallVector<MachineInstr*, 4> &Ops,
1405 unsigned Base, bool isLd,
1406 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1407 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1409 char ARMPreAllocLoadStoreOpt::ID = 0;
1412 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1413 TD = Fn.getTarget().getTargetData();
1414 TII = Fn.getTarget().getInstrInfo();
1415 TRI = Fn.getTarget().getRegisterInfo();
1416 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1417 MRI = &Fn.getRegInfo();
1420 bool Modified = false;
1421 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1423 Modified |= RescheduleLoadStoreInstrs(MFI);
1428 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1429 MachineBasicBlock::iterator I,
1430 MachineBasicBlock::iterator E,
1431 SmallPtrSet<MachineInstr*, 4> &MemOps,
1432 SmallSet<unsigned, 4> &MemRegs,
1433 const TargetRegisterInfo *TRI) {
1434 // Are there stores / loads / calls between them?
1435 // FIXME: This is overly conservative. We should make use of alias information
1437 SmallSet<unsigned, 4> AddedRegPressure;
1439 if (I->isDebugValue() || MemOps.count(&*I))
1441 const TargetInstrDesc &TID = I->getDesc();
1442 if (TID.isCall() || TID.isTerminator() || I->hasUnmodeledSideEffects())
1444 if (isLd && TID.mayStore())
1449 // It's not safe to move the first 'str' down.
1452 // str r4, [r0, #+4]
1456 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1457 MachineOperand &MO = I->getOperand(j);
1460 unsigned Reg = MO.getReg();
1461 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1463 if (Reg != Base && !MemRegs.count(Reg))
1464 AddedRegPressure.insert(Reg);
1468 // Estimate register pressure increase due to the transformation.
1469 if (MemRegs.size() <= 4)
1470 // Ok if we are moving small number of instructions.
1472 return AddedRegPressure.size() <= MemRegs.size() * 2;
1476 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1478 unsigned &NewOpc, unsigned &EvenReg,
1479 unsigned &OddReg, unsigned &BaseReg,
1480 int &Offset, unsigned &PredReg,
1481 ARMCC::CondCodes &Pred,
1483 // Make sure we're allowed to generate LDRD/STRD.
1484 if (!STI->hasV5TEOps())
1487 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1489 unsigned Opcode = Op0->getOpcode();
1490 if (Opcode == ARM::LDRi12)
1492 else if (Opcode == ARM::STRi12)
1494 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1495 NewOpc = ARM::t2LDRDi8;
1498 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1499 NewOpc = ARM::t2STRDi8;
1505 // Make sure the base address satisfies i64 ld / st alignment requirement.
1506 if (!Op0->hasOneMemOperand() ||
1507 !(*Op0->memoperands_begin())->getValue() ||
1508 (*Op0->memoperands_begin())->isVolatile())
1511 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1512 const Function *Func = MF->getFunction();
1513 unsigned ReqAlign = STI->hasV6Ops()
1514 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1515 : 8; // Pre-v6 need 8-byte align
1516 if (Align < ReqAlign)
1519 // Then make sure the immediate offset fits.
1520 int OffImm = getMemoryOpOffset(Op0);
1522 int Limit = (1 << 8) * Scale;
1523 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1527 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1529 AddSub = ARM_AM::sub;
1532 int Limit = (1 << 8) * Scale;
1533 if (OffImm >= Limit || (OffImm & (Scale-1)))
1535 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1537 EvenReg = Op0->getOperand(0).getReg();
1538 OddReg = Op1->getOperand(0).getReg();
1539 if (EvenReg == OddReg)
1541 BaseReg = Op0->getOperand(1).getReg();
1542 Pred = llvm::getInstrPredicate(Op0, PredReg);
1543 dl = Op0->getDebugLoc();
1548 struct OffsetCompare {
1549 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1550 int LOffset = getMemoryOpOffset(LHS);
1551 int ROffset = getMemoryOpOffset(RHS);
1552 assert(LHS == RHS || LOffset != ROffset);
1553 return LOffset > ROffset;
1558 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1559 SmallVector<MachineInstr*, 4> &Ops,
1560 unsigned Base, bool isLd,
1561 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1562 bool RetVal = false;
1564 // Sort by offset (in reverse order).
1565 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1567 // The loads / stores of the same base are in order. Scan them from first to
1568 // last and check for the following:
1569 // 1. Any def of base.
1571 while (Ops.size() > 1) {
1572 unsigned FirstLoc = ~0U;
1573 unsigned LastLoc = 0;
1574 MachineInstr *FirstOp = 0;
1575 MachineInstr *LastOp = 0;
1577 unsigned LastOpcode = 0;
1578 unsigned LastBytes = 0;
1579 unsigned NumMove = 0;
1580 for (int i = Ops.size() - 1; i >= 0; --i) {
1581 MachineInstr *Op = Ops[i];
1582 unsigned Loc = MI2LocMap[Op];
1583 if (Loc <= FirstLoc) {
1587 if (Loc >= LastLoc) {
1592 unsigned Opcode = Op->getOpcode();
1593 if (LastOpcode && Opcode != LastOpcode)
1596 int Offset = getMemoryOpOffset(Op);
1597 unsigned Bytes = getLSMultipleTransferSize(Op);
1599 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1602 LastOffset = Offset;
1604 LastOpcode = Opcode;
1605 if (++NumMove == 8) // FIXME: Tune this limit.
1612 SmallPtrSet<MachineInstr*, 4> MemOps;
1613 SmallSet<unsigned, 4> MemRegs;
1614 for (int i = NumMove-1; i >= 0; --i) {
1615 MemOps.insert(Ops[i]);
1616 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1619 // Be conservative, if the instructions are too far apart, don't
1620 // move them. We want to limit the increase of register pressure.
1621 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1623 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1624 MemOps, MemRegs, TRI);
1626 for (unsigned i = 0; i != NumMove; ++i)
1629 // This is the new location for the loads / stores.
1630 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1631 while (InsertPos != MBB->end()
1632 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1635 // If we are moving a pair of loads / stores, see if it makes sense
1636 // to try to allocate a pair of registers that can form register pairs.
1637 MachineInstr *Op0 = Ops.back();
1638 MachineInstr *Op1 = Ops[Ops.size()-2];
1639 unsigned EvenReg = 0, OddReg = 0;
1640 unsigned BaseReg = 0, PredReg = 0;
1641 ARMCC::CondCodes Pred = ARMCC::AL;
1643 unsigned NewOpc = 0;
1646 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1647 EvenReg, OddReg, BaseReg,
1648 Offset, PredReg, Pred, isT2)) {
1652 // Form the pair instruction.
1654 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1655 dl, TII->get(NewOpc))
1656 .addReg(EvenReg, RegState::Define)
1657 .addReg(OddReg, RegState::Define)
1659 // FIXME: We're converting from LDRi12 to an insn that still
1660 // uses addrmode2, so we need an explicit offset reg. It should
1661 // always by reg0 since we're transforming LDRi12s.
1664 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1667 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1668 dl, TII->get(NewOpc))
1672 // FIXME: We're converting from LDRi12 to an insn that still
1673 // uses addrmode2, so we need an explicit offset reg. It should
1674 // always by reg0 since we're transforming STRi12s.
1677 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1683 // Add register allocation hints to form register pairs.
1684 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1685 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1687 for (unsigned i = 0; i != NumMove; ++i) {
1688 MachineInstr *Op = Ops.back();
1690 MBB->splice(InsertPos, MBB, Op);
1694 NumLdStMoved += NumMove;
1704 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1705 bool RetVal = false;
1707 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1708 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1709 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1710 SmallVector<unsigned, 4> LdBases;
1711 SmallVector<unsigned, 4> StBases;
1714 MachineBasicBlock::iterator MBBI = MBB->begin();
1715 MachineBasicBlock::iterator E = MBB->end();
1717 for (; MBBI != E; ++MBBI) {
1718 MachineInstr *MI = MBBI;
1719 const TargetInstrDesc &TID = MI->getDesc();
1720 if (TID.isCall() || TID.isTerminator()) {
1721 // Stop at barriers.
1726 if (!MI->isDebugValue())
1727 MI2LocMap[MI] = ++Loc;
1729 if (!isMemoryOp(MI))
1731 unsigned PredReg = 0;
1732 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1735 int Opc = MI->getOpcode();
1736 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1737 unsigned Base = MI->getOperand(1).getReg();
1738 int Offset = getMemoryOpOffset(MI);
1740 bool StopHere = false;
1742 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1743 Base2LdsMap.find(Base);
1744 if (BI != Base2LdsMap.end()) {
1745 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1746 if (Offset == getMemoryOpOffset(BI->second[i])) {
1752 BI->second.push_back(MI);
1754 SmallVector<MachineInstr*, 4> MIs;
1756 Base2LdsMap[Base] = MIs;
1757 LdBases.push_back(Base);
1760 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1761 Base2StsMap.find(Base);
1762 if (BI != Base2StsMap.end()) {
1763 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1764 if (Offset == getMemoryOpOffset(BI->second[i])) {
1770 BI->second.push_back(MI);
1772 SmallVector<MachineInstr*, 4> MIs;
1774 Base2StsMap[Base] = MIs;
1775 StBases.push_back(Base);
1780 // Found a duplicate (a base+offset combination that's seen earlier).
1787 // Re-schedule loads.
1788 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1789 unsigned Base = LdBases[i];
1790 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1792 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1795 // Re-schedule stores.
1796 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1797 unsigned Base = StBases[i];
1798 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1800 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1804 Base2LdsMap.clear();
1805 Base2StsMap.clear();
1815 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1816 /// optimization pass.
1817 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1819 return new ARMPreAllocLoadStoreOpt();
1820 return new ARMLoadStoreOpt();