1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/Target/TargetData.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallSet.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/ADT/Statistic.h"
42 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
43 STATISTIC(NumSTMGened , "Number of stm instructions generated");
44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55 /// load / store instructions to form ldm / stm instructions.
58 struct ARMLoadStoreOpt : public MachineFunctionPass {
60 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
62 const TargetInstrInfo *TII;
63 const TargetRegisterInfo *TRI;
68 virtual bool runOnMachineFunction(MachineFunction &Fn);
70 virtual const char *getPassName() const {
71 return "ARM load / store optimization pass";
75 struct MemOpQueueEntry {
80 MachineBasicBlock::iterator MBBI;
82 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
83 MachineBasicBlock::iterator i)
84 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
86 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
87 typedef MemOpQueue::iterator MemOpQueueIter;
89 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
90 int Offset, unsigned Base, bool BaseKill, int Opcode,
91 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
92 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
93 void MergeOpsUpdate(MachineBasicBlock &MBB,
102 ARMCC::CondCodes Pred,
106 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
107 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
108 int Opcode, unsigned Size,
109 ARMCC::CondCodes Pred, unsigned PredReg,
110 unsigned Scratch, MemOpQueue &MemOps,
111 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
113 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
114 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
115 MachineBasicBlock::iterator &MBBI);
116 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
117 MachineBasicBlock::iterator MBBI,
118 const TargetInstrInfo *TII,
120 MachineBasicBlock::iterator &I);
121 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
122 MachineBasicBlock::iterator MBBI,
124 MachineBasicBlock::iterator &I);
125 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
126 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
128 char ARMLoadStoreOpt::ID = 0;
131 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
133 default: llvm_unreachable("Unhandled opcode!");
137 default: llvm_unreachable("Unhandled submode!");
138 case ARM_AM::ia: return ARM::LDMIA;
139 case ARM_AM::da: return ARM::LDMDA;
140 case ARM_AM::db: return ARM::LDMDB;
141 case ARM_AM::ib: return ARM::LDMIB;
147 default: llvm_unreachable("Unhandled submode!");
148 case ARM_AM::ia: return ARM::STMIA;
149 case ARM_AM::da: return ARM::STMDA;
150 case ARM_AM::db: return ARM::STMDB;
151 case ARM_AM::ib: return ARM::STMIB;
158 default: llvm_unreachable("Unhandled submode!");
159 case ARM_AM::ia: return ARM::t2LDMIA;
160 case ARM_AM::db: return ARM::t2LDMDB;
167 default: llvm_unreachable("Unhandled submode!");
168 case ARM_AM::ia: return ARM::t2STMIA;
169 case ARM_AM::db: return ARM::t2STMDB;
175 default: llvm_unreachable("Unhandled submode!");
176 case ARM_AM::ia: return ARM::VLDMSIA;
177 case ARM_AM::db: return ARM::VLDMSDB;
183 default: llvm_unreachable("Unhandled submode!");
184 case ARM_AM::ia: return ARM::VSTMSIA;
185 case ARM_AM::db: return ARM::VSTMSDB;
191 default: llvm_unreachable("Unhandled submode!");
192 case ARM_AM::ia: return ARM::VLDMDIA;
193 case ARM_AM::db: return ARM::VLDMDDB;
199 default: llvm_unreachable("Unhandled submode!");
200 case ARM_AM::ia: return ARM::VSTMDIA;
201 case ARM_AM::db: return ARM::VSTMDDB;
212 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
214 default: llvm_unreachable("Unhandled opcode!");
220 case ARM::t2LDMIA_RET:
222 case ARM::t2LDMIA_UPD:
224 case ARM::t2STMIA_UPD:
226 case ARM::VLDMSIA_UPD:
228 case ARM::VSTMSIA_UPD:
230 case ARM::VLDMDIA_UPD:
232 case ARM::VSTMDIA_UPD:
246 case ARM::t2LDMDB_UPD:
248 case ARM::t2STMDB_UPD:
250 case ARM::VLDMSDB_UPD:
252 case ARM::VSTMSDB_UPD:
254 case ARM::VLDMDDB_UPD:
256 case ARM::VSTMDDB_UPD:
266 return ARM_AM::bad_am_submode;
269 } // end namespace ARM_AM
270 } // end namespace llvm
272 static bool isT2i32Load(unsigned Opc) {
273 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
276 static bool isi32Load(unsigned Opc) {
277 return Opc == ARM::LDRi12 || isT2i32Load(Opc);
280 static bool isT2i32Store(unsigned Opc) {
281 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
284 static bool isi32Store(unsigned Opc) {
285 return Opc == ARM::STRi12 || isT2i32Store(Opc);
288 /// MergeOps - Create and insert a LDM or STM with Base as base register and
289 /// registers in Regs as the register operands that would be loaded / stored.
290 /// It returns true if the transformation is done.
292 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
293 MachineBasicBlock::iterator MBBI,
294 int Offset, unsigned Base, bool BaseKill,
295 int Opcode, ARMCC::CondCodes Pred,
296 unsigned PredReg, unsigned Scratch, DebugLoc dl,
297 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
298 // Only a single register to load / store. Don't bother.
299 unsigned NumRegs = Regs.size();
303 ARM_AM::AMSubMode Mode = ARM_AM::ia;
304 // VFP and Thumb2 do not support IB or DA modes.
305 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
306 bool haveIBAndDA = isNotVFP && !isThumb2;
307 if (Offset == 4 && haveIBAndDA)
309 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
311 else if (Offset == -4 * (int)NumRegs && isNotVFP)
312 // VLDM/VSTM do not support DB mode without also updating the base reg.
314 else if (Offset != 0) {
315 // If starting offset isn't zero, insert a MI to materialize a new base.
316 // But only do so if it is cost effective, i.e. merging more than two
322 if (isi32Load(Opcode))
323 // If it is a load, then just use one of the destination register to
324 // use as the new base.
325 NewBase = Regs[NumRegs-1].first;
327 // Use the scratch register to use as a new base.
332 int BaseOpc = !isThumb2
334 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
338 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
341 int ImmedOffset = isThumb2
342 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
343 if (ImmedOffset == -1)
344 // FIXME: Try t2ADDri12 or t2SUBri12?
345 return false; // Probably not worth it then.
347 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
348 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
349 .addImm(Pred).addReg(PredReg).addReg(0);
351 BaseKill = true; // New base is always killed right its use.
354 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
355 Opcode == ARM::VLDRD);
356 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
357 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
358 .addReg(Base, getKillRegState(BaseKill))
359 .addImm(Pred).addReg(PredReg);
360 for (unsigned i = 0; i != NumRegs; ++i)
361 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
362 | getKillRegState(Regs[i].second));
367 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
369 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
371 unsigned memOpsBegin, unsigned memOpsEnd,
372 unsigned insertAfter, int Offset,
373 unsigned Base, bool BaseKill,
375 ARMCC::CondCodes Pred, unsigned PredReg,
378 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
379 // First calculate which of the registers should be killed by the merged
381 const unsigned insertPos = memOps[insertAfter].Position;
382 SmallSet<unsigned, 4> KilledRegs;
383 DenseMap<unsigned, unsigned> Killer;
384 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
385 if (i == memOpsBegin) {
390 if (memOps[i].Position < insertPos && memOps[i].isKill) {
391 unsigned Reg = memOps[i].Reg;
392 KilledRegs.insert(Reg);
397 SmallVector<std::pair<unsigned, bool>, 8> Regs;
398 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
399 unsigned Reg = memOps[i].Reg;
400 // If we are inserting the merged operation after an operation that
401 // uses the same register, make sure to transfer any kill flag.
402 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
403 Regs.push_back(std::make_pair(Reg, isKill));
406 // Try to do the merge.
407 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
409 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
410 Pred, PredReg, Scratch, dl, Regs))
413 // Merge succeeded, update records.
414 Merges.push_back(prior(Loc));
415 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
416 // Remove kill flags from any memops that come before insertPos.
417 if (Regs[i-memOpsBegin].second) {
418 unsigned Reg = Regs[i-memOpsBegin].first;
419 if (KilledRegs.count(Reg)) {
420 unsigned j = Killer[Reg];
421 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
422 assert(Idx >= 0 && "Cannot find killing operand");
423 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
424 memOps[j].isKill = false;
426 memOps[i].isKill = true;
428 MBB.erase(memOps[i].MBBI);
429 // Update this memop to refer to the merged instruction.
430 // We may need to move kill flags again.
431 memOps[i].Merged = true;
432 memOps[i].MBBI = Merges.back();
433 memOps[i].Position = insertPos;
437 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
438 /// load / store multiple instructions.
440 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
441 unsigned Base, int Opcode, unsigned Size,
442 ARMCC::CondCodes Pred, unsigned PredReg,
443 unsigned Scratch, MemOpQueue &MemOps,
444 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
445 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
446 int Offset = MemOps[SIndex].Offset;
447 int SOffset = Offset;
448 unsigned insertAfter = SIndex;
449 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
450 DebugLoc dl = Loc->getDebugLoc();
451 const MachineOperand &PMO = Loc->getOperand(0);
452 unsigned PReg = PMO.getReg();
453 unsigned PRegNum = PMO.isUndef() ? UINT_MAX
454 : getARMRegisterNumbering(PReg);
457 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
458 int NewOffset = MemOps[i].Offset;
459 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
460 unsigned Reg = MO.getReg();
461 unsigned RegNum = MO.isUndef() ? UINT_MAX
462 : getARMRegisterNumbering(Reg);
463 // Register numbers must be in ascending order. For VFP, the registers
464 // must also be consecutive and there is a limit of 16 double-word
465 // registers per instruction.
466 if (Reg != ARM::SP &&
467 NewOffset == Offset + (int)Size &&
468 ((isNotVFP && RegNum > PRegNum)
469 || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) {
474 // Can't merge this in. Try merge the earlier ones first.
475 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
476 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
477 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
482 if (MemOps[i].Position > MemOps[insertAfter].Position)
486 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
487 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
488 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
492 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
493 unsigned Bytes, unsigned Limit,
494 ARMCC::CondCodes Pred, unsigned PredReg){
495 unsigned MyPredReg = 0;
498 if (MI->getOpcode() != ARM::t2SUBri &&
499 MI->getOpcode() != ARM::t2SUBrSPi &&
500 MI->getOpcode() != ARM::t2SUBrSPi12 &&
501 MI->getOpcode() != ARM::tSUBspi &&
502 MI->getOpcode() != ARM::SUBri)
505 // Make sure the offset fits in 8 bits.
506 if (Bytes == 0 || (Limit && Bytes >= Limit))
509 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
510 return (MI->getOperand(0).getReg() == Base &&
511 MI->getOperand(1).getReg() == Base &&
512 (MI->getOperand(2).getImm()*Scale) == Bytes &&
513 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
514 MyPredReg == PredReg);
517 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
518 unsigned Bytes, unsigned Limit,
519 ARMCC::CondCodes Pred, unsigned PredReg){
520 unsigned MyPredReg = 0;
523 if (MI->getOpcode() != ARM::t2ADDri &&
524 MI->getOpcode() != ARM::t2ADDrSPi &&
525 MI->getOpcode() != ARM::t2ADDrSPi12 &&
526 MI->getOpcode() != ARM::tADDspi &&
527 MI->getOpcode() != ARM::ADDri)
530 if (Bytes == 0 || (Limit && Bytes >= Limit))
531 // Make sure the offset fits in 8 bits.
534 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
535 return (MI->getOperand(0).getReg() == Base &&
536 MI->getOperand(1).getReg() == Base &&
537 (MI->getOperand(2).getImm()*Scale) == Bytes &&
538 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
539 MyPredReg == PredReg);
542 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
543 switch (MI->getOpcode()) {
573 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
578 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
582 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
583 ARM_AM::AMSubMode Mode) {
585 default: llvm_unreachable("Unhandled opcode!");
591 default: llvm_unreachable("Unhandled submode!");
592 case ARM_AM::ia: return ARM::LDMIA_UPD;
593 case ARM_AM::ib: return ARM::LDMIB_UPD;
594 case ARM_AM::da: return ARM::LDMDA_UPD;
595 case ARM_AM::db: return ARM::LDMDB_UPD;
603 default: llvm_unreachable("Unhandled submode!");
604 case ARM_AM::ia: return ARM::STMIA_UPD;
605 case ARM_AM::ib: return ARM::STMIB_UPD;
606 case ARM_AM::da: return ARM::STMDA_UPD;
607 case ARM_AM::db: return ARM::STMDB_UPD;
613 default: llvm_unreachable("Unhandled submode!");
614 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
615 case ARM_AM::db: return ARM::t2LDMDB_UPD;
621 default: llvm_unreachable("Unhandled submode!");
622 case ARM_AM::ia: return ARM::t2STMIA_UPD;
623 case ARM_AM::db: return ARM::t2STMDB_UPD;
629 default: llvm_unreachable("Unhandled submode!");
630 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
631 case ARM_AM::db: return ARM::VLDMSDB_UPD;
637 default: llvm_unreachable("Unhandled submode!");
638 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
639 case ARM_AM::db: return ARM::VLDMDDB_UPD;
645 default: llvm_unreachable("Unhandled submode!");
646 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
647 case ARM_AM::db: return ARM::VSTMSDB_UPD;
653 default: llvm_unreachable("Unhandled submode!");
654 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
655 case ARM_AM::db: return ARM::VSTMDDB_UPD;
663 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
664 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
666 /// stmia rn, <ra, rb, rc>
667 /// rn := rn + 4 * 3;
669 /// stmia rn!, <ra, rb, rc>
671 /// rn := rn - 4 * 3;
672 /// ldmia rn, <ra, rb, rc>
674 /// ldmdb rn!, <ra, rb, rc>
675 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
676 MachineBasicBlock::iterator MBBI,
678 MachineBasicBlock::iterator &I) {
679 MachineInstr *MI = MBBI;
680 unsigned Base = MI->getOperand(0).getReg();
681 bool BaseKill = MI->getOperand(0).isKill();
682 unsigned Bytes = getLSMultipleTransferSize(MI);
683 unsigned PredReg = 0;
684 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
685 int Opcode = MI->getOpcode();
686 DebugLoc dl = MI->getDebugLoc();
688 // Can't use an updating ld/st if the base register is also a dest
689 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
690 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
691 if (MI->getOperand(i).getReg() == Base)
694 bool DoMerge = false;
695 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
697 // Try merging with the previous instruction.
698 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
699 if (MBBI != BeginMBBI) {
700 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
701 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
703 if (Mode == ARM_AM::ia &&
704 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
707 } else if (Mode == ARM_AM::ib &&
708 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
716 // Try merging with the next instruction.
717 MachineBasicBlock::iterator EndMBBI = MBB.end();
718 if (!DoMerge && MBBI != EndMBBI) {
719 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
720 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
722 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
723 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
725 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
726 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
741 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
742 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
743 .addReg(Base, getDefRegState(true)) // WB base register
744 .addReg(Base, getKillRegState(BaseKill))
745 .addImm(Pred).addReg(PredReg);
747 // Transfer the rest of operands.
748 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
749 MIB.addOperand(MI->getOperand(OpNum));
751 // Transfer memoperands.
752 (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
758 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
759 ARM_AM::AddrOpc Mode) {
766 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
768 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
770 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
772 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
775 return ARM::t2LDR_PRE;
778 return ARM::t2STR_PRE;
779 default: llvm_unreachable("Unhandled opcode!");
784 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
785 ARM_AM::AddrOpc Mode) {
788 return ARM::LDR_POST;
790 return ARM::STR_POST;
792 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
794 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
796 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
798 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
801 return ARM::t2LDR_POST;
804 return ARM::t2STR_POST;
805 default: llvm_unreachable("Unhandled opcode!");
810 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
811 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
812 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
813 MachineBasicBlock::iterator MBBI,
814 const TargetInstrInfo *TII,
816 MachineBasicBlock::iterator &I) {
817 MachineInstr *MI = MBBI;
818 unsigned Base = MI->getOperand(1).getReg();
819 bool BaseKill = MI->getOperand(1).isKill();
820 unsigned Bytes = getLSMultipleTransferSize(MI);
821 int Opcode = MI->getOpcode();
822 DebugLoc dl = MI->getDebugLoc();
823 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
824 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
825 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
826 if (isi32Load(Opcode) || isi32Store(Opcode))
827 if (MI->getOperand(2).getImm() != 0)
829 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
832 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
833 // Can't do the merge if the destination register is the same as the would-be
834 // writeback register.
835 if (isLd && MI->getOperand(0).getReg() == Base)
838 unsigned PredReg = 0;
839 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
840 bool DoMerge = false;
841 ARM_AM::AddrOpc AddSub = ARM_AM::add;
843 // AM2 - 12 bits, thumb2 - 8 bits.
844 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
846 // Try merging with the previous instruction.
847 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
848 if (MBBI != BeginMBBI) {
849 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
850 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
852 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
854 AddSub = ARM_AM::sub;
856 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
860 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
865 // Try merging with the next instruction.
866 MachineBasicBlock::iterator EndMBBI = MBB.end();
867 if (!DoMerge && MBBI != EndMBBI) {
868 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
869 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
872 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
874 AddSub = ARM_AM::sub;
875 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
879 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
893 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
895 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
898 // VLDM[SD}_UPD, VSTM[SD]_UPD
899 // (There are no base-updating versions of VLDR/VSTR instructions, but the
900 // updating load/store-multiple instructions can be used with only one
902 MachineOperand &MO = MI->getOperand(0);
903 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
904 .addReg(Base, getDefRegState(true)) // WB base register
905 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
906 .addImm(Pred).addReg(PredReg)
907 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
908 getKillRegState(MO.isKill())));
911 // LDR_PRE, LDR_POST,
912 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
913 .addReg(Base, RegState::Define)
914 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
916 // t2LDR_PRE, t2LDR_POST
917 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
918 .addReg(Base, RegState::Define)
919 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
921 MachineOperand &MO = MI->getOperand(0);
924 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
925 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
926 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
928 // t2STR_PRE, t2STR_POST
929 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
930 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
931 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
938 /// isMemoryOp - Returns true if instruction is a memory operations (that this
939 /// pass is capable of operating on).
940 static bool isMemoryOp(const MachineInstr *MI) {
941 // When no memory operands are present, conservatively assume unaligned,
942 // volatile, unfoldable.
943 if (!MI->hasOneMemOperand())
946 const MachineMemOperand *MMO = *MI->memoperands_begin();
948 // Don't touch volatile memory accesses - we may be changing their order.
949 if (MMO->isVolatile())
952 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
954 if (MMO->getAlignment() < 4)
957 // str <undef> could probably be eliminated entirely, but for now we just want
958 // to avoid making a mess of it.
959 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
960 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
961 MI->getOperand(0).isUndef())
964 // Likewise don't mess with references to undefined addresses.
965 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
966 MI->getOperand(1).isUndef())
969 int Opcode = MI->getOpcode();
974 return MI->getOperand(1).isReg();
977 return MI->getOperand(1).isReg();
984 return MI->getOperand(1).isReg();
989 /// AdvanceRS - Advance register scavenger to just before the earliest memory
990 /// op that is being merged.
991 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
992 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
993 unsigned Position = MemOps[0].Position;
994 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
995 if (MemOps[i].Position < Position) {
996 Position = MemOps[i].Position;
997 Loc = MemOps[i].MBBI;
1001 if (Loc != MBB.begin())
1002 RS->forward(prior(Loc));
1005 static int getMemoryOpOffset(const MachineInstr *MI) {
1006 int Opcode = MI->getOpcode();
1007 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1008 unsigned NumOperands = MI->getDesc().getNumOperands();
1009 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1011 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1012 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1013 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1014 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
1017 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1018 : ARM_AM::getAM5Offset(OffField) * 4;
1020 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1023 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1029 static void InsertLDR_STR(MachineBasicBlock &MBB,
1030 MachineBasicBlock::iterator &MBBI,
1031 int Offset, bool isDef,
1032 DebugLoc dl, unsigned NewOpc,
1033 unsigned Reg, bool RegDeadKill, bool RegUndef,
1034 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1035 bool OffKill, bool OffUndef,
1036 ARMCC::CondCodes Pred, unsigned PredReg,
1037 const TargetInstrInfo *TII, bool isT2) {
1039 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1041 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1042 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1043 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1045 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1047 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1048 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1049 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1053 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1054 MachineBasicBlock::iterator &MBBI) {
1055 MachineInstr *MI = &*MBBI;
1056 unsigned Opcode = MI->getOpcode();
1057 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1058 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1059 unsigned EvenReg = MI->getOperand(0).getReg();
1060 unsigned OddReg = MI->getOperand(1).getReg();
1061 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1062 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1063 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
1066 MachineBasicBlock::iterator NewBBI = MBBI;
1067 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1068 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1069 bool EvenDeadKill = isLd ?
1070 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1071 bool EvenUndef = MI->getOperand(0).isUndef();
1072 bool OddDeadKill = isLd ?
1073 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1074 bool OddUndef = MI->getOperand(1).isUndef();
1075 const MachineOperand &BaseOp = MI->getOperand(2);
1076 unsigned BaseReg = BaseOp.getReg();
1077 bool BaseKill = BaseOp.isKill();
1078 bool BaseUndef = BaseOp.isUndef();
1079 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1080 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1081 int OffImm = getMemoryOpOffset(MI);
1082 unsigned PredReg = 0;
1083 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
1085 if (OddRegNum > EvenRegNum && OffImm == 0) {
1086 // Ascending register numbers and no offset. It's safe to change it to a
1088 unsigned NewOpc = (isLd)
1089 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1090 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1092 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1093 .addReg(BaseReg, getKillRegState(BaseKill))
1094 .addImm(Pred).addReg(PredReg)
1095 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1096 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1099 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1100 .addReg(BaseReg, getKillRegState(BaseKill))
1101 .addImm(Pred).addReg(PredReg)
1103 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1105 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1108 NewBBI = llvm::prior(MBBI);
1110 // Split into two instructions.
1111 unsigned NewOpc = (isLd)
1112 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1113 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1114 DebugLoc dl = MBBI->getDebugLoc();
1115 // If this is a load and base register is killed, it may have been
1116 // re-defed by the load, make sure the first load does not clobber it.
1118 (BaseKill || OffKill) &&
1119 (TRI->regsOverlap(EvenReg, BaseReg))) {
1120 assert(!TRI->regsOverlap(OddReg, BaseReg));
1121 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1122 OddReg, OddDeadKill, false,
1123 BaseReg, false, BaseUndef, false, OffUndef,
1124 Pred, PredReg, TII, isT2);
1125 NewBBI = llvm::prior(MBBI);
1126 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1127 EvenReg, EvenDeadKill, false,
1128 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1129 Pred, PredReg, TII, isT2);
1131 if (OddReg == EvenReg && EvenDeadKill) {
1132 // If the two source operands are the same, the kill marker is
1133 // probably on the first one. e.g.
1134 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1135 EvenDeadKill = false;
1138 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1139 EvenReg, EvenDeadKill, EvenUndef,
1140 BaseReg, false, BaseUndef, false, OffUndef,
1141 Pred, PredReg, TII, isT2);
1142 NewBBI = llvm::prior(MBBI);
1143 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1144 OddReg, OddDeadKill, OddUndef,
1145 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1146 Pred, PredReg, TII, isT2);
1161 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1162 /// ops of the same base and incrementing offset into LDM / STM ops.
1163 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1164 unsigned NumMerges = 0;
1165 unsigned NumMemOps = 0;
1167 unsigned CurrBase = 0;
1169 unsigned CurrSize = 0;
1170 ARMCC::CondCodes CurrPred = ARMCC::AL;
1171 unsigned CurrPredReg = 0;
1172 unsigned Position = 0;
1173 SmallVector<MachineBasicBlock::iterator,4> Merges;
1175 RS->enterBasicBlock(&MBB);
1176 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1178 if (FixInvalidRegPairOp(MBB, MBBI))
1181 bool Advance = false;
1182 bool TryMerge = false;
1183 bool Clobber = false;
1185 bool isMemOp = isMemoryOp(MBBI);
1187 int Opcode = MBBI->getOpcode();
1188 unsigned Size = getLSMultipleTransferSize(MBBI);
1189 const MachineOperand &MO = MBBI->getOperand(0);
1190 unsigned Reg = MO.getReg();
1191 bool isKill = MO.isDef() ? false : MO.isKill();
1192 unsigned Base = MBBI->getOperand(1).getReg();
1193 unsigned PredReg = 0;
1194 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1195 int Offset = getMemoryOpOffset(MBBI);
1198 // r5 := ldr [r5, #4]
1199 // r6 := ldr [r5, #8]
1201 // The second ldr has effectively broken the chain even though it
1202 // looks like the later ldr(s) use the same base register. Try to
1203 // merge the ldr's so far, including this one. But don't try to
1204 // combine the following ldr(s).
1205 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1206 if (CurrBase == 0 && !Clobber) {
1207 // Start of a new chain.
1212 CurrPredReg = PredReg;
1213 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1222 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1223 // No need to match PredReg.
1224 // Continue adding to the queue.
1225 if (Offset > MemOps.back().Offset) {
1226 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1231 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1233 if (Offset < I->Offset) {
1234 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1239 } else if (Offset == I->Offset) {
1240 // Collision! This can't be merged!
1249 if (MBBI->isDebugValue()) {
1252 // Reach the end of the block, try merging the memory instructions.
1254 } else if (Advance) {
1258 // Reach the end of the block, try merging the memory instructions.
1264 if (NumMemOps > 1) {
1265 // Try to find a free register to use as a new base in case it's needed.
1266 // First advance to the instruction just before the start of the chain.
1267 AdvanceRS(MBB, MemOps);
1268 // Find a scratch register.
1269 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1270 // Process the load / store instructions.
1271 RS->forward(prior(MBBI));
1275 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1276 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1278 // Try folding preceeding/trailing base inc/dec into the generated
1280 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1281 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1283 NumMerges += Merges.size();
1285 // Try folding preceeding/trailing base inc/dec into those load/store
1286 // that were not merged to form LDM/STM ops.
1287 for (unsigned i = 0; i != NumMemOps; ++i)
1288 if (!MemOps[i].Merged)
1289 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1292 // RS may be pointing to an instruction that's deleted.
1293 RS->skipTo(prior(MBBI));
1294 } else if (NumMemOps == 1) {
1295 // Try folding preceeding/trailing base inc/dec into the single
1297 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1299 RS->forward(prior(MBBI));
1306 CurrPred = ARMCC::AL;
1313 // If iterator hasn't been advanced and this is not a memory op, skip it.
1314 // It can't start a new chain anyway.
1315 if (!Advance && !isMemOp && MBBI != E) {
1321 return NumMerges > 0;
1324 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1325 /// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it
1326 /// directly restore the value of LR into pc.
1327 /// ldmfd sp!, {..., lr}
1330 /// ldmfd sp!, {..., lr}
1333 /// ldmfd sp!, {..., pc}
1334 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1335 if (MBB.empty()) return false;
1337 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1338 if (MBBI != MBB.begin() &&
1339 (MBBI->getOpcode() == ARM::BX_RET ||
1340 MBBI->getOpcode() == ARM::tBX_RET ||
1341 MBBI->getOpcode() == ARM::MOVPCLR)) {
1342 MachineInstr *PrevMI = prior(MBBI);
1343 unsigned Opcode = PrevMI->getOpcode();
1344 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1345 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1346 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1347 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1348 if (MO.getReg() != ARM::LR)
1350 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1351 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1352 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1353 PrevMI->setDesc(TII->get(NewOpc));
1355 PrevMI->copyImplicitOps(&*MBBI);
1363 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1364 const TargetMachine &TM = Fn.getTarget();
1365 AFI = Fn.getInfo<ARMFunctionInfo>();
1366 TII = TM.getInstrInfo();
1367 TRI = TM.getRegisterInfo();
1368 RS = new RegScavenger();
1369 isThumb2 = AFI->isThumb2Function();
1371 bool Modified = false;
1372 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1374 MachineBasicBlock &MBB = *MFI;
1375 Modified |= LoadStoreMultipleOpti(MBB);
1376 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1377 Modified |= MergeReturnIntoLDM(MBB);
1385 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1386 /// load / stores from consecutive locations close to make it more
1387 /// likely they will be combined later.
1390 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1392 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1394 const TargetData *TD;
1395 const TargetInstrInfo *TII;
1396 const TargetRegisterInfo *TRI;
1397 const ARMSubtarget *STI;
1398 MachineRegisterInfo *MRI;
1399 MachineFunction *MF;
1401 virtual bool runOnMachineFunction(MachineFunction &Fn);
1403 virtual const char *getPassName() const {
1404 return "ARM pre- register allocation load / store optimization pass";
1408 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1409 unsigned &NewOpc, unsigned &EvenReg,
1410 unsigned &OddReg, unsigned &BaseReg,
1412 unsigned &PredReg, ARMCC::CondCodes &Pred,
1414 bool RescheduleOps(MachineBasicBlock *MBB,
1415 SmallVector<MachineInstr*, 4> &Ops,
1416 unsigned Base, bool isLd,
1417 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1418 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1420 char ARMPreAllocLoadStoreOpt::ID = 0;
1423 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1424 TD = Fn.getTarget().getTargetData();
1425 TII = Fn.getTarget().getInstrInfo();
1426 TRI = Fn.getTarget().getRegisterInfo();
1427 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1428 MRI = &Fn.getRegInfo();
1431 bool Modified = false;
1432 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1434 Modified |= RescheduleLoadStoreInstrs(MFI);
1439 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1440 MachineBasicBlock::iterator I,
1441 MachineBasicBlock::iterator E,
1442 SmallPtrSet<MachineInstr*, 4> &MemOps,
1443 SmallSet<unsigned, 4> &MemRegs,
1444 const TargetRegisterInfo *TRI) {
1445 // Are there stores / loads / calls between them?
1446 // FIXME: This is overly conservative. We should make use of alias information
1448 SmallSet<unsigned, 4> AddedRegPressure;
1450 if (I->isDebugValue() || MemOps.count(&*I))
1452 const TargetInstrDesc &TID = I->getDesc();
1453 if (TID.isCall() || TID.isTerminator() || I->hasUnmodeledSideEffects())
1455 if (isLd && TID.mayStore())
1460 // It's not safe to move the first 'str' down.
1463 // str r4, [r0, #+4]
1467 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1468 MachineOperand &MO = I->getOperand(j);
1471 unsigned Reg = MO.getReg();
1472 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1474 if (Reg != Base && !MemRegs.count(Reg))
1475 AddedRegPressure.insert(Reg);
1479 // Estimate register pressure increase due to the transformation.
1480 if (MemRegs.size() <= 4)
1481 // Ok if we are moving small number of instructions.
1483 return AddedRegPressure.size() <= MemRegs.size() * 2;
1487 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1489 unsigned &NewOpc, unsigned &EvenReg,
1490 unsigned &OddReg, unsigned &BaseReg,
1491 int &Offset, unsigned &PredReg,
1492 ARMCC::CondCodes &Pred,
1494 // Make sure we're allowed to generate LDRD/STRD.
1495 if (!STI->hasV5TEOps())
1498 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1500 unsigned Opcode = Op0->getOpcode();
1501 if (Opcode == ARM::LDRi12)
1503 else if (Opcode == ARM::STRi12)
1505 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1506 NewOpc = ARM::t2LDRDi8;
1509 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1510 NewOpc = ARM::t2STRDi8;
1516 // Make sure the base address satisfies i64 ld / st alignment requirement.
1517 if (!Op0->hasOneMemOperand() ||
1518 !(*Op0->memoperands_begin())->getValue() ||
1519 (*Op0->memoperands_begin())->isVolatile())
1522 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1523 const Function *Func = MF->getFunction();
1524 unsigned ReqAlign = STI->hasV6Ops()
1525 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1526 : 8; // Pre-v6 need 8-byte align
1527 if (Align < ReqAlign)
1530 // Then make sure the immediate offset fits.
1531 int OffImm = getMemoryOpOffset(Op0);
1533 int Limit = (1 << 8) * Scale;
1534 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1538 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1540 AddSub = ARM_AM::sub;
1543 int Limit = (1 << 8) * Scale;
1544 if (OffImm >= Limit || (OffImm & (Scale-1)))
1546 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1548 EvenReg = Op0->getOperand(0).getReg();
1549 OddReg = Op1->getOperand(0).getReg();
1550 if (EvenReg == OddReg)
1552 BaseReg = Op0->getOperand(1).getReg();
1553 Pred = llvm::getInstrPredicate(Op0, PredReg);
1554 dl = Op0->getDebugLoc();
1559 struct OffsetCompare {
1560 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1561 int LOffset = getMemoryOpOffset(LHS);
1562 int ROffset = getMemoryOpOffset(RHS);
1563 assert(LHS == RHS || LOffset != ROffset);
1564 return LOffset > ROffset;
1569 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1570 SmallVector<MachineInstr*, 4> &Ops,
1571 unsigned Base, bool isLd,
1572 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1573 bool RetVal = false;
1575 // Sort by offset (in reverse order).
1576 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1578 // The loads / stores of the same base are in order. Scan them from first to
1579 // last and check for the following:
1580 // 1. Any def of base.
1582 while (Ops.size() > 1) {
1583 unsigned FirstLoc = ~0U;
1584 unsigned LastLoc = 0;
1585 MachineInstr *FirstOp = 0;
1586 MachineInstr *LastOp = 0;
1588 unsigned LastOpcode = 0;
1589 unsigned LastBytes = 0;
1590 unsigned NumMove = 0;
1591 for (int i = Ops.size() - 1; i >= 0; --i) {
1592 MachineInstr *Op = Ops[i];
1593 unsigned Loc = MI2LocMap[Op];
1594 if (Loc <= FirstLoc) {
1598 if (Loc >= LastLoc) {
1603 unsigned Opcode = Op->getOpcode();
1604 if (LastOpcode && Opcode != LastOpcode)
1607 int Offset = getMemoryOpOffset(Op);
1608 unsigned Bytes = getLSMultipleTransferSize(Op);
1610 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1613 LastOffset = Offset;
1615 LastOpcode = Opcode;
1616 if (++NumMove == 8) // FIXME: Tune this limit.
1623 SmallPtrSet<MachineInstr*, 4> MemOps;
1624 SmallSet<unsigned, 4> MemRegs;
1625 for (int i = NumMove-1; i >= 0; --i) {
1626 MemOps.insert(Ops[i]);
1627 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1630 // Be conservative, if the instructions are too far apart, don't
1631 // move them. We want to limit the increase of register pressure.
1632 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1634 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1635 MemOps, MemRegs, TRI);
1637 for (unsigned i = 0; i != NumMove; ++i)
1640 // This is the new location for the loads / stores.
1641 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1642 while (InsertPos != MBB->end()
1643 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1646 // If we are moving a pair of loads / stores, see if it makes sense
1647 // to try to allocate a pair of registers that can form register pairs.
1648 MachineInstr *Op0 = Ops.back();
1649 MachineInstr *Op1 = Ops[Ops.size()-2];
1650 unsigned EvenReg = 0, OddReg = 0;
1651 unsigned BaseReg = 0, PredReg = 0;
1652 ARMCC::CondCodes Pred = ARMCC::AL;
1654 unsigned NewOpc = 0;
1657 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1658 EvenReg, OddReg, BaseReg,
1659 Offset, PredReg, Pred, isT2)) {
1663 // Form the pair instruction.
1665 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1666 dl, TII->get(NewOpc))
1667 .addReg(EvenReg, RegState::Define)
1668 .addReg(OddReg, RegState::Define)
1670 // FIXME: We're converting from LDRi12 to an insn that still
1671 // uses addrmode2, so we need an explicit offset reg. It should
1672 // always by reg0 since we're transforming LDRi12s.
1675 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1678 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1679 dl, TII->get(NewOpc))
1683 // FIXME: We're converting from LDRi12 to an insn that still
1684 // uses addrmode2, so we need an explicit offset reg. It should
1685 // always by reg0 since we're transforming STRi12s.
1688 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1694 // Add register allocation hints to form register pairs.
1695 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1696 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1698 for (unsigned i = 0; i != NumMove; ++i) {
1699 MachineInstr *Op = Ops.back();
1701 MBB->splice(InsertPos, MBB, Op);
1705 NumLdStMoved += NumMove;
1715 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1716 bool RetVal = false;
1718 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1719 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1720 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1721 SmallVector<unsigned, 4> LdBases;
1722 SmallVector<unsigned, 4> StBases;
1725 MachineBasicBlock::iterator MBBI = MBB->begin();
1726 MachineBasicBlock::iterator E = MBB->end();
1728 for (; MBBI != E; ++MBBI) {
1729 MachineInstr *MI = MBBI;
1730 const TargetInstrDesc &TID = MI->getDesc();
1731 if (TID.isCall() || TID.isTerminator()) {
1732 // Stop at barriers.
1737 if (!MI->isDebugValue())
1738 MI2LocMap[MI] = ++Loc;
1740 if (!isMemoryOp(MI))
1742 unsigned PredReg = 0;
1743 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1746 int Opc = MI->getOpcode();
1747 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1748 unsigned Base = MI->getOperand(1).getReg();
1749 int Offset = getMemoryOpOffset(MI);
1751 bool StopHere = false;
1753 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1754 Base2LdsMap.find(Base);
1755 if (BI != Base2LdsMap.end()) {
1756 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1757 if (Offset == getMemoryOpOffset(BI->second[i])) {
1763 BI->second.push_back(MI);
1765 SmallVector<MachineInstr*, 4> MIs;
1767 Base2LdsMap[Base] = MIs;
1768 LdBases.push_back(Base);
1771 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1772 Base2StsMap.find(Base);
1773 if (BI != Base2StsMap.end()) {
1774 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1775 if (Offset == getMemoryOpOffset(BI->second[i])) {
1781 BI->second.push_back(MI);
1783 SmallVector<MachineInstr*, 4> MIs;
1785 Base2StsMap[Base] = MIs;
1786 StBases.push_back(Base);
1791 // Found a duplicate (a base+offset combination that's seen earlier).
1798 // Re-schedule loads.
1799 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1800 unsigned Base = LdBases[i];
1801 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1803 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1806 // Re-schedule stores.
1807 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1808 unsigned Base = StBases[i];
1809 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1811 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1815 Base2LdsMap.clear();
1816 Base2StsMap.clear();
1826 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1827 /// optimization pass.
1828 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1830 return new ARMPreAllocLoadStoreOpt();
1831 return new ARMLoadStoreOpt();