1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/Target/TargetData.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallSet.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/ADT/Statistic.h"
42 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
43 STATISTIC(NumSTMGened , "Number of stm instructions generated");
44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55 /// load / store instructions to form ldm / stm instructions.
58 struct ARMLoadStoreOpt : public MachineFunctionPass {
60 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
62 const TargetInstrInfo *TII;
63 const TargetRegisterInfo *TRI;
68 virtual bool runOnMachineFunction(MachineFunction &Fn);
70 virtual const char *getPassName() const {
71 return "ARM load / store optimization pass";
75 struct MemOpQueueEntry {
80 MachineBasicBlock::iterator MBBI;
82 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
83 MachineBasicBlock::iterator i)
84 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
86 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
87 typedef MemOpQueue::iterator MemOpQueueIter;
89 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
90 int Offset, unsigned Base, bool BaseKill, int Opcode,
91 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
92 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
93 void MergeOpsUpdate(MachineBasicBlock &MBB,
102 ARMCC::CondCodes Pred,
106 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
107 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
108 int Opcode, unsigned Size,
109 ARMCC::CondCodes Pred, unsigned PredReg,
110 unsigned Scratch, MemOpQueue &MemOps,
111 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
113 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
114 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
115 MachineBasicBlock::iterator &MBBI);
116 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
117 MachineBasicBlock::iterator MBBI,
118 const TargetInstrInfo *TII,
120 MachineBasicBlock::iterator &I);
121 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
122 MachineBasicBlock::iterator MBBI,
124 MachineBasicBlock::iterator &I);
125 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
126 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
128 char ARMLoadStoreOpt::ID = 0;
131 static int getLoadStoreMultipleOpcode(int Opcode) {
159 default: llvm_unreachable("Unhandled opcode!");
164 static bool isT2i32Load(unsigned Opc) {
165 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
168 static bool isi32Load(unsigned Opc) {
169 return Opc == ARM::LDR || isT2i32Load(Opc);
172 static bool isT2i32Store(unsigned Opc) {
173 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
176 static bool isi32Store(unsigned Opc) {
177 return Opc == ARM::STR || isT2i32Store(Opc);
180 /// MergeOps - Create and insert a LDM or STM with Base as base register and
181 /// registers in Regs as the register operands that would be loaded / stored.
182 /// It returns true if the transformation is done.
184 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
185 MachineBasicBlock::iterator MBBI,
186 int Offset, unsigned Base, bool BaseKill,
187 int Opcode, ARMCC::CondCodes Pred,
188 unsigned PredReg, unsigned Scratch, DebugLoc dl,
189 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
190 // Only a single register to load / store. Don't bother.
191 unsigned NumRegs = Regs.size();
195 ARM_AM::AMSubMode Mode = ARM_AM::ia;
196 // VFP and Thumb2 do not support IB or DA modes.
197 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
198 bool haveIBAndDA = isNotVFP && !isThumb2;
199 if (Offset == 4 && haveIBAndDA)
201 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
203 else if (Offset == -4 * (int)NumRegs && isNotVFP)
204 // VLDM/VSTM do not support DB mode without also updating the base reg.
206 else if (Offset != 0) {
207 // If starting offset isn't zero, insert a MI to materialize a new base.
208 // But only do so if it is cost effective, i.e. merging more than two
214 if (isi32Load(Opcode))
215 // If it is a load, then just use one of the destination register to
216 // use as the new base.
217 NewBase = Regs[NumRegs-1].first;
219 // Use the scratch register to use as a new base.
224 int BaseOpc = !isThumb2
226 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
230 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
233 int ImmedOffset = isThumb2
234 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
235 if (ImmedOffset == -1)
236 // FIXME: Try t2ADDri12 or t2SUBri12?
237 return false; // Probably not worth it then.
239 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
240 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
241 .addImm(Pred).addReg(PredReg).addReg(0);
243 BaseKill = true; // New base is always killed right its use.
246 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
247 Opcode == ARM::VLDRD);
248 Opcode = getLoadStoreMultipleOpcode(Opcode);
249 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
250 .addReg(Base, getKillRegState(BaseKill))
251 .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg);
252 for (unsigned i = 0; i != NumRegs; ++i)
253 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
254 | getKillRegState(Regs[i].second));
259 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
261 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
263 unsigned memOpsBegin, unsigned memOpsEnd,
264 unsigned insertAfter, int Offset,
265 unsigned Base, bool BaseKill,
267 ARMCC::CondCodes Pred, unsigned PredReg,
270 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
271 // First calculate which of the registers should be killed by the merged
273 const unsigned insertPos = memOps[insertAfter].Position;
275 SmallSet<unsigned, 4> UnavailRegs;
276 SmallSet<unsigned, 4> KilledRegs;
277 DenseMap<unsigned, unsigned> Killer;
278 for (unsigned i = 0; i < memOpsBegin; ++i) {
279 if (memOps[i].Position < insertPos && memOps[i].isKill) {
280 unsigned Reg = memOps[i].Reg;
281 if (memOps[i].Merged)
282 UnavailRegs.insert(Reg);
284 KilledRegs.insert(Reg);
289 for (unsigned i = memOpsEnd, e = memOps.size(); i != e; ++i) {
290 if (memOps[i].Position < insertPos && memOps[i].isKill) {
291 unsigned Reg = memOps[i].Reg;
292 KilledRegs.insert(Reg);
297 SmallVector<std::pair<unsigned, bool>, 8> Regs;
298 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
299 unsigned Reg = memOps[i].Reg;
300 if (UnavailRegs.count(Reg))
301 // Register is killed before and it's not easy / possible to update the
302 // kill marker on already merged instructions. Abort.
305 // If we are inserting the merged operation after an unmerged operation that
306 // uses the same register, make sure to transfer any kill flag.
307 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
308 Regs.push_back(std::make_pair(Reg, isKill));
311 // Try to do the merge.
312 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
314 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
315 Pred, PredReg, Scratch, dl, Regs))
318 // Merge succeeded, update records.
319 Merges.push_back(prior(Loc));
320 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
321 // Remove kill flags from any unmerged memops that come before insertPos.
322 if (Regs[i-memOpsBegin].second) {
323 unsigned Reg = Regs[i-memOpsBegin].first;
324 if (KilledRegs.count(Reg)) {
325 unsigned j = Killer[Reg];
326 memOps[j].MBBI->getOperand(0).setIsKill(false);
329 MBB.erase(memOps[i].MBBI);
330 memOps[i].Merged = true;
334 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
335 /// load / store multiple instructions.
337 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
338 unsigned Base, int Opcode, unsigned Size,
339 ARMCC::CondCodes Pred, unsigned PredReg,
340 unsigned Scratch, MemOpQueue &MemOps,
341 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
342 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
343 int Offset = MemOps[SIndex].Offset;
344 int SOffset = Offset;
345 unsigned insertAfter = SIndex;
346 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
347 DebugLoc dl = Loc->getDebugLoc();
348 const MachineOperand &PMO = Loc->getOperand(0);
349 unsigned PReg = PMO.getReg();
350 unsigned PRegNum = PMO.isUndef() ? UINT_MAX
351 : ARMRegisterInfo::getRegisterNumbering(PReg);
354 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
355 int NewOffset = MemOps[i].Offset;
356 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
357 unsigned Reg = MO.getReg();
358 unsigned RegNum = MO.isUndef() ? UINT_MAX
359 : ARMRegisterInfo::getRegisterNumbering(Reg);
360 // Register numbers must be in ascending order. For VFP, the registers
361 // must also be consecutive and there is a limit of 16 double-word
362 // registers per instruction.
363 if (Reg != ARM::SP &&
364 NewOffset == Offset + (int)Size &&
365 ((isNotVFP && RegNum > PRegNum)
366 || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) {
371 // Can't merge this in. Try merge the earlier ones first.
372 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
373 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
374 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
379 if (MemOps[i].Position > MemOps[insertAfter].Position)
383 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
384 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
385 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
389 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
390 unsigned Bytes, unsigned Limit,
391 ARMCC::CondCodes Pred, unsigned PredReg){
392 unsigned MyPredReg = 0;
395 if (MI->getOpcode() != ARM::t2SUBri &&
396 MI->getOpcode() != ARM::t2SUBrSPi &&
397 MI->getOpcode() != ARM::t2SUBrSPi12 &&
398 MI->getOpcode() != ARM::tSUBspi &&
399 MI->getOpcode() != ARM::SUBri)
402 // Make sure the offset fits in 8 bits.
403 if (Bytes == 0 || (Limit && Bytes >= Limit))
406 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
407 return (MI->getOperand(0).getReg() == Base &&
408 MI->getOperand(1).getReg() == Base &&
409 (MI->getOperand(2).getImm()*Scale) == Bytes &&
410 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
411 MyPredReg == PredReg);
414 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
415 unsigned Bytes, unsigned Limit,
416 ARMCC::CondCodes Pred, unsigned PredReg){
417 unsigned MyPredReg = 0;
420 if (MI->getOpcode() != ARM::t2ADDri &&
421 MI->getOpcode() != ARM::t2ADDrSPi &&
422 MI->getOpcode() != ARM::t2ADDrSPi12 &&
423 MI->getOpcode() != ARM::tADDspi &&
424 MI->getOpcode() != ARM::ADDri)
427 if (Bytes == 0 || (Limit && Bytes >= Limit))
428 // Make sure the offset fits in 8 bits.
431 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
432 return (MI->getOperand(0).getReg() == Base &&
433 MI->getOperand(1).getReg() == Base &&
434 (MI->getOperand(2).getImm()*Scale) == Bytes &&
435 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
436 MyPredReg == PredReg);
439 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
440 switch (MI->getOpcode()) {
462 return (MI->getNumOperands() - 4) * 4;
466 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc) {
468 case ARM::LDM: return ARM::LDM_UPD;
469 case ARM::STM: return ARM::STM_UPD;
470 case ARM::t2LDM: return ARM::t2LDM_UPD;
471 case ARM::t2STM: return ARM::t2STM_UPD;
472 case ARM::VLDMS: return ARM::VLDMS_UPD;
473 case ARM::VLDMD: return ARM::VLDMD_UPD;
474 case ARM::VSTMS: return ARM::VSTMS_UPD;
475 case ARM::VSTMD: return ARM::VSTMD_UPD;
476 default: llvm_unreachable("Unhandled opcode!");
481 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
482 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
484 /// stmia rn, <ra, rb, rc>
485 /// rn := rn + 4 * 3;
487 /// stmia rn!, <ra, rb, rc>
489 /// rn := rn - 4 * 3;
490 /// ldmia rn, <ra, rb, rc>
492 /// ldmdb rn!, <ra, rb, rc>
493 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
494 MachineBasicBlock::iterator MBBI,
496 MachineBasicBlock::iterator &I) {
497 MachineInstr *MI = MBBI;
498 unsigned Base = MI->getOperand(0).getReg();
499 bool BaseKill = MI->getOperand(0).isKill();
500 unsigned Bytes = getLSMultipleTransferSize(MI);
501 unsigned PredReg = 0;
502 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
503 int Opcode = MI->getOpcode();
504 DebugLoc dl = MI->getDebugLoc();
506 bool DoMerge = false;
507 ARM_AM::AMSubMode Mode = ARM_AM::ia;
509 // Can't use an updating ld/st if the base register is also a dest
510 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
511 for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
512 if (MI->getOperand(i).getReg() == Base)
515 Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
517 // Try merging with the previous instruction.
518 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
519 if (MBBI != BeginMBBI) {
520 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
521 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
523 if (Mode == ARM_AM::ia &&
524 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
527 } else if (Mode == ARM_AM::ib &&
528 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
536 // Try merging with the next instruction.
537 MachineBasicBlock::iterator EndMBBI = MBB.end();
538 if (!DoMerge && MBBI != EndMBBI) {
539 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
540 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
542 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
543 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
545 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
546 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
561 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode);
562 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
563 .addReg(Base, getDefRegState(true)) // WB base register
564 .addReg(Base, getKillRegState(BaseKill))
565 .addImm(ARM_AM::getAM4ModeImm(Mode))
566 .addImm(Pred).addReg(PredReg);
567 // Transfer the rest of operands.
568 for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum)
569 MIB.addOperand(MI->getOperand(OpNum));
570 // Transfer memoperands.
571 (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
577 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
579 case ARM::LDR: return ARM::LDR_PRE;
580 case ARM::STR: return ARM::STR_PRE;
581 case ARM::VLDRS: return ARM::VLDMS_UPD;
582 case ARM::VLDRD: return ARM::VLDMD_UPD;
583 case ARM::VSTRS: return ARM::VSTMS_UPD;
584 case ARM::VSTRD: return ARM::VSTMD_UPD;
587 return ARM::t2LDR_PRE;
590 return ARM::t2STR_PRE;
591 default: llvm_unreachable("Unhandled opcode!");
596 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
598 case ARM::LDR: return ARM::LDR_POST;
599 case ARM::STR: return ARM::STR_POST;
600 case ARM::VLDRS: return ARM::VLDMS_UPD;
601 case ARM::VLDRD: return ARM::VLDMD_UPD;
602 case ARM::VSTRS: return ARM::VSTMS_UPD;
603 case ARM::VSTRD: return ARM::VSTMD_UPD;
606 return ARM::t2LDR_POST;
609 return ARM::t2STR_POST;
610 default: llvm_unreachable("Unhandled opcode!");
615 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
616 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
617 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
618 MachineBasicBlock::iterator MBBI,
619 const TargetInstrInfo *TII,
621 MachineBasicBlock::iterator &I) {
622 MachineInstr *MI = MBBI;
623 unsigned Base = MI->getOperand(1).getReg();
624 bool BaseKill = MI->getOperand(1).isKill();
625 unsigned Bytes = getLSMultipleTransferSize(MI);
626 int Opcode = MI->getOpcode();
627 DebugLoc dl = MI->getDebugLoc();
628 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
629 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
630 bool isAM2 = (Opcode == ARM::LDR || Opcode == ARM::STR);
631 if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
633 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
635 if (isT2i32Load(Opcode) || isT2i32Store(Opcode))
636 if (MI->getOperand(2).getImm() != 0)
639 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
640 // Can't do the merge if the destination register is the same as the would-be
641 // writeback register.
642 if (isLd && MI->getOperand(0).getReg() == Base)
645 unsigned PredReg = 0;
646 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
647 bool DoMerge = false;
648 ARM_AM::AddrOpc AddSub = ARM_AM::add;
650 // AM2 - 12 bits, thumb2 - 8 bits.
651 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
653 // Try merging with the previous instruction.
654 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
655 if (MBBI != BeginMBBI) {
656 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
657 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
659 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
661 AddSub = ARM_AM::sub;
663 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
667 NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
672 // Try merging with the next instruction.
673 MachineBasicBlock::iterator EndMBBI = MBB.end();
674 if (!DoMerge && MBBI != EndMBBI) {
675 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
676 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
679 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
681 AddSub = ARM_AM::sub;
682 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
686 NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
700 Offset = ARM_AM::getAM4ModeImm(AddSub == ARM_AM::sub ?
701 ARM_AM::db : ARM_AM::ia);
703 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
705 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
708 // VLDM[SD}_UPD, VSTM[SD]_UPD
709 // (There are no base-updating versions of VLDR/VSTR instructions, but the
710 // updating load/store-multiple instructions can be used with only one
712 MachineOperand &MO = MI->getOperand(0);
713 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
714 .addReg(Base, getDefRegState(true)) // WB base register
715 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
717 .addImm(Pred).addReg(PredReg)
718 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
719 getKillRegState(MO.isKill())));
722 // LDR_PRE, LDR_POST,
723 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
724 .addReg(Base, RegState::Define)
725 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
727 // t2LDR_PRE, t2LDR_POST
728 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
729 .addReg(Base, RegState::Define)
730 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
732 MachineOperand &MO = MI->getOperand(0);
735 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
736 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
737 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
739 // t2STR_PRE, t2STR_POST
740 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
741 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
742 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
749 /// isMemoryOp - Returns true if instruction is a memory operations (that this
750 /// pass is capable of operating on).
751 static bool isMemoryOp(const MachineInstr *MI) {
752 // When no memory operands are present, conservatively assume unaligned,
753 // volatile, unfoldable.
754 if (!MI->hasOneMemOperand())
757 const MachineMemOperand *MMO = *MI->memoperands_begin();
759 // Don't touch volatile memory accesses - we may be changing their order.
760 if (MMO->isVolatile())
763 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
765 if (MMO->getAlignment() < 4)
768 // str <undef> could probably be eliminated entirely, but for now we just want
769 // to avoid making a mess of it.
770 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
771 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
772 MI->getOperand(0).isUndef())
775 // Likewise don't mess with references to undefined addresses.
776 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
777 MI->getOperand(1).isUndef())
780 int Opcode = MI->getOpcode();
785 return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
788 return MI->getOperand(1).isReg();
791 return MI->getOperand(1).isReg();
796 return MI->getOperand(1).isReg();
801 /// AdvanceRS - Advance register scavenger to just before the earliest memory
802 /// op that is being merged.
803 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
804 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
805 unsigned Position = MemOps[0].Position;
806 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
807 if (MemOps[i].Position < Position) {
808 Position = MemOps[i].Position;
809 Loc = MemOps[i].MBBI;
813 if (Loc != MBB.begin())
814 RS->forward(prior(Loc));
817 static int getMemoryOpOffset(const MachineInstr *MI) {
818 int Opcode = MI->getOpcode();
819 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
820 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
821 unsigned NumOperands = MI->getDesc().getNumOperands();
822 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
824 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
825 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
826 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8)
830 ? ARM_AM::getAM2Offset(OffField)
831 : (isAM3 ? ARM_AM::getAM3Offset(OffField)
832 : ARM_AM::getAM5Offset(OffField) * 4);
834 if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
837 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
840 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
846 static void InsertLDR_STR(MachineBasicBlock &MBB,
847 MachineBasicBlock::iterator &MBBI,
848 int OffImm, bool isDef,
849 DebugLoc dl, unsigned NewOpc,
850 unsigned Reg, bool RegDeadKill, bool RegUndef,
851 unsigned BaseReg, bool BaseKill, bool BaseUndef,
852 unsigned OffReg, bool OffKill, bool OffUndef,
853 ARMCC::CondCodes Pred, unsigned PredReg,
854 const TargetInstrInfo *TII, bool isT2) {
858 Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
860 Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
863 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
865 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
866 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
868 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
869 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
871 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
873 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
874 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
876 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
877 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
881 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
882 MachineBasicBlock::iterator &MBBI) {
883 MachineInstr *MI = &*MBBI;
884 unsigned Opcode = MI->getOpcode();
885 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
886 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
887 unsigned EvenReg = MI->getOperand(0).getReg();
888 unsigned OddReg = MI->getOperand(1).getReg();
889 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
890 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
891 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
894 MachineBasicBlock::iterator NewBBI = MBBI;
895 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
896 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
897 bool EvenDeadKill = isLd ?
898 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
899 bool EvenUndef = MI->getOperand(0).isUndef();
900 bool OddDeadKill = isLd ?
901 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
902 bool OddUndef = MI->getOperand(1).isUndef();
903 const MachineOperand &BaseOp = MI->getOperand(2);
904 unsigned BaseReg = BaseOp.getReg();
905 bool BaseKill = BaseOp.isKill();
906 bool BaseUndef = BaseOp.isUndef();
907 unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg();
908 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
909 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
910 int OffImm = getMemoryOpOffset(MI);
911 unsigned PredReg = 0;
912 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
914 if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) {
915 // Ascending register numbers and no offset. It's safe to change it to a
917 unsigned NewOpc = (isLd)
918 ? (isT2 ? ARM::t2LDM : ARM::LDM)
919 : (isT2 ? ARM::t2STM : ARM::STM);
921 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
922 .addReg(BaseReg, getKillRegState(BaseKill))
923 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
924 .addImm(Pred).addReg(PredReg)
925 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
926 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
929 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
930 .addReg(BaseReg, getKillRegState(BaseKill))
931 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
932 .addImm(Pred).addReg(PredReg)
934 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
936 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
939 NewBBI = llvm::prior(MBBI);
941 // Split into two instructions.
942 assert((!isT2 || !OffReg) &&
943 "Thumb2 ldrd / strd does not encode offset register!");
944 unsigned NewOpc = (isLd)
945 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR)
946 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR);
947 DebugLoc dl = MBBI->getDebugLoc();
948 // If this is a load and base register is killed, it may have been
949 // re-defed by the load, make sure the first load does not clobber it.
951 (BaseKill || OffKill) &&
952 (TRI->regsOverlap(EvenReg, BaseReg) ||
953 (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) {
954 assert(!TRI->regsOverlap(OddReg, BaseReg) &&
955 (!OffReg || !TRI->regsOverlap(OddReg, OffReg)));
956 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
957 OddReg, OddDeadKill, false,
958 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
959 Pred, PredReg, TII, isT2);
960 NewBBI = llvm::prior(MBBI);
961 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
962 EvenReg, EvenDeadKill, false,
963 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
964 Pred, PredReg, TII, isT2);
966 if (OddReg == EvenReg && EvenDeadKill) {
967 // If the two source operands are the same, the kill marker is
968 // probably on the first one. e.g.
969 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
970 EvenDeadKill = false;
973 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
974 EvenReg, EvenDeadKill, EvenUndef,
975 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
976 Pred, PredReg, TII, isT2);
977 NewBBI = llvm::prior(MBBI);
978 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
979 OddReg, OddDeadKill, OddUndef,
980 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
981 Pred, PredReg, TII, isT2);
996 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
997 /// ops of the same base and incrementing offset into LDM / STM ops.
998 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
999 unsigned NumMerges = 0;
1000 unsigned NumMemOps = 0;
1002 unsigned CurrBase = 0;
1004 unsigned CurrSize = 0;
1005 ARMCC::CondCodes CurrPred = ARMCC::AL;
1006 unsigned CurrPredReg = 0;
1007 unsigned Position = 0;
1008 SmallVector<MachineBasicBlock::iterator,4> Merges;
1010 RS->enterBasicBlock(&MBB);
1011 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1013 if (FixInvalidRegPairOp(MBB, MBBI))
1016 bool Advance = false;
1017 bool TryMerge = false;
1018 bool Clobber = false;
1020 bool isMemOp = isMemoryOp(MBBI);
1022 int Opcode = MBBI->getOpcode();
1023 unsigned Size = getLSMultipleTransferSize(MBBI);
1024 const MachineOperand &MO = MBBI->getOperand(0);
1025 unsigned Reg = MO.getReg();
1026 bool isKill = MO.isDef() ? false : MO.isKill();
1027 unsigned Base = MBBI->getOperand(1).getReg();
1028 unsigned PredReg = 0;
1029 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1030 int Offset = getMemoryOpOffset(MBBI);
1033 // r5 := ldr [r5, #4]
1034 // r6 := ldr [r5, #8]
1036 // The second ldr has effectively broken the chain even though it
1037 // looks like the later ldr(s) use the same base register. Try to
1038 // merge the ldr's so far, including this one. But don't try to
1039 // combine the following ldr(s).
1040 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1041 if (CurrBase == 0 && !Clobber) {
1042 // Start of a new chain.
1047 CurrPredReg = PredReg;
1048 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1057 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1058 // No need to match PredReg.
1059 // Continue adding to the queue.
1060 if (Offset > MemOps.back().Offset) {
1061 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1066 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1068 if (Offset < I->Offset) {
1069 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1074 } else if (Offset == I->Offset) {
1075 // Collision! This can't be merged!
1084 if (MBBI->isDebugValue()) {
1087 // Reach the end of the block, try merging the memory instructions.
1089 } else if (Advance) {
1093 // Reach the end of the block, try merging the memory instructions.
1099 if (NumMemOps > 1) {
1100 // Try to find a free register to use as a new base in case it's needed.
1101 // First advance to the instruction just before the start of the chain.
1102 AdvanceRS(MBB, MemOps);
1103 // Find a scratch register.
1104 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1105 // Process the load / store instructions.
1106 RS->forward(prior(MBBI));
1110 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1111 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1113 // Try folding preceeding/trailing base inc/dec into the generated
1115 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1116 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1118 NumMerges += Merges.size();
1120 // Try folding preceeding/trailing base inc/dec into those load/store
1121 // that were not merged to form LDM/STM ops.
1122 for (unsigned i = 0; i != NumMemOps; ++i)
1123 if (!MemOps[i].Merged)
1124 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1127 // RS may be pointing to an instruction that's deleted.
1128 RS->skipTo(prior(MBBI));
1129 } else if (NumMemOps == 1) {
1130 // Try folding preceeding/trailing base inc/dec into the single
1132 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1134 RS->forward(prior(MBBI));
1141 CurrPred = ARMCC::AL;
1148 // If iterator hasn't been advanced and this is not a memory op, skip it.
1149 // It can't start a new chain anyway.
1150 if (!Advance && !isMemOp && MBBI != E) {
1156 return NumMerges > 0;
1160 struct OffsetCompare {
1161 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1162 int LOffset = getMemoryOpOffset(LHS);
1163 int ROffset = getMemoryOpOffset(RHS);
1164 assert(LHS == RHS || LOffset != ROffset);
1165 return LOffset > ROffset;
1170 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1171 /// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it
1172 /// directly restore the value of LR into pc.
1173 /// ldmfd sp!, {..., lr}
1176 /// ldmfd sp!, {..., lr}
1179 /// ldmfd sp!, {..., pc}
1180 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1181 if (MBB.empty()) return false;
1183 MachineBasicBlock::iterator MBBI = prior(MBB.end());
1184 if (MBBI != MBB.begin() &&
1185 (MBBI->getOpcode() == ARM::BX_RET ||
1186 MBBI->getOpcode() == ARM::tBX_RET ||
1187 MBBI->getOpcode() == ARM::MOVPCLR)) {
1188 MachineInstr *PrevMI = prior(MBBI);
1189 if (PrevMI->getOpcode() == ARM::LDM_UPD ||
1190 PrevMI->getOpcode() == ARM::t2LDM_UPD) {
1191 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1192 if (MO.getReg() != ARM::LR)
1194 unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET;
1195 PrevMI->setDesc(TII->get(NewOpc));
1204 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1205 const TargetMachine &TM = Fn.getTarget();
1206 AFI = Fn.getInfo<ARMFunctionInfo>();
1207 TII = TM.getInstrInfo();
1208 TRI = TM.getRegisterInfo();
1209 RS = new RegScavenger();
1210 isThumb2 = AFI->isThumb2Function();
1212 bool Modified = false;
1213 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1215 MachineBasicBlock &MBB = *MFI;
1216 Modified |= LoadStoreMultipleOpti(MBB);
1217 Modified |= MergeReturnIntoLDM(MBB);
1225 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1226 /// load / stores from consecutive locations close to make it more
1227 /// likely they will be combined later.
1230 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1232 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1234 const TargetData *TD;
1235 const TargetInstrInfo *TII;
1236 const TargetRegisterInfo *TRI;
1237 const ARMSubtarget *STI;
1238 MachineRegisterInfo *MRI;
1239 MachineFunction *MF;
1241 virtual bool runOnMachineFunction(MachineFunction &Fn);
1243 virtual const char *getPassName() const {
1244 return "ARM pre- register allocation load / store optimization pass";
1248 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1249 unsigned &NewOpc, unsigned &EvenReg,
1250 unsigned &OddReg, unsigned &BaseReg,
1251 unsigned &OffReg, int &Offset,
1252 unsigned &PredReg, ARMCC::CondCodes &Pred,
1254 bool RescheduleOps(MachineBasicBlock *MBB,
1255 SmallVector<MachineInstr*, 4> &Ops,
1256 unsigned Base, bool isLd,
1257 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1258 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1260 char ARMPreAllocLoadStoreOpt::ID = 0;
1263 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1264 TD = Fn.getTarget().getTargetData();
1265 TII = Fn.getTarget().getInstrInfo();
1266 TRI = Fn.getTarget().getRegisterInfo();
1267 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1268 MRI = &Fn.getRegInfo();
1271 bool Modified = false;
1272 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1274 Modified |= RescheduleLoadStoreInstrs(MFI);
1279 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1280 MachineBasicBlock::iterator I,
1281 MachineBasicBlock::iterator E,
1282 SmallPtrSet<MachineInstr*, 4> &MemOps,
1283 SmallSet<unsigned, 4> &MemRegs,
1284 const TargetRegisterInfo *TRI) {
1285 // Are there stores / loads / calls between them?
1286 // FIXME: This is overly conservative. We should make use of alias information
1288 SmallSet<unsigned, 4> AddedRegPressure;
1290 if (I->isDebugValue() || MemOps.count(&*I))
1292 const TargetInstrDesc &TID = I->getDesc();
1293 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
1295 if (isLd && TID.mayStore())
1300 // It's not safe to move the first 'str' down.
1303 // str r4, [r0, #+4]
1307 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1308 MachineOperand &MO = I->getOperand(j);
1311 unsigned Reg = MO.getReg();
1312 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1314 if (Reg != Base && !MemRegs.count(Reg))
1315 AddedRegPressure.insert(Reg);
1319 // Estimate register pressure increase due to the transformation.
1320 if (MemRegs.size() <= 4)
1321 // Ok if we are moving small number of instructions.
1323 return AddedRegPressure.size() <= MemRegs.size() * 2;
1327 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1329 unsigned &NewOpc, unsigned &EvenReg,
1330 unsigned &OddReg, unsigned &BaseReg,
1331 unsigned &OffReg, int &Offset,
1333 ARMCC::CondCodes &Pred,
1335 // Make sure we're allowed to generate LDRD/STRD.
1336 if (!STI->hasV5TEOps())
1339 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1341 unsigned Opcode = Op0->getOpcode();
1342 if (Opcode == ARM::LDR)
1344 else if (Opcode == ARM::STR)
1346 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1347 NewOpc = ARM::t2LDRDi8;
1350 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1351 NewOpc = ARM::t2STRDi8;
1357 // Make sure the offset registers match.
1359 (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg()))
1362 // Must sure the base address satisfies i64 ld / st alignment requirement.
1363 if (!Op0->hasOneMemOperand() ||
1364 !(*Op0->memoperands_begin())->getValue() ||
1365 (*Op0->memoperands_begin())->isVolatile())
1368 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1369 const Function *Func = MF->getFunction();
1370 unsigned ReqAlign = STI->hasV6Ops()
1371 ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext()))
1372 : 8; // Pre-v6 need 8-byte align
1373 if (Align < ReqAlign)
1376 // Then make sure the immediate offset fits.
1377 int OffImm = getMemoryOpOffset(Op0);
1381 // Can't fall back to t2LDRi8 / t2STRi8.
1384 int Limit = (1 << 8) * Scale;
1385 if (OffImm >= Limit || (OffImm & (Scale-1)))
1390 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1392 AddSub = ARM_AM::sub;
1395 int Limit = (1 << 8) * Scale;
1396 if (OffImm >= Limit || (OffImm & (Scale-1)))
1398 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1400 EvenReg = Op0->getOperand(0).getReg();
1401 OddReg = Op1->getOperand(0).getReg();
1402 if (EvenReg == OddReg)
1404 BaseReg = Op0->getOperand(1).getReg();
1406 OffReg = Op0->getOperand(2).getReg();
1407 Pred = llvm::getInstrPredicate(Op0, PredReg);
1408 dl = Op0->getDebugLoc();
1412 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1413 SmallVector<MachineInstr*, 4> &Ops,
1414 unsigned Base, bool isLd,
1415 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1416 bool RetVal = false;
1418 // Sort by offset (in reverse order).
1419 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1421 // The loads / stores of the same base are in order. Scan them from first to
1422 // last and check for the following:
1423 // 1. Any def of base.
1425 while (Ops.size() > 1) {
1426 unsigned FirstLoc = ~0U;
1427 unsigned LastLoc = 0;
1428 MachineInstr *FirstOp = 0;
1429 MachineInstr *LastOp = 0;
1431 unsigned LastOpcode = 0;
1432 unsigned LastBytes = 0;
1433 unsigned NumMove = 0;
1434 for (int i = Ops.size() - 1; i >= 0; --i) {
1435 MachineInstr *Op = Ops[i];
1436 unsigned Loc = MI2LocMap[Op];
1437 if (Loc <= FirstLoc) {
1441 if (Loc >= LastLoc) {
1446 unsigned Opcode = Op->getOpcode();
1447 if (LastOpcode && Opcode != LastOpcode)
1450 int Offset = getMemoryOpOffset(Op);
1451 unsigned Bytes = getLSMultipleTransferSize(Op);
1453 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1456 LastOffset = Offset;
1458 LastOpcode = Opcode;
1459 if (++NumMove == 8) // FIXME: Tune this limit.
1466 SmallPtrSet<MachineInstr*, 4> MemOps;
1467 SmallSet<unsigned, 4> MemRegs;
1468 for (int i = NumMove-1; i >= 0; --i) {
1469 MemOps.insert(Ops[i]);
1470 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1473 // Be conservative, if the instructions are too far apart, don't
1474 // move them. We want to limit the increase of register pressure.
1475 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1477 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1478 MemOps, MemRegs, TRI);
1480 for (unsigned i = 0; i != NumMove; ++i)
1483 // This is the new location for the loads / stores.
1484 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1485 while (InsertPos != MBB->end()
1486 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1489 // If we are moving a pair of loads / stores, see if it makes sense
1490 // to try to allocate a pair of registers that can form register pairs.
1491 MachineInstr *Op0 = Ops.back();
1492 MachineInstr *Op1 = Ops[Ops.size()-2];
1493 unsigned EvenReg = 0, OddReg = 0;
1494 unsigned BaseReg = 0, OffReg = 0, PredReg = 0;
1495 ARMCC::CondCodes Pred = ARMCC::AL;
1497 unsigned NewOpc = 0;
1500 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1501 EvenReg, OddReg, BaseReg, OffReg,
1502 Offset, PredReg, Pred, isT2)) {
1506 // Form the pair instruction.
1508 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1509 dl, TII->get(NewOpc))
1510 .addReg(EvenReg, RegState::Define)
1511 .addReg(OddReg, RegState::Define)
1515 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1518 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1519 dl, TII->get(NewOpc))
1525 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1531 // Add register allocation hints to form register pairs.
1532 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1533 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1535 for (unsigned i = 0; i != NumMove; ++i) {
1536 MachineInstr *Op = Ops.back();
1538 MBB->splice(InsertPos, MBB, Op);
1542 NumLdStMoved += NumMove;
1552 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1553 bool RetVal = false;
1555 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1556 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1557 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1558 SmallVector<unsigned, 4> LdBases;
1559 SmallVector<unsigned, 4> StBases;
1562 MachineBasicBlock::iterator MBBI = MBB->begin();
1563 MachineBasicBlock::iterator E = MBB->end();
1565 for (; MBBI != E; ++MBBI) {
1566 MachineInstr *MI = MBBI;
1567 const TargetInstrDesc &TID = MI->getDesc();
1568 if (TID.isCall() || TID.isTerminator()) {
1569 // Stop at barriers.
1574 if (!MI->isDebugValue())
1575 MI2LocMap[MI] = ++Loc;
1577 if (!isMemoryOp(MI))
1579 unsigned PredReg = 0;
1580 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1583 int Opc = MI->getOpcode();
1584 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1585 unsigned Base = MI->getOperand(1).getReg();
1586 int Offset = getMemoryOpOffset(MI);
1588 bool StopHere = false;
1590 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1591 Base2LdsMap.find(Base);
1592 if (BI != Base2LdsMap.end()) {
1593 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1594 if (Offset == getMemoryOpOffset(BI->second[i])) {
1600 BI->second.push_back(MI);
1602 SmallVector<MachineInstr*, 4> MIs;
1604 Base2LdsMap[Base] = MIs;
1605 LdBases.push_back(Base);
1608 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1609 Base2StsMap.find(Base);
1610 if (BI != Base2StsMap.end()) {
1611 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1612 if (Offset == getMemoryOpOffset(BI->second[i])) {
1618 BI->second.push_back(MI);
1620 SmallVector<MachineInstr*, 4> MIs;
1622 Base2StsMap[Base] = MIs;
1623 StBases.push_back(Base);
1628 // Found a duplicate (a base+offset combination that's seen earlier).
1635 // Re-schedule loads.
1636 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1637 unsigned Base = LdBases[i];
1638 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1640 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1643 // Re-schedule stores.
1644 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1645 unsigned Base = StBases[i];
1646 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1648 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1652 Base2LdsMap.clear();
1653 Base2StsMap.clear();
1663 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1664 /// optimization pass.
1665 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1667 return new ARMPreAllocLoadStoreOpt();
1668 return new ARMLoadStoreOpt();