1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/Target/TargetData.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallSet.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/ADT/Statistic.h"
42 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
43 STATISTIC(NumSTMGened , "Number of stm instructions generated");
44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55 /// load / store instructions to form ldm / stm instructions.
58 struct ARMLoadStoreOpt : public MachineFunctionPass {
60 ARMLoadStoreOpt() : MachineFunctionPass(&ID) {}
62 const TargetInstrInfo *TII;
63 const TargetRegisterInfo *TRI;
68 virtual bool runOnMachineFunction(MachineFunction &Fn);
70 virtual const char *getPassName() const {
71 return "ARM load / store optimization pass";
75 struct MemOpQueueEntry {
78 MachineBasicBlock::iterator MBBI;
80 MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
81 : Offset(o), Position(p), MBBI(i), Merged(false) {}
83 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
84 typedef MemOpQueue::iterator MemOpQueueIter;
86 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
87 int Offset, unsigned Base, bool BaseKill, int Opcode,
88 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
89 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
90 void MergeOpsUpdate(MachineBasicBlock &MBB,
99 ARMCC::CondCodes Pred,
103 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
104 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
105 int Opcode, unsigned Size,
106 ARMCC::CondCodes Pred, unsigned PredReg,
107 unsigned Scratch, MemOpQueue &MemOps,
108 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
110 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
111 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
112 MachineBasicBlock::iterator &MBBI);
113 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
114 MachineBasicBlock::iterator MBBI,
115 const TargetInstrInfo *TII,
117 MachineBasicBlock::iterator &I);
118 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
119 MachineBasicBlock::iterator MBBI,
121 MachineBasicBlock::iterator &I);
122 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
123 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
125 char ARMLoadStoreOpt::ID = 0;
128 static int getLoadStoreMultipleOpcode(int Opcode) {
156 default: llvm_unreachable("Unhandled opcode!");
161 static bool isT2i32Load(unsigned Opc) {
162 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
165 static bool isi32Load(unsigned Opc) {
166 return Opc == ARM::LDR || isT2i32Load(Opc);
169 static bool isT2i32Store(unsigned Opc) {
170 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
173 static bool isi32Store(unsigned Opc) {
174 return Opc == ARM::STR || isT2i32Store(Opc);
177 /// MergeOps - Create and insert a LDM or STM with Base as base register and
178 /// registers in Regs as the register operands that would be loaded / stored.
179 /// It returns true if the transformation is done.
181 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
182 MachineBasicBlock::iterator MBBI,
183 int Offset, unsigned Base, bool BaseKill,
184 int Opcode, ARMCC::CondCodes Pred,
185 unsigned PredReg, unsigned Scratch, DebugLoc dl,
186 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
187 // Only a single register to load / store. Don't bother.
188 unsigned NumRegs = Regs.size();
192 ARM_AM::AMSubMode Mode = ARM_AM::ia;
193 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
194 if (isAM4 && Offset == 4) {
196 // Thumb2 does not support ldmib / stmib.
199 } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) {
201 // Thumb2 does not support ldmda / stmda.
204 } else if (isAM4 && Offset == -4 * (int)NumRegs) {
206 } else if (Offset != 0) {
207 // If starting offset isn't zero, insert a MI to materialize a new base.
208 // But only do so if it is cost effective, i.e. merging more than two
214 if (isi32Load(Opcode))
215 // If it is a load, then just use one of the destination register to
216 // use as the new base.
217 NewBase = Regs[NumRegs-1].first;
219 // Use the scratch register to use as a new base.
224 int BaseOpc = !isThumb2
226 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
230 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
233 int ImmedOffset = isThumb2
234 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
235 if (ImmedOffset == -1)
236 // FIXME: Try t2ADDri12 or t2SUBri12?
237 return false; // Probably not worth it then.
239 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
240 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
241 .addImm(Pred).addReg(PredReg).addReg(0);
243 BaseKill = true; // New base is always killed right its use.
246 bool isDPR = (Opcode == ARM::VLDRD || Opcode == ARM::VSTRD);
247 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
248 Opcode == ARM::VLDRD);
249 Opcode = getLoadStoreMultipleOpcode(Opcode);
250 MachineInstrBuilder MIB = (isAM4)
251 ? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
252 .addReg(Base, getKillRegState(BaseKill))
253 .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
254 : BuildMI(MBB, MBBI, dl, TII->get(Opcode))
255 .addReg(Base, getKillRegState(BaseKill))
256 .addImm(ARM_AM::getAM5Opc(Mode, isDPR ? NumRegs<<1 : NumRegs))
257 .addImm(Pred).addReg(PredReg);
258 for (unsigned i = 0; i != NumRegs; ++i)
259 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
260 | getKillRegState(Regs[i].second));
265 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
267 void ARMLoadStoreOpt::
268 MergeOpsUpdate(MachineBasicBlock &MBB,
270 unsigned memOpsBegin,
272 unsigned insertAfter,
277 ARMCC::CondCodes Pred,
281 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
282 // First calculate which of the registers should be killed by the merged
284 SmallVector<std::pair<unsigned, bool>, 8> Regs;
285 const unsigned insertPos = memOps[insertAfter].Position;
286 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
287 const MachineOperand &MO = memOps[i].MBBI->getOperand(0);
288 unsigned Reg = MO.getReg();
289 bool isKill = MO.isKill();
291 // If we are inserting the merged operation after an unmerged operation that
292 // uses the same register, make sure to transfer any kill flag.
293 for (unsigned j = memOpsEnd, e = memOps.size(); !isKill && j != e; ++j)
294 if (memOps[j].Position<insertPos) {
295 const MachineOperand &MOJ = memOps[j].MBBI->getOperand(0);
296 if (MOJ.getReg() == Reg && MOJ.isKill())
300 Regs.push_back(std::make_pair(Reg, isKill));
303 // Try to do the merge.
304 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
306 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
307 Pred, PredReg, Scratch, dl, Regs))
310 // Merge succeeded, update records.
311 Merges.push_back(prior(Loc));
312 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
313 // Remove kill flags from any unmerged memops that come before insertPos.
314 if (Regs[i-memOpsBegin].second)
315 for (unsigned j = memOpsEnd, e = memOps.size(); j != e; ++j)
316 if (memOps[j].Position<insertPos) {
317 MachineOperand &MOJ = memOps[j].MBBI->getOperand(0);
318 if (MOJ.getReg() == Regs[i-memOpsBegin].first && MOJ.isKill())
319 MOJ.setIsKill(false);
321 MBB.erase(memOps[i].MBBI);
322 memOps[i].Merged = true;
326 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
327 /// load / store multiple instructions.
329 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
330 unsigned Base, int Opcode, unsigned Size,
331 ARMCC::CondCodes Pred, unsigned PredReg,
332 unsigned Scratch, MemOpQueue &MemOps,
333 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
334 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
335 int Offset = MemOps[SIndex].Offset;
336 int SOffset = Offset;
337 unsigned insertAfter = SIndex;
338 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
339 DebugLoc dl = Loc->getDebugLoc();
340 const MachineOperand &PMO = Loc->getOperand(0);
341 unsigned PReg = PMO.getReg();
342 unsigned PRegNum = PMO.isUndef() ? UINT_MAX
343 : ARMRegisterInfo::getRegisterNumbering(PReg);
346 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
347 int NewOffset = MemOps[i].Offset;
348 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
349 unsigned Reg = MO.getReg();
350 unsigned RegNum = MO.isUndef() ? UINT_MAX
351 : ARMRegisterInfo::getRegisterNumbering(Reg);
352 // AM4 - register numbers in ascending order.
353 // AM5 - consecutive register numbers in ascending order.
354 // Can only do up to 16 double-word registers per insn.
355 if (Reg != ARM::SP &&
356 NewOffset == Offset + (int)Size &&
357 ((isAM4 && RegNum > PRegNum)
358 || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) {
363 // Can't merge this in. Try merge the earlier ones first.
364 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
365 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
366 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
371 if (MemOps[i].Position > MemOps[insertAfter].Position)
375 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
376 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
377 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
381 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
382 unsigned Bytes, unsigned Limit,
383 ARMCC::CondCodes Pred, unsigned PredReg){
384 unsigned MyPredReg = 0;
387 if (MI->getOpcode() != ARM::t2SUBri &&
388 MI->getOpcode() != ARM::t2SUBrSPi &&
389 MI->getOpcode() != ARM::t2SUBrSPi12 &&
390 MI->getOpcode() != ARM::tSUBspi &&
391 MI->getOpcode() != ARM::SUBri)
394 // Make sure the offset fits in 8 bits.
395 if (Bytes <= 0 || (Limit && Bytes >= Limit))
398 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
399 return (MI->getOperand(0).getReg() == Base &&
400 MI->getOperand(1).getReg() == Base &&
401 (MI->getOperand(2).getImm()*Scale) == Bytes &&
402 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
403 MyPredReg == PredReg);
406 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
407 unsigned Bytes, unsigned Limit,
408 ARMCC::CondCodes Pred, unsigned PredReg){
409 unsigned MyPredReg = 0;
412 if (MI->getOpcode() != ARM::t2ADDri &&
413 MI->getOpcode() != ARM::t2ADDrSPi &&
414 MI->getOpcode() != ARM::t2ADDrSPi12 &&
415 MI->getOpcode() != ARM::tADDspi &&
416 MI->getOpcode() != ARM::ADDri)
419 if (Bytes <= 0 || (Limit && Bytes >= Limit))
420 // Make sure the offset fits in 8 bits.
423 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
424 return (MI->getOperand(0).getReg() == Base &&
425 MI->getOperand(1).getReg() == Base &&
426 (MI->getOperand(2).getImm()*Scale) == Bytes &&
427 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
428 MyPredReg == PredReg);
431 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
432 switch (MI->getOpcode()) {
450 return (MI->getNumOperands() - 4) * 4;
455 return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
459 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc) {
461 case ARM::LDM: return ARM::LDM_UPD;
462 case ARM::STM: return ARM::STM_UPD;
463 case ARM::t2LDM: return ARM::t2LDM_UPD;
464 case ARM::t2STM: return ARM::t2STM_UPD;
465 case ARM::VLDMS: return ARM::VLDMS_UPD;
466 case ARM::VLDMD: return ARM::VLDMD_UPD;
467 case ARM::VSTMS: return ARM::VSTMS_UPD;
468 case ARM::VSTMD: return ARM::VSTMD_UPD;
469 default: llvm_unreachable("Unhandled opcode!");
474 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
475 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
477 /// stmia rn, <ra, rb, rc>
478 /// rn := rn + 4 * 3;
480 /// stmia rn!, <ra, rb, rc>
482 /// rn := rn - 4 * 3;
483 /// ldmia rn, <ra, rb, rc>
485 /// ldmdb rn!, <ra, rb, rc>
486 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
487 MachineBasicBlock::iterator MBBI,
489 MachineBasicBlock::iterator &I) {
490 MachineInstr *MI = MBBI;
491 unsigned Base = MI->getOperand(0).getReg();
492 bool BaseKill = MI->getOperand(0).isKill();
493 unsigned Bytes = getLSMultipleTransferSize(MI);
494 unsigned PredReg = 0;
495 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
496 int Opcode = MI->getOpcode();
497 DebugLoc dl = MI->getDebugLoc();
498 bool isAM4 = (Opcode == ARM::LDM || Opcode == ARM::t2LDM ||
499 Opcode == ARM::STM || Opcode == ARM::t2STM);
501 bool DoMerge = false;
502 ARM_AM::AMSubMode Mode = ARM_AM::ia;
506 // Can't use an updating ld/st if the base register is also a dest
507 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
508 for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
509 if (MI->getOperand(i).getReg() == Base)
512 Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
514 // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops.
515 Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
516 Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
519 // Try merging with the previous instruction.
520 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
521 if (MBBI != BeginMBBI) {
522 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
523 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
526 if (Mode == ARM_AM::ia &&
527 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
530 } else if (isAM4 && Mode == ARM_AM::ib &&
531 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
536 if (Mode == ARM_AM::ia &&
537 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
546 // Try merging with the next instruction.
547 MachineBasicBlock::iterator EndMBBI = MBB.end();
548 if (!DoMerge && MBBI != EndMBBI) {
549 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
550 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
553 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
554 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
556 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
557 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
561 if (Mode == ARM_AM::ia &&
562 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
578 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode);
579 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
580 .addReg(Base, getDefRegState(true)) // WB base register
581 .addReg(Base, getKillRegState(BaseKill));
583 // [t2]LDM_UPD, [t2]STM_UPD
584 MIB.addImm(ARM_AM::getAM4ModeImm(Mode))
585 .addImm(Pred).addReg(PredReg);
587 // VLDM[SD}_UPD, VSTM[SD]_UPD
588 MIB.addImm(ARM_AM::getAM5Opc(Mode, Offset))
589 .addImm(Pred).addReg(PredReg);
591 // Transfer the rest of operands.
592 for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum)
593 MIB.addOperand(MI->getOperand(OpNum));
594 // Transfer memoperands.
595 (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
601 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
603 case ARM::LDR: return ARM::LDR_PRE;
604 case ARM::STR: return ARM::STR_PRE;
605 case ARM::VLDRS: return ARM::VLDMS_UPD;
606 case ARM::VLDRD: return ARM::VLDMD_UPD;
607 case ARM::VSTRS: return ARM::VSTMS_UPD;
608 case ARM::VSTRD: return ARM::VSTMD_UPD;
611 return ARM::t2LDR_PRE;
614 return ARM::t2STR_PRE;
615 default: llvm_unreachable("Unhandled opcode!");
620 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
622 case ARM::LDR: return ARM::LDR_POST;
623 case ARM::STR: return ARM::STR_POST;
624 case ARM::VLDRS: return ARM::VLDMS_UPD;
625 case ARM::VLDRD: return ARM::VLDMD_UPD;
626 case ARM::VSTRS: return ARM::VSTMS_UPD;
627 case ARM::VSTRD: return ARM::VSTMD_UPD;
630 return ARM::t2LDR_POST;
633 return ARM::t2STR_POST;
634 default: llvm_unreachable("Unhandled opcode!");
639 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
640 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
641 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
642 MachineBasicBlock::iterator MBBI,
643 const TargetInstrInfo *TII,
645 MachineBasicBlock::iterator &I) {
646 MachineInstr *MI = MBBI;
647 unsigned Base = MI->getOperand(1).getReg();
648 bool BaseKill = MI->getOperand(1).isKill();
649 unsigned Bytes = getLSMultipleTransferSize(MI);
650 int Opcode = MI->getOpcode();
651 DebugLoc dl = MI->getDebugLoc();
652 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
653 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
654 bool isAM2 = (Opcode == ARM::LDR || Opcode == ARM::STR);
655 if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
657 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
659 if (isT2i32Load(Opcode) || isT2i32Store(Opcode))
660 if (MI->getOperand(2).getImm() != 0)
663 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
664 // Can't do the merge if the destination register is the same as the would-be
665 // writeback register.
666 if (isLd && MI->getOperand(0).getReg() == Base)
669 unsigned PredReg = 0;
670 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
671 bool DoMerge = false;
672 ARM_AM::AddrOpc AddSub = ARM_AM::add;
674 // AM2 - 12 bits, thumb2 - 8 bits.
675 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
677 // Try merging with the previous instruction.
678 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
679 if (MBBI != BeginMBBI) {
680 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
681 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
683 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
685 AddSub = ARM_AM::sub;
687 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
691 NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
696 // Try merging with the next instruction.
697 MachineBasicBlock::iterator EndMBBI = MBB.end();
698 if (!DoMerge && MBBI != EndMBBI) {
699 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
700 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
703 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
705 AddSub = ARM_AM::sub;
706 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
710 NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
722 bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD;
725 Offset = ARM_AM::getAM5Opc(AddSub == ARM_AM::sub ? ARM_AM::db : ARM_AM::ia,
728 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
730 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
733 // VLDM[SD}_UPD, VSTM[SD]_UPD
734 MachineOperand &MO = MI->getOperand(0);
735 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
736 .addReg(Base, getDefRegState(true)) // WB base register
737 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
739 .addImm(Pred).addReg(PredReg)
740 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
741 getKillRegState(MO.isKill())));
744 // LDR_PRE, LDR_POST,
745 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
746 .addReg(Base, RegState::Define)
747 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
749 // t2LDR_PRE, t2LDR_POST
750 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
751 .addReg(Base, RegState::Define)
752 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
754 MachineOperand &MO = MI->getOperand(0);
757 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
758 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
759 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
761 // t2STR_PRE, t2STR_POST
762 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
763 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
764 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
771 /// isMemoryOp - Returns true if instruction is a memory operations (that this
772 /// pass is capable of operating on).
773 static bool isMemoryOp(const MachineInstr *MI) {
774 if (MI->hasOneMemOperand()) {
775 const MachineMemOperand *MMO = *MI->memoperands_begin();
777 // Don't touch volatile memory accesses - we may be changing their order.
778 if (MMO->isVolatile())
781 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
783 if (MMO->getAlignment() < 4)
787 // str <undef> could probably be eliminated entirely, but for now we just want
788 // to avoid making a mess of it.
789 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
790 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
791 MI->getOperand(0).isUndef())
794 // Likewise don't mess with references to undefined addresses.
795 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
796 MI->getOperand(1).isUndef())
799 int Opcode = MI->getOpcode();
804 return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
807 return MI->getOperand(1).isReg();
810 return MI->getOperand(1).isReg();
815 return MI->getOperand(1).isReg();
820 /// AdvanceRS - Advance register scavenger to just before the earliest memory
821 /// op that is being merged.
822 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
823 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
824 unsigned Position = MemOps[0].Position;
825 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
826 if (MemOps[i].Position < Position) {
827 Position = MemOps[i].Position;
828 Loc = MemOps[i].MBBI;
832 if (Loc != MBB.begin())
833 RS->forward(prior(Loc));
836 static int getMemoryOpOffset(const MachineInstr *MI) {
837 int Opcode = MI->getOpcode();
838 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
839 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
840 unsigned NumOperands = MI->getDesc().getNumOperands();
841 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
843 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
844 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
845 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8)
849 ? ARM_AM::getAM2Offset(OffField)
850 : (isAM3 ? ARM_AM::getAM3Offset(OffField)
851 : ARM_AM::getAM5Offset(OffField) * 4);
853 if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
856 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
859 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
865 static void InsertLDR_STR(MachineBasicBlock &MBB,
866 MachineBasicBlock::iterator &MBBI,
867 int OffImm, bool isDef,
868 DebugLoc dl, unsigned NewOpc,
869 unsigned Reg, bool RegDeadKill, bool RegUndef,
870 unsigned BaseReg, bool BaseKill, bool BaseUndef,
871 unsigned OffReg, bool OffKill, bool OffUndef,
872 ARMCC::CondCodes Pred, unsigned PredReg,
873 const TargetInstrInfo *TII, bool isT2) {
877 Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
879 Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
882 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
884 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
885 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
887 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
888 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
890 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
892 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
893 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
895 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
896 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
900 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
901 MachineBasicBlock::iterator &MBBI) {
902 MachineInstr *MI = &*MBBI;
903 unsigned Opcode = MI->getOpcode();
904 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
905 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
906 unsigned EvenReg = MI->getOperand(0).getReg();
907 unsigned OddReg = MI->getOperand(1).getReg();
908 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
909 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
910 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
913 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
914 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
915 bool EvenDeadKill = isLd ?
916 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
917 bool EvenUndef = MI->getOperand(0).isUndef();
918 bool OddDeadKill = isLd ?
919 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
920 bool OddUndef = MI->getOperand(1).isUndef();
921 const MachineOperand &BaseOp = MI->getOperand(2);
922 unsigned BaseReg = BaseOp.getReg();
923 bool BaseKill = BaseOp.isKill();
924 bool BaseUndef = BaseOp.isUndef();
925 unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg();
926 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
927 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
928 int OffImm = getMemoryOpOffset(MI);
929 unsigned PredReg = 0;
930 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
932 if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) {
933 // Ascending register numbers and no offset. It's safe to change it to a
935 unsigned NewOpc = (isLd)
936 ? (isT2 ? ARM::t2LDM : ARM::LDM)
937 : (isT2 ? ARM::t2STM : ARM::STM);
939 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
940 .addReg(BaseReg, getKillRegState(BaseKill))
941 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
942 .addImm(Pred).addReg(PredReg)
943 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
944 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
947 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
948 .addReg(BaseReg, getKillRegState(BaseKill))
949 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
950 .addImm(Pred).addReg(PredReg)
952 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
954 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
958 // Split into two instructions.
959 assert((!isT2 || !OffReg) &&
960 "Thumb2 ldrd / strd does not encode offset register!");
961 unsigned NewOpc = (isLd)
962 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR)
963 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR);
964 DebugLoc dl = MBBI->getDebugLoc();
965 // If this is a load and base register is killed, it may have been
966 // re-defed by the load, make sure the first load does not clobber it.
968 (BaseKill || OffKill) &&
969 (TRI->regsOverlap(EvenReg, BaseReg) ||
970 (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) {
971 assert(!TRI->regsOverlap(OddReg, BaseReg) &&
972 (!OffReg || !TRI->regsOverlap(OddReg, OffReg)));
973 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
974 OddReg, OddDeadKill, false,
975 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
976 Pred, PredReg, TII, isT2);
977 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
978 EvenReg, EvenDeadKill, false,
979 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
980 Pred, PredReg, TII, isT2);
982 if (OddReg == EvenReg && EvenDeadKill) {
983 // If the two source operands are the same, the kill marker is
984 // probably on the first one. e.g.
985 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
986 EvenDeadKill = false;
989 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
990 EvenReg, EvenDeadKill, EvenUndef,
991 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
992 Pred, PredReg, TII, isT2);
993 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
994 OddReg, OddDeadKill, OddUndef,
995 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
996 Pred, PredReg, TII, isT2);
1010 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1011 /// ops of the same base and incrementing offset into LDM / STM ops.
1012 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1013 unsigned NumMerges = 0;
1014 unsigned NumMemOps = 0;
1016 unsigned CurrBase = 0;
1018 unsigned CurrSize = 0;
1019 ARMCC::CondCodes CurrPred = ARMCC::AL;
1020 unsigned CurrPredReg = 0;
1021 unsigned Position = 0;
1022 SmallVector<MachineBasicBlock::iterator,4> Merges;
1024 RS->enterBasicBlock(&MBB);
1025 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1027 if (MBBI->isDebugValue()) {
1031 if (FixInvalidRegPairOp(MBB, MBBI))
1034 bool Advance = false;
1035 bool TryMerge = false;
1036 bool Clobber = false;
1038 bool isMemOp = isMemoryOp(MBBI);
1040 int Opcode = MBBI->getOpcode();
1041 unsigned Size = getLSMultipleTransferSize(MBBI);
1042 unsigned Base = MBBI->getOperand(1).getReg();
1043 unsigned PredReg = 0;
1044 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1045 int Offset = getMemoryOpOffset(MBBI);
1048 // r5 := ldr [r5, #4]
1049 // r6 := ldr [r5, #8]
1051 // The second ldr has effectively broken the chain even though it
1052 // looks like the later ldr(s) use the same base register. Try to
1053 // merge the ldr's so far, including this one. But don't try to
1054 // combine the following ldr(s).
1055 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1056 if (CurrBase == 0 && !Clobber) {
1057 // Start of a new chain.
1062 CurrPredReg = PredReg;
1063 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
1072 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1073 // No need to match PredReg.
1074 // Continue adding to the queue.
1075 if (Offset > MemOps.back().Offset) {
1076 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
1080 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1082 if (Offset < I->Offset) {
1083 MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI));
1087 } else if (Offset == I->Offset) {
1088 // Collision! This can't be merged!
1101 // Reach the end of the block, try merging the memory instructions.
1107 if (NumMemOps > 1) {
1108 // Try to find a free register to use as a new base in case it's needed.
1109 // First advance to the instruction just before the start of the chain.
1110 AdvanceRS(MBB, MemOps);
1111 // Find a scratch register.
1112 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1113 // Process the load / store instructions.
1114 RS->forward(prior(MBBI));
1118 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1119 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1121 // Try folding preceeding/trailing base inc/dec into the generated
1123 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1124 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1126 NumMerges += Merges.size();
1128 // Try folding preceeding/trailing base inc/dec into those load/store
1129 // that were not merged to form LDM/STM ops.
1130 for (unsigned i = 0; i != NumMemOps; ++i)
1131 if (!MemOps[i].Merged)
1132 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1135 // RS may be pointing to an instruction that's deleted.
1136 RS->skipTo(prior(MBBI));
1137 } else if (NumMemOps == 1) {
1138 // Try folding preceeding/trailing base inc/dec into the single
1140 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1142 RS->forward(prior(MBBI));
1149 CurrPred = ARMCC::AL;
1156 // If iterator hasn't been advanced and this is not a memory op, skip it.
1157 // It can't start a new chain anyway.
1158 if (!Advance && !isMemOp && MBBI != E) {
1164 return NumMerges > 0;
1168 struct OffsetCompare {
1169 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1170 int LOffset = getMemoryOpOffset(LHS);
1171 int ROffset = getMemoryOpOffset(RHS);
1172 assert(LHS == RHS || LOffset != ROffset);
1173 return LOffset > ROffset;
1178 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1179 /// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it
1180 /// directly restore the value of LR into pc.
1181 /// ldmfd sp!, {..., lr}
1184 /// ldmfd sp!, {..., lr}
1187 /// ldmfd sp!, {..., pc}
1188 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1189 if (MBB.empty()) return false;
1191 MachineBasicBlock::iterator MBBI = prior(MBB.end());
1192 if (MBBI != MBB.begin() &&
1193 (MBBI->getOpcode() == ARM::BX_RET ||
1194 MBBI->getOpcode() == ARM::tBX_RET ||
1195 MBBI->getOpcode() == ARM::MOVPCLR)) {
1196 MachineInstr *PrevMI = prior(MBBI);
1197 if (PrevMI->getOpcode() == ARM::LDM_UPD ||
1198 PrevMI->getOpcode() == ARM::t2LDM_UPD) {
1199 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1200 if (MO.getReg() != ARM::LR)
1202 unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET;
1203 PrevMI->setDesc(TII->get(NewOpc));
1212 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1213 const TargetMachine &TM = Fn.getTarget();
1214 AFI = Fn.getInfo<ARMFunctionInfo>();
1215 TII = TM.getInstrInfo();
1216 TRI = TM.getRegisterInfo();
1217 RS = new RegScavenger();
1218 isThumb2 = AFI->isThumb2Function();
1220 bool Modified = false;
1221 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1223 MachineBasicBlock &MBB = *MFI;
1224 Modified |= LoadStoreMultipleOpti(MBB);
1225 Modified |= MergeReturnIntoLDM(MBB);
1233 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1234 /// load / stores from consecutive locations close to make it more
1235 /// likely they will be combined later.
1238 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1240 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
1242 const TargetData *TD;
1243 const TargetInstrInfo *TII;
1244 const TargetRegisterInfo *TRI;
1245 const ARMSubtarget *STI;
1246 MachineRegisterInfo *MRI;
1247 MachineFunction *MF;
1249 virtual bool runOnMachineFunction(MachineFunction &Fn);
1251 virtual const char *getPassName() const {
1252 return "ARM pre- register allocation load / store optimization pass";
1256 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1257 unsigned &NewOpc, unsigned &EvenReg,
1258 unsigned &OddReg, unsigned &BaseReg,
1259 unsigned &OffReg, int &Offset,
1260 unsigned &PredReg, ARMCC::CondCodes &Pred,
1262 bool RescheduleOps(MachineBasicBlock *MBB,
1263 SmallVector<MachineInstr*, 4> &Ops,
1264 unsigned Base, bool isLd,
1265 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1266 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1268 char ARMPreAllocLoadStoreOpt::ID = 0;
1271 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1272 TD = Fn.getTarget().getTargetData();
1273 TII = Fn.getTarget().getInstrInfo();
1274 TRI = Fn.getTarget().getRegisterInfo();
1275 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1276 MRI = &Fn.getRegInfo();
1279 bool Modified = false;
1280 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1282 Modified |= RescheduleLoadStoreInstrs(MFI);
1287 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1288 MachineBasicBlock::iterator I,
1289 MachineBasicBlock::iterator E,
1290 SmallPtrSet<MachineInstr*, 4> &MemOps,
1291 SmallSet<unsigned, 4> &MemRegs,
1292 const TargetRegisterInfo *TRI) {
1293 // Are there stores / loads / calls between them?
1294 // FIXME: This is overly conservative. We should make use of alias information
1296 SmallSet<unsigned, 4> AddedRegPressure;
1298 if (I->isDebugValue() || MemOps.count(&*I))
1300 const TargetInstrDesc &TID = I->getDesc();
1301 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
1303 if (isLd && TID.mayStore())
1308 // It's not safe to move the first 'str' down.
1311 // str r4, [r0, #+4]
1315 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1316 MachineOperand &MO = I->getOperand(j);
1319 unsigned Reg = MO.getReg();
1320 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1322 if (Reg != Base && !MemRegs.count(Reg))
1323 AddedRegPressure.insert(Reg);
1327 // Estimate register pressure increase due to the transformation.
1328 if (MemRegs.size() <= 4)
1329 // Ok if we are moving small number of instructions.
1331 return AddedRegPressure.size() <= MemRegs.size() * 2;
1335 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1337 unsigned &NewOpc, unsigned &EvenReg,
1338 unsigned &OddReg, unsigned &BaseReg,
1339 unsigned &OffReg, int &Offset,
1341 ARMCC::CondCodes &Pred,
1343 // Make sure we're allowed to generate LDRD/STRD.
1344 if (!STI->hasV5TEOps())
1347 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1349 unsigned Opcode = Op0->getOpcode();
1350 if (Opcode == ARM::LDR)
1352 else if (Opcode == ARM::STR)
1354 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1355 NewOpc = ARM::t2LDRDi8;
1358 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1359 NewOpc = ARM::t2STRDi8;
1365 // Make sure the offset registers match.
1367 (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg()))
1370 // Must sure the base address satisfies i64 ld / st alignment requirement.
1371 if (!Op0->hasOneMemOperand() ||
1372 !(*Op0->memoperands_begin())->getValue() ||
1373 (*Op0->memoperands_begin())->isVolatile())
1376 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1377 const Function *Func = MF->getFunction();
1378 unsigned ReqAlign = STI->hasV6Ops()
1379 ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext()))
1380 : 8; // Pre-v6 need 8-byte align
1381 if (Align < ReqAlign)
1384 // Then make sure the immediate offset fits.
1385 int OffImm = getMemoryOpOffset(Op0);
1389 // Can't fall back to t2LDRi8 / t2STRi8.
1392 int Limit = (1 << 8) * Scale;
1393 if (OffImm >= Limit || (OffImm & (Scale-1)))
1398 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1400 AddSub = ARM_AM::sub;
1403 int Limit = (1 << 8) * Scale;
1404 if (OffImm >= Limit || (OffImm & (Scale-1)))
1406 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1408 EvenReg = Op0->getOperand(0).getReg();
1409 OddReg = Op1->getOperand(0).getReg();
1410 if (EvenReg == OddReg)
1412 BaseReg = Op0->getOperand(1).getReg();
1414 OffReg = Op0->getOperand(2).getReg();
1415 Pred = llvm::getInstrPredicate(Op0, PredReg);
1416 dl = Op0->getDebugLoc();
1420 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1421 SmallVector<MachineInstr*, 4> &Ops,
1422 unsigned Base, bool isLd,
1423 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1424 bool RetVal = false;
1426 // Sort by offset (in reverse order).
1427 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1429 // The loads / stores of the same base are in order. Scan them from first to
1430 // last and check for the following:
1431 // 1. Any def of base.
1433 while (Ops.size() > 1) {
1434 unsigned FirstLoc = ~0U;
1435 unsigned LastLoc = 0;
1436 MachineInstr *FirstOp = 0;
1437 MachineInstr *LastOp = 0;
1439 unsigned LastOpcode = 0;
1440 unsigned LastBytes = 0;
1441 unsigned NumMove = 0;
1442 for (int i = Ops.size() - 1; i >= 0; --i) {
1443 MachineInstr *Op = Ops[i];
1444 unsigned Loc = MI2LocMap[Op];
1445 if (Loc <= FirstLoc) {
1449 if (Loc >= LastLoc) {
1454 unsigned Opcode = Op->getOpcode();
1455 if (LastOpcode && Opcode != LastOpcode)
1458 int Offset = getMemoryOpOffset(Op);
1459 unsigned Bytes = getLSMultipleTransferSize(Op);
1461 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1464 LastOffset = Offset;
1466 LastOpcode = Opcode;
1467 if (++NumMove == 8) // FIXME: Tune this limit.
1474 SmallPtrSet<MachineInstr*, 4> MemOps;
1475 SmallSet<unsigned, 4> MemRegs;
1476 for (int i = NumMove-1; i >= 0; --i) {
1477 MemOps.insert(Ops[i]);
1478 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1481 // Be conservative, if the instructions are too far apart, don't
1482 // move them. We want to limit the increase of register pressure.
1483 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1485 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1486 MemOps, MemRegs, TRI);
1488 for (unsigned i = 0; i != NumMove; ++i)
1491 // This is the new location for the loads / stores.
1492 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1493 while (InsertPos != MBB->end() && MemOps.count(InsertPos))
1496 // If we are moving a pair of loads / stores, see if it makes sense
1497 // to try to allocate a pair of registers that can form register pairs.
1498 MachineInstr *Op0 = Ops.back();
1499 MachineInstr *Op1 = Ops[Ops.size()-2];
1500 unsigned EvenReg = 0, OddReg = 0;
1501 unsigned BaseReg = 0, OffReg = 0, PredReg = 0;
1502 ARMCC::CondCodes Pred = ARMCC::AL;
1504 unsigned NewOpc = 0;
1507 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1508 EvenReg, OddReg, BaseReg, OffReg,
1509 Offset, PredReg, Pred, isT2)) {
1513 // Form the pair instruction.
1515 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1516 dl, TII->get(NewOpc))
1517 .addReg(EvenReg, RegState::Define)
1518 .addReg(OddReg, RegState::Define)
1522 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1525 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1526 dl, TII->get(NewOpc))
1532 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1538 // Add register allocation hints to form register pairs.
1539 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1540 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1542 for (unsigned i = 0; i != NumMove; ++i) {
1543 MachineInstr *Op = Ops.back();
1545 MBB->splice(InsertPos, MBB, Op);
1549 NumLdStMoved += NumMove;
1559 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1560 bool RetVal = false;
1562 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1563 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1564 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1565 SmallVector<unsigned, 4> LdBases;
1566 SmallVector<unsigned, 4> StBases;
1569 MachineBasicBlock::iterator MBBI = MBB->begin();
1570 MachineBasicBlock::iterator E = MBB->end();
1572 for (; MBBI != E; ++MBBI) {
1573 MachineInstr *MI = MBBI;
1574 const TargetInstrDesc &TID = MI->getDesc();
1575 if (TID.isCall() || TID.isTerminator()) {
1576 // Stop at barriers.
1581 if (!MI->isDebugValue())
1582 MI2LocMap[MI] = ++Loc;
1584 if (!isMemoryOp(MI))
1586 unsigned PredReg = 0;
1587 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1590 int Opc = MI->getOpcode();
1591 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1592 unsigned Base = MI->getOperand(1).getReg();
1593 int Offset = getMemoryOpOffset(MI);
1595 bool StopHere = false;
1597 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1598 Base2LdsMap.find(Base);
1599 if (BI != Base2LdsMap.end()) {
1600 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1601 if (Offset == getMemoryOpOffset(BI->second[i])) {
1607 BI->second.push_back(MI);
1609 SmallVector<MachineInstr*, 4> MIs;
1611 Base2LdsMap[Base] = MIs;
1612 LdBases.push_back(Base);
1615 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1616 Base2StsMap.find(Base);
1617 if (BI != Base2StsMap.end()) {
1618 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1619 if (Offset == getMemoryOpOffset(BI->second[i])) {
1625 BI->second.push_back(MI);
1627 SmallVector<MachineInstr*, 4> MIs;
1629 Base2StsMap[Base] = MIs;
1630 StBases.push_back(Base);
1635 // Found a duplicate (a base+offset combination that's seen earlier).
1642 // Re-schedule loads.
1643 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1644 unsigned Base = LdBases[i];
1645 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1647 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1650 // Re-schedule stores.
1651 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1652 unsigned Base = StBases[i];
1653 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1655 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1659 Base2LdsMap.clear();
1660 Base2StsMap.clear();
1670 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1671 /// optimization pass.
1672 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1674 return new ARMPreAllocLoadStoreOpt();
1675 return new ARMLoadStoreOpt();