1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/Target/TargetData.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Support/Compiler.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/ADT/DenseMap.h"
36 #include "llvm/ADT/STLExtras.h"
37 #include "llvm/ADT/SmallPtrSet.h"
38 #include "llvm/ADT/SmallSet.h"
39 #include "llvm/ADT/SmallVector.h"
40 #include "llvm/ADT/Statistic.h"
43 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
44 STATISTIC(NumSTMGened , "Number of stm instructions generated");
45 STATISTIC(NumFLDMGened, "Number of fldm instructions generated");
46 STATISTIC(NumFSTMGened, "Number of fstm instructions generated");
47 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
48 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
49 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
50 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
51 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
52 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
53 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
55 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
56 /// load / store instructions to form ldm / stm instructions.
59 struct VISIBILITY_HIDDEN ARMLoadStoreOpt : public MachineFunctionPass {
61 ARMLoadStoreOpt() : MachineFunctionPass(&ID) {}
63 const TargetInstrInfo *TII;
64 const TargetRegisterInfo *TRI;
69 virtual bool runOnMachineFunction(MachineFunction &Fn);
71 virtual const char *getPassName() const {
72 return "ARM load / store optimization pass";
76 struct MemOpQueueEntry {
79 MachineBasicBlock::iterator MBBI;
81 MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
82 : Offset(o), Position(p), MBBI(i), Merged(false) {};
84 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
85 typedef MemOpQueue::iterator MemOpQueueIter;
87 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
88 int Offset, unsigned Base, bool BaseKill, int Opcode,
89 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
90 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
91 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
92 int Opcode, unsigned Size,
93 ARMCC::CondCodes Pred, unsigned PredReg,
94 unsigned Scratch, MemOpQueue &MemOps,
95 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
97 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
98 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
99 MachineBasicBlock::iterator &MBBI);
100 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
101 MachineBasicBlock::iterator MBBI,
102 const TargetInstrInfo *TII,
104 MachineBasicBlock::iterator &I);
105 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
106 MachineBasicBlock::iterator MBBI,
108 MachineBasicBlock::iterator &I);
109 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
110 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
112 char ARMLoadStoreOpt::ID = 0;
115 static int getLoadStoreMultipleOpcode(int Opcode) {
143 default: llvm_unreachable("Unhandled opcode!");
148 static bool isT2i32Load(unsigned Opc) {
149 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
152 static bool isi32Load(unsigned Opc) {
153 return Opc == ARM::LDR || isT2i32Load(Opc);
156 static bool isT2i32Store(unsigned Opc) {
157 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
160 static bool isi32Store(unsigned Opc) {
161 return Opc == ARM::STR || isT2i32Store(Opc);
164 /// MergeOps - Create and insert a LDM or STM with Base as base register and
165 /// registers in Regs as the register operands that would be loaded / stored.
166 /// It returns true if the transformation is done.
168 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
169 MachineBasicBlock::iterator MBBI,
170 int Offset, unsigned Base, bool BaseKill,
171 int Opcode, ARMCC::CondCodes Pred,
172 unsigned PredReg, unsigned Scratch, DebugLoc dl,
173 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
174 // Only a single register to load / store. Don't bother.
175 unsigned NumRegs = Regs.size();
179 ARM_AM::AMSubMode Mode = ARM_AM::ia;
180 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
181 if (isAM4 && Offset == 4) {
183 // Thumb2 does not support ldmib / stmib.
186 } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) {
188 // Thumb2 does not support ldmda / stmda.
191 } else if (isAM4 && Offset == -4 * (int)NumRegs) {
193 } else if (Offset != 0) {
194 // If starting offset isn't zero, insert a MI to materialize a new base.
195 // But only do so if it is cost effective, i.e. merging more than two
201 if (isi32Load(Opcode))
202 // If it is a load, then just use one of the destination register to
203 // use as the new base.
204 NewBase = Regs[NumRegs-1].first;
206 // Use the scratch register to use as a new base.
211 int BaseOpc = !isThumb2
213 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
217 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
220 int ImmedOffset = isThumb2
221 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
222 if (ImmedOffset == -1)
223 // FIXME: Try t2ADDri12 or t2SUBri12?
224 return false; // Probably not worth it then.
226 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
227 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
228 .addImm(Pred).addReg(PredReg).addReg(0);
230 BaseKill = true; // New base is always killed right its use.
233 bool isDPR = Opcode == ARM::FLDD || Opcode == ARM::FSTD;
234 bool isDef = isi32Load(Opcode) || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
235 Opcode = getLoadStoreMultipleOpcode(Opcode);
236 MachineInstrBuilder MIB = (isAM4)
237 ? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
238 .addReg(Base, getKillRegState(BaseKill))
239 .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
240 : BuildMI(MBB, MBBI, dl, TII->get(Opcode))
241 .addReg(Base, getKillRegState(BaseKill))
242 .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs))
243 .addImm(Pred).addReg(PredReg);
244 for (unsigned i = 0; i != NumRegs; ++i)
245 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
246 | getKillRegState(Regs[i].second));
251 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
252 /// load / store multiple instructions.
254 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
255 unsigned Base, int Opcode, unsigned Size,
256 ARMCC::CondCodes Pred, unsigned PredReg,
257 unsigned Scratch, MemOpQueue &MemOps,
258 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
259 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
260 int Offset = MemOps[SIndex].Offset;
261 int SOffset = Offset;
262 unsigned Pos = MemOps[SIndex].Position;
263 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
264 DebugLoc dl = Loc->getDebugLoc();
265 unsigned PReg = Loc->getOperand(0).getReg();
266 unsigned PRegNum = ARMRegisterInfo::getRegisterNumbering(PReg);
267 bool isKill = Loc->getOperand(0).isKill();
269 SmallVector<std::pair<unsigned,bool>, 8> Regs;
270 Regs.push_back(std::make_pair(PReg, isKill));
271 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
272 int NewOffset = MemOps[i].Offset;
273 unsigned Reg = MemOps[i].MBBI->getOperand(0).getReg();
274 unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg);
275 isKill = MemOps[i].MBBI->getOperand(0).isKill();
276 // AM4 - register numbers in ascending order.
277 // AM5 - consecutive register numbers in ascending order.
278 if (NewOffset == Offset + (int)Size &&
279 ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) {
281 Regs.push_back(std::make_pair(Reg, isKill));
284 // Can't merge this in. Try merge the earlier ones first.
285 if (MergeOps(MBB, ++Loc, SOffset, Base, false, Opcode, Pred, PredReg,
286 Scratch, dl, Regs)) {
287 Merges.push_back(prior(Loc));
288 for (unsigned j = SIndex; j < i; ++j) {
289 MBB.erase(MemOps[j].MBBI);
290 MemOps[j].Merged = true;
293 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
298 if (MemOps[i].Position > Pos) {
299 Pos = MemOps[i].Position;
300 Loc = MemOps[i].MBBI;
304 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
305 if (MergeOps(MBB, ++Loc, SOffset, Base, BaseKill, Opcode, Pred, PredReg,
306 Scratch, dl, Regs)) {
307 Merges.push_back(prior(Loc));
308 for (unsigned i = SIndex, e = MemOps.size(); i != e; ++i) {
309 MBB.erase(MemOps[i].MBBI);
310 MemOps[i].Merged = true;
317 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
318 unsigned Bytes, unsigned Limit,
319 ARMCC::CondCodes Pred, unsigned PredReg){
320 unsigned MyPredReg = 0;
323 if (MI->getOpcode() != ARM::t2SUBri &&
324 MI->getOpcode() != ARM::t2SUBrSPi &&
325 MI->getOpcode() != ARM::t2SUBrSPi12 &&
326 MI->getOpcode() != ARM::tSUBspi &&
327 MI->getOpcode() != ARM::SUBri)
330 // Make sure the offset fits in 8 bits.
331 if (Bytes <= 0 || (Limit && Bytes >= Limit))
334 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
335 return (MI->getOperand(0).getReg() == Base &&
336 MI->getOperand(1).getReg() == Base &&
337 (MI->getOperand(2).getImm()*Scale) == Bytes &&
338 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
339 MyPredReg == PredReg);
342 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
343 unsigned Bytes, unsigned Limit,
344 ARMCC::CondCodes Pred, unsigned PredReg){
345 unsigned MyPredReg = 0;
348 if (MI->getOpcode() != ARM::t2ADDri &&
349 MI->getOpcode() != ARM::t2ADDrSPi &&
350 MI->getOpcode() != ARM::t2ADDrSPi12 &&
351 MI->getOpcode() != ARM::tADDspi &&
352 MI->getOpcode() != ARM::ADDri)
355 if (Bytes <= 0 || (Limit && Bytes >= Limit))
356 // Make sure the offset fits in 8 bits.
359 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
360 return (MI->getOperand(0).getReg() == Base &&
361 MI->getOperand(1).getReg() == Base &&
362 (MI->getOperand(2).getImm()*Scale) == Bytes &&
363 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
364 MyPredReg == PredReg);
367 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
368 switch (MI->getOpcode()) {
386 return (MI->getNumOperands() - 4) * 4;
391 return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
395 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
396 /// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible:
398 /// stmia rn, <ra, rb, rc>
399 /// rn := rn + 4 * 3;
401 /// stmia rn!, <ra, rb, rc>
403 /// rn := rn - 4 * 3;
404 /// ldmia rn, <ra, rb, rc>
406 /// ldmdb rn!, <ra, rb, rc>
407 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
408 MachineBasicBlock::iterator MBBI,
410 MachineBasicBlock::iterator &I) {
411 MachineInstr *MI = MBBI;
412 unsigned Base = MI->getOperand(0).getReg();
413 unsigned Bytes = getLSMultipleTransferSize(MI);
414 unsigned PredReg = 0;
415 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
416 int Opcode = MI->getOpcode();
417 bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::t2LDM ||
418 Opcode == ARM::STM || Opcode == ARM::t2STM;
421 if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm()))
424 // Can't use the updating AM4 sub-mode if the base register is also a dest
425 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
426 for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
427 if (MI->getOperand(i).getReg() == Base)
431 ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
432 if (MBBI != MBB.begin()) {
433 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
434 if (Mode == ARM_AM::ia &&
435 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
436 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true));
439 } else if (Mode == ARM_AM::ib &&
440 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
441 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true));
447 if (MBBI != MBB.end()) {
448 MachineBasicBlock::iterator NextMBBI = next(MBBI);
449 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
450 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
451 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
458 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
459 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
460 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
470 // FLDM{D|S}, FSTM{D|S} addressing mode 5 ops.
471 if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm()))
474 ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
475 unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
476 if (MBBI != MBB.begin()) {
477 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
478 if (Mode == ARM_AM::ia &&
479 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
480 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset));
486 if (MBBI != MBB.end()) {
487 MachineBasicBlock::iterator NextMBBI = next(MBBI);
488 if (Mode == ARM_AM::ia &&
489 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
490 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset));
504 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
506 case ARM::LDR: return ARM::LDR_PRE;
507 case ARM::STR: return ARM::STR_PRE;
508 case ARM::FLDS: return ARM::FLDMS;
509 case ARM::FLDD: return ARM::FLDMD;
510 case ARM::FSTS: return ARM::FSTMS;
511 case ARM::FSTD: return ARM::FSTMD;
514 return ARM::t2LDR_PRE;
517 return ARM::t2STR_PRE;
518 default: llvm_unreachable("Unhandled opcode!");
523 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
525 case ARM::LDR: return ARM::LDR_POST;
526 case ARM::STR: return ARM::STR_POST;
527 case ARM::FLDS: return ARM::FLDMS;
528 case ARM::FLDD: return ARM::FLDMD;
529 case ARM::FSTS: return ARM::FSTMS;
530 case ARM::FSTD: return ARM::FSTMD;
533 return ARM::t2LDR_POST;
536 return ARM::t2STR_POST;
537 default: llvm_unreachable("Unhandled opcode!");
542 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
543 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
544 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
545 MachineBasicBlock::iterator MBBI,
546 const TargetInstrInfo *TII,
548 MachineBasicBlock::iterator &I) {
549 MachineInstr *MI = MBBI;
550 unsigned Base = MI->getOperand(1).getReg();
551 bool BaseKill = MI->getOperand(1).isKill();
552 unsigned Bytes = getLSMultipleTransferSize(MI);
553 int Opcode = MI->getOpcode();
554 DebugLoc dl = MI->getDebugLoc();
555 bool isAM5 = Opcode == ARM::FLDD || Opcode == ARM::FLDS ||
556 Opcode == ARM::FSTD || Opcode == ARM::FSTS;
557 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
558 if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
560 else if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
562 else if (isT2i32Load(Opcode) || isT2i32Store(Opcode))
563 if (MI->getOperand(2).getImm() != 0)
566 bool isLd = isi32Load(Opcode) || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
567 // Can't do the merge if the destination register is the same as the would-be
568 // writeback register.
569 if (isLd && MI->getOperand(0).getReg() == Base)
572 unsigned PredReg = 0;
573 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
574 bool DoMerge = false;
575 ARM_AM::AddrOpc AddSub = ARM_AM::add;
577 // AM2 - 12 bits, thumb2 - 8 bits.
578 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
579 if (MBBI != MBB.begin()) {
580 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
581 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
583 AddSub = ARM_AM::sub;
584 NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
586 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
588 NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
594 if (!DoMerge && MBBI != MBB.end()) {
595 MachineBasicBlock::iterator NextMBBI = next(MBBI);
597 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
599 AddSub = ARM_AM::sub;
600 NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
601 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
603 NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
617 bool isDPR = NewOpc == ARM::FLDMD || NewOpc == ARM::FSTMD;
620 Offset = ARM_AM::getAM5Opc((AddSub == ARM_AM::sub)
622 : ARM_AM::ia, true, (isDPR ? 2 : 1));
624 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
626 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
630 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
631 .addReg(Base, getKillRegState(BaseKill))
632 .addImm(Offset).addImm(Pred).addReg(PredReg)
633 .addReg(MI->getOperand(0).getReg(), RegState::Define);
635 // LDR_PRE, LDR_POST,
636 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
637 .addReg(Base, RegState::Define)
638 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
640 // t2LDR_PRE, t2LDR_POST
641 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
642 .addReg(Base, RegState::Define)
643 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
645 MachineOperand &MO = MI->getOperand(0);
648 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)).addReg(Base).addImm(Offset)
649 .addImm(Pred).addReg(PredReg)
650 .addReg(MO.getReg(), getKillRegState(MO.isKill()));
653 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
654 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
655 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
657 // t2STR_PRE, t2STR_POST
658 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
659 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
660 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
667 /// isMemoryOp - Returns true if instruction is a memory operations (that this
668 /// pass is capable of operating on).
669 static bool isMemoryOp(const MachineInstr *MI) {
670 int Opcode = MI->getOpcode();
675 return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
678 return MI->getOperand(1).isReg();
681 return MI->getOperand(1).isReg();
686 return MI->getOperand(1).isReg();
691 /// AdvanceRS - Advance register scavenger to just before the earliest memory
692 /// op that is being merged.
693 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
694 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
695 unsigned Position = MemOps[0].Position;
696 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
697 if (MemOps[i].Position < Position) {
698 Position = MemOps[i].Position;
699 Loc = MemOps[i].MBBI;
703 if (Loc != MBB.begin())
704 RS->forward(prior(Loc));
707 static int getMemoryOpOffset(const MachineInstr *MI) {
708 int Opcode = MI->getOpcode();
709 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
710 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
711 unsigned NumOperands = MI->getDesc().getNumOperands();
712 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
714 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
715 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
716 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8)
720 ? ARM_AM::getAM2Offset(OffField)
721 : (isAM3 ? ARM_AM::getAM3Offset(OffField)
722 : ARM_AM::getAM5Offset(OffField) * 4);
724 if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
727 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
730 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
736 static void InsertLDR_STR(MachineBasicBlock &MBB,
737 MachineBasicBlock::iterator &MBBI,
738 int OffImm, bool isDef,
739 DebugLoc dl, unsigned NewOpc,
740 unsigned Reg, bool RegDeadKill, bool RegUndef,
741 unsigned BaseReg, bool BaseKill, bool BaseUndef,
742 unsigned OffReg, bool OffKill, bool OffUndef,
743 ARMCC::CondCodes Pred, unsigned PredReg,
744 const TargetInstrInfo *TII, bool isT2) {
748 Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
750 Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
753 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
755 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
756 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
758 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
759 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
761 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
763 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
764 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
766 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
767 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
771 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
772 MachineBasicBlock::iterator &MBBI) {
773 MachineInstr *MI = &*MBBI;
774 unsigned Opcode = MI->getOpcode();
775 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
776 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
777 unsigned EvenReg = MI->getOperand(0).getReg();
778 unsigned OddReg = MI->getOperand(1).getReg();
779 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
780 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
781 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
784 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
785 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
786 bool EvenDeadKill = isLd ?
787 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
788 bool EvenUndef = MI->getOperand(0).isUndef();
789 bool OddDeadKill = isLd ?
790 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
791 bool OddUndef = MI->getOperand(1).isUndef();
792 const MachineOperand &BaseOp = MI->getOperand(2);
793 unsigned BaseReg = BaseOp.getReg();
794 bool BaseKill = BaseOp.isKill();
795 bool BaseUndef = BaseOp.isUndef();
796 unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg();
797 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
798 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
799 int OffImm = getMemoryOpOffset(MI);
800 unsigned PredReg = 0;
801 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
803 if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) {
804 // Ascending register numbers and no offset. It's safe to change it to a
806 unsigned NewOpc = (isLd)
807 ? (isT2 ? ARM::t2LDM : ARM::LDM)
808 : (isT2 ? ARM::t2STM : ARM::STM);
810 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
811 .addReg(BaseReg, getKillRegState(BaseKill))
812 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
813 .addImm(Pred).addReg(PredReg)
814 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
815 .addReg(OddReg, getDefRegState(isLd)| getDeadRegState(OddDeadKill));
818 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
819 .addReg(BaseReg, getKillRegState(BaseKill))
820 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
821 .addImm(Pred).addReg(PredReg)
823 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
825 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
829 // Split into two instructions.
830 assert((!isT2 || !OffReg) &&
831 "Thumb2 ldrd / strd does not encode offset register!");
832 unsigned NewOpc = (isLd)
833 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR)
834 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR);
835 DebugLoc dl = MBBI->getDebugLoc();
836 // If this is a load and base register is killed, it may have been
837 // re-defed by the load, make sure the first load does not clobber it.
839 (BaseKill || OffKill) &&
840 (TRI->regsOverlap(EvenReg, BaseReg) ||
841 (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) {
842 assert(!TRI->regsOverlap(OddReg, BaseReg) &&
843 (!OffReg || !TRI->regsOverlap(OddReg, OffReg)));
844 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
845 OddReg, OddDeadKill, false,
846 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
847 Pred, PredReg, TII, isT2);
848 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
849 EvenReg, EvenDeadKill, false,
850 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
851 Pred, PredReg, TII, isT2);
853 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
854 EvenReg, EvenDeadKill, EvenUndef,
855 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
856 Pred, PredReg, TII, isT2);
857 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
858 OddReg, OddDeadKill, OddUndef,
859 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
860 Pred, PredReg, TII, isT2);
874 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
875 /// ops of the same base and incrementing offset into LDM / STM ops.
876 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
877 unsigned NumMerges = 0;
878 unsigned NumMemOps = 0;
880 unsigned CurrBase = 0;
882 unsigned CurrSize = 0;
883 ARMCC::CondCodes CurrPred = ARMCC::AL;
884 unsigned CurrPredReg = 0;
885 unsigned Position = 0;
886 SmallVector<MachineBasicBlock::iterator,4> Merges;
888 RS->enterBasicBlock(&MBB);
889 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
891 if (FixInvalidRegPairOp(MBB, MBBI))
894 bool Advance = false;
895 bool TryMerge = false;
896 bool Clobber = false;
898 bool isMemOp = isMemoryOp(MBBI);
900 int Opcode = MBBI->getOpcode();
901 unsigned Size = getLSMultipleTransferSize(MBBI);
902 unsigned Base = MBBI->getOperand(1).getReg();
903 unsigned PredReg = 0;
904 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
905 int Offset = getMemoryOpOffset(MBBI);
908 // r5 := ldr [r5, #4]
909 // r6 := ldr [r5, #8]
911 // The second ldr has effectively broken the chain even though it
912 // looks like the later ldr(s) use the same base register. Try to
913 // merge the ldr's so far, including this one. But don't try to
914 // combine the following ldr(s).
915 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
916 if (CurrBase == 0 && !Clobber) {
917 // Start of a new chain.
922 CurrPredReg = PredReg;
923 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
932 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
933 // No need to match PredReg.
934 // Continue adding to the queue.
935 if (Offset > MemOps.back().Offset) {
936 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
940 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
942 if (Offset < I->Offset) {
943 MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI));
947 } else if (Offset == I->Offset) {
948 // Collision! This can't be merged!
965 // Try to find a free register to use as a new base in case it's needed.
966 // First advance to the instruction just before the start of the chain.
967 AdvanceRS(MBB, MemOps);
968 // Find a scratch register.
969 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
970 // Process the load / store instructions.
971 RS->forward(prior(MBBI));
975 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
976 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
978 // Try folding preceeding/trailing base inc/dec into the generated
980 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
981 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
983 NumMerges += Merges.size();
985 // Try folding preceeding/trailing base inc/dec into those load/store
986 // that were not merged to form LDM/STM ops.
987 for (unsigned i = 0; i != NumMemOps; ++i)
988 if (!MemOps[i].Merged)
989 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
992 // RS may be pointing to an instruction that's deleted.
993 RS->skipTo(prior(MBBI));
994 } else if (NumMemOps == 1) {
995 // Try folding preceeding/trailing base inc/dec into the single
997 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
999 RS->forward(prior(MBBI));
1006 CurrPred = ARMCC::AL;
1013 // If iterator hasn't been advanced and this is not a memory op, skip it.
1014 // It can't start a new chain anyway.
1015 if (!Advance && !isMemOp && MBBI != E) {
1021 return NumMerges > 0;
1025 struct OffsetCompare {
1026 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1027 int LOffset = getMemoryOpOffset(LHS);
1028 int ROffset = getMemoryOpOffset(RHS);
1029 assert(LHS == RHS || LOffset != ROffset);
1030 return LOffset > ROffset;
1035 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
1036 /// (bx lr) into the preceeding stack restore so it directly restore the value
1038 /// ldmfd sp!, {r7, lr}
1041 /// ldmfd sp!, {r7, pc}
1042 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1043 if (MBB.empty()) return false;
1045 MachineBasicBlock::iterator MBBI = prior(MBB.end());
1046 if (MBBI != MBB.begin() &&
1047 (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::tBX_RET)) {
1048 MachineInstr *PrevMI = prior(MBBI);
1049 if (PrevMI->getOpcode() == ARM::LDM || PrevMI->getOpcode() == ARM::t2LDM) {
1050 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1051 if (MO.getReg() != ARM::LR)
1053 unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET;
1054 PrevMI->setDesc(TII->get(NewOpc));
1063 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1064 const TargetMachine &TM = Fn.getTarget();
1065 AFI = Fn.getInfo<ARMFunctionInfo>();
1066 TII = TM.getInstrInfo();
1067 TRI = TM.getRegisterInfo();
1068 RS = new RegScavenger();
1069 isThumb2 = AFI->isThumb2Function();
1071 bool Modified = false;
1072 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1074 MachineBasicBlock &MBB = *MFI;
1075 Modified |= LoadStoreMultipleOpti(MBB);
1076 Modified |= MergeReturnIntoLDM(MBB);
1084 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1085 /// load / stores from consecutive locations close to make it more
1086 /// likely they will be combined later.
1089 struct VISIBILITY_HIDDEN ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1091 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
1093 const TargetData *TD;
1094 const TargetInstrInfo *TII;
1095 const TargetRegisterInfo *TRI;
1096 const ARMSubtarget *STI;
1097 MachineRegisterInfo *MRI;
1098 MachineFunction *MF;
1100 virtual bool runOnMachineFunction(MachineFunction &Fn);
1102 virtual const char *getPassName() const {
1103 return "ARM pre- register allocation load / store optimization pass";
1107 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1108 unsigned &NewOpc, unsigned &EvenReg,
1109 unsigned &OddReg, unsigned &BaseReg,
1110 unsigned &OffReg, int &Offset,
1111 unsigned &PredReg, ARMCC::CondCodes &Pred,
1113 bool RescheduleOps(MachineBasicBlock *MBB,
1114 SmallVector<MachineInstr*, 4> &Ops,
1115 unsigned Base, bool isLd,
1116 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1117 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1119 char ARMPreAllocLoadStoreOpt::ID = 0;
1122 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1123 TD = Fn.getTarget().getTargetData();
1124 TII = Fn.getTarget().getInstrInfo();
1125 TRI = Fn.getTarget().getRegisterInfo();
1126 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1127 MRI = &Fn.getRegInfo();
1130 bool Modified = false;
1131 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1133 Modified |= RescheduleLoadStoreInstrs(MFI);
1138 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1139 MachineBasicBlock::iterator I,
1140 MachineBasicBlock::iterator E,
1141 SmallPtrSet<MachineInstr*, 4> &MemOps,
1142 SmallSet<unsigned, 4> &MemRegs,
1143 const TargetRegisterInfo *TRI) {
1144 // Are there stores / loads / calls between them?
1145 // FIXME: This is overly conservative. We should make use of alias information
1147 SmallSet<unsigned, 4> AddedRegPressure;
1149 if (MemOps.count(&*I))
1151 const TargetInstrDesc &TID = I->getDesc();
1152 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
1154 if (isLd && TID.mayStore())
1159 // It's not safe to move the first 'str' down.
1162 // str r4, [r0, #+4]
1166 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1167 MachineOperand &MO = I->getOperand(j);
1170 unsigned Reg = MO.getReg();
1171 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1173 if (Reg != Base && !MemRegs.count(Reg))
1174 AddedRegPressure.insert(Reg);
1178 // Estimate register pressure increase due to the transformation.
1179 if (MemRegs.size() <= 4)
1180 // Ok if we are moving small number of instructions.
1182 return AddedRegPressure.size() <= MemRegs.size() * 2;
1186 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1188 unsigned &NewOpc, unsigned &EvenReg,
1189 unsigned &OddReg, unsigned &BaseReg,
1190 unsigned &OffReg, int &Offset,
1192 ARMCC::CondCodes &Pred,
1194 // FIXME: FLDS / FSTS -> FLDD / FSTD
1196 unsigned Opcode = Op0->getOpcode();
1197 if (Opcode == ARM::LDR)
1199 else if (Opcode == ARM::STR)
1201 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1202 NewOpc = ARM::t2LDRDi8;
1205 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1206 NewOpc = ARM::t2STRDi8;
1212 // Make sure the offset registers match.
1214 (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg()))
1217 // Must sure the base address satisfies i64 ld / st alignment requirement.
1218 if (!Op0->hasOneMemOperand() ||
1219 !(*Op0->memoperands_begin())->getValue() ||
1220 (*Op0->memoperands_begin())->isVolatile())
1223 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1224 Function *Func = MF->getFunction();
1225 unsigned ReqAlign = STI->hasV6Ops()
1226 ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext()))
1227 : 8; // Pre-v6 need 8-byte align
1228 if (Align < ReqAlign)
1231 // Then make sure the immediate offset fits.
1232 int OffImm = getMemoryOpOffset(Op0);
1236 // Can't fall back to t2LDRi8 / t2STRi8.
1239 int Limit = (1 << 8) * Scale;
1240 if (OffImm >= Limit || (OffImm & (Scale-1)))
1245 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1247 AddSub = ARM_AM::sub;
1250 int Limit = (1 << 8) * Scale;
1251 if (OffImm >= Limit || (OffImm & (Scale-1)))
1253 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1255 EvenReg = Op0->getOperand(0).getReg();
1256 OddReg = Op1->getOperand(0).getReg();
1257 if (EvenReg == OddReg)
1259 BaseReg = Op0->getOperand(1).getReg();
1261 OffReg = Op0->getOperand(2).getReg();
1262 Pred = llvm::getInstrPredicate(Op0, PredReg);
1263 dl = Op0->getDebugLoc();
1267 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1268 SmallVector<MachineInstr*, 4> &Ops,
1269 unsigned Base, bool isLd,
1270 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1271 bool RetVal = false;
1273 // Sort by offset (in reverse order).
1274 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1276 // The loads / stores of the same base are in order. Scan them from first to
1277 // last and check for the followins:
1278 // 1. Any def of base.
1280 while (Ops.size() > 1) {
1281 unsigned FirstLoc = ~0U;
1282 unsigned LastLoc = 0;
1283 MachineInstr *FirstOp = 0;
1284 MachineInstr *LastOp = 0;
1286 unsigned LastOpcode = 0;
1287 unsigned LastBytes = 0;
1288 unsigned NumMove = 0;
1289 for (int i = Ops.size() - 1; i >= 0; --i) {
1290 MachineInstr *Op = Ops[i];
1291 unsigned Loc = MI2LocMap[Op];
1292 if (Loc <= FirstLoc) {
1296 if (Loc >= LastLoc) {
1301 unsigned Opcode = Op->getOpcode();
1302 if (LastOpcode && Opcode != LastOpcode)
1305 int Offset = getMemoryOpOffset(Op);
1306 unsigned Bytes = getLSMultipleTransferSize(Op);
1308 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1311 LastOffset = Offset;
1313 LastOpcode = Opcode;
1314 if (++NumMove == 8) // FIXME: Tune this limit.
1321 SmallPtrSet<MachineInstr*, 4> MemOps;
1322 SmallSet<unsigned, 4> MemRegs;
1323 for (int i = NumMove-1; i >= 0; --i) {
1324 MemOps.insert(Ops[i]);
1325 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1328 // Be conservative, if the instructions are too far apart, don't
1329 // move them. We want to limit the increase of register pressure.
1330 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1332 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1333 MemOps, MemRegs, TRI);
1335 for (unsigned i = 0; i != NumMove; ++i)
1338 // This is the new location for the loads / stores.
1339 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1340 while (InsertPos != MBB->end() && MemOps.count(InsertPos))
1343 // If we are moving a pair of loads / stores, see if it makes sense
1344 // to try to allocate a pair of registers that can form register pairs.
1345 MachineInstr *Op0 = Ops.back();
1346 MachineInstr *Op1 = Ops[Ops.size()-2];
1347 unsigned EvenReg = 0, OddReg = 0;
1348 unsigned BaseReg = 0, OffReg = 0, PredReg = 0;
1349 ARMCC::CondCodes Pred = ARMCC::AL;
1351 unsigned NewOpc = 0;
1354 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1355 EvenReg, OddReg, BaseReg, OffReg,
1356 Offset, PredReg, Pred, isT2)) {
1360 // Form the pair instruction.
1362 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1363 dl, TII->get(NewOpc))
1364 .addReg(EvenReg, RegState::Define)
1365 .addReg(OddReg, RegState::Define)
1369 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1372 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1373 dl, TII->get(NewOpc))
1379 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1385 // Add register allocation hints to form register pairs.
1386 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1387 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1389 for (unsigned i = 0; i != NumMove; ++i) {
1390 MachineInstr *Op = Ops.back();
1392 MBB->splice(InsertPos, MBB, Op);
1396 NumLdStMoved += NumMove;
1406 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1407 bool RetVal = false;
1409 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1410 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1411 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1412 SmallVector<unsigned, 4> LdBases;
1413 SmallVector<unsigned, 4> StBases;
1416 MachineBasicBlock::iterator MBBI = MBB->begin();
1417 MachineBasicBlock::iterator E = MBB->end();
1419 for (; MBBI != E; ++MBBI) {
1420 MachineInstr *MI = MBBI;
1421 const TargetInstrDesc &TID = MI->getDesc();
1422 if (TID.isCall() || TID.isTerminator()) {
1423 // Stop at barriers.
1428 MI2LocMap[MI] = Loc++;
1429 if (!isMemoryOp(MI))
1431 unsigned PredReg = 0;
1432 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1435 int Opc = MI->getOpcode();
1436 bool isLd = isi32Load(Opc) || Opc == ARM::FLDS || Opc == ARM::FLDD;
1437 unsigned Base = MI->getOperand(1).getReg();
1438 int Offset = getMemoryOpOffset(MI);
1440 bool StopHere = false;
1442 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1443 Base2LdsMap.find(Base);
1444 if (BI != Base2LdsMap.end()) {
1445 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1446 if (Offset == getMemoryOpOffset(BI->second[i])) {
1452 BI->second.push_back(MI);
1454 SmallVector<MachineInstr*, 4> MIs;
1456 Base2LdsMap[Base] = MIs;
1457 LdBases.push_back(Base);
1460 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1461 Base2StsMap.find(Base);
1462 if (BI != Base2StsMap.end()) {
1463 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1464 if (Offset == getMemoryOpOffset(BI->second[i])) {
1470 BI->second.push_back(MI);
1472 SmallVector<MachineInstr*, 4> MIs;
1474 Base2StsMap[Base] = MIs;
1475 StBases.push_back(Base);
1480 // Found a duplicate (a base+offset combination that's seen earlier).
1487 // Re-schedule loads.
1488 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1489 unsigned Base = LdBases[i];
1490 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1492 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1495 // Re-schedule stores.
1496 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1497 unsigned Base = StBases[i];
1498 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1500 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1504 Base2LdsMap.clear();
1505 Base2StsMap.clear();
1515 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1516 /// optimization pass.
1517 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1519 return new ARMPreAllocLoadStoreOpt();
1520 return new ARMLoadStoreOpt();