1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/Target/TargetData.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Support/Compiler.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/ADT/DenseMap.h"
36 #include "llvm/ADT/STLExtras.h"
37 #include "llvm/ADT/SmallPtrSet.h"
38 #include "llvm/ADT/SmallSet.h"
39 #include "llvm/ADT/SmallVector.h"
40 #include "llvm/ADT/Statistic.h"
43 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
44 STATISTIC(NumSTMGened , "Number of stm instructions generated");
45 STATISTIC(NumFLDMGened, "Number of fldm instructions generated");
46 STATISTIC(NumFSTMGened, "Number of fstm instructions generated");
47 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
48 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
49 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
50 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
51 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
52 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
53 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
55 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
56 /// load / store instructions to form ldm / stm instructions.
59 struct VISIBILITY_HIDDEN ARMLoadStoreOpt : public MachineFunctionPass {
61 ARMLoadStoreOpt() : MachineFunctionPass(&ID) {}
63 const TargetInstrInfo *TII;
64 const TargetRegisterInfo *TRI;
69 virtual bool runOnMachineFunction(MachineFunction &Fn);
71 virtual const char *getPassName() const {
72 return "ARM load / store optimization pass";
76 struct MemOpQueueEntry {
79 MachineBasicBlock::iterator MBBI;
81 MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
82 : Offset(o), Position(p), MBBI(i), Merged(false) {};
84 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
85 typedef MemOpQueue::iterator MemOpQueueIter;
87 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
88 int Offset, unsigned Base, bool BaseKill, int Opcode,
89 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
90 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
91 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
92 int Opcode, unsigned Size,
93 ARMCC::CondCodes Pred, unsigned PredReg,
94 unsigned Scratch, MemOpQueue &MemOps,
95 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
97 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
98 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
99 MachineBasicBlock::iterator &MBBI);
100 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
101 MachineBasicBlock::iterator MBBI,
102 const TargetInstrInfo *TII,
104 MachineBasicBlock::iterator &I);
105 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
106 MachineBasicBlock::iterator MBBI,
108 MachineBasicBlock::iterator &I);
109 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
110 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
112 char ARMLoadStoreOpt::ID = 0;
115 static int getLoadStoreMultipleOpcode(int Opcode) {
143 default: llvm_unreachable("Unhandled opcode!");
148 static bool isT2i32Load(unsigned Opc) {
149 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
152 static bool isi32Load(unsigned Opc) {
153 return Opc == ARM::LDR || isT2i32Load(Opc);
156 static bool isT2i32Store(unsigned Opc) {
157 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
160 static bool isi32Store(unsigned Opc) {
161 return Opc == ARM::STR || isT2i32Store(Opc);
164 /// MergeOps - Create and insert a LDM or STM with Base as base register and
165 /// registers in Regs as the register operands that would be loaded / stored.
166 /// It returns true if the transformation is done.
168 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
169 MachineBasicBlock::iterator MBBI,
170 int Offset, unsigned Base, bool BaseKill,
171 int Opcode, ARMCC::CondCodes Pred,
172 unsigned PredReg, unsigned Scratch, DebugLoc dl,
173 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
174 // Only a single register to load / store. Don't bother.
175 unsigned NumRegs = Regs.size();
179 ARM_AM::AMSubMode Mode = ARM_AM::ia;
180 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
181 if (isAM4 && Offset == 4) {
183 // Thumb2 does not support ldmib / stmib.
186 } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) {
188 // Thumb2 does not support ldmda / stmda.
191 } else if (isAM4 && Offset == -4 * (int)NumRegs) {
193 } else if (Offset != 0) {
194 // If starting offset isn't zero, insert a MI to materialize a new base.
195 // But only do so if it is cost effective, i.e. merging more than two
201 if (isi32Load(Opcode))
202 // If it is a load, then just use one of the destination register to
203 // use as the new base.
204 NewBase = Regs[NumRegs-1].first;
206 // Use the scratch register to use as a new base.
211 int BaseOpc = !isThumb2
213 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
217 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
220 int ImmedOffset = isThumb2
221 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
222 if (ImmedOffset == -1)
223 // FIXME: Try t2ADDri12 or t2SUBri12?
224 return false; // Probably not worth it then.
226 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
227 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
228 .addImm(Pred).addReg(PredReg).addReg(0);
230 BaseKill = true; // New base is always killed right its use.
233 bool isDPR = Opcode == ARM::FLDD || Opcode == ARM::FSTD;
234 bool isDef = isi32Load(Opcode) || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
235 Opcode = getLoadStoreMultipleOpcode(Opcode);
236 MachineInstrBuilder MIB = (isAM4)
237 ? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
238 .addReg(Base, getKillRegState(BaseKill))
239 .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
240 : BuildMI(MBB, MBBI, dl, TII->get(Opcode))
241 .addReg(Base, getKillRegState(BaseKill))
242 .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs))
243 .addImm(Pred).addReg(PredReg);
244 MIB.addReg(0); // Add optional writeback (0 for now).
245 for (unsigned i = 0; i != NumRegs; ++i)
246 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
247 | getKillRegState(Regs[i].second));
252 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
253 /// load / store multiple instructions.
255 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
256 unsigned Base, int Opcode, unsigned Size,
257 ARMCC::CondCodes Pred, unsigned PredReg,
258 unsigned Scratch, MemOpQueue &MemOps,
259 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
260 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
261 int Offset = MemOps[SIndex].Offset;
262 int SOffset = Offset;
263 unsigned Pos = MemOps[SIndex].Position;
264 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
265 DebugLoc dl = Loc->getDebugLoc();
266 unsigned PReg = Loc->getOperand(0).getReg();
267 unsigned PRegNum = ARMRegisterInfo::getRegisterNumbering(PReg);
268 bool isKill = Loc->getOperand(0).isKill();
270 SmallVector<std::pair<unsigned,bool>, 8> Regs;
271 Regs.push_back(std::make_pair(PReg, isKill));
272 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
273 int NewOffset = MemOps[i].Offset;
274 unsigned Reg = MemOps[i].MBBI->getOperand(0).getReg();
275 unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg);
276 isKill = MemOps[i].MBBI->getOperand(0).isKill();
277 // AM4 - register numbers in ascending order.
278 // AM5 - consecutive register numbers in ascending order.
279 if (NewOffset == Offset + (int)Size &&
280 ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) {
282 Regs.push_back(std::make_pair(Reg, isKill));
285 // Can't merge this in. Try merge the earlier ones first.
286 if (MergeOps(MBB, ++Loc, SOffset, Base, false, Opcode, Pred, PredReg,
287 Scratch, dl, Regs)) {
288 Merges.push_back(prior(Loc));
289 for (unsigned j = SIndex; j < i; ++j) {
290 MBB.erase(MemOps[j].MBBI);
291 MemOps[j].Merged = true;
294 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
299 if (MemOps[i].Position > Pos) {
300 Pos = MemOps[i].Position;
301 Loc = MemOps[i].MBBI;
305 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
306 if (MergeOps(MBB, ++Loc, SOffset, Base, BaseKill, Opcode, Pred, PredReg,
307 Scratch, dl, Regs)) {
308 Merges.push_back(prior(Loc));
309 for (unsigned i = SIndex, e = MemOps.size(); i != e; ++i) {
310 MBB.erase(MemOps[i].MBBI);
311 MemOps[i].Merged = true;
318 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
319 unsigned Bytes, unsigned Limit,
320 ARMCC::CondCodes Pred, unsigned PredReg){
321 unsigned MyPredReg = 0;
324 if (MI->getOpcode() != ARM::t2SUBri &&
325 MI->getOpcode() != ARM::t2SUBrSPi &&
326 MI->getOpcode() != ARM::t2SUBrSPi12 &&
327 MI->getOpcode() != ARM::tSUBspi &&
328 MI->getOpcode() != ARM::SUBri)
331 // Make sure the offset fits in 8 bits.
332 if (Bytes <= 0 || (Limit && Bytes >= Limit))
335 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
336 return (MI->getOperand(0).getReg() == Base &&
337 MI->getOperand(1).getReg() == Base &&
338 (MI->getOperand(2).getImm()*Scale) == Bytes &&
339 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
340 MyPredReg == PredReg);
343 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
344 unsigned Bytes, unsigned Limit,
345 ARMCC::CondCodes Pred, unsigned PredReg){
346 unsigned MyPredReg = 0;
349 if (MI->getOpcode() != ARM::t2ADDri &&
350 MI->getOpcode() != ARM::t2ADDrSPi &&
351 MI->getOpcode() != ARM::t2ADDrSPi12 &&
352 MI->getOpcode() != ARM::tADDspi &&
353 MI->getOpcode() != ARM::ADDri)
356 if (Bytes <= 0 || (Limit && Bytes >= Limit))
357 // Make sure the offset fits in 8 bits.
360 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
361 return (MI->getOperand(0).getReg() == Base &&
362 MI->getOperand(1).getReg() == Base &&
363 (MI->getOperand(2).getImm()*Scale) == Bytes &&
364 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
365 MyPredReg == PredReg);
368 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
369 switch (MI->getOpcode()) {
387 return (MI->getNumOperands() - 5) * 4;
392 return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
396 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
397 /// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible:
399 /// stmia rn, <ra, rb, rc>
400 /// rn := rn + 4 * 3;
402 /// stmia rn!, <ra, rb, rc>
404 /// rn := rn - 4 * 3;
405 /// ldmia rn, <ra, rb, rc>
407 /// ldmdb rn!, <ra, rb, rc>
408 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
409 MachineBasicBlock::iterator MBBI,
411 MachineBasicBlock::iterator &I) {
412 MachineInstr *MI = MBBI;
413 unsigned Base = MI->getOperand(0).getReg();
414 unsigned Bytes = getLSMultipleTransferSize(MI);
415 unsigned PredReg = 0;
416 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
417 int Opcode = MI->getOpcode();
418 bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::t2LDM ||
419 Opcode == ARM::STM || Opcode == ARM::t2STM;
422 if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm()))
425 // Can't use the updating AM4 sub-mode if the base register is also a dest
426 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
427 for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
428 if (MI->getOperand(i).getReg() == Base)
432 ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
433 if (MBBI != MBB.begin()) {
434 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
435 if (Mode == ARM_AM::ia &&
436 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
437 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true));
438 MI->getOperand(4).setReg(Base);
439 MI->getOperand(4).setIsDef();
442 } else if (Mode == ARM_AM::ib &&
443 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
444 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true));
445 MI->getOperand(4).setReg(Base); // WB to base
446 MI->getOperand(4).setIsDef();
452 if (MBBI != MBB.end()) {
453 MachineBasicBlock::iterator NextMBBI = next(MBBI);
454 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
455 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
456 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
457 MI->getOperand(4).setReg(Base); // WB to base
458 MI->getOperand(4).setIsDef();
465 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
466 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
467 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
468 MI->getOperand(4).setReg(Base); // WB to base
469 MI->getOperand(4).setIsDef();
479 // FLDM{D|S}, FSTM{D|S} addressing mode 5 ops.
480 if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm()))
483 ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
484 unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
485 if (MBBI != MBB.begin()) {
486 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
487 if (Mode == ARM_AM::ia &&
488 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
489 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset));
490 MI->getOperand(4).setReg(Base); // WB to base
491 MI->getOperand(4).setIsDef();
497 if (MBBI != MBB.end()) {
498 MachineBasicBlock::iterator NextMBBI = next(MBBI);
499 if (Mode == ARM_AM::ia &&
500 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
501 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset));
502 MI->getOperand(4).setReg(Base); // WB to base
503 MI->getOperand(4).setIsDef();
517 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
519 case ARM::LDR: return ARM::LDR_PRE;
520 case ARM::STR: return ARM::STR_PRE;
521 case ARM::FLDS: return ARM::FLDMS;
522 case ARM::FLDD: return ARM::FLDMD;
523 case ARM::FSTS: return ARM::FSTMS;
524 case ARM::FSTD: return ARM::FSTMD;
527 return ARM::t2LDR_PRE;
530 return ARM::t2STR_PRE;
531 default: llvm_unreachable("Unhandled opcode!");
536 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
538 case ARM::LDR: return ARM::LDR_POST;
539 case ARM::STR: return ARM::STR_POST;
540 case ARM::FLDS: return ARM::FLDMS;
541 case ARM::FLDD: return ARM::FLDMD;
542 case ARM::FSTS: return ARM::FSTMS;
543 case ARM::FSTD: return ARM::FSTMD;
546 return ARM::t2LDR_POST;
549 return ARM::t2STR_POST;
550 default: llvm_unreachable("Unhandled opcode!");
555 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
556 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
557 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
558 MachineBasicBlock::iterator MBBI,
559 const TargetInstrInfo *TII,
561 MachineBasicBlock::iterator &I) {
562 MachineInstr *MI = MBBI;
563 unsigned Base = MI->getOperand(1).getReg();
564 bool BaseKill = MI->getOperand(1).isKill();
565 unsigned Bytes = getLSMultipleTransferSize(MI);
566 int Opcode = MI->getOpcode();
567 DebugLoc dl = MI->getDebugLoc();
568 bool isAM5 = Opcode == ARM::FLDD || Opcode == ARM::FLDS ||
569 Opcode == ARM::FSTD || Opcode == ARM::FSTS;
570 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
571 if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
573 else if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
575 else if (isT2i32Load(Opcode) || isT2i32Store(Opcode))
576 if (MI->getOperand(2).getImm() != 0)
579 bool isLd = isi32Load(Opcode) || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
580 // Can't do the merge if the destination register is the same as the would-be
581 // writeback register.
582 if (isLd && MI->getOperand(0).getReg() == Base)
585 unsigned PredReg = 0;
586 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
587 bool DoMerge = false;
588 ARM_AM::AddrOpc AddSub = ARM_AM::add;
590 // AM2 - 12 bits, thumb2 - 8 bits.
591 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
592 if (MBBI != MBB.begin()) {
593 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
594 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
596 AddSub = ARM_AM::sub;
597 NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
599 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
601 NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
607 if (!DoMerge && MBBI != MBB.end()) {
608 MachineBasicBlock::iterator NextMBBI = next(MBBI);
610 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
612 AddSub = ARM_AM::sub;
613 NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
614 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
616 NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
630 bool isDPR = NewOpc == ARM::FLDMD || NewOpc == ARM::FSTMD;
633 Offset = ARM_AM::getAM5Opc((AddSub == ARM_AM::sub)
635 : ARM_AM::ia, true, (isDPR ? 2 : 1));
637 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
639 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
643 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
644 .addReg(Base, getKillRegState(BaseKill))
645 .addImm(Offset).addImm(Pred).addReg(PredReg)
646 .addReg(Base, getDefRegState(true)) // WB base register
647 .addReg(MI->getOperand(0).getReg(), RegState::Define);
649 // LDR_PRE, LDR_POST,
650 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
651 .addReg(Base, RegState::Define)
652 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
654 // t2LDR_PRE, t2LDR_POST
655 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
656 .addReg(Base, RegState::Define)
657 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
659 MachineOperand &MO = MI->getOperand(0);
662 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)).addReg(Base).addImm(Offset)
663 .addImm(Pred).addReg(PredReg)
664 .addReg(Base, getDefRegState(true)) // WB base register
665 .addReg(MO.getReg(), getKillRegState(MO.isKill()));
668 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
669 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
670 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
672 // t2STR_PRE, t2STR_POST
673 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
674 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
675 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
682 /// isMemoryOp - Returns true if instruction is a memory operations (that this
683 /// pass is capable of operating on).
684 static bool isMemoryOp(const MachineInstr *MI) {
685 int Opcode = MI->getOpcode();
690 return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
693 return MI->getOperand(1).isReg();
696 return MI->getOperand(1).isReg();
701 return MI->getOperand(1).isReg();
706 /// AdvanceRS - Advance register scavenger to just before the earliest memory
707 /// op that is being merged.
708 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
709 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
710 unsigned Position = MemOps[0].Position;
711 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
712 if (MemOps[i].Position < Position) {
713 Position = MemOps[i].Position;
714 Loc = MemOps[i].MBBI;
718 if (Loc != MBB.begin())
719 RS->forward(prior(Loc));
722 static int getMemoryOpOffset(const MachineInstr *MI) {
723 int Opcode = MI->getOpcode();
724 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
725 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
726 unsigned NumOperands = MI->getDesc().getNumOperands();
727 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
729 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
730 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
731 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8)
735 ? ARM_AM::getAM2Offset(OffField)
736 : (isAM3 ? ARM_AM::getAM3Offset(OffField)
737 : ARM_AM::getAM5Offset(OffField) * 4);
739 if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
742 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
745 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
751 static void InsertLDR_STR(MachineBasicBlock &MBB,
752 MachineBasicBlock::iterator &MBBI,
753 int OffImm, bool isDef,
754 DebugLoc dl, unsigned NewOpc,
755 unsigned Reg, bool RegDeadKill, bool RegUndef,
756 unsigned BaseReg, bool BaseKill, bool BaseUndef,
757 unsigned OffReg, bool OffKill, bool OffUndef,
758 ARMCC::CondCodes Pred, unsigned PredReg,
759 const TargetInstrInfo *TII, bool isT2) {
763 Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
765 Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
768 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
770 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
771 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
773 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
774 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
776 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
778 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
779 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
781 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
782 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
786 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
787 MachineBasicBlock::iterator &MBBI) {
788 MachineInstr *MI = &*MBBI;
789 unsigned Opcode = MI->getOpcode();
790 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
791 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
792 unsigned EvenReg = MI->getOperand(0).getReg();
793 unsigned OddReg = MI->getOperand(1).getReg();
794 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
795 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
796 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
799 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
800 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
801 bool EvenDeadKill = isLd ?
802 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
803 bool EvenUndef = MI->getOperand(0).isUndef();
804 bool OddDeadKill = isLd ?
805 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
806 bool OddUndef = MI->getOperand(1).isUndef();
807 const MachineOperand &BaseOp = MI->getOperand(2);
808 unsigned BaseReg = BaseOp.getReg();
809 bool BaseKill = BaseOp.isKill();
810 bool BaseUndef = BaseOp.isUndef();
811 unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg();
812 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
813 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
814 int OffImm = getMemoryOpOffset(MI);
815 unsigned PredReg = 0;
816 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
818 if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) {
819 // Ascending register numbers and no offset. It's safe to change it to a
821 unsigned NewOpc = (isLd)
822 ? (isT2 ? ARM::t2LDM : ARM::LDM)
823 : (isT2 ? ARM::t2STM : ARM::STM);
825 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
826 .addReg(BaseReg, getKillRegState(BaseKill))
827 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
828 .addImm(Pred).addReg(PredReg)
830 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
831 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
834 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
835 .addReg(BaseReg, getKillRegState(BaseKill))
836 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
837 .addImm(Pred).addReg(PredReg)
840 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
842 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
846 // Split into two instructions.
847 assert((!isT2 || !OffReg) &&
848 "Thumb2 ldrd / strd does not encode offset register!");
849 unsigned NewOpc = (isLd)
850 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR)
851 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR);
852 DebugLoc dl = MBBI->getDebugLoc();
853 // If this is a load and base register is killed, it may have been
854 // re-defed by the load, make sure the first load does not clobber it.
856 (BaseKill || OffKill) &&
857 (TRI->regsOverlap(EvenReg, BaseReg) ||
858 (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) {
859 assert(!TRI->regsOverlap(OddReg, BaseReg) &&
860 (!OffReg || !TRI->regsOverlap(OddReg, OffReg)));
861 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
862 OddReg, OddDeadKill, false,
863 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
864 Pred, PredReg, TII, isT2);
865 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
866 EvenReg, EvenDeadKill, false,
867 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
868 Pred, PredReg, TII, isT2);
870 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
871 EvenReg, EvenDeadKill, EvenUndef,
872 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
873 Pred, PredReg, TII, isT2);
874 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
875 OddReg, OddDeadKill, OddUndef,
876 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
877 Pred, PredReg, TII, isT2);
891 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
892 /// ops of the same base and incrementing offset into LDM / STM ops.
893 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
894 unsigned NumMerges = 0;
895 unsigned NumMemOps = 0;
897 unsigned CurrBase = 0;
899 unsigned CurrSize = 0;
900 ARMCC::CondCodes CurrPred = ARMCC::AL;
901 unsigned CurrPredReg = 0;
902 unsigned Position = 0;
903 SmallVector<MachineBasicBlock::iterator,4> Merges;
905 RS->enterBasicBlock(&MBB);
906 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
908 if (FixInvalidRegPairOp(MBB, MBBI))
911 bool Advance = false;
912 bool TryMerge = false;
913 bool Clobber = false;
915 bool isMemOp = isMemoryOp(MBBI);
917 int Opcode = MBBI->getOpcode();
918 unsigned Size = getLSMultipleTransferSize(MBBI);
919 unsigned Base = MBBI->getOperand(1).getReg();
920 unsigned PredReg = 0;
921 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
922 int Offset = getMemoryOpOffset(MBBI);
925 // r5 := ldr [r5, #4]
926 // r6 := ldr [r5, #8]
928 // The second ldr has effectively broken the chain even though it
929 // looks like the later ldr(s) use the same base register. Try to
930 // merge the ldr's so far, including this one. But don't try to
931 // combine the following ldr(s).
932 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
933 if (CurrBase == 0 && !Clobber) {
934 // Start of a new chain.
939 CurrPredReg = PredReg;
940 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
949 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
950 // No need to match PredReg.
951 // Continue adding to the queue.
952 if (Offset > MemOps.back().Offset) {
953 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
957 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
959 if (Offset < I->Offset) {
960 MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI));
964 } else if (Offset == I->Offset) {
965 // Collision! This can't be merged!
982 // Try to find a free register to use as a new base in case it's needed.
983 // First advance to the instruction just before the start of the chain.
984 AdvanceRS(MBB, MemOps);
985 // Find a scratch register.
986 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
987 // Process the load / store instructions.
988 RS->forward(prior(MBBI));
992 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
993 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
995 // Try folding preceeding/trailing base inc/dec into the generated
997 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
998 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1000 NumMerges += Merges.size();
1002 // Try folding preceeding/trailing base inc/dec into those load/store
1003 // that were not merged to form LDM/STM ops.
1004 for (unsigned i = 0; i != NumMemOps; ++i)
1005 if (!MemOps[i].Merged)
1006 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1009 // RS may be pointing to an instruction that's deleted.
1010 RS->skipTo(prior(MBBI));
1011 } else if (NumMemOps == 1) {
1012 // Try folding preceeding/trailing base inc/dec into the single
1014 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1016 RS->forward(prior(MBBI));
1023 CurrPred = ARMCC::AL;
1030 // If iterator hasn't been advanced and this is not a memory op, skip it.
1031 // It can't start a new chain anyway.
1032 if (!Advance && !isMemOp && MBBI != E) {
1038 return NumMerges > 0;
1042 struct OffsetCompare {
1043 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1044 int LOffset = getMemoryOpOffset(LHS);
1045 int ROffset = getMemoryOpOffset(RHS);
1046 assert(LHS == RHS || LOffset != ROffset);
1047 return LOffset > ROffset;
1052 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
1053 /// (bx lr) into the preceeding stack restore so it directly restore the value
1055 /// ldmfd sp!, {r7, lr}
1058 /// ldmfd sp!, {r7, pc}
1059 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1060 if (MBB.empty()) return false;
1062 MachineBasicBlock::iterator MBBI = prior(MBB.end());
1063 if (MBBI != MBB.begin() &&
1064 (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::tBX_RET)) {
1065 MachineInstr *PrevMI = prior(MBBI);
1066 if (PrevMI->getOpcode() == ARM::LDM || PrevMI->getOpcode() == ARM::t2LDM) {
1067 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1068 if (MO.getReg() != ARM::LR)
1070 unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET;
1071 PrevMI->setDesc(TII->get(NewOpc));
1080 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1081 const TargetMachine &TM = Fn.getTarget();
1082 AFI = Fn.getInfo<ARMFunctionInfo>();
1083 TII = TM.getInstrInfo();
1084 TRI = TM.getRegisterInfo();
1085 RS = new RegScavenger();
1086 isThumb2 = AFI->isThumb2Function();
1088 bool Modified = false;
1089 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1091 MachineBasicBlock &MBB = *MFI;
1092 Modified |= LoadStoreMultipleOpti(MBB);
1093 Modified |= MergeReturnIntoLDM(MBB);
1101 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1102 /// load / stores from consecutive locations close to make it more
1103 /// likely they will be combined later.
1106 struct VISIBILITY_HIDDEN ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1108 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
1110 const TargetData *TD;
1111 const TargetInstrInfo *TII;
1112 const TargetRegisterInfo *TRI;
1113 const ARMSubtarget *STI;
1114 MachineRegisterInfo *MRI;
1115 MachineFunction *MF;
1117 virtual bool runOnMachineFunction(MachineFunction &Fn);
1119 virtual const char *getPassName() const {
1120 return "ARM pre- register allocation load / store optimization pass";
1124 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1125 unsigned &NewOpc, unsigned &EvenReg,
1126 unsigned &OddReg, unsigned &BaseReg,
1127 unsigned &OffReg, int &Offset,
1128 unsigned &PredReg, ARMCC::CondCodes &Pred,
1130 bool RescheduleOps(MachineBasicBlock *MBB,
1131 SmallVector<MachineInstr*, 4> &Ops,
1132 unsigned Base, bool isLd,
1133 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1134 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1136 char ARMPreAllocLoadStoreOpt::ID = 0;
1139 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1140 TD = Fn.getTarget().getTargetData();
1141 TII = Fn.getTarget().getInstrInfo();
1142 TRI = Fn.getTarget().getRegisterInfo();
1143 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1144 MRI = &Fn.getRegInfo();
1147 bool Modified = false;
1148 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1150 Modified |= RescheduleLoadStoreInstrs(MFI);
1155 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1156 MachineBasicBlock::iterator I,
1157 MachineBasicBlock::iterator E,
1158 SmallPtrSet<MachineInstr*, 4> &MemOps,
1159 SmallSet<unsigned, 4> &MemRegs,
1160 const TargetRegisterInfo *TRI) {
1161 // Are there stores / loads / calls between them?
1162 // FIXME: This is overly conservative. We should make use of alias information
1164 SmallSet<unsigned, 4> AddedRegPressure;
1166 if (MemOps.count(&*I))
1168 const TargetInstrDesc &TID = I->getDesc();
1169 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
1171 if (isLd && TID.mayStore())
1176 // It's not safe to move the first 'str' down.
1179 // str r4, [r0, #+4]
1183 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1184 MachineOperand &MO = I->getOperand(j);
1187 unsigned Reg = MO.getReg();
1188 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1190 if (Reg != Base && !MemRegs.count(Reg))
1191 AddedRegPressure.insert(Reg);
1195 // Estimate register pressure increase due to the transformation.
1196 if (MemRegs.size() <= 4)
1197 // Ok if we are moving small number of instructions.
1199 return AddedRegPressure.size() <= MemRegs.size() * 2;
1203 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1205 unsigned &NewOpc, unsigned &EvenReg,
1206 unsigned &OddReg, unsigned &BaseReg,
1207 unsigned &OffReg, int &Offset,
1209 ARMCC::CondCodes &Pred,
1211 // Make sure we're allowed to generate LDRD/STRD.
1212 if (!STI->hasV5TEOps())
1215 // FIXME: FLDS / FSTS -> FLDD / FSTD
1217 unsigned Opcode = Op0->getOpcode();
1218 if (Opcode == ARM::LDR)
1220 else if (Opcode == ARM::STR)
1222 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1223 NewOpc = ARM::t2LDRDi8;
1226 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1227 NewOpc = ARM::t2STRDi8;
1233 // Make sure the offset registers match.
1235 (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg()))
1238 // Must sure the base address satisfies i64 ld / st alignment requirement.
1239 if (!Op0->hasOneMemOperand() ||
1240 !(*Op0->memoperands_begin())->getValue() ||
1241 (*Op0->memoperands_begin())->isVolatile())
1244 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1245 Function *Func = MF->getFunction();
1246 unsigned ReqAlign = STI->hasV6Ops()
1247 ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext()))
1248 : 8; // Pre-v6 need 8-byte align
1249 if (Align < ReqAlign)
1252 // Then make sure the immediate offset fits.
1253 int OffImm = getMemoryOpOffset(Op0);
1257 // Can't fall back to t2LDRi8 / t2STRi8.
1260 int Limit = (1 << 8) * Scale;
1261 if (OffImm >= Limit || (OffImm & (Scale-1)))
1266 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1268 AddSub = ARM_AM::sub;
1271 int Limit = (1 << 8) * Scale;
1272 if (OffImm >= Limit || (OffImm & (Scale-1)))
1274 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1276 EvenReg = Op0->getOperand(0).getReg();
1277 OddReg = Op1->getOperand(0).getReg();
1278 if (EvenReg == OddReg)
1280 BaseReg = Op0->getOperand(1).getReg();
1282 OffReg = Op0->getOperand(2).getReg();
1283 Pred = llvm::getInstrPredicate(Op0, PredReg);
1284 dl = Op0->getDebugLoc();
1288 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1289 SmallVector<MachineInstr*, 4> &Ops,
1290 unsigned Base, bool isLd,
1291 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1292 bool RetVal = false;
1294 // Sort by offset (in reverse order).
1295 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1297 // The loads / stores of the same base are in order. Scan them from first to
1298 // last and check for the followins:
1299 // 1. Any def of base.
1301 while (Ops.size() > 1) {
1302 unsigned FirstLoc = ~0U;
1303 unsigned LastLoc = 0;
1304 MachineInstr *FirstOp = 0;
1305 MachineInstr *LastOp = 0;
1307 unsigned LastOpcode = 0;
1308 unsigned LastBytes = 0;
1309 unsigned NumMove = 0;
1310 for (int i = Ops.size() - 1; i >= 0; --i) {
1311 MachineInstr *Op = Ops[i];
1312 unsigned Loc = MI2LocMap[Op];
1313 if (Loc <= FirstLoc) {
1317 if (Loc >= LastLoc) {
1322 unsigned Opcode = Op->getOpcode();
1323 if (LastOpcode && Opcode != LastOpcode)
1326 int Offset = getMemoryOpOffset(Op);
1327 unsigned Bytes = getLSMultipleTransferSize(Op);
1329 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1332 LastOffset = Offset;
1334 LastOpcode = Opcode;
1335 if (++NumMove == 8) // FIXME: Tune this limit.
1342 SmallPtrSet<MachineInstr*, 4> MemOps;
1343 SmallSet<unsigned, 4> MemRegs;
1344 for (int i = NumMove-1; i >= 0; --i) {
1345 MemOps.insert(Ops[i]);
1346 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1349 // Be conservative, if the instructions are too far apart, don't
1350 // move them. We want to limit the increase of register pressure.
1351 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1353 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1354 MemOps, MemRegs, TRI);
1356 for (unsigned i = 0; i != NumMove; ++i)
1359 // This is the new location for the loads / stores.
1360 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1361 while (InsertPos != MBB->end() && MemOps.count(InsertPos))
1364 // If we are moving a pair of loads / stores, see if it makes sense
1365 // to try to allocate a pair of registers that can form register pairs.
1366 MachineInstr *Op0 = Ops.back();
1367 MachineInstr *Op1 = Ops[Ops.size()-2];
1368 unsigned EvenReg = 0, OddReg = 0;
1369 unsigned BaseReg = 0, OffReg = 0, PredReg = 0;
1370 ARMCC::CondCodes Pred = ARMCC::AL;
1372 unsigned NewOpc = 0;
1375 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1376 EvenReg, OddReg, BaseReg, OffReg,
1377 Offset, PredReg, Pred, isT2)) {
1381 // Form the pair instruction.
1383 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1384 dl, TII->get(NewOpc))
1385 .addReg(EvenReg, RegState::Define)
1386 .addReg(OddReg, RegState::Define)
1390 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1393 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1394 dl, TII->get(NewOpc))
1400 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1406 // Add register allocation hints to form register pairs.
1407 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1408 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1410 for (unsigned i = 0; i != NumMove; ++i) {
1411 MachineInstr *Op = Ops.back();
1413 MBB->splice(InsertPos, MBB, Op);
1417 NumLdStMoved += NumMove;
1427 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1428 bool RetVal = false;
1430 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1431 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1432 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1433 SmallVector<unsigned, 4> LdBases;
1434 SmallVector<unsigned, 4> StBases;
1437 MachineBasicBlock::iterator MBBI = MBB->begin();
1438 MachineBasicBlock::iterator E = MBB->end();
1440 for (; MBBI != E; ++MBBI) {
1441 MachineInstr *MI = MBBI;
1442 const TargetInstrDesc &TID = MI->getDesc();
1443 if (TID.isCall() || TID.isTerminator()) {
1444 // Stop at barriers.
1449 MI2LocMap[MI] = Loc++;
1450 if (!isMemoryOp(MI))
1452 unsigned PredReg = 0;
1453 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1456 int Opc = MI->getOpcode();
1457 bool isLd = isi32Load(Opc) || Opc == ARM::FLDS || Opc == ARM::FLDD;
1458 unsigned Base = MI->getOperand(1).getReg();
1459 int Offset = getMemoryOpOffset(MI);
1461 bool StopHere = false;
1463 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1464 Base2LdsMap.find(Base);
1465 if (BI != Base2LdsMap.end()) {
1466 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1467 if (Offset == getMemoryOpOffset(BI->second[i])) {
1473 BI->second.push_back(MI);
1475 SmallVector<MachineInstr*, 4> MIs;
1477 Base2LdsMap[Base] = MIs;
1478 LdBases.push_back(Base);
1481 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1482 Base2StsMap.find(Base);
1483 if (BI != Base2StsMap.end()) {
1484 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1485 if (Offset == getMemoryOpOffset(BI->second[i])) {
1491 BI->second.push_back(MI);
1493 SmallVector<MachineInstr*, 4> MIs;
1495 Base2StsMap[Base] = MIs;
1496 StBases.push_back(Base);
1501 // Found a duplicate (a base+offset combination that's seen earlier).
1508 // Re-schedule loads.
1509 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1510 unsigned Base = LdBases[i];
1511 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1513 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1516 // Re-schedule stores.
1517 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1518 unsigned Base = StBases[i];
1519 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1521 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1525 Base2LdsMap.clear();
1526 Base2StsMap.clear();
1536 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1537 /// optimization pass.
1538 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1540 return new ARMPreAllocLoadStoreOpt();
1541 return new ARMLoadStoreOpt();