1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMMachineFunctionInfo.h"
19 #include "ARMRegisterInfo.h"
20 #include "llvm/DerivedTypes.h"
21 #include "llvm/CodeGen/MachineBasicBlock.h"
22 #include "llvm/CodeGen/MachineFunctionPass.h"
23 #include "llvm/CodeGen/MachineInstr.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/RegisterScavenging.h"
27 #include "llvm/Target/TargetData.h"
28 #include "llvm/Target/TargetInstrInfo.h"
29 #include "llvm/Target/TargetMachine.h"
30 #include "llvm/Target/TargetRegisterInfo.h"
31 #include "llvm/Support/Compiler.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/ADT/DenseMap.h"
34 #include "llvm/ADT/STLExtras.h"
35 #include "llvm/ADT/SmallPtrSet.h"
36 #include "llvm/ADT/SmallSet.h"
37 #include "llvm/ADT/SmallVector.h"
38 #include "llvm/ADT/Statistic.h"
41 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
42 STATISTIC(NumSTMGened , "Number of stm instructions generated");
43 STATISTIC(NumFLDMGened, "Number of fldm instructions generated");
44 STATISTIC(NumFSTMGened, "Number of fstm instructions generated");
45 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
46 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
47 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
48 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
49 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
50 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
51 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
53 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
54 /// load / store instructions to form ldm / stm instructions.
57 struct VISIBILITY_HIDDEN ARMLoadStoreOpt : public MachineFunctionPass {
59 ARMLoadStoreOpt() : MachineFunctionPass(&ID) {}
61 const TargetInstrInfo *TII;
62 const TargetRegisterInfo *TRI;
66 virtual bool runOnMachineFunction(MachineFunction &Fn);
68 virtual const char *getPassName() const {
69 return "ARM load / store optimization pass";
73 struct MemOpQueueEntry {
76 MachineBasicBlock::iterator MBBI;
78 MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
79 : Offset(o), Position(p), MBBI(i), Merged(false) {};
81 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
82 typedef MemOpQueue::iterator MemOpQueueIter;
84 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
85 int Offset, unsigned Base, bool BaseKill, int Opcode,
86 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
87 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
88 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
89 int Opcode, unsigned Size,
90 ARMCC::CondCodes Pred, unsigned PredReg,
91 unsigned Scratch, MemOpQueue &MemOps,
92 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
94 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
95 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
96 MachineBasicBlock::iterator &MBBI);
97 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
98 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
100 char ARMLoadStoreOpt::ID = 0;
103 static int getLoadStoreMultipleOpcode(int Opcode) {
123 default: llvm_report_error("Unhandled opcode!");
128 /// MergeOps - Create and insert a LDM or STM with Base as base register and
129 /// registers in Regs as the register operands that would be loaded / stored.
130 /// It returns true if the transformation is done.
132 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
133 MachineBasicBlock::iterator MBBI,
134 int Offset, unsigned Base, bool BaseKill,
135 int Opcode, ARMCC::CondCodes Pred,
136 unsigned PredReg, unsigned Scratch, DebugLoc dl,
137 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
138 // Only a single register to load / store. Don't bother.
139 unsigned NumRegs = Regs.size();
143 ARM_AM::AMSubMode Mode = ARM_AM::ia;
144 bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR;
145 if (isAM4 && Offset == 4)
147 else if (isAM4 && Offset == -4 * (int)NumRegs + 4)
149 else if (isAM4 && Offset == -4 * (int)NumRegs)
151 else if (Offset != 0) {
152 // If starting offset isn't zero, insert a MI to materialize a new base.
153 // But only do so if it is cost effective, i.e. merging more than two
159 if (Opcode == ARM::LDR)
160 // If it is a load, then just use one of the destination register to
161 // use as the new base.
162 NewBase = Regs[NumRegs-1].first;
164 // Use the scratch register to use as a new base.
169 int BaseOpc = ARM::ADDri;
171 BaseOpc = ARM::SUBri;
174 int ImmedOffset = ARM_AM::getSOImmVal(Offset);
175 if (ImmedOffset == -1)
176 return false; // Probably not worth it then.
178 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
179 .addReg(Base, getKillRegState(BaseKill)).addImm(ImmedOffset)
180 .addImm(Pred).addReg(PredReg).addReg(0);
182 BaseKill = true; // New base is always killed right its use.
185 bool isDPR = Opcode == ARM::FLDD || Opcode == ARM::FSTD;
186 bool isDef = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
187 Opcode = getLoadStoreMultipleOpcode(Opcode);
188 MachineInstrBuilder MIB = (isAM4)
189 ? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
190 .addReg(Base, getKillRegState(BaseKill))
191 .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
192 : BuildMI(MBB, MBBI, dl, TII->get(Opcode))
193 .addReg(Base, getKillRegState(BaseKill))
194 .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs))
195 .addImm(Pred).addReg(PredReg);
196 for (unsigned i = 0; i != NumRegs; ++i)
197 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
198 | getKillRegState(Regs[i].second));
203 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
204 /// load / store multiple instructions.
206 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
207 unsigned Base, int Opcode, unsigned Size,
208 ARMCC::CondCodes Pred, unsigned PredReg,
209 unsigned Scratch, MemOpQueue &MemOps,
210 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
211 bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR;
212 int Offset = MemOps[SIndex].Offset;
213 int SOffset = Offset;
214 unsigned Pos = MemOps[SIndex].Position;
215 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
216 DebugLoc dl = Loc->getDebugLoc();
217 unsigned PReg = Loc->getOperand(0).getReg();
218 unsigned PRegNum = ARMRegisterInfo::getRegisterNumbering(PReg);
219 bool isKill = Loc->getOperand(0).isKill();
221 SmallVector<std::pair<unsigned,bool>, 8> Regs;
222 Regs.push_back(std::make_pair(PReg, isKill));
223 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
224 int NewOffset = MemOps[i].Offset;
225 unsigned Reg = MemOps[i].MBBI->getOperand(0).getReg();
226 unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg);
227 isKill = MemOps[i].MBBI->getOperand(0).isKill();
228 // AM4 - register numbers in ascending order.
229 // AM5 - consecutive register numbers in ascending order.
230 if (NewOffset == Offset + (int)Size &&
231 ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) {
233 Regs.push_back(std::make_pair(Reg, isKill));
236 // Can't merge this in. Try merge the earlier ones first.
237 if (MergeOps(MBB, ++Loc, SOffset, Base, false, Opcode, Pred, PredReg,
238 Scratch, dl, Regs)) {
239 Merges.push_back(prior(Loc));
240 for (unsigned j = SIndex; j < i; ++j) {
241 MBB.erase(MemOps[j].MBBI);
242 MemOps[j].Merged = true;
245 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
250 if (MemOps[i].Position > Pos) {
251 Pos = MemOps[i].Position;
252 Loc = MemOps[i].MBBI;
256 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
257 if (MergeOps(MBB, ++Loc, SOffset, Base, BaseKill, Opcode, Pred, PredReg,
258 Scratch, dl, Regs)) {
259 Merges.push_back(prior(Loc));
260 for (unsigned i = SIndex, e = MemOps.size(); i != e; ++i) {
261 MBB.erase(MemOps[i].MBBI);
262 MemOps[i].Merged = true;
269 /// getInstrPredicate - If instruction is predicated, returns its predicate
270 /// condition, otherwise returns AL. It also returns the condition code
271 /// register by reference.
272 static ARMCC::CondCodes getInstrPredicate(MachineInstr *MI, unsigned &PredReg) {
273 int PIdx = MI->findFirstPredOperandIdx();
279 PredReg = MI->getOperand(PIdx+1).getReg();
280 return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm();
283 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
284 unsigned Bytes, ARMCC::CondCodes Pred,
286 unsigned MyPredReg = 0;
287 return (MI && MI->getOpcode() == ARM::SUBri &&
288 MI->getOperand(0).getReg() == Base &&
289 MI->getOperand(1).getReg() == Base &&
290 ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes &&
291 getInstrPredicate(MI, MyPredReg) == Pred &&
292 MyPredReg == PredReg);
295 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
296 unsigned Bytes, ARMCC::CondCodes Pred,
298 unsigned MyPredReg = 0;
299 return (MI && MI->getOpcode() == ARM::ADDri &&
300 MI->getOperand(0).getReg() == Base &&
301 MI->getOperand(1).getReg() == Base &&
302 ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes &&
303 getInstrPredicate(MI, MyPredReg) == Pred &&
304 MyPredReg == PredReg);
307 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
308 switch (MI->getOpcode()) {
320 return (MI->getNumOperands() - 4) * 4;
325 return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
329 /// mergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
330 /// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible:
332 /// stmia rn, <ra, rb, rc>
333 /// rn := rn + 4 * 3;
335 /// stmia rn!, <ra, rb, rc>
337 /// rn := rn - 4 * 3;
338 /// ldmia rn, <ra, rb, rc>
340 /// ldmdb rn!, <ra, rb, rc>
341 static bool mergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
342 MachineBasicBlock::iterator MBBI,
344 MachineBasicBlock::iterator &I) {
345 MachineInstr *MI = MBBI;
346 unsigned Base = MI->getOperand(0).getReg();
347 unsigned Bytes = getLSMultipleTransferSize(MI);
348 unsigned PredReg = 0;
349 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
350 int Opcode = MI->getOpcode();
351 bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::STM;
354 if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm()))
357 // Can't use the updating AM4 sub-mode if the base register is also a dest
358 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
359 for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
360 if (MI->getOperand(i).getReg() == Base)
364 ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
365 if (MBBI != MBB.begin()) {
366 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
367 if (Mode == ARM_AM::ia &&
368 isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) {
369 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true));
372 } else if (Mode == ARM_AM::ib &&
373 isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) {
374 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true));
380 if (MBBI != MBB.end()) {
381 MachineBasicBlock::iterator NextMBBI = next(MBBI);
382 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
383 isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
384 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
391 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
392 isMatchingDecrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
393 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
403 // FLDM{D|S}, FSTM{D|S} addressing mode 5 ops.
404 if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm()))
407 ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
408 unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
409 if (MBBI != MBB.begin()) {
410 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
411 if (Mode == ARM_AM::ia &&
412 isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) {
413 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset));
419 if (MBBI != MBB.end()) {
420 MachineBasicBlock::iterator NextMBBI = next(MBBI);
421 if (Mode == ARM_AM::ia &&
422 isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
423 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset));
437 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
439 case ARM::LDR: return ARM::LDR_PRE;
440 case ARM::STR: return ARM::STR_PRE;
441 case ARM::FLDS: return ARM::FLDMS;
442 case ARM::FLDD: return ARM::FLDMD;
443 case ARM::FSTS: return ARM::FSTMS;
444 case ARM::FSTD: return ARM::FSTMD;
445 default: llvm_report_error("Unhandled opcode!");
450 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
452 case ARM::LDR: return ARM::LDR_POST;
453 case ARM::STR: return ARM::STR_POST;
454 case ARM::FLDS: return ARM::FLDMS;
455 case ARM::FLDD: return ARM::FLDMD;
456 case ARM::FSTS: return ARM::FSTMS;
457 case ARM::FSTD: return ARM::FSTMD;
458 default: llvm_report_error("Unhandled opcode!");
463 /// mergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
464 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
465 static bool mergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
466 MachineBasicBlock::iterator MBBI,
467 const TargetInstrInfo *TII,
469 MachineBasicBlock::iterator &I) {
470 MachineInstr *MI = MBBI;
471 unsigned Base = MI->getOperand(1).getReg();
472 bool BaseKill = MI->getOperand(1).isKill();
473 unsigned Bytes = getLSMultipleTransferSize(MI);
474 int Opcode = MI->getOpcode();
475 DebugLoc dl = MI->getDebugLoc();
476 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
477 if ((isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0) ||
478 (!isAM2 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0))
481 bool isLd = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
482 // Can't do the merge if the destination register is the same as the would-be
483 // writeback register.
484 if (isLd && MI->getOperand(0).getReg() == Base)
487 unsigned PredReg = 0;
488 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
489 bool DoMerge = false;
490 ARM_AM::AddrOpc AddSub = ARM_AM::add;
492 if (MBBI != MBB.begin()) {
493 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
494 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) {
496 AddSub = ARM_AM::sub;
497 NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
498 } else if (isAM2 && isMatchingIncrement(PrevMBBI, Base, Bytes,
501 NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
507 if (!DoMerge && MBBI != MBB.end()) {
508 MachineBasicBlock::iterator NextMBBI = next(MBBI);
509 if (isAM2 && isMatchingDecrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
511 AddSub = ARM_AM::sub;
512 NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
513 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
515 NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
529 bool isDPR = NewOpc == ARM::FLDMD || NewOpc == ARM::FSTMD;
530 unsigned Offset = isAM2 ? ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift)
531 : ARM_AM::getAM5Opc((AddSub == ARM_AM::sub) ? ARM_AM::db : ARM_AM::ia,
532 true, isDPR ? 2 : 1);
535 // LDR_PRE, LDR_POST;
536 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
537 .addReg(Base, RegState::Define)
538 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
541 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
542 .addReg(Base, getKillRegState(BaseKill))
543 .addImm(Offset).addImm(Pred).addReg(PredReg)
544 .addReg(MI->getOperand(0).getReg(), RegState::Define);
546 MachineOperand &MO = MI->getOperand(0);
548 // STR_PRE, STR_POST;
549 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
550 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
551 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
554 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)).addReg(Base).addImm(Offset)
555 .addImm(Pred).addReg(PredReg)
556 .addReg(MO.getReg(), getKillRegState(MO.isKill()));
563 /// isMemoryOp - Returns true if instruction is a memory operations (that this
564 /// pass is capable of operating on).
565 static bool isMemoryOp(MachineInstr *MI) {
566 int Opcode = MI->getOpcode();
571 return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
574 return MI->getOperand(1).isReg();
577 return MI->getOperand(1).isReg();
582 /// AdvanceRS - Advance register scavenger to just before the earliest memory
583 /// op that is being merged.
584 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
585 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
586 unsigned Position = MemOps[0].Position;
587 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
588 if (MemOps[i].Position < Position) {
589 Position = MemOps[i].Position;
590 Loc = MemOps[i].MBBI;
594 if (Loc != MBB.begin())
595 RS->forward(prior(Loc));
598 static int getMemoryOpOffset(const MachineInstr *MI) {
599 int Opcode = MI->getOpcode();
600 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
601 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
602 unsigned NumOperands = MI->getDesc().getNumOperands();
603 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
605 ? ARM_AM::getAM2Offset(OffField)
606 : (isAM3 ? ARM_AM::getAM3Offset(OffField)
607 : ARM_AM::getAM5Offset(OffField) * 4);
609 if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
612 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
615 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
621 static void InsertLDR_STR(MachineBasicBlock &MBB,
622 MachineBasicBlock::iterator &MBBI,
623 int OffImm, bool isDef,
624 DebugLoc dl, unsigned NewOpc,
625 unsigned Reg, bool RegDeadKill,
626 unsigned BaseReg, bool BaseKill,
627 unsigned OffReg, bool OffKill,
628 ARMCC::CondCodes Pred, unsigned PredReg,
629 const TargetInstrInfo *TII) {
632 Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
634 Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
636 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
637 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
638 .addReg(BaseReg, getKillRegState(BaseKill))
639 .addReg(OffReg, getKillRegState(OffKill))
641 .addImm(Pred).addReg(PredReg);
643 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
644 .addReg(Reg, getKillRegState(RegDeadKill))
645 .addReg(BaseReg, getKillRegState(BaseKill))
646 .addReg(OffReg, getKillRegState(OffKill))
648 .addImm(Pred).addReg(PredReg);
651 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
652 MachineBasicBlock::iterator &MBBI) {
653 MachineInstr *MI = &*MBBI;
654 unsigned Opcode = MI->getOpcode();
655 if (Opcode == ARM::LDRD || Opcode == ARM::STRD) {
656 unsigned EvenReg = MI->getOperand(0).getReg();
657 unsigned OddReg = MI->getOperand(1).getReg();
658 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
659 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
660 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
663 bool isLd = Opcode == ARM::LDRD;
664 bool EvenDeadKill = isLd ?
665 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
666 bool OddDeadKill = isLd ?
667 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
668 const MachineOperand &BaseOp = MI->getOperand(2);
669 unsigned BaseReg = BaseOp.getReg();
670 bool BaseKill = BaseOp.isKill();
671 const MachineOperand &OffOp = MI->getOperand(3);
672 unsigned OffReg = OffOp.getReg();
673 bool OffKill = OffOp.isKill();
674 int OffImm = getMemoryOpOffset(MI);
675 unsigned PredReg = 0;
676 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
678 if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) {
679 // Ascending register numbers and no offset. It's safe to change it to a
681 unsigned NewOpc = (Opcode == ARM::LDRD) ? ARM::LDM : ARM::STM;
683 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
684 .addReg(BaseReg, getKillRegState(BaseKill))
685 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
686 .addImm(Pred).addReg(PredReg)
687 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
688 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
691 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
692 .addReg(BaseReg, getKillRegState(BaseKill))
693 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
694 .addImm(Pred).addReg(PredReg)
695 .addReg(EvenReg, getKillRegState(EvenDeadKill))
696 .addReg(OddReg, getKillRegState(OddDeadKill));
700 // Split into two instructions.
701 unsigned NewOpc = (Opcode == ARM::LDRD) ? ARM::LDR : ARM::STR;
702 DebugLoc dl = MBBI->getDebugLoc();
703 // If this is a load and base register is killed, it may have been
704 // re-defed by the load, make sure the first load does not clobber it.
706 (BaseKill || OffKill) &&
707 (TRI->regsOverlap(EvenReg, BaseReg) ||
708 (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) {
709 assert(!TRI->regsOverlap(OddReg, BaseReg) &&
710 (!OffReg || !TRI->regsOverlap(OddReg, OffReg)));
711 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, OddReg, OddDeadKill,
712 BaseReg, false, OffReg, false, Pred, PredReg, TII);
713 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill,
714 BaseReg, BaseKill, OffReg, OffKill, Pred, PredReg, TII);
716 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
717 EvenReg, EvenDeadKill, BaseReg, false, OffReg, false,
719 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
720 OddReg, OddDeadKill, BaseReg, BaseKill, OffReg, OffKill,
735 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
736 /// ops of the same base and incrementing offset into LDM / STM ops.
737 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
738 unsigned NumMerges = 0;
739 unsigned NumMemOps = 0;
741 unsigned CurrBase = 0;
743 unsigned CurrSize = 0;
744 ARMCC::CondCodes CurrPred = ARMCC::AL;
745 unsigned CurrPredReg = 0;
746 unsigned Position = 0;
747 SmallVector<MachineBasicBlock::iterator,4> Merges;
749 RS->enterBasicBlock(&MBB);
750 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
752 if (FixInvalidRegPairOp(MBB, MBBI))
755 bool Advance = false;
756 bool TryMerge = false;
757 bool Clobber = false;
759 bool isMemOp = isMemoryOp(MBBI);
761 int Opcode = MBBI->getOpcode();
762 unsigned Size = getLSMultipleTransferSize(MBBI);
763 unsigned Base = MBBI->getOperand(1).getReg();
764 unsigned PredReg = 0;
765 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
766 int Offset = getMemoryOpOffset(MBBI);
769 // r5 := ldr [r5, #4]
770 // r6 := ldr [r5, #8]
772 // The second ldr has effectively broken the chain even though it
773 // looks like the later ldr(s) use the same base register. Try to
774 // merge the ldr's so far, including this one. But don't try to
775 // combine the following ldr(s).
776 Clobber = (Opcode == ARM::LDR && Base == MBBI->getOperand(0).getReg());
777 if (CurrBase == 0 && !Clobber) {
778 // Start of a new chain.
783 CurrPredReg = PredReg;
784 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
793 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
794 // No need to match PredReg.
795 // Continue adding to the queue.
796 if (Offset > MemOps.back().Offset) {
797 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
801 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
803 if (Offset < I->Offset) {
804 MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI));
808 } else if (Offset == I->Offset) {
809 // Collision! This can't be merged!
826 // Try to find a free register to use as a new base in case it's needed.
827 // First advance to the instruction just before the start of the chain.
828 AdvanceRS(MBB, MemOps);
829 // Find a scratch register. Make sure it's a call clobbered register or
830 // a spilled callee-saved register.
831 unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass, true);
833 Scratch = RS->FindUnusedReg(&ARM::GPRRegClass,
834 AFI->getSpilledCSRegisters());
835 // Process the load / store instructions.
836 RS->forward(prior(MBBI));
840 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
841 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
843 // Try folding preceeding/trailing base inc/dec into the generated
845 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
846 if (mergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
848 NumMerges += Merges.size();
850 // Try folding preceeding/trailing base inc/dec into those load/store
851 // that were not merged to form LDM/STM ops.
852 for (unsigned i = 0; i != NumMemOps; ++i)
853 if (!MemOps[i].Merged)
854 if (mergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
857 // RS may be pointing to an instruction that's deleted.
858 RS->skipTo(prior(MBBI));
859 } else if (NumMemOps == 1) {
860 // Try folding preceeding/trailing base inc/dec into the single
862 if (mergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
864 RS->forward(prior(MBBI));
871 CurrPred = ARMCC::AL;
878 // If iterator hasn't been advanced and this is not a memory op, skip it.
879 // It can't start a new chain anyway.
880 if (!Advance && !isMemOp && MBBI != E) {
886 return NumMerges > 0;
890 struct OffsetCompare {
891 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
892 int LOffset = getMemoryOpOffset(LHS);
893 int ROffset = getMemoryOpOffset(RHS);
894 assert(LHS == RHS || LOffset != ROffset);
895 return LOffset > ROffset;
900 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
901 /// (bx lr) into the preceeding stack restore so it directly restore the value
903 /// ldmfd sp!, {r7, lr}
906 /// ldmfd sp!, {r7, pc}
907 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
908 if (MBB.empty()) return false;
910 MachineBasicBlock::iterator MBBI = prior(MBB.end());
911 if (MBBI->getOpcode() == ARM::BX_RET && MBBI != MBB.begin()) {
912 MachineInstr *PrevMI = prior(MBBI);
913 if (PrevMI->getOpcode() == ARM::LDM) {
914 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
915 if (MO.getReg() == ARM::LR) {
916 PrevMI->setDesc(TII->get(ARM::LDM_RET));
926 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
927 const TargetMachine &TM = Fn.getTarget();
928 AFI = Fn.getInfo<ARMFunctionInfo>();
929 TII = TM.getInstrInfo();
930 TRI = TM.getRegisterInfo();
931 RS = new RegScavenger();
933 bool Modified = false;
934 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
936 MachineBasicBlock &MBB = *MFI;
937 Modified |= LoadStoreMultipleOpti(MBB);
938 Modified |= MergeReturnIntoLDM(MBB);
946 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
947 /// load / stores from consecutive locations close to make it more
948 /// likely they will be combined later.
951 struct VISIBILITY_HIDDEN ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
953 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
955 const TargetData *TD;
956 const TargetInstrInfo *TII;
957 const TargetRegisterInfo *TRI;
958 const ARMSubtarget *STI;
959 MachineRegisterInfo *MRI;
961 virtual bool runOnMachineFunction(MachineFunction &Fn);
963 virtual const char *getPassName() const {
964 return "ARM pre- register allocation load / store optimization pass";
968 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
969 unsigned &NewOpc, unsigned &EvenReg,
970 unsigned &OddReg, unsigned &BaseReg,
971 unsigned &OffReg, unsigned &Offset,
972 unsigned &PredReg, ARMCC::CondCodes &Pred);
973 bool RescheduleOps(MachineBasicBlock *MBB,
974 SmallVector<MachineInstr*, 4> &Ops,
975 unsigned Base, bool isLd,
976 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
977 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
979 char ARMPreAllocLoadStoreOpt::ID = 0;
982 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
983 TD = Fn.getTarget().getTargetData();
984 TII = Fn.getTarget().getInstrInfo();
985 TRI = Fn.getTarget().getRegisterInfo();
986 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
987 MRI = &Fn.getRegInfo();
989 bool Modified = false;
990 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
992 Modified |= RescheduleLoadStoreInstrs(MFI);
997 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
998 MachineBasicBlock::iterator I,
999 MachineBasicBlock::iterator E,
1000 SmallPtrSet<MachineInstr*, 4> &MemOps,
1001 SmallSet<unsigned, 4> &MemRegs,
1002 const TargetRegisterInfo *TRI) {
1003 // Are there stores / loads / calls between them?
1004 // FIXME: This is overly conservative. We should make use of alias information
1006 SmallSet<unsigned, 4> AddedRegPressure;
1008 if (MemOps.count(&*I))
1010 const TargetInstrDesc &TID = I->getDesc();
1011 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
1013 if (isLd && TID.mayStore())
1018 // It's not safe to move the first 'str' down.
1021 // str r4, [r0, #+4]
1025 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1026 MachineOperand &MO = I->getOperand(j);
1029 unsigned Reg = MO.getReg();
1030 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1032 if (Reg != Base && !MemRegs.count(Reg))
1033 AddedRegPressure.insert(Reg);
1037 // Estimate register pressure increase due to the transformation.
1038 if (MemRegs.size() <= 4)
1039 // Ok if we are moving small number of instructions.
1041 return AddedRegPressure.size() <= MemRegs.size() * 2;
1045 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1047 unsigned &NewOpc, unsigned &EvenReg,
1048 unsigned &OddReg, unsigned &BaseReg,
1049 unsigned &OffReg, unsigned &Offset,
1051 ARMCC::CondCodes &Pred) {
1052 // FIXME: FLDS / FSTS -> FLDD / FSTD
1053 unsigned Opcode = Op0->getOpcode();
1054 if (Opcode == ARM::LDR)
1056 else if (Opcode == ARM::STR)
1061 // Must sure the base address satisfies i64 ld / st alignment requirement.
1062 if (!Op0->hasOneMemOperand() ||
1063 !Op0->memoperands_begin()->getValue() ||
1064 Op0->memoperands_begin()->isVolatile())
1067 unsigned Align = Op0->memoperands_begin()->getAlignment();
1068 unsigned ReqAlign = STI->hasV6Ops()
1069 ? TD->getPrefTypeAlignment(Type::Int64Ty) : 8; // Pre-v6 need 8-byte align
1070 if (Align < ReqAlign)
1073 // Then make sure the immediate offset fits.
1074 int OffImm = getMemoryOpOffset(Op0);
1075 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1077 AddSub = ARM_AM::sub;
1080 if (OffImm >= 256) // 8 bits
1082 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1084 EvenReg = Op0->getOperand(0).getReg();
1085 OddReg = Op1->getOperand(0).getReg();
1086 if (EvenReg == OddReg)
1088 BaseReg = Op0->getOperand(1).getReg();
1089 OffReg = Op0->getOperand(2).getReg();
1090 Pred = getInstrPredicate(Op0, PredReg);
1091 dl = Op0->getDebugLoc();
1095 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1096 SmallVector<MachineInstr*, 4> &Ops,
1097 unsigned Base, bool isLd,
1098 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1099 bool RetVal = false;
1101 // Sort by offset (in reverse order).
1102 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1104 // The loads / stores of the same base are in order. Scan them from first to
1105 // last and check for the followins:
1106 // 1. Any def of base.
1108 while (Ops.size() > 1) {
1109 unsigned FirstLoc = ~0U;
1110 unsigned LastLoc = 0;
1111 MachineInstr *FirstOp = 0;
1112 MachineInstr *LastOp = 0;
1114 unsigned LastOpcode = 0;
1115 unsigned LastBytes = 0;
1116 unsigned NumMove = 0;
1117 for (int i = Ops.size() - 1; i >= 0; --i) {
1118 MachineInstr *Op = Ops[i];
1119 unsigned Loc = MI2LocMap[Op];
1120 if (Loc <= FirstLoc) {
1124 if (Loc >= LastLoc) {
1129 unsigned Opcode = Op->getOpcode();
1130 if (LastOpcode && Opcode != LastOpcode)
1133 int Offset = getMemoryOpOffset(Op);
1134 unsigned Bytes = getLSMultipleTransferSize(Op);
1136 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1139 LastOffset = Offset;
1141 LastOpcode = Opcode;
1142 if (++NumMove == 8) // FIXME: Tune
1149 SmallPtrSet<MachineInstr*, 4> MemOps;
1150 SmallSet<unsigned, 4> MemRegs;
1151 for (int i = NumMove-1; i >= 0; --i) {
1152 MemOps.insert(Ops[i]);
1153 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1156 // Be conservative, if the instructions are too far apart, don't
1157 // move them. We want to limit the increase of register pressure.
1158 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1160 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1161 MemOps, MemRegs, TRI);
1163 for (unsigned i = 0; i != NumMove; ++i)
1166 // This is the new location for the loads / stores.
1167 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1168 while (InsertPos != MBB->end() && MemOps.count(InsertPos))
1171 // If we are moving a pair of loads / stores, see if it makes sense
1172 // to try to allocate a pair of registers that can form register pairs.
1173 MachineInstr *Op0 = Ops.back();
1174 MachineInstr *Op1 = Ops[Ops.size()-2];
1175 unsigned EvenReg = 0, OddReg = 0;
1176 unsigned BaseReg = 0, OffReg = 0, PredReg = 0;
1177 ARMCC::CondCodes Pred = ARMCC::AL;
1178 unsigned NewOpc = 0;
1179 unsigned Offset = 0;
1181 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1182 EvenReg, OddReg, BaseReg, OffReg,
1183 Offset, PredReg, Pred)) {
1187 // Form the pair instruction.
1189 BuildMI(*MBB, InsertPos, dl, TII->get(NewOpc))
1190 .addReg(EvenReg, RegState::Define)
1191 .addReg(OddReg, RegState::Define)
1192 .addReg(BaseReg).addReg(0).addImm(Offset)
1193 .addImm(Pred).addReg(PredReg);
1196 BuildMI(*MBB, InsertPos, dl, TII->get(NewOpc))
1199 .addReg(BaseReg).addReg(0).addImm(Offset)
1200 .addImm(Pred).addReg(PredReg);
1206 // Add register allocation hints to form register pairs.
1207 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1208 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1210 for (unsigned i = 0; i != NumMove; ++i) {
1211 MachineInstr *Op = Ops.back();
1213 MBB->splice(InsertPos, MBB, Op);
1217 NumLdStMoved += NumMove;
1227 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1228 bool RetVal = false;
1230 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1231 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1232 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1233 SmallVector<unsigned, 4> LdBases;
1234 SmallVector<unsigned, 4> StBases;
1237 MachineBasicBlock::iterator MBBI = MBB->begin();
1238 MachineBasicBlock::iterator E = MBB->end();
1240 for (; MBBI != E; ++MBBI) {
1241 MachineInstr *MI = MBBI;
1242 const TargetInstrDesc &TID = MI->getDesc();
1243 if (TID.isCall() || TID.isTerminator()) {
1244 // Stop at barriers.
1249 MI2LocMap[MI] = Loc++;
1250 if (!isMemoryOp(MI))
1252 unsigned PredReg = 0;
1253 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
1256 int Opcode = MI->getOpcode();
1257 bool isLd = Opcode == ARM::LDR ||
1258 Opcode == ARM::FLDS || Opcode == ARM::FLDD;
1259 unsigned Base = MI->getOperand(1).getReg();
1260 int Offset = getMemoryOpOffset(MI);
1262 bool StopHere = false;
1264 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1265 Base2LdsMap.find(Base);
1266 if (BI != Base2LdsMap.end()) {
1267 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1268 if (Offset == getMemoryOpOffset(BI->second[i])) {
1274 BI->second.push_back(MI);
1276 SmallVector<MachineInstr*, 4> MIs;
1278 Base2LdsMap[Base] = MIs;
1279 LdBases.push_back(Base);
1282 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1283 Base2StsMap.find(Base);
1284 if (BI != Base2StsMap.end()) {
1285 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1286 if (Offset == getMemoryOpOffset(BI->second[i])) {
1292 BI->second.push_back(MI);
1294 SmallVector<MachineInstr*, 4> MIs;
1296 Base2StsMap[Base] = MIs;
1297 StBases.push_back(Base);
1302 // Found a duplicate (a base+offset combination that's seen earlier).
1309 // Re-schedule loads.
1310 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1311 unsigned Base = LdBases[i];
1312 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1314 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1317 // Re-schedule stores.
1318 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1319 unsigned Base = StBases[i];
1320 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1322 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1326 Base2LdsMap.clear();
1327 Base2StsMap.clear();
1337 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1338 /// optimization pass.
1339 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1341 return new ARMPreAllocLoadStoreOpt();
1342 return new ARMLoadStoreOpt();