1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/Target/TargetData.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallSet.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/ADT/Statistic.h"
42 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
43 STATISTIC(NumSTMGened , "Number of stm instructions generated");
44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55 /// load / store instructions to form ldm / stm instructions.
58 struct ARMLoadStoreOpt : public MachineFunctionPass {
60 ARMLoadStoreOpt() : MachineFunctionPass(&ID) {}
62 const TargetInstrInfo *TII;
63 const TargetRegisterInfo *TRI;
68 virtual bool runOnMachineFunction(MachineFunction &Fn);
70 virtual const char *getPassName() const {
71 return "ARM load / store optimization pass";
75 struct MemOpQueueEntry {
78 MachineBasicBlock::iterator MBBI;
80 MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
81 : Offset(o), Position(p), MBBI(i), Merged(false) {}
83 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
84 typedef MemOpQueue::iterator MemOpQueueIter;
86 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
87 int Offset, unsigned Base, bool BaseKill, int Opcode,
88 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
89 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
90 void MergeOpsUpdate(MachineBasicBlock &MBB,
99 ARMCC::CondCodes Pred,
103 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
104 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
105 int Opcode, unsigned Size,
106 ARMCC::CondCodes Pred, unsigned PredReg,
107 unsigned Scratch, MemOpQueue &MemOps,
108 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
110 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
111 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
112 MachineBasicBlock::iterator &MBBI);
113 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
114 MachineBasicBlock::iterator MBBI,
115 const TargetInstrInfo *TII,
117 MachineBasicBlock::iterator &I);
118 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
119 MachineBasicBlock::iterator MBBI,
121 MachineBasicBlock::iterator &I);
122 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
123 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
125 char ARMLoadStoreOpt::ID = 0;
128 static int getLoadStoreMultipleOpcode(int Opcode) {
156 default: llvm_unreachable("Unhandled opcode!");
161 static bool isT2i32Load(unsigned Opc) {
162 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
165 static bool isi32Load(unsigned Opc) {
166 return Opc == ARM::LDR || isT2i32Load(Opc);
169 static bool isT2i32Store(unsigned Opc) {
170 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
173 static bool isi32Store(unsigned Opc) {
174 return Opc == ARM::STR || isT2i32Store(Opc);
177 /// MergeOps - Create and insert a LDM or STM with Base as base register and
178 /// registers in Regs as the register operands that would be loaded / stored.
179 /// It returns true if the transformation is done.
181 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
182 MachineBasicBlock::iterator MBBI,
183 int Offset, unsigned Base, bool BaseKill,
184 int Opcode, ARMCC::CondCodes Pred,
185 unsigned PredReg, unsigned Scratch, DebugLoc dl,
186 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
187 // Only a single register to load / store. Don't bother.
188 unsigned NumRegs = Regs.size();
192 ARM_AM::AMSubMode Mode = ARM_AM::ia;
193 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
194 if (isAM4 && Offset == 4) {
196 // Thumb2 does not support ldmib / stmib.
199 } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) {
201 // Thumb2 does not support ldmda / stmda.
204 } else if (isAM4 && Offset == -4 * (int)NumRegs) {
206 } else if (Offset != 0) {
207 // If starting offset isn't zero, insert a MI to materialize a new base.
208 // But only do so if it is cost effective, i.e. merging more than two
214 if (isi32Load(Opcode))
215 // If it is a load, then just use one of the destination register to
216 // use as the new base.
217 NewBase = Regs[NumRegs-1].first;
219 // Use the scratch register to use as a new base.
224 int BaseOpc = !isThumb2
226 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
230 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
233 int ImmedOffset = isThumb2
234 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
235 if (ImmedOffset == -1)
236 // FIXME: Try t2ADDri12 or t2SUBri12?
237 return false; // Probably not worth it then.
239 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
240 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
241 .addImm(Pred).addReg(PredReg).addReg(0);
243 BaseKill = true; // New base is always killed right its use.
246 bool isDPR = Opcode == ARM::VLDRD || Opcode == ARM::VSTRD;
247 bool isDef = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
248 Opcode = getLoadStoreMultipleOpcode(Opcode);
249 MachineInstrBuilder MIB = (isAM4)
250 ? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
251 .addReg(Base, getKillRegState(BaseKill))
252 .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
253 : BuildMI(MBB, MBBI, dl, TII->get(Opcode))
254 .addReg(Base, getKillRegState(BaseKill))
255 .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs))
256 .addImm(Pred).addReg(PredReg);
257 MIB.addReg(0); // Add optional writeback (0 for now).
258 for (unsigned i = 0; i != NumRegs; ++i)
259 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
260 | getKillRegState(Regs[i].second));
265 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
267 void ARMLoadStoreOpt::
268 MergeOpsUpdate(MachineBasicBlock &MBB,
270 unsigned memOpsBegin,
272 unsigned insertAfter,
277 ARMCC::CondCodes Pred,
281 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
282 // First calculate which of the registers should be killed by the merged
284 SmallVector<std::pair<unsigned, bool>, 8> Regs;
285 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
286 const MachineOperand &MO = memOps[i].MBBI->getOperand(0);
287 Regs.push_back(std::make_pair(MO.getReg(), MO.isKill()));
290 // Try to do the merge.
291 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
293 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
294 Pred, PredReg, Scratch, dl, Regs))
297 // Merge succeeded, update records.
298 Merges.push_back(prior(Loc));
299 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
300 MBB.erase(memOps[i].MBBI);
301 memOps[i].Merged = true;
305 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
306 /// load / store multiple instructions.
308 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
309 unsigned Base, int Opcode, unsigned Size,
310 ARMCC::CondCodes Pred, unsigned PredReg,
311 unsigned Scratch, MemOpQueue &MemOps,
312 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
313 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
314 int Offset = MemOps[SIndex].Offset;
315 int SOffset = Offset;
316 unsigned insertAfter = SIndex;
317 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
318 DebugLoc dl = Loc->getDebugLoc();
319 unsigned PReg = Loc->getOperand(0).getReg();
320 unsigned PRegNum = ARMRegisterInfo::getRegisterNumbering(PReg);
322 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
323 int NewOffset = MemOps[i].Offset;
324 unsigned Reg = MemOps[i].MBBI->getOperand(0).getReg();
325 unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg);
326 // AM4 - register numbers in ascending order.
327 // AM5 - consecutive register numbers in ascending order.
328 if (NewOffset == Offset + (int)Size &&
329 ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) {
333 // Can't merge this in. Try merge the earlier ones first.
334 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
335 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
336 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
341 if (MemOps[i].Position > MemOps[insertAfter].Position)
345 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
346 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
347 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
351 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
352 unsigned Bytes, unsigned Limit,
353 ARMCC::CondCodes Pred, unsigned PredReg){
354 unsigned MyPredReg = 0;
357 if (MI->getOpcode() != ARM::t2SUBri &&
358 MI->getOpcode() != ARM::t2SUBrSPi &&
359 MI->getOpcode() != ARM::t2SUBrSPi12 &&
360 MI->getOpcode() != ARM::tSUBspi &&
361 MI->getOpcode() != ARM::SUBri)
364 // Make sure the offset fits in 8 bits.
365 if (Bytes <= 0 || (Limit && Bytes >= Limit))
368 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
369 return (MI->getOperand(0).getReg() == Base &&
370 MI->getOperand(1).getReg() == Base &&
371 (MI->getOperand(2).getImm()*Scale) == Bytes &&
372 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
373 MyPredReg == PredReg);
376 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
377 unsigned Bytes, unsigned Limit,
378 ARMCC::CondCodes Pred, unsigned PredReg){
379 unsigned MyPredReg = 0;
382 if (MI->getOpcode() != ARM::t2ADDri &&
383 MI->getOpcode() != ARM::t2ADDrSPi &&
384 MI->getOpcode() != ARM::t2ADDrSPi12 &&
385 MI->getOpcode() != ARM::tADDspi &&
386 MI->getOpcode() != ARM::ADDri)
389 if (Bytes <= 0 || (Limit && Bytes >= Limit))
390 // Make sure the offset fits in 8 bits.
393 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
394 return (MI->getOperand(0).getReg() == Base &&
395 MI->getOperand(1).getReg() == Base &&
396 (MI->getOperand(2).getImm()*Scale) == Bytes &&
397 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
398 MyPredReg == PredReg);
401 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
402 switch (MI->getOpcode()) {
420 return (MI->getNumOperands() - 5) * 4;
425 return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
429 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
430 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
432 /// stmia rn, <ra, rb, rc>
433 /// rn := rn + 4 * 3;
435 /// stmia rn!, <ra, rb, rc>
437 /// rn := rn - 4 * 3;
438 /// ldmia rn, <ra, rb, rc>
440 /// ldmdb rn!, <ra, rb, rc>
441 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
442 MachineBasicBlock::iterator MBBI,
444 MachineBasicBlock::iterator &I) {
445 MachineInstr *MI = MBBI;
446 unsigned Base = MI->getOperand(0).getReg();
447 unsigned Bytes = getLSMultipleTransferSize(MI);
448 unsigned PredReg = 0;
449 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
450 int Opcode = MI->getOpcode();
451 bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::t2LDM ||
452 Opcode == ARM::STM || Opcode == ARM::t2STM;
455 if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm()))
458 // Can't use the updating AM4 sub-mode if the base register is also a dest
459 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
460 for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
461 if (MI->getOperand(i).getReg() == Base)
465 ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
466 if (MBBI != MBB.begin()) {
467 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
468 if (Mode == ARM_AM::ia &&
469 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
470 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true));
471 MI->getOperand(4).setReg(Base);
472 MI->getOperand(4).setIsDef();
475 } else if (Mode == ARM_AM::ib &&
476 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
477 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true));
478 MI->getOperand(4).setReg(Base); // WB to base
479 MI->getOperand(4).setIsDef();
485 if (MBBI != MBB.end()) {
486 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
487 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
488 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
489 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
490 MI->getOperand(4).setReg(Base); // WB to base
491 MI->getOperand(4).setIsDef();
498 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
499 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
500 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
501 MI->getOperand(4).setReg(Base); // WB to base
502 MI->getOperand(4).setIsDef();
512 // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops.
513 if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm()))
516 ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
517 unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
518 if (MBBI != MBB.begin()) {
519 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
520 if (Mode == ARM_AM::ia &&
521 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
522 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset));
523 MI->getOperand(4).setReg(Base); // WB to base
524 MI->getOperand(4).setIsDef();
530 if (MBBI != MBB.end()) {
531 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
532 if (Mode == ARM_AM::ia &&
533 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
534 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset));
535 MI->getOperand(4).setReg(Base); // WB to base
536 MI->getOperand(4).setIsDef();
550 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
552 case ARM::LDR: return ARM::LDR_PRE;
553 case ARM::STR: return ARM::STR_PRE;
554 case ARM::VLDRS: return ARM::VLDMS;
555 case ARM::VLDRD: return ARM::VLDMD;
556 case ARM::VSTRS: return ARM::VSTMS;
557 case ARM::VSTRD: return ARM::VSTMD;
560 return ARM::t2LDR_PRE;
563 return ARM::t2STR_PRE;
564 default: llvm_unreachable("Unhandled opcode!");
569 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
571 case ARM::LDR: return ARM::LDR_POST;
572 case ARM::STR: return ARM::STR_POST;
573 case ARM::VLDRS: return ARM::VLDMS;
574 case ARM::VLDRD: return ARM::VLDMD;
575 case ARM::VSTRS: return ARM::VSTMS;
576 case ARM::VSTRD: return ARM::VSTMD;
579 return ARM::t2LDR_POST;
582 return ARM::t2STR_POST;
583 default: llvm_unreachable("Unhandled opcode!");
588 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
589 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
590 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
591 MachineBasicBlock::iterator MBBI,
592 const TargetInstrInfo *TII,
594 MachineBasicBlock::iterator &I) {
595 MachineInstr *MI = MBBI;
596 unsigned Base = MI->getOperand(1).getReg();
597 bool BaseKill = MI->getOperand(1).isKill();
598 unsigned Bytes = getLSMultipleTransferSize(MI);
599 int Opcode = MI->getOpcode();
600 DebugLoc dl = MI->getDebugLoc();
601 bool isAM5 = Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
602 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS;
603 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
604 if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
606 else if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
608 else if (isT2i32Load(Opcode) || isT2i32Store(Opcode))
609 if (MI->getOperand(2).getImm() != 0)
612 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
613 // Can't do the merge if the destination register is the same as the would-be
614 // writeback register.
615 if (isLd && MI->getOperand(0).getReg() == Base)
618 unsigned PredReg = 0;
619 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
620 bool DoMerge = false;
621 ARM_AM::AddrOpc AddSub = ARM_AM::add;
623 // AM2 - 12 bits, thumb2 - 8 bits.
624 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
625 if (MBBI != MBB.begin()) {
626 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
627 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
629 AddSub = ARM_AM::sub;
630 NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
632 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
634 NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
640 if (!DoMerge && MBBI != MBB.end()) {
641 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
643 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
645 AddSub = ARM_AM::sub;
646 NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
647 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
649 NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
663 bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD;
666 Offset = ARM_AM::getAM5Opc((AddSub == ARM_AM::sub)
668 : ARM_AM::ia, true, (isDPR ? 2 : 1));
670 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
672 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
676 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
677 .addReg(Base, getKillRegState(BaseKill))
678 .addImm(Offset).addImm(Pred).addReg(PredReg)
679 .addReg(Base, getDefRegState(true)) // WB base register
680 .addReg(MI->getOperand(0).getReg(), RegState::Define);
682 // LDR_PRE, LDR_POST,
683 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
684 .addReg(Base, RegState::Define)
685 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
687 // t2LDR_PRE, t2LDR_POST
688 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
689 .addReg(Base, RegState::Define)
690 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
692 MachineOperand &MO = MI->getOperand(0);
695 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)).addReg(Base).addImm(Offset)
696 .addImm(Pred).addReg(PredReg)
697 .addReg(Base, getDefRegState(true)) // WB base register
698 .addReg(MO.getReg(), getKillRegState(MO.isKill()));
701 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
702 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
703 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
705 // t2STR_PRE, t2STR_POST
706 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
707 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
708 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
715 /// isMemoryOp - Returns true if instruction is a memory operations (that this
716 /// pass is capable of operating on).
717 static bool isMemoryOp(const MachineInstr *MI) {
718 int Opcode = MI->getOpcode();
723 return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
726 return MI->getOperand(1).isReg();
729 return MI->getOperand(1).isReg();
734 return MI->getOperand(1).isReg();
739 /// AdvanceRS - Advance register scavenger to just before the earliest memory
740 /// op that is being merged.
741 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
742 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
743 unsigned Position = MemOps[0].Position;
744 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
745 if (MemOps[i].Position < Position) {
746 Position = MemOps[i].Position;
747 Loc = MemOps[i].MBBI;
751 if (Loc != MBB.begin())
752 RS->forward(prior(Loc));
755 static int getMemoryOpOffset(const MachineInstr *MI) {
756 int Opcode = MI->getOpcode();
757 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
758 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
759 unsigned NumOperands = MI->getDesc().getNumOperands();
760 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
762 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
763 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
764 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8)
768 ? ARM_AM::getAM2Offset(OffField)
769 : (isAM3 ? ARM_AM::getAM3Offset(OffField)
770 : ARM_AM::getAM5Offset(OffField) * 4);
772 if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
775 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
778 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
784 static void InsertLDR_STR(MachineBasicBlock &MBB,
785 MachineBasicBlock::iterator &MBBI,
786 int OffImm, bool isDef,
787 DebugLoc dl, unsigned NewOpc,
788 unsigned Reg, bool RegDeadKill, bool RegUndef,
789 unsigned BaseReg, bool BaseKill, bool BaseUndef,
790 unsigned OffReg, bool OffKill, bool OffUndef,
791 ARMCC::CondCodes Pred, unsigned PredReg,
792 const TargetInstrInfo *TII, bool isT2) {
796 Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
798 Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
801 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
803 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
804 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
806 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
807 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
809 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
811 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
812 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
814 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
815 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
819 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
820 MachineBasicBlock::iterator &MBBI) {
821 MachineInstr *MI = &*MBBI;
822 unsigned Opcode = MI->getOpcode();
823 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
824 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
825 unsigned EvenReg = MI->getOperand(0).getReg();
826 unsigned OddReg = MI->getOperand(1).getReg();
827 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
828 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
829 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
832 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
833 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
834 bool EvenDeadKill = isLd ?
835 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
836 bool EvenUndef = MI->getOperand(0).isUndef();
837 bool OddDeadKill = isLd ?
838 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
839 bool OddUndef = MI->getOperand(1).isUndef();
840 const MachineOperand &BaseOp = MI->getOperand(2);
841 unsigned BaseReg = BaseOp.getReg();
842 bool BaseKill = BaseOp.isKill();
843 bool BaseUndef = BaseOp.isUndef();
844 unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg();
845 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
846 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
847 int OffImm = getMemoryOpOffset(MI);
848 unsigned PredReg = 0;
849 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
851 if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) {
852 // Ascending register numbers and no offset. It's safe to change it to a
854 unsigned NewOpc = (isLd)
855 ? (isT2 ? ARM::t2LDM : ARM::LDM)
856 : (isT2 ? ARM::t2STM : ARM::STM);
858 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
859 .addReg(BaseReg, getKillRegState(BaseKill))
860 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
861 .addImm(Pred).addReg(PredReg)
863 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
864 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
867 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
868 .addReg(BaseReg, getKillRegState(BaseKill))
869 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
870 .addImm(Pred).addReg(PredReg)
873 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
875 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
879 // Split into two instructions.
880 assert((!isT2 || !OffReg) &&
881 "Thumb2 ldrd / strd does not encode offset register!");
882 unsigned NewOpc = (isLd)
883 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR)
884 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR);
885 DebugLoc dl = MBBI->getDebugLoc();
886 // If this is a load and base register is killed, it may have been
887 // re-defed by the load, make sure the first load does not clobber it.
889 (BaseKill || OffKill) &&
890 (TRI->regsOverlap(EvenReg, BaseReg) ||
891 (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) {
892 assert(!TRI->regsOverlap(OddReg, BaseReg) &&
893 (!OffReg || !TRI->regsOverlap(OddReg, OffReg)));
894 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
895 OddReg, OddDeadKill, false,
896 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
897 Pred, PredReg, TII, isT2);
898 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
899 EvenReg, EvenDeadKill, false,
900 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
901 Pred, PredReg, TII, isT2);
903 if (OddReg == EvenReg && EvenDeadKill) {
904 // If the two source operands are the same, the kill marker is probably
905 // on the first one. e.g.
906 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
907 EvenDeadKill = false;
910 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
911 EvenReg, EvenDeadKill, EvenUndef,
912 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
913 Pred, PredReg, TII, isT2);
914 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
915 OddReg, OddDeadKill, OddUndef,
916 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
917 Pred, PredReg, TII, isT2);
931 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
932 /// ops of the same base and incrementing offset into LDM / STM ops.
933 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
934 unsigned NumMerges = 0;
935 unsigned NumMemOps = 0;
937 unsigned CurrBase = 0;
939 unsigned CurrSize = 0;
940 ARMCC::CondCodes CurrPred = ARMCC::AL;
941 unsigned CurrPredReg = 0;
942 unsigned Position = 0;
943 SmallVector<MachineBasicBlock::iterator,4> Merges;
945 RS->enterBasicBlock(&MBB);
946 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
948 if (FixInvalidRegPairOp(MBB, MBBI))
951 bool Advance = false;
952 bool TryMerge = false;
953 bool Clobber = false;
955 bool isMemOp = isMemoryOp(MBBI);
957 int Opcode = MBBI->getOpcode();
958 unsigned Size = getLSMultipleTransferSize(MBBI);
959 unsigned Base = MBBI->getOperand(1).getReg();
960 unsigned PredReg = 0;
961 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
962 int Offset = getMemoryOpOffset(MBBI);
965 // r5 := ldr [r5, #4]
966 // r6 := ldr [r5, #8]
968 // The second ldr has effectively broken the chain even though it
969 // looks like the later ldr(s) use the same base register. Try to
970 // merge the ldr's so far, including this one. But don't try to
971 // combine the following ldr(s).
972 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
973 if (CurrBase == 0 && !Clobber) {
974 // Start of a new chain.
979 CurrPredReg = PredReg;
980 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
989 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
990 // No need to match PredReg.
991 // Continue adding to the queue.
992 if (Offset > MemOps.back().Offset) {
993 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
997 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
999 if (Offset < I->Offset) {
1000 MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI));
1004 } else if (Offset == I->Offset) {
1005 // Collision! This can't be merged!
1018 // Reach the end of the block, try merging the memory instructions.
1024 if (NumMemOps > 1) {
1025 // Try to find a free register to use as a new base in case it's needed.
1026 // First advance to the instruction just before the start of the chain.
1027 AdvanceRS(MBB, MemOps);
1028 // Find a scratch register.
1029 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1030 // Process the load / store instructions.
1031 RS->forward(prior(MBBI));
1035 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1036 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1038 // Try folding preceeding/trailing base inc/dec into the generated
1040 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1041 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1043 NumMerges += Merges.size();
1045 // Try folding preceeding/trailing base inc/dec into those load/store
1046 // that were not merged to form LDM/STM ops.
1047 for (unsigned i = 0; i != NumMemOps; ++i)
1048 if (!MemOps[i].Merged)
1049 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1052 // RS may be pointing to an instruction that's deleted.
1053 RS->skipTo(prior(MBBI));
1054 } else if (NumMemOps == 1) {
1055 // Try folding preceeding/trailing base inc/dec into the single
1057 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1059 RS->forward(prior(MBBI));
1066 CurrPred = ARMCC::AL;
1073 // If iterator hasn't been advanced and this is not a memory op, skip it.
1074 // It can't start a new chain anyway.
1075 if (!Advance && !isMemOp && MBBI != E) {
1081 return NumMerges > 0;
1085 struct OffsetCompare {
1086 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1087 int LOffset = getMemoryOpOffset(LHS);
1088 int ROffset = getMemoryOpOffset(RHS);
1089 assert(LHS == RHS || LOffset != ROffset);
1090 return LOffset > ROffset;
1095 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
1096 /// (bx lr) into the preceeding stack restore so it directly restore the value
1098 /// ldmfd sp!, {r7, lr}
1101 /// ldmfd sp!, {r7, pc}
1102 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1103 if (MBB.empty()) return false;
1105 MachineBasicBlock::iterator MBBI = prior(MBB.end());
1106 if (MBBI != MBB.begin() &&
1107 (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::tBX_RET)) {
1108 MachineInstr *PrevMI = prior(MBBI);
1109 if (PrevMI->getOpcode() == ARM::LDM || PrevMI->getOpcode() == ARM::t2LDM) {
1110 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1111 if (MO.getReg() != ARM::LR)
1113 unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET;
1114 PrevMI->setDesc(TII->get(NewOpc));
1123 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1124 const TargetMachine &TM = Fn.getTarget();
1125 AFI = Fn.getInfo<ARMFunctionInfo>();
1126 TII = TM.getInstrInfo();
1127 TRI = TM.getRegisterInfo();
1128 RS = new RegScavenger();
1129 isThumb2 = AFI->isThumb2Function();
1131 bool Modified = false;
1132 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1134 MachineBasicBlock &MBB = *MFI;
1135 Modified |= LoadStoreMultipleOpti(MBB);
1136 Modified |= MergeReturnIntoLDM(MBB);
1144 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1145 /// load / stores from consecutive locations close to make it more
1146 /// likely they will be combined later.
1149 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1151 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
1153 const TargetData *TD;
1154 const TargetInstrInfo *TII;
1155 const TargetRegisterInfo *TRI;
1156 const ARMSubtarget *STI;
1157 MachineRegisterInfo *MRI;
1158 MachineFunction *MF;
1160 virtual bool runOnMachineFunction(MachineFunction &Fn);
1162 virtual const char *getPassName() const {
1163 return "ARM pre- register allocation load / store optimization pass";
1167 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1168 unsigned &NewOpc, unsigned &EvenReg,
1169 unsigned &OddReg, unsigned &BaseReg,
1170 unsigned &OffReg, int &Offset,
1171 unsigned &PredReg, ARMCC::CondCodes &Pred,
1173 bool RescheduleOps(MachineBasicBlock *MBB,
1174 SmallVector<MachineInstr*, 4> &Ops,
1175 unsigned Base, bool isLd,
1176 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1177 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1179 char ARMPreAllocLoadStoreOpt::ID = 0;
1182 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1183 TD = Fn.getTarget().getTargetData();
1184 TII = Fn.getTarget().getInstrInfo();
1185 TRI = Fn.getTarget().getRegisterInfo();
1186 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1187 MRI = &Fn.getRegInfo();
1190 bool Modified = false;
1191 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1193 Modified |= RescheduleLoadStoreInstrs(MFI);
1198 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1199 MachineBasicBlock::iterator I,
1200 MachineBasicBlock::iterator E,
1201 SmallPtrSet<MachineInstr*, 4> &MemOps,
1202 SmallSet<unsigned, 4> &MemRegs,
1203 const TargetRegisterInfo *TRI) {
1204 // Are there stores / loads / calls between them?
1205 // FIXME: This is overly conservative. We should make use of alias information
1207 SmallSet<unsigned, 4> AddedRegPressure;
1209 if (MemOps.count(&*I))
1211 const TargetInstrDesc &TID = I->getDesc();
1212 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
1214 if (isLd && TID.mayStore())
1219 // It's not safe to move the first 'str' down.
1222 // str r4, [r0, #+4]
1226 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1227 MachineOperand &MO = I->getOperand(j);
1230 unsigned Reg = MO.getReg();
1231 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1233 if (Reg != Base && !MemRegs.count(Reg))
1234 AddedRegPressure.insert(Reg);
1238 // Estimate register pressure increase due to the transformation.
1239 if (MemRegs.size() <= 4)
1240 // Ok if we are moving small number of instructions.
1242 return AddedRegPressure.size() <= MemRegs.size() * 2;
1246 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1248 unsigned &NewOpc, unsigned &EvenReg,
1249 unsigned &OddReg, unsigned &BaseReg,
1250 unsigned &OffReg, int &Offset,
1252 ARMCC::CondCodes &Pred,
1254 // Make sure we're allowed to generate LDRD/STRD.
1255 if (!STI->hasV5TEOps())
1258 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1260 unsigned Opcode = Op0->getOpcode();
1261 if (Opcode == ARM::LDR)
1263 else if (Opcode == ARM::STR)
1265 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1266 NewOpc = ARM::t2LDRDi8;
1269 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1270 NewOpc = ARM::t2STRDi8;
1276 // Make sure the offset registers match.
1278 (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg()))
1281 // Must sure the base address satisfies i64 ld / st alignment requirement.
1282 if (!Op0->hasOneMemOperand() ||
1283 !(*Op0->memoperands_begin())->getValue() ||
1284 (*Op0->memoperands_begin())->isVolatile())
1287 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1288 Function *Func = MF->getFunction();
1289 unsigned ReqAlign = STI->hasV6Ops()
1290 ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext()))
1291 : 8; // Pre-v6 need 8-byte align
1292 if (Align < ReqAlign)
1295 // Then make sure the immediate offset fits.
1296 int OffImm = getMemoryOpOffset(Op0);
1300 // Can't fall back to t2LDRi8 / t2STRi8.
1303 int Limit = (1 << 8) * Scale;
1304 if (OffImm >= Limit || (OffImm & (Scale-1)))
1309 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1311 AddSub = ARM_AM::sub;
1314 int Limit = (1 << 8) * Scale;
1315 if (OffImm >= Limit || (OffImm & (Scale-1)))
1317 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1319 EvenReg = Op0->getOperand(0).getReg();
1320 OddReg = Op1->getOperand(0).getReg();
1321 if (EvenReg == OddReg)
1323 BaseReg = Op0->getOperand(1).getReg();
1325 OffReg = Op0->getOperand(2).getReg();
1326 Pred = llvm::getInstrPredicate(Op0, PredReg);
1327 dl = Op0->getDebugLoc();
1331 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1332 SmallVector<MachineInstr*, 4> &Ops,
1333 unsigned Base, bool isLd,
1334 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1335 bool RetVal = false;
1337 // Sort by offset (in reverse order).
1338 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1340 // The loads / stores of the same base are in order. Scan them from first to
1341 // last and check for the followins:
1342 // 1. Any def of base.
1344 while (Ops.size() > 1) {
1345 unsigned FirstLoc = ~0U;
1346 unsigned LastLoc = 0;
1347 MachineInstr *FirstOp = 0;
1348 MachineInstr *LastOp = 0;
1350 unsigned LastOpcode = 0;
1351 unsigned LastBytes = 0;
1352 unsigned NumMove = 0;
1353 for (int i = Ops.size() - 1; i >= 0; --i) {
1354 MachineInstr *Op = Ops[i];
1355 unsigned Loc = MI2LocMap[Op];
1356 if (Loc <= FirstLoc) {
1360 if (Loc >= LastLoc) {
1365 unsigned Opcode = Op->getOpcode();
1366 if (LastOpcode && Opcode != LastOpcode)
1369 int Offset = getMemoryOpOffset(Op);
1370 unsigned Bytes = getLSMultipleTransferSize(Op);
1372 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1375 LastOffset = Offset;
1377 LastOpcode = Opcode;
1378 if (++NumMove == 8) // FIXME: Tune this limit.
1385 SmallPtrSet<MachineInstr*, 4> MemOps;
1386 SmallSet<unsigned, 4> MemRegs;
1387 for (int i = NumMove-1; i >= 0; --i) {
1388 MemOps.insert(Ops[i]);
1389 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1392 // Be conservative, if the instructions are too far apart, don't
1393 // move them. We want to limit the increase of register pressure.
1394 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1396 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1397 MemOps, MemRegs, TRI);
1399 for (unsigned i = 0; i != NumMove; ++i)
1402 // This is the new location for the loads / stores.
1403 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1404 while (InsertPos != MBB->end() && MemOps.count(InsertPos))
1407 // If we are moving a pair of loads / stores, see if it makes sense
1408 // to try to allocate a pair of registers that can form register pairs.
1409 MachineInstr *Op0 = Ops.back();
1410 MachineInstr *Op1 = Ops[Ops.size()-2];
1411 unsigned EvenReg = 0, OddReg = 0;
1412 unsigned BaseReg = 0, OffReg = 0, PredReg = 0;
1413 ARMCC::CondCodes Pred = ARMCC::AL;
1415 unsigned NewOpc = 0;
1418 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1419 EvenReg, OddReg, BaseReg, OffReg,
1420 Offset, PredReg, Pred, isT2)) {
1424 // Form the pair instruction.
1426 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1427 dl, TII->get(NewOpc))
1428 .addReg(EvenReg, RegState::Define)
1429 .addReg(OddReg, RegState::Define)
1433 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1436 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1437 dl, TII->get(NewOpc))
1443 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1449 // Add register allocation hints to form register pairs.
1450 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1451 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1453 for (unsigned i = 0; i != NumMove; ++i) {
1454 MachineInstr *Op = Ops.back();
1456 MBB->splice(InsertPos, MBB, Op);
1460 NumLdStMoved += NumMove;
1470 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1471 bool RetVal = false;
1473 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1474 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1475 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1476 SmallVector<unsigned, 4> LdBases;
1477 SmallVector<unsigned, 4> StBases;
1480 MachineBasicBlock::iterator MBBI = MBB->begin();
1481 MachineBasicBlock::iterator E = MBB->end();
1483 for (; MBBI != E; ++MBBI) {
1484 MachineInstr *MI = MBBI;
1485 const TargetInstrDesc &TID = MI->getDesc();
1486 if (TID.isCall() || TID.isTerminator()) {
1487 // Stop at barriers.
1492 MI2LocMap[MI] = Loc++;
1493 if (!isMemoryOp(MI))
1495 unsigned PredReg = 0;
1496 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1499 int Opc = MI->getOpcode();
1500 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1501 unsigned Base = MI->getOperand(1).getReg();
1502 int Offset = getMemoryOpOffset(MI);
1504 bool StopHere = false;
1506 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1507 Base2LdsMap.find(Base);
1508 if (BI != Base2LdsMap.end()) {
1509 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1510 if (Offset == getMemoryOpOffset(BI->second[i])) {
1516 BI->second.push_back(MI);
1518 SmallVector<MachineInstr*, 4> MIs;
1520 Base2LdsMap[Base] = MIs;
1521 LdBases.push_back(Base);
1524 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1525 Base2StsMap.find(Base);
1526 if (BI != Base2StsMap.end()) {
1527 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1528 if (Offset == getMemoryOpOffset(BI->second[i])) {
1534 BI->second.push_back(MI);
1536 SmallVector<MachineInstr*, 4> MIs;
1538 Base2StsMap[Base] = MIs;
1539 StBases.push_back(Base);
1544 // Found a duplicate (a base+offset combination that's seen earlier).
1551 // Re-schedule loads.
1552 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1553 unsigned Base = LdBases[i];
1554 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1556 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1559 // Re-schedule stores.
1560 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1561 unsigned Base = StBases[i];
1562 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1564 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1568 Base2LdsMap.clear();
1569 Base2StsMap.clear();
1579 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1580 /// optimization pass.
1581 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1583 return new ARMPreAllocLoadStoreOpt();
1584 return new ARMLoadStoreOpt();