1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/Target/TargetData.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallSet.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/ADT/Statistic.h"
42 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
43 STATISTIC(NumSTMGened , "Number of stm instructions generated");
44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55 /// load / store instructions to form ldm / stm instructions.
58 struct ARMLoadStoreOpt : public MachineFunctionPass {
60 ARMLoadStoreOpt() : MachineFunctionPass(&ID) {}
62 const TargetInstrInfo *TII;
63 const TargetRegisterInfo *TRI;
68 virtual bool runOnMachineFunction(MachineFunction &Fn);
70 virtual const char *getPassName() const {
71 return "ARM load / store optimization pass";
75 struct MemOpQueueEntry {
80 MachineBasicBlock::iterator MBBI;
82 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
83 MachineBasicBlock::iterator i)
84 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
86 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
87 typedef MemOpQueue::iterator MemOpQueueIter;
89 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
90 int Offset, unsigned Base, bool BaseKill, int Opcode,
91 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
92 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
93 void MergeOpsUpdate(MachineBasicBlock &MBB,
102 ARMCC::CondCodes Pred,
106 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
107 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
108 int Opcode, unsigned Size,
109 ARMCC::CondCodes Pred, unsigned PredReg,
110 unsigned Scratch, MemOpQueue &MemOps,
111 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
113 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
114 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
115 MachineBasicBlock::iterator &MBBI);
116 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
117 MachineBasicBlock::iterator MBBI,
118 const TargetInstrInfo *TII,
120 MachineBasicBlock::iterator &I);
121 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
122 MachineBasicBlock::iterator MBBI,
124 MachineBasicBlock::iterator &I);
125 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
126 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
128 char ARMLoadStoreOpt::ID = 0;
131 static int getLoadStoreMultipleOpcode(int Opcode) {
159 default: llvm_unreachable("Unhandled opcode!");
164 static bool isT2i32Load(unsigned Opc) {
165 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
168 static bool isi32Load(unsigned Opc) {
169 return Opc == ARM::LDR || isT2i32Load(Opc);
172 static bool isT2i32Store(unsigned Opc) {
173 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
176 static bool isi32Store(unsigned Opc) {
177 return Opc == ARM::STR || isT2i32Store(Opc);
180 /// MergeOps - Create and insert a LDM or STM with Base as base register and
181 /// registers in Regs as the register operands that would be loaded / stored.
182 /// It returns true if the transformation is done.
184 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
185 MachineBasicBlock::iterator MBBI,
186 int Offset, unsigned Base, bool BaseKill,
187 int Opcode, ARMCC::CondCodes Pred,
188 unsigned PredReg, unsigned Scratch, DebugLoc dl,
189 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
190 // Only a single register to load / store. Don't bother.
191 unsigned NumRegs = Regs.size();
195 ARM_AM::AMSubMode Mode = ARM_AM::ia;
196 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
197 if (isAM4 && Offset == 4) {
199 // Thumb2 does not support ldmib / stmib.
202 } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) {
204 // Thumb2 does not support ldmda / stmda.
207 } else if (isAM4 && Offset == -4 * (int)NumRegs) {
209 } else if (Offset != 0) {
210 // If starting offset isn't zero, insert a MI to materialize a new base.
211 // But only do so if it is cost effective, i.e. merging more than two
217 if (isi32Load(Opcode))
218 // If it is a load, then just use one of the destination register to
219 // use as the new base.
220 NewBase = Regs[NumRegs-1].first;
222 // Use the scratch register to use as a new base.
227 int BaseOpc = !isThumb2
229 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
233 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
236 int ImmedOffset = isThumb2
237 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
238 if (ImmedOffset == -1)
239 // FIXME: Try t2ADDri12 or t2SUBri12?
240 return false; // Probably not worth it then.
242 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
243 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
244 .addImm(Pred).addReg(PredReg).addReg(0);
246 BaseKill = true; // New base is always killed right its use.
249 bool isDPR = (Opcode == ARM::VLDRD || Opcode == ARM::VSTRD);
250 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
251 Opcode == ARM::VLDRD);
252 Opcode = getLoadStoreMultipleOpcode(Opcode);
253 MachineInstrBuilder MIB = (isAM4)
254 ? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
255 .addReg(Base, getKillRegState(BaseKill))
256 .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
257 : BuildMI(MBB, MBBI, dl, TII->get(Opcode))
258 .addReg(Base, getKillRegState(BaseKill))
259 .addImm(ARM_AM::getAM5Opc(Mode, isDPR ? NumRegs<<1 : NumRegs))
260 .addImm(Pred).addReg(PredReg);
261 for (unsigned i = 0; i != NumRegs; ++i)
262 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
263 | getKillRegState(Regs[i].second));
268 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
270 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
272 unsigned memOpsBegin, unsigned memOpsEnd,
273 unsigned insertAfter, int Offset,
274 unsigned Base, bool BaseKill,
276 ARMCC::CondCodes Pred, unsigned PredReg,
279 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
280 // First calculate which of the registers should be killed by the merged
282 const unsigned insertPos = memOps[insertAfter].Position;
284 SmallSet<unsigned, 4> UnavailRegs;
285 SmallSet<unsigned, 4> KilledRegs;
286 DenseMap<unsigned, unsigned> Killer;
287 for (unsigned i = 0; i < memOpsBegin; ++i) {
288 if (memOps[i].Position < insertPos && memOps[i].isKill) {
289 unsigned Reg = memOps[i].Reg;
290 if (memOps[i].Merged)
291 UnavailRegs.insert(Reg);
293 KilledRegs.insert(Reg);
298 for (unsigned i = memOpsEnd, e = memOps.size(); i != e; ++i) {
299 if (memOps[i].Position < insertPos && memOps[i].isKill) {
300 unsigned Reg = memOps[i].Reg;
301 KilledRegs.insert(Reg);
306 SmallVector<std::pair<unsigned, bool>, 8> Regs;
307 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
308 unsigned Reg = memOps[i].Reg;
309 if (UnavailRegs.count(Reg))
310 // Register is killed before and it's not easy / possible to update the
311 // kill marker on already merged instructions. Abort.
314 // If we are inserting the merged operation after an unmerged operation that
315 // uses the same register, make sure to transfer any kill flag.
316 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
317 Regs.push_back(std::make_pair(Reg, isKill));
320 // Try to do the merge.
321 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
323 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
324 Pred, PredReg, Scratch, dl, Regs))
327 // Merge succeeded, update records.
328 Merges.push_back(prior(Loc));
329 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
330 // Remove kill flags from any unmerged memops that come before insertPos.
331 if (Regs[i-memOpsBegin].second) {
332 unsigned Reg = Regs[i-memOpsBegin].first;
333 if (KilledRegs.count(Reg)) {
334 unsigned j = Killer[Reg];
335 memOps[j].MBBI->getOperand(0).setIsKill(false);
338 MBB.erase(memOps[i].MBBI);
339 memOps[i].Merged = true;
343 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
344 /// load / store multiple instructions.
346 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
347 unsigned Base, int Opcode, unsigned Size,
348 ARMCC::CondCodes Pred, unsigned PredReg,
349 unsigned Scratch, MemOpQueue &MemOps,
350 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
351 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
352 int Offset = MemOps[SIndex].Offset;
353 int SOffset = Offset;
354 unsigned insertAfter = SIndex;
355 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
356 DebugLoc dl = Loc->getDebugLoc();
357 const MachineOperand &PMO = Loc->getOperand(0);
358 unsigned PReg = PMO.getReg();
359 unsigned PRegNum = PMO.isUndef() ? UINT_MAX
360 : ARMRegisterInfo::getRegisterNumbering(PReg);
363 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
364 int NewOffset = MemOps[i].Offset;
365 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
366 unsigned Reg = MO.getReg();
367 unsigned RegNum = MO.isUndef() ? UINT_MAX
368 : ARMRegisterInfo::getRegisterNumbering(Reg);
369 // AM4 - register numbers in ascending order.
370 // AM5 - consecutive register numbers in ascending order.
371 // Can only do up to 16 double-word registers per insn.
372 if (Reg != ARM::SP &&
373 NewOffset == Offset + (int)Size &&
374 ((isAM4 && RegNum > PRegNum)
375 || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) {
380 // Can't merge this in. Try merge the earlier ones first.
381 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
382 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
383 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
388 if (MemOps[i].Position > MemOps[insertAfter].Position)
392 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
393 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
394 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
398 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
399 unsigned Bytes, unsigned Limit,
400 ARMCC::CondCodes Pred, unsigned PredReg){
401 unsigned MyPredReg = 0;
404 if (MI->getOpcode() != ARM::t2SUBri &&
405 MI->getOpcode() != ARM::t2SUBrSPi &&
406 MI->getOpcode() != ARM::t2SUBrSPi12 &&
407 MI->getOpcode() != ARM::tSUBspi &&
408 MI->getOpcode() != ARM::SUBri)
411 // Make sure the offset fits in 8 bits.
412 if (Bytes <= 0 || (Limit && Bytes >= Limit))
415 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
416 return (MI->getOperand(0).getReg() == Base &&
417 MI->getOperand(1).getReg() == Base &&
418 (MI->getOperand(2).getImm()*Scale) == Bytes &&
419 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
420 MyPredReg == PredReg);
423 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
424 unsigned Bytes, unsigned Limit,
425 ARMCC::CondCodes Pred, unsigned PredReg){
426 unsigned MyPredReg = 0;
429 if (MI->getOpcode() != ARM::t2ADDri &&
430 MI->getOpcode() != ARM::t2ADDrSPi &&
431 MI->getOpcode() != ARM::t2ADDrSPi12 &&
432 MI->getOpcode() != ARM::tADDspi &&
433 MI->getOpcode() != ARM::ADDri)
436 if (Bytes <= 0 || (Limit && Bytes >= Limit))
437 // Make sure the offset fits in 8 bits.
440 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
441 return (MI->getOperand(0).getReg() == Base &&
442 MI->getOperand(1).getReg() == Base &&
443 (MI->getOperand(2).getImm()*Scale) == Bytes &&
444 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
445 MyPredReg == PredReg);
448 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
449 switch (MI->getOpcode()) {
467 return (MI->getNumOperands() - 4) * 4;
472 return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
476 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc) {
478 case ARM::LDM: return ARM::LDM_UPD;
479 case ARM::STM: return ARM::STM_UPD;
480 case ARM::t2LDM: return ARM::t2LDM_UPD;
481 case ARM::t2STM: return ARM::t2STM_UPD;
482 case ARM::VLDMS: return ARM::VLDMS_UPD;
483 case ARM::VLDMD: return ARM::VLDMD_UPD;
484 case ARM::VSTMS: return ARM::VSTMS_UPD;
485 case ARM::VSTMD: return ARM::VSTMD_UPD;
486 default: llvm_unreachable("Unhandled opcode!");
491 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
492 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
494 /// stmia rn, <ra, rb, rc>
495 /// rn := rn + 4 * 3;
497 /// stmia rn!, <ra, rb, rc>
499 /// rn := rn - 4 * 3;
500 /// ldmia rn, <ra, rb, rc>
502 /// ldmdb rn!, <ra, rb, rc>
503 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
504 MachineBasicBlock::iterator MBBI,
506 MachineBasicBlock::iterator &I) {
507 MachineInstr *MI = MBBI;
508 unsigned Base = MI->getOperand(0).getReg();
509 bool BaseKill = MI->getOperand(0).isKill();
510 unsigned Bytes = getLSMultipleTransferSize(MI);
511 unsigned PredReg = 0;
512 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
513 int Opcode = MI->getOpcode();
514 DebugLoc dl = MI->getDebugLoc();
515 bool isAM4 = (Opcode == ARM::LDM || Opcode == ARM::t2LDM ||
516 Opcode == ARM::STM || Opcode == ARM::t2STM);
518 bool DoMerge = false;
519 ARM_AM::AMSubMode Mode = ARM_AM::ia;
523 // Can't use an updating ld/st if the base register is also a dest
524 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
525 for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
526 if (MI->getOperand(i).getReg() == Base)
529 Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
531 // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops.
532 Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
533 Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
536 // Try merging with the previous instruction.
537 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
538 if (MBBI != BeginMBBI) {
539 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
540 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
543 if (Mode == ARM_AM::ia &&
544 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
547 } else if (isAM4 && Mode == ARM_AM::ib &&
548 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
553 if (Mode == ARM_AM::ia &&
554 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
563 // Try merging with the next instruction.
564 MachineBasicBlock::iterator EndMBBI = MBB.end();
565 if (!DoMerge && MBBI != EndMBBI) {
566 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
567 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
570 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
571 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
573 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
574 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
578 if (Mode == ARM_AM::ia &&
579 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
595 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode);
596 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
597 .addReg(Base, getDefRegState(true)) // WB base register
598 .addReg(Base, getKillRegState(BaseKill));
600 // [t2]LDM_UPD, [t2]STM_UPD
601 MIB.addImm(ARM_AM::getAM4ModeImm(Mode))
602 .addImm(Pred).addReg(PredReg);
604 // VLDM[SD}_UPD, VSTM[SD]_UPD
605 MIB.addImm(ARM_AM::getAM5Opc(Mode, Offset))
606 .addImm(Pred).addReg(PredReg);
608 // Transfer the rest of operands.
609 for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum)
610 MIB.addOperand(MI->getOperand(OpNum));
611 // Transfer memoperands.
612 (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
618 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
620 case ARM::LDR: return ARM::LDR_PRE;
621 case ARM::STR: return ARM::STR_PRE;
622 case ARM::VLDRS: return ARM::VLDMS_UPD;
623 case ARM::VLDRD: return ARM::VLDMD_UPD;
624 case ARM::VSTRS: return ARM::VSTMS_UPD;
625 case ARM::VSTRD: return ARM::VSTMD_UPD;
628 return ARM::t2LDR_PRE;
631 return ARM::t2STR_PRE;
632 default: llvm_unreachable("Unhandled opcode!");
637 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
639 case ARM::LDR: return ARM::LDR_POST;
640 case ARM::STR: return ARM::STR_POST;
641 case ARM::VLDRS: return ARM::VLDMS_UPD;
642 case ARM::VLDRD: return ARM::VLDMD_UPD;
643 case ARM::VSTRS: return ARM::VSTMS_UPD;
644 case ARM::VSTRD: return ARM::VSTMD_UPD;
647 return ARM::t2LDR_POST;
650 return ARM::t2STR_POST;
651 default: llvm_unreachable("Unhandled opcode!");
656 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
657 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
658 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
659 MachineBasicBlock::iterator MBBI,
660 const TargetInstrInfo *TII,
662 MachineBasicBlock::iterator &I) {
663 MachineInstr *MI = MBBI;
664 unsigned Base = MI->getOperand(1).getReg();
665 bool BaseKill = MI->getOperand(1).isKill();
666 unsigned Bytes = getLSMultipleTransferSize(MI);
667 int Opcode = MI->getOpcode();
668 DebugLoc dl = MI->getDebugLoc();
669 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
670 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
671 bool isAM2 = (Opcode == ARM::LDR || Opcode == ARM::STR);
672 if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
674 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
676 if (isT2i32Load(Opcode) || isT2i32Store(Opcode))
677 if (MI->getOperand(2).getImm() != 0)
680 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
681 // Can't do the merge if the destination register is the same as the would-be
682 // writeback register.
683 if (isLd && MI->getOperand(0).getReg() == Base)
686 unsigned PredReg = 0;
687 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
688 bool DoMerge = false;
689 ARM_AM::AddrOpc AddSub = ARM_AM::add;
691 // AM2 - 12 bits, thumb2 - 8 bits.
692 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
694 // Try merging with the previous instruction.
695 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
696 if (MBBI != BeginMBBI) {
697 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
698 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
700 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
702 AddSub = ARM_AM::sub;
704 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
708 NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
713 // Try merging with the next instruction.
714 MachineBasicBlock::iterator EndMBBI = MBB.end();
715 if (!DoMerge && MBBI != EndMBBI) {
716 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
717 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
720 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
722 AddSub = ARM_AM::sub;
723 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
727 NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
739 bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD;
742 Offset = ARM_AM::getAM5Opc(AddSub == ARM_AM::sub ? ARM_AM::db : ARM_AM::ia,
745 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
747 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
750 // VLDM[SD}_UPD, VSTM[SD]_UPD
751 MachineOperand &MO = MI->getOperand(0);
752 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
753 .addReg(Base, getDefRegState(true)) // WB base register
754 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
756 .addImm(Pred).addReg(PredReg)
757 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
758 getKillRegState(MO.isKill())));
761 // LDR_PRE, LDR_POST,
762 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
763 .addReg(Base, RegState::Define)
764 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
766 // t2LDR_PRE, t2LDR_POST
767 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
768 .addReg(Base, RegState::Define)
769 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
771 MachineOperand &MO = MI->getOperand(0);
774 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
775 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
776 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
778 // t2STR_PRE, t2STR_POST
779 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
780 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
781 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
788 /// isMemoryOp - Returns true if instruction is a memory operations (that this
789 /// pass is capable of operating on).
790 static bool isMemoryOp(const MachineInstr *MI) {
791 // When no memory operands are present, conservatively assume unaligned,
792 // volatile, unfoldable.
793 if (!MI->hasOneMemOperand())
796 const MachineMemOperand *MMO = *MI->memoperands_begin();
798 // Don't touch volatile memory accesses - we may be changing their order.
799 if (MMO->isVolatile())
802 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
804 if (MMO->getAlignment() < 4)
807 // str <undef> could probably be eliminated entirely, but for now we just want
808 // to avoid making a mess of it.
809 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
810 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
811 MI->getOperand(0).isUndef())
814 // Likewise don't mess with references to undefined addresses.
815 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
816 MI->getOperand(1).isUndef())
819 int Opcode = MI->getOpcode();
824 return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
827 return MI->getOperand(1).isReg();
830 return MI->getOperand(1).isReg();
835 return MI->getOperand(1).isReg();
840 /// AdvanceRS - Advance register scavenger to just before the earliest memory
841 /// op that is being merged.
842 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
843 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
844 unsigned Position = MemOps[0].Position;
845 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
846 if (MemOps[i].Position < Position) {
847 Position = MemOps[i].Position;
848 Loc = MemOps[i].MBBI;
852 if (Loc != MBB.begin())
853 RS->forward(prior(Loc));
856 static int getMemoryOpOffset(const MachineInstr *MI) {
857 int Opcode = MI->getOpcode();
858 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
859 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
860 unsigned NumOperands = MI->getDesc().getNumOperands();
861 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
863 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
864 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
865 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8)
869 ? ARM_AM::getAM2Offset(OffField)
870 : (isAM3 ? ARM_AM::getAM3Offset(OffField)
871 : ARM_AM::getAM5Offset(OffField) * 4);
873 if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
876 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
879 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
885 static void InsertLDR_STR(MachineBasicBlock &MBB,
886 MachineBasicBlock::iterator &MBBI,
887 int OffImm, bool isDef,
888 DebugLoc dl, unsigned NewOpc,
889 unsigned Reg, bool RegDeadKill, bool RegUndef,
890 unsigned BaseReg, bool BaseKill, bool BaseUndef,
891 unsigned OffReg, bool OffKill, bool OffUndef,
892 ARMCC::CondCodes Pred, unsigned PredReg,
893 const TargetInstrInfo *TII, bool isT2) {
897 Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
899 Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
902 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
904 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
905 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
907 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
908 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
910 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
912 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
913 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
915 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
916 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
920 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
921 MachineBasicBlock::iterator &MBBI) {
922 MachineInstr *MI = &*MBBI;
923 unsigned Opcode = MI->getOpcode();
924 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
925 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
926 unsigned EvenReg = MI->getOperand(0).getReg();
927 unsigned OddReg = MI->getOperand(1).getReg();
928 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
929 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
930 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
933 MachineBasicBlock::iterator NewBBI = MBBI;
934 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
935 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
936 bool EvenDeadKill = isLd ?
937 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
938 bool EvenUndef = MI->getOperand(0).isUndef();
939 bool OddDeadKill = isLd ?
940 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
941 bool OddUndef = MI->getOperand(1).isUndef();
942 const MachineOperand &BaseOp = MI->getOperand(2);
943 unsigned BaseReg = BaseOp.getReg();
944 bool BaseKill = BaseOp.isKill();
945 bool BaseUndef = BaseOp.isUndef();
946 unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg();
947 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
948 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
949 int OffImm = getMemoryOpOffset(MI);
950 unsigned PredReg = 0;
951 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
953 if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) {
954 // Ascending register numbers and no offset. It's safe to change it to a
956 unsigned NewOpc = (isLd)
957 ? (isT2 ? ARM::t2LDM : ARM::LDM)
958 : (isT2 ? ARM::t2STM : ARM::STM);
960 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
961 .addReg(BaseReg, getKillRegState(BaseKill))
962 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
963 .addImm(Pred).addReg(PredReg)
964 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
965 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
968 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
969 .addReg(BaseReg, getKillRegState(BaseKill))
970 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
971 .addImm(Pred).addReg(PredReg)
973 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
975 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
978 NewBBI = llvm::prior(MBBI);
980 // Split into two instructions.
981 assert((!isT2 || !OffReg) &&
982 "Thumb2 ldrd / strd does not encode offset register!");
983 unsigned NewOpc = (isLd)
984 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR)
985 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR);
986 DebugLoc dl = MBBI->getDebugLoc();
987 // If this is a load and base register is killed, it may have been
988 // re-defed by the load, make sure the first load does not clobber it.
990 (BaseKill || OffKill) &&
991 (TRI->regsOverlap(EvenReg, BaseReg) ||
992 (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) {
993 assert(!TRI->regsOverlap(OddReg, BaseReg) &&
994 (!OffReg || !TRI->regsOverlap(OddReg, OffReg)));
995 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
996 OddReg, OddDeadKill, false,
997 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
998 Pred, PredReg, TII, isT2);
999 NewBBI = llvm::prior(MBBI);
1000 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1001 EvenReg, EvenDeadKill, false,
1002 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
1003 Pred, PredReg, TII, isT2);
1005 if (OddReg == EvenReg && EvenDeadKill) {
1006 // If the two source operands are the same, the kill marker is
1007 // probably on the first one. e.g.
1008 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1009 EvenDeadKill = false;
1012 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1013 EvenReg, EvenDeadKill, EvenUndef,
1014 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
1015 Pred, PredReg, TII, isT2);
1016 NewBBI = llvm::prior(MBBI);
1017 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1018 OddReg, OddDeadKill, OddUndef,
1019 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
1020 Pred, PredReg, TII, isT2);
1035 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1036 /// ops of the same base and incrementing offset into LDM / STM ops.
1037 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1038 unsigned NumMerges = 0;
1039 unsigned NumMemOps = 0;
1041 unsigned CurrBase = 0;
1043 unsigned CurrSize = 0;
1044 ARMCC::CondCodes CurrPred = ARMCC::AL;
1045 unsigned CurrPredReg = 0;
1046 unsigned Position = 0;
1047 SmallVector<MachineBasicBlock::iterator,4> Merges;
1049 RS->enterBasicBlock(&MBB);
1050 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1052 if (FixInvalidRegPairOp(MBB, MBBI))
1055 bool Advance = false;
1056 bool TryMerge = false;
1057 bool Clobber = false;
1059 bool isMemOp = isMemoryOp(MBBI);
1061 int Opcode = MBBI->getOpcode();
1062 unsigned Size = getLSMultipleTransferSize(MBBI);
1063 const MachineOperand &MO = MBBI->getOperand(0);
1064 unsigned Reg = MO.getReg();
1065 bool isKill = MO.isDef() ? false : MO.isKill();
1066 unsigned Base = MBBI->getOperand(1).getReg();
1067 unsigned PredReg = 0;
1068 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1069 int Offset = getMemoryOpOffset(MBBI);
1072 // r5 := ldr [r5, #4]
1073 // r6 := ldr [r5, #8]
1075 // The second ldr has effectively broken the chain even though it
1076 // looks like the later ldr(s) use the same base register. Try to
1077 // merge the ldr's so far, including this one. But don't try to
1078 // combine the following ldr(s).
1079 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1080 if (CurrBase == 0 && !Clobber) {
1081 // Start of a new chain.
1086 CurrPredReg = PredReg;
1087 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1096 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1097 // No need to match PredReg.
1098 // Continue adding to the queue.
1099 if (Offset > MemOps.back().Offset) {
1100 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1105 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1107 if (Offset < I->Offset) {
1108 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1113 } else if (Offset == I->Offset) {
1114 // Collision! This can't be merged!
1123 if (MBBI->isDebugValue()) {
1126 // Reach the end of the block, try merging the memory instructions.
1128 } else if (Advance) {
1132 // Reach the end of the block, try merging the memory instructions.
1138 if (NumMemOps > 1) {
1139 // Try to find a free register to use as a new base in case it's needed.
1140 // First advance to the instruction just before the start of the chain.
1141 AdvanceRS(MBB, MemOps);
1142 // Find a scratch register.
1143 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1144 // Process the load / store instructions.
1145 RS->forward(prior(MBBI));
1149 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1150 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1152 // Try folding preceeding/trailing base inc/dec into the generated
1154 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1155 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1157 NumMerges += Merges.size();
1159 // Try folding preceeding/trailing base inc/dec into those load/store
1160 // that were not merged to form LDM/STM ops.
1161 for (unsigned i = 0; i != NumMemOps; ++i)
1162 if (!MemOps[i].Merged)
1163 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1166 // RS may be pointing to an instruction that's deleted.
1167 RS->skipTo(prior(MBBI));
1168 } else if (NumMemOps == 1) {
1169 // Try folding preceeding/trailing base inc/dec into the single
1171 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1173 RS->forward(prior(MBBI));
1180 CurrPred = ARMCC::AL;
1187 // If iterator hasn't been advanced and this is not a memory op, skip it.
1188 // It can't start a new chain anyway.
1189 if (!Advance && !isMemOp && MBBI != E) {
1195 return NumMerges > 0;
1199 struct OffsetCompare {
1200 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1201 int LOffset = getMemoryOpOffset(LHS);
1202 int ROffset = getMemoryOpOffset(RHS);
1203 assert(LHS == RHS || LOffset != ROffset);
1204 return LOffset > ROffset;
1209 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1210 /// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it
1211 /// directly restore the value of LR into pc.
1212 /// ldmfd sp!, {..., lr}
1215 /// ldmfd sp!, {..., lr}
1218 /// ldmfd sp!, {..., pc}
1219 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1220 if (MBB.empty()) return false;
1222 MachineBasicBlock::iterator MBBI = prior(MBB.end());
1223 if (MBBI != MBB.begin() &&
1224 (MBBI->getOpcode() == ARM::BX_RET ||
1225 MBBI->getOpcode() == ARM::tBX_RET ||
1226 MBBI->getOpcode() == ARM::MOVPCLR)) {
1227 MachineInstr *PrevMI = prior(MBBI);
1228 if (PrevMI->getOpcode() == ARM::LDM_UPD ||
1229 PrevMI->getOpcode() == ARM::t2LDM_UPD) {
1230 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1231 if (MO.getReg() != ARM::LR)
1233 unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET;
1234 PrevMI->setDesc(TII->get(NewOpc));
1243 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1244 const TargetMachine &TM = Fn.getTarget();
1245 AFI = Fn.getInfo<ARMFunctionInfo>();
1246 TII = TM.getInstrInfo();
1247 TRI = TM.getRegisterInfo();
1248 RS = new RegScavenger();
1249 isThumb2 = AFI->isThumb2Function();
1251 bool Modified = false;
1252 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1254 MachineBasicBlock &MBB = *MFI;
1255 Modified |= LoadStoreMultipleOpti(MBB);
1256 Modified |= MergeReturnIntoLDM(MBB);
1264 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1265 /// load / stores from consecutive locations close to make it more
1266 /// likely they will be combined later.
1269 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1271 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
1273 const TargetData *TD;
1274 const TargetInstrInfo *TII;
1275 const TargetRegisterInfo *TRI;
1276 const ARMSubtarget *STI;
1277 MachineRegisterInfo *MRI;
1278 MachineFunction *MF;
1280 virtual bool runOnMachineFunction(MachineFunction &Fn);
1282 virtual const char *getPassName() const {
1283 return "ARM pre- register allocation load / store optimization pass";
1287 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1288 unsigned &NewOpc, unsigned &EvenReg,
1289 unsigned &OddReg, unsigned &BaseReg,
1290 unsigned &OffReg, int &Offset,
1291 unsigned &PredReg, ARMCC::CondCodes &Pred,
1293 bool RescheduleOps(MachineBasicBlock *MBB,
1294 SmallVector<MachineInstr*, 4> &Ops,
1295 unsigned Base, bool isLd,
1296 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1297 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1299 char ARMPreAllocLoadStoreOpt::ID = 0;
1302 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1303 TD = Fn.getTarget().getTargetData();
1304 TII = Fn.getTarget().getInstrInfo();
1305 TRI = Fn.getTarget().getRegisterInfo();
1306 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1307 MRI = &Fn.getRegInfo();
1310 bool Modified = false;
1311 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1313 Modified |= RescheduleLoadStoreInstrs(MFI);
1318 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1319 MachineBasicBlock::iterator I,
1320 MachineBasicBlock::iterator E,
1321 SmallPtrSet<MachineInstr*, 4> &MemOps,
1322 SmallSet<unsigned, 4> &MemRegs,
1323 const TargetRegisterInfo *TRI) {
1324 // Are there stores / loads / calls between them?
1325 // FIXME: This is overly conservative. We should make use of alias information
1327 SmallSet<unsigned, 4> AddedRegPressure;
1329 if (I->isDebugValue() || MemOps.count(&*I))
1331 const TargetInstrDesc &TID = I->getDesc();
1332 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
1334 if (isLd && TID.mayStore())
1339 // It's not safe to move the first 'str' down.
1342 // str r4, [r0, #+4]
1346 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1347 MachineOperand &MO = I->getOperand(j);
1350 unsigned Reg = MO.getReg();
1351 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1353 if (Reg != Base && !MemRegs.count(Reg))
1354 AddedRegPressure.insert(Reg);
1358 // Estimate register pressure increase due to the transformation.
1359 if (MemRegs.size() <= 4)
1360 // Ok if we are moving small number of instructions.
1362 return AddedRegPressure.size() <= MemRegs.size() * 2;
1366 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1368 unsigned &NewOpc, unsigned &EvenReg,
1369 unsigned &OddReg, unsigned &BaseReg,
1370 unsigned &OffReg, int &Offset,
1372 ARMCC::CondCodes &Pred,
1374 // Make sure we're allowed to generate LDRD/STRD.
1375 if (!STI->hasV5TEOps())
1378 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1380 unsigned Opcode = Op0->getOpcode();
1381 if (Opcode == ARM::LDR)
1383 else if (Opcode == ARM::STR)
1385 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1386 NewOpc = ARM::t2LDRDi8;
1389 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1390 NewOpc = ARM::t2STRDi8;
1396 // Make sure the offset registers match.
1398 (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg()))
1401 // Must sure the base address satisfies i64 ld / st alignment requirement.
1402 if (!Op0->hasOneMemOperand() ||
1403 !(*Op0->memoperands_begin())->getValue() ||
1404 (*Op0->memoperands_begin())->isVolatile())
1407 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1408 const Function *Func = MF->getFunction();
1409 unsigned ReqAlign = STI->hasV6Ops()
1410 ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext()))
1411 : 8; // Pre-v6 need 8-byte align
1412 if (Align < ReqAlign)
1415 // Then make sure the immediate offset fits.
1416 int OffImm = getMemoryOpOffset(Op0);
1420 // Can't fall back to t2LDRi8 / t2STRi8.
1423 int Limit = (1 << 8) * Scale;
1424 if (OffImm >= Limit || (OffImm & (Scale-1)))
1429 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1431 AddSub = ARM_AM::sub;
1434 int Limit = (1 << 8) * Scale;
1435 if (OffImm >= Limit || (OffImm & (Scale-1)))
1437 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1439 EvenReg = Op0->getOperand(0).getReg();
1440 OddReg = Op1->getOperand(0).getReg();
1441 if (EvenReg == OddReg)
1443 BaseReg = Op0->getOperand(1).getReg();
1445 OffReg = Op0->getOperand(2).getReg();
1446 Pred = llvm::getInstrPredicate(Op0, PredReg);
1447 dl = Op0->getDebugLoc();
1451 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1452 SmallVector<MachineInstr*, 4> &Ops,
1453 unsigned Base, bool isLd,
1454 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1455 bool RetVal = false;
1457 // Sort by offset (in reverse order).
1458 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1460 // The loads / stores of the same base are in order. Scan them from first to
1461 // last and check for the following:
1462 // 1. Any def of base.
1464 while (Ops.size() > 1) {
1465 unsigned FirstLoc = ~0U;
1466 unsigned LastLoc = 0;
1467 MachineInstr *FirstOp = 0;
1468 MachineInstr *LastOp = 0;
1470 unsigned LastOpcode = 0;
1471 unsigned LastBytes = 0;
1472 unsigned NumMove = 0;
1473 for (int i = Ops.size() - 1; i >= 0; --i) {
1474 MachineInstr *Op = Ops[i];
1475 unsigned Loc = MI2LocMap[Op];
1476 if (Loc <= FirstLoc) {
1480 if (Loc >= LastLoc) {
1485 unsigned Opcode = Op->getOpcode();
1486 if (LastOpcode && Opcode != LastOpcode)
1489 int Offset = getMemoryOpOffset(Op);
1490 unsigned Bytes = getLSMultipleTransferSize(Op);
1492 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1495 LastOffset = Offset;
1497 LastOpcode = Opcode;
1498 if (++NumMove == 8) // FIXME: Tune this limit.
1505 SmallPtrSet<MachineInstr*, 4> MemOps;
1506 SmallSet<unsigned, 4> MemRegs;
1507 for (int i = NumMove-1; i >= 0; --i) {
1508 MemOps.insert(Ops[i]);
1509 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1512 // Be conservative, if the instructions are too far apart, don't
1513 // move them. We want to limit the increase of register pressure.
1514 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1516 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1517 MemOps, MemRegs, TRI);
1519 for (unsigned i = 0; i != NumMove; ++i)
1522 // This is the new location for the loads / stores.
1523 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1524 while (InsertPos != MBB->end()
1525 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1528 // If we are moving a pair of loads / stores, see if it makes sense
1529 // to try to allocate a pair of registers that can form register pairs.
1530 MachineInstr *Op0 = Ops.back();
1531 MachineInstr *Op1 = Ops[Ops.size()-2];
1532 unsigned EvenReg = 0, OddReg = 0;
1533 unsigned BaseReg = 0, OffReg = 0, PredReg = 0;
1534 ARMCC::CondCodes Pred = ARMCC::AL;
1536 unsigned NewOpc = 0;
1539 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1540 EvenReg, OddReg, BaseReg, OffReg,
1541 Offset, PredReg, Pred, isT2)) {
1545 // Form the pair instruction.
1547 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1548 dl, TII->get(NewOpc))
1549 .addReg(EvenReg, RegState::Define)
1550 .addReg(OddReg, RegState::Define)
1554 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1557 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1558 dl, TII->get(NewOpc))
1564 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1570 // Add register allocation hints to form register pairs.
1571 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1572 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1574 for (unsigned i = 0; i != NumMove; ++i) {
1575 MachineInstr *Op = Ops.back();
1577 MBB->splice(InsertPos, MBB, Op);
1581 NumLdStMoved += NumMove;
1591 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1592 bool RetVal = false;
1594 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1595 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1596 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1597 SmallVector<unsigned, 4> LdBases;
1598 SmallVector<unsigned, 4> StBases;
1601 MachineBasicBlock::iterator MBBI = MBB->begin();
1602 MachineBasicBlock::iterator E = MBB->end();
1604 for (; MBBI != E; ++MBBI) {
1605 MachineInstr *MI = MBBI;
1606 const TargetInstrDesc &TID = MI->getDesc();
1607 if (TID.isCall() || TID.isTerminator()) {
1608 // Stop at barriers.
1613 if (!MI->isDebugValue())
1614 MI2LocMap[MI] = ++Loc;
1616 if (!isMemoryOp(MI))
1618 unsigned PredReg = 0;
1619 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1622 int Opc = MI->getOpcode();
1623 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1624 unsigned Base = MI->getOperand(1).getReg();
1625 int Offset = getMemoryOpOffset(MI);
1627 bool StopHere = false;
1629 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1630 Base2LdsMap.find(Base);
1631 if (BI != Base2LdsMap.end()) {
1632 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1633 if (Offset == getMemoryOpOffset(BI->second[i])) {
1639 BI->second.push_back(MI);
1641 SmallVector<MachineInstr*, 4> MIs;
1643 Base2LdsMap[Base] = MIs;
1644 LdBases.push_back(Base);
1647 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1648 Base2StsMap.find(Base);
1649 if (BI != Base2StsMap.end()) {
1650 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1651 if (Offset == getMemoryOpOffset(BI->second[i])) {
1657 BI->second.push_back(MI);
1659 SmallVector<MachineInstr*, 4> MIs;
1661 Base2StsMap[Base] = MIs;
1662 StBases.push_back(Base);
1667 // Found a duplicate (a base+offset combination that's seen earlier).
1674 // Re-schedule loads.
1675 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1676 unsigned Base = LdBases[i];
1677 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1679 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1682 // Re-schedule stores.
1683 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1684 unsigned Base = StBases[i];
1685 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1687 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1691 Base2LdsMap.clear();
1692 Base2StsMap.clear();
1702 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1703 /// optimization pass.
1704 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1706 return new ARMPreAllocLoadStoreOpt();
1707 return new ARMLoadStoreOpt();