1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/Target/TargetData.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallSet.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/ADT/Statistic.h"
42 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
43 STATISTIC(NumSTMGened , "Number of stm instructions generated");
44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55 /// load / store instructions to form ldm / stm instructions.
58 struct ARMLoadStoreOpt : public MachineFunctionPass {
60 ARMLoadStoreOpt() : MachineFunctionPass(&ID) {}
62 const TargetInstrInfo *TII;
63 const TargetRegisterInfo *TRI;
68 virtual bool runOnMachineFunction(MachineFunction &Fn);
70 virtual const char *getPassName() const {
71 return "ARM load / store optimization pass";
75 struct MemOpQueueEntry {
80 MachineBasicBlock::iterator MBBI;
82 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
83 MachineBasicBlock::iterator i)
84 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
86 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
87 typedef MemOpQueue::iterator MemOpQueueIter;
89 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
90 int Offset, unsigned Base, bool BaseKill, int Opcode,
91 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
92 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
93 void MergeOpsUpdate(MachineBasicBlock &MBB,
102 ARMCC::CondCodes Pred,
106 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
107 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
108 int Opcode, unsigned Size,
109 ARMCC::CondCodes Pred, unsigned PredReg,
110 unsigned Scratch, MemOpQueue &MemOps,
111 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
113 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
114 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
115 MachineBasicBlock::iterator &MBBI);
116 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
117 MachineBasicBlock::iterator MBBI,
118 const TargetInstrInfo *TII,
120 MachineBasicBlock::iterator &I);
121 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
122 MachineBasicBlock::iterator MBBI,
124 MachineBasicBlock::iterator &I);
125 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
126 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
128 char ARMLoadStoreOpt::ID = 0;
131 static int getLoadStoreMultipleOpcode(int Opcode) {
159 default: llvm_unreachable("Unhandled opcode!");
164 static bool isT2i32Load(unsigned Opc) {
165 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
168 static bool isi32Load(unsigned Opc) {
169 return Opc == ARM::LDR || isT2i32Load(Opc);
172 static bool isT2i32Store(unsigned Opc) {
173 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
176 static bool isi32Store(unsigned Opc) {
177 return Opc == ARM::STR || isT2i32Store(Opc);
180 /// MergeOps - Create and insert a LDM or STM with Base as base register and
181 /// registers in Regs as the register operands that would be loaded / stored.
182 /// It returns true if the transformation is done.
184 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
185 MachineBasicBlock::iterator MBBI,
186 int Offset, unsigned Base, bool BaseKill,
187 int Opcode, ARMCC::CondCodes Pred,
188 unsigned PredReg, unsigned Scratch, DebugLoc dl,
189 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
190 // Only a single register to load / store. Don't bother.
191 unsigned NumRegs = Regs.size();
195 ARM_AM::AMSubMode Mode = ARM_AM::ia;
196 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
197 if (isAM4 && Offset == 4) {
199 // Thumb2 does not support ldmib / stmib.
202 } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) {
204 // Thumb2 does not support ldmda / stmda.
207 } else if (isAM4 && Offset == -4 * (int)NumRegs) {
209 } else if (Offset != 0) {
210 // If starting offset isn't zero, insert a MI to materialize a new base.
211 // But only do so if it is cost effective, i.e. merging more than two
217 if (isi32Load(Opcode))
218 // If it is a load, then just use one of the destination register to
219 // use as the new base.
220 NewBase = Regs[NumRegs-1].first;
222 // Use the scratch register to use as a new base.
227 int BaseOpc = !isThumb2
229 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
233 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
236 int ImmedOffset = isThumb2
237 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
238 if (ImmedOffset == -1)
239 // FIXME: Try t2ADDri12 or t2SUBri12?
240 return false; // Probably not worth it then.
242 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
243 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
244 .addImm(Pred).addReg(PredReg).addReg(0);
246 BaseKill = true; // New base is always killed right its use.
249 bool isDPR = (Opcode == ARM::VLDRD || Opcode == ARM::VSTRD);
250 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
251 Opcode == ARM::VLDRD);
252 Opcode = getLoadStoreMultipleOpcode(Opcode);
253 MachineInstrBuilder MIB = (isAM4)
254 ? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
255 .addReg(Base, getKillRegState(BaseKill))
256 .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
257 : BuildMI(MBB, MBBI, dl, TII->get(Opcode))
258 .addReg(Base, getKillRegState(BaseKill))
259 .addImm(ARM_AM::getAM5Opc(Mode, isDPR ? NumRegs<<1 : NumRegs))
260 .addImm(Pred).addReg(PredReg);
261 for (unsigned i = 0; i != NumRegs; ++i)
262 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
263 | getKillRegState(Regs[i].second));
268 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
270 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
272 unsigned memOpsBegin, unsigned memOpsEnd,
273 unsigned insertAfter, int Offset,
274 unsigned Base, bool BaseKill,
276 ARMCC::CondCodes Pred, unsigned PredReg,
279 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
280 // First calculate which of the registers should be killed by the merged
282 const unsigned insertPos = memOps[insertAfter].Position;
284 SmallSet<unsigned, 4> UnavailRegs;
285 SmallSet<unsigned, 4> KilledRegs;
286 DenseMap<unsigned, unsigned> Killer;
287 for (unsigned i = 0; i < memOpsBegin; ++i) {
288 if (memOps[i].Position < insertPos && memOps[i].isKill) {
289 unsigned Reg = memOps[i].Reg;
290 if (memOps[i].Merged)
291 UnavailRegs.insert(Reg);
293 KilledRegs.insert(Reg);
298 for (unsigned i = memOpsEnd, e = memOps.size(); i != e; ++i) {
299 if (memOps[i].Position < insertPos && memOps[i].isKill) {
300 unsigned Reg = memOps[i].Reg;
301 KilledRegs.insert(Reg);
306 SmallVector<std::pair<unsigned, bool>, 8> Regs;
307 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
308 unsigned Reg = memOps[i].Reg;
309 if (UnavailRegs.count(Reg))
310 // Register is killed before and it's not easy / possible to update the
311 // kill marker on already merged instructions. Abort.
314 // If we are inserting the merged operation after an unmerged operation that
315 // uses the same register, make sure to transfer any kill flag.
316 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
317 Regs.push_back(std::make_pair(Reg, isKill));
320 // Try to do the merge.
321 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
323 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
324 Pred, PredReg, Scratch, dl, Regs))
327 // Merge succeeded, update records.
328 Merges.push_back(prior(Loc));
329 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
330 // Remove kill flags from any unmerged memops that come before insertPos.
331 if (Regs[i-memOpsBegin].second) {
332 unsigned Reg = Regs[i-memOpsBegin].first;
333 if (KilledRegs.count(Reg)) {
334 unsigned j = Killer[Reg];
335 memOps[j].MBBI->getOperand(0).setIsKill(false);
338 MBB.erase(memOps[i].MBBI);
339 memOps[i].Merged = true;
343 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
344 /// load / store multiple instructions.
346 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
347 unsigned Base, int Opcode, unsigned Size,
348 ARMCC::CondCodes Pred, unsigned PredReg,
349 unsigned Scratch, MemOpQueue &MemOps,
350 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
351 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
352 int Offset = MemOps[SIndex].Offset;
353 int SOffset = Offset;
354 unsigned insertAfter = SIndex;
355 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
356 DebugLoc dl = Loc->getDebugLoc();
357 const MachineOperand &PMO = Loc->getOperand(0);
358 unsigned PReg = PMO.getReg();
359 unsigned PRegNum = PMO.isUndef() ? UINT_MAX
360 : ARMRegisterInfo::getRegisterNumbering(PReg);
363 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
364 int NewOffset = MemOps[i].Offset;
365 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
366 unsigned Reg = MO.getReg();
367 unsigned RegNum = MO.isUndef() ? UINT_MAX
368 : ARMRegisterInfo::getRegisterNumbering(Reg);
369 // AM4 - register numbers in ascending order.
370 // AM5 - consecutive register numbers in ascending order.
371 // Can only do up to 16 double-word registers per insn.
372 if (Reg != ARM::SP &&
373 NewOffset == Offset + (int)Size &&
374 ((isAM4 && RegNum > PRegNum)
375 || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) {
380 // Can't merge this in. Try merge the earlier ones first.
381 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
382 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
383 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
388 if (MemOps[i].Position > MemOps[insertAfter].Position)
392 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
393 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
394 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
398 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
399 unsigned Bytes, unsigned Limit,
400 ARMCC::CondCodes Pred, unsigned PredReg){
401 unsigned MyPredReg = 0;
404 if (MI->getOpcode() != ARM::t2SUBri &&
405 MI->getOpcode() != ARM::t2SUBrSPi &&
406 MI->getOpcode() != ARM::t2SUBrSPi12 &&
407 MI->getOpcode() != ARM::tSUBspi &&
408 MI->getOpcode() != ARM::SUBri)
411 // Make sure the offset fits in 8 bits.
412 if (Bytes <= 0 || (Limit && Bytes >= Limit))
415 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
416 return (MI->getOperand(0).getReg() == Base &&
417 MI->getOperand(1).getReg() == Base &&
418 (MI->getOperand(2).getImm()*Scale) == Bytes &&
419 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
420 MyPredReg == PredReg);
423 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
424 unsigned Bytes, unsigned Limit,
425 ARMCC::CondCodes Pred, unsigned PredReg){
426 unsigned MyPredReg = 0;
429 if (MI->getOpcode() != ARM::t2ADDri &&
430 MI->getOpcode() != ARM::t2ADDrSPi &&
431 MI->getOpcode() != ARM::t2ADDrSPi12 &&
432 MI->getOpcode() != ARM::tADDspi &&
433 MI->getOpcode() != ARM::ADDri)
436 if (Bytes <= 0 || (Limit && Bytes >= Limit))
437 // Make sure the offset fits in 8 bits.
440 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
441 return (MI->getOperand(0).getReg() == Base &&
442 MI->getOperand(1).getReg() == Base &&
443 (MI->getOperand(2).getImm()*Scale) == Bytes &&
444 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
445 MyPredReg == PredReg);
448 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
449 switch (MI->getOpcode()) {
467 return (MI->getNumOperands() - 4) * 4;
472 return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
476 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc) {
478 case ARM::LDM: return ARM::LDM_UPD;
479 case ARM::STM: return ARM::STM_UPD;
480 case ARM::t2LDM: return ARM::t2LDM_UPD;
481 case ARM::t2STM: return ARM::t2STM_UPD;
482 case ARM::VLDMS: return ARM::VLDMS_UPD;
483 case ARM::VLDMD: return ARM::VLDMD_UPD;
484 case ARM::VSTMS: return ARM::VSTMS_UPD;
485 case ARM::VSTMD: return ARM::VSTMD_UPD;
486 default: llvm_unreachable("Unhandled opcode!");
491 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
492 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
494 /// stmia rn, <ra, rb, rc>
495 /// rn := rn + 4 * 3;
497 /// stmia rn!, <ra, rb, rc>
499 /// rn := rn - 4 * 3;
500 /// ldmia rn, <ra, rb, rc>
502 /// ldmdb rn!, <ra, rb, rc>
503 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
504 MachineBasicBlock::iterator MBBI,
506 MachineBasicBlock::iterator &I) {
507 MachineInstr *MI = MBBI;
508 unsigned Base = MI->getOperand(0).getReg();
509 bool BaseKill = MI->getOperand(0).isKill();
510 unsigned Bytes = getLSMultipleTransferSize(MI);
511 unsigned PredReg = 0;
512 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
513 int Opcode = MI->getOpcode();
514 DebugLoc dl = MI->getDebugLoc();
515 bool isAM4 = (Opcode == ARM::LDM || Opcode == ARM::t2LDM ||
516 Opcode == ARM::STM || Opcode == ARM::t2STM);
518 bool DoMerge = false;
519 ARM_AM::AMSubMode Mode = ARM_AM::ia;
523 // Can't use an updating ld/st if the base register is also a dest
524 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
525 for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
526 if (MI->getOperand(i).getReg() == Base)
529 Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
531 // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops.
532 Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
533 Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
536 // Try merging with the previous instruction.
537 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
538 if (MBBI != BeginMBBI) {
539 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
540 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
543 if (Mode == ARM_AM::ia &&
544 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
547 } else if (isAM4 && Mode == ARM_AM::ib &&
548 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
553 if (Mode == ARM_AM::ia &&
554 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
563 // Try merging with the next instruction.
564 MachineBasicBlock::iterator EndMBBI = MBB.end();
565 if (!DoMerge && MBBI != EndMBBI) {
566 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
567 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
570 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
571 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
573 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
574 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
578 if (Mode == ARM_AM::ia &&
579 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
595 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode);
596 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
597 .addReg(Base, getDefRegState(true)) // WB base register
598 .addReg(Base, getKillRegState(BaseKill));
600 // [t2]LDM_UPD, [t2]STM_UPD
601 MIB.addImm(ARM_AM::getAM4ModeImm(Mode))
602 .addImm(Pred).addReg(PredReg);
604 // VLDM[SD}_UPD, VSTM[SD]_UPD
605 MIB.addImm(ARM_AM::getAM5Opc(Mode, Offset))
606 .addImm(Pred).addReg(PredReg);
608 // Transfer the rest of operands.
609 for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum)
610 MIB.addOperand(MI->getOperand(OpNum));
611 // Transfer memoperands.
612 (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
618 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
620 case ARM::LDR: return ARM::LDR_PRE;
621 case ARM::STR: return ARM::STR_PRE;
622 case ARM::VLDRS: return ARM::VLDMS_UPD;
623 case ARM::VLDRD: return ARM::VLDMD_UPD;
624 case ARM::VSTRS: return ARM::VSTMS_UPD;
625 case ARM::VSTRD: return ARM::VSTMD_UPD;
628 return ARM::t2LDR_PRE;
631 return ARM::t2STR_PRE;
632 default: llvm_unreachable("Unhandled opcode!");
637 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
639 case ARM::LDR: return ARM::LDR_POST;
640 case ARM::STR: return ARM::STR_POST;
641 case ARM::VLDRS: return ARM::VLDMS_UPD;
642 case ARM::VLDRD: return ARM::VLDMD_UPD;
643 case ARM::VSTRS: return ARM::VSTMS_UPD;
644 case ARM::VSTRD: return ARM::VSTMD_UPD;
647 return ARM::t2LDR_POST;
650 return ARM::t2STR_POST;
651 default: llvm_unreachable("Unhandled opcode!");
656 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
657 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
658 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
659 MachineBasicBlock::iterator MBBI,
660 const TargetInstrInfo *TII,
662 MachineBasicBlock::iterator &I) {
663 MachineInstr *MI = MBBI;
664 unsigned Base = MI->getOperand(1).getReg();
665 bool BaseKill = MI->getOperand(1).isKill();
666 unsigned Bytes = getLSMultipleTransferSize(MI);
667 int Opcode = MI->getOpcode();
668 DebugLoc dl = MI->getDebugLoc();
669 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
670 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
671 bool isAM2 = (Opcode == ARM::LDR || Opcode == ARM::STR);
672 if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
674 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
676 if (isT2i32Load(Opcode) || isT2i32Store(Opcode))
677 if (MI->getOperand(2).getImm() != 0)
680 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
681 // Can't do the merge if the destination register is the same as the would-be
682 // writeback register.
683 if (isLd && MI->getOperand(0).getReg() == Base)
686 unsigned PredReg = 0;
687 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
688 bool DoMerge = false;
689 ARM_AM::AddrOpc AddSub = ARM_AM::add;
691 // AM2 - 12 bits, thumb2 - 8 bits.
692 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
694 // Try merging with the previous instruction.
695 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
696 if (MBBI != BeginMBBI) {
697 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
698 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
700 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
702 AddSub = ARM_AM::sub;
704 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
708 NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
713 // Try merging with the next instruction.
714 MachineBasicBlock::iterator EndMBBI = MBB.end();
715 if (!DoMerge && MBBI != EndMBBI) {
716 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
717 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
720 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
722 AddSub = ARM_AM::sub;
723 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
727 NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
739 bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD;
742 Offset = ARM_AM::getAM5Opc(AddSub == ARM_AM::sub ? ARM_AM::db : ARM_AM::ia,
745 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
747 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
750 // VLDM[SD}_UPD, VSTM[SD]_UPD
751 MachineOperand &MO = MI->getOperand(0);
752 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
753 .addReg(Base, getDefRegState(true)) // WB base register
754 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
756 .addImm(Pred).addReg(PredReg)
757 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
758 getKillRegState(MO.isKill())));
761 // LDR_PRE, LDR_POST,
762 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
763 .addReg(Base, RegState::Define)
764 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
766 // t2LDR_PRE, t2LDR_POST
767 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
768 .addReg(Base, RegState::Define)
769 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
771 MachineOperand &MO = MI->getOperand(0);
774 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
775 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
776 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
778 // t2STR_PRE, t2STR_POST
779 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
780 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
781 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
788 /// isMemoryOp - Returns true if instruction is a memory operations (that this
789 /// pass is capable of operating on).
790 static bool isMemoryOp(const MachineInstr *MI) {
791 if (MI->hasOneMemOperand()) {
792 const MachineMemOperand *MMO = *MI->memoperands_begin();
794 // Don't touch volatile memory accesses - we may be changing their order.
795 if (MMO->isVolatile())
798 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
800 if (MMO->getAlignment() < 4)
804 // str <undef> could probably be eliminated entirely, but for now we just want
805 // to avoid making a mess of it.
806 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
807 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
808 MI->getOperand(0).isUndef())
811 // Likewise don't mess with references to undefined addresses.
812 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
813 MI->getOperand(1).isUndef())
816 int Opcode = MI->getOpcode();
821 return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
824 return MI->getOperand(1).isReg();
827 return MI->getOperand(1).isReg();
832 return MI->getOperand(1).isReg();
837 /// AdvanceRS - Advance register scavenger to just before the earliest memory
838 /// op that is being merged.
839 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
840 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
841 unsigned Position = MemOps[0].Position;
842 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
843 if (MemOps[i].Position < Position) {
844 Position = MemOps[i].Position;
845 Loc = MemOps[i].MBBI;
849 if (Loc != MBB.begin())
850 RS->forward(prior(Loc));
853 static int getMemoryOpOffset(const MachineInstr *MI) {
854 int Opcode = MI->getOpcode();
855 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
856 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
857 unsigned NumOperands = MI->getDesc().getNumOperands();
858 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
860 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
861 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
862 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8)
866 ? ARM_AM::getAM2Offset(OffField)
867 : (isAM3 ? ARM_AM::getAM3Offset(OffField)
868 : ARM_AM::getAM5Offset(OffField) * 4);
870 if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
873 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
876 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
882 static void InsertLDR_STR(MachineBasicBlock &MBB,
883 MachineBasicBlock::iterator &MBBI,
884 int OffImm, bool isDef,
885 DebugLoc dl, unsigned NewOpc,
886 unsigned Reg, bool RegDeadKill, bool RegUndef,
887 unsigned BaseReg, bool BaseKill, bool BaseUndef,
888 unsigned OffReg, bool OffKill, bool OffUndef,
889 ARMCC::CondCodes Pred, unsigned PredReg,
890 const TargetInstrInfo *TII, bool isT2) {
894 Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
896 Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
899 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
901 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
902 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
904 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
905 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
907 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
909 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
910 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
912 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
913 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
917 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
918 MachineBasicBlock::iterator &MBBI) {
919 MachineInstr *MI = &*MBBI;
920 unsigned Opcode = MI->getOpcode();
921 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
922 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
923 unsigned EvenReg = MI->getOperand(0).getReg();
924 unsigned OddReg = MI->getOperand(1).getReg();
925 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
926 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
927 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
930 MachineBasicBlock::iterator NewBBI = MBBI;
931 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
932 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
933 bool EvenDeadKill = isLd ?
934 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
935 bool EvenUndef = MI->getOperand(0).isUndef();
936 bool OddDeadKill = isLd ?
937 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
938 bool OddUndef = MI->getOperand(1).isUndef();
939 const MachineOperand &BaseOp = MI->getOperand(2);
940 unsigned BaseReg = BaseOp.getReg();
941 bool BaseKill = BaseOp.isKill();
942 bool BaseUndef = BaseOp.isUndef();
943 unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg();
944 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
945 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
946 int OffImm = getMemoryOpOffset(MI);
947 unsigned PredReg = 0;
948 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
950 if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) {
951 // Ascending register numbers and no offset. It's safe to change it to a
953 unsigned NewOpc = (isLd)
954 ? (isT2 ? ARM::t2LDM : ARM::LDM)
955 : (isT2 ? ARM::t2STM : ARM::STM);
957 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
958 .addReg(BaseReg, getKillRegState(BaseKill))
959 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
960 .addImm(Pred).addReg(PredReg)
961 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
962 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
965 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
966 .addReg(BaseReg, getKillRegState(BaseKill))
967 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
968 .addImm(Pred).addReg(PredReg)
970 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
972 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
975 NewBBI = llvm::prior(MBBI);
977 // Split into two instructions.
978 assert((!isT2 || !OffReg) &&
979 "Thumb2 ldrd / strd does not encode offset register!");
980 unsigned NewOpc = (isLd)
981 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR)
982 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR);
983 DebugLoc dl = MBBI->getDebugLoc();
984 // If this is a load and base register is killed, it may have been
985 // re-defed by the load, make sure the first load does not clobber it.
987 (BaseKill || OffKill) &&
988 (TRI->regsOverlap(EvenReg, BaseReg) ||
989 (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) {
990 assert(!TRI->regsOverlap(OddReg, BaseReg) &&
991 (!OffReg || !TRI->regsOverlap(OddReg, OffReg)));
992 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
993 OddReg, OddDeadKill, false,
994 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
995 Pred, PredReg, TII, isT2);
996 NewBBI = llvm::prior(MBBI);
997 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
998 EvenReg, EvenDeadKill, false,
999 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
1000 Pred, PredReg, TII, isT2);
1002 if (OddReg == EvenReg && EvenDeadKill) {
1003 // If the two source operands are the same, the kill marker is
1004 // probably on the first one. e.g.
1005 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1006 EvenDeadKill = false;
1009 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1010 EvenReg, EvenDeadKill, EvenUndef,
1011 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
1012 Pred, PredReg, TII, isT2);
1013 NewBBI = llvm::prior(MBBI);
1014 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1015 OddReg, OddDeadKill, OddUndef,
1016 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
1017 Pred, PredReg, TII, isT2);
1032 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1033 /// ops of the same base and incrementing offset into LDM / STM ops.
1034 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1035 unsigned NumMerges = 0;
1036 unsigned NumMemOps = 0;
1038 unsigned CurrBase = 0;
1040 unsigned CurrSize = 0;
1041 ARMCC::CondCodes CurrPred = ARMCC::AL;
1042 unsigned CurrPredReg = 0;
1043 unsigned Position = 0;
1044 SmallVector<MachineBasicBlock::iterator,4> Merges;
1046 RS->enterBasicBlock(&MBB);
1047 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1049 if (FixInvalidRegPairOp(MBB, MBBI))
1052 bool Advance = false;
1053 bool TryMerge = false;
1054 bool Clobber = false;
1056 bool isMemOp = isMemoryOp(MBBI);
1058 int Opcode = MBBI->getOpcode();
1059 unsigned Size = getLSMultipleTransferSize(MBBI);
1060 const MachineOperand &MO = MBBI->getOperand(0);
1061 unsigned Reg = MO.getReg();
1062 bool isKill = MO.isDef() ? false : MO.isKill();
1063 unsigned Base = MBBI->getOperand(1).getReg();
1064 unsigned PredReg = 0;
1065 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1066 int Offset = getMemoryOpOffset(MBBI);
1069 // r5 := ldr [r5, #4]
1070 // r6 := ldr [r5, #8]
1072 // The second ldr has effectively broken the chain even though it
1073 // looks like the later ldr(s) use the same base register. Try to
1074 // merge the ldr's so far, including this one. But don't try to
1075 // combine the following ldr(s).
1076 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1077 if (CurrBase == 0 && !Clobber) {
1078 // Start of a new chain.
1083 CurrPredReg = PredReg;
1084 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1093 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1094 // No need to match PredReg.
1095 // Continue adding to the queue.
1096 if (Offset > MemOps.back().Offset) {
1097 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1102 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1104 if (Offset < I->Offset) {
1105 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1110 } else if (Offset == I->Offset) {
1111 // Collision! This can't be merged!
1120 if (MBBI->isDebugValue()) {
1123 // Reach the end of the block, try merging the memory instructions.
1125 } else if (Advance) {
1129 // Reach the end of the block, try merging the memory instructions.
1135 if (NumMemOps > 1) {
1136 // Try to find a free register to use as a new base in case it's needed.
1137 // First advance to the instruction just before the start of the chain.
1138 AdvanceRS(MBB, MemOps);
1139 // Find a scratch register.
1140 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1141 // Process the load / store instructions.
1142 RS->forward(prior(MBBI));
1146 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1147 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1149 // Try folding preceeding/trailing base inc/dec into the generated
1151 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1152 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1154 NumMerges += Merges.size();
1156 // Try folding preceeding/trailing base inc/dec into those load/store
1157 // that were not merged to form LDM/STM ops.
1158 for (unsigned i = 0; i != NumMemOps; ++i)
1159 if (!MemOps[i].Merged)
1160 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1163 // RS may be pointing to an instruction that's deleted.
1164 RS->skipTo(prior(MBBI));
1165 } else if (NumMemOps == 1) {
1166 // Try folding preceeding/trailing base inc/dec into the single
1168 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1170 RS->forward(prior(MBBI));
1177 CurrPred = ARMCC::AL;
1184 // If iterator hasn't been advanced and this is not a memory op, skip it.
1185 // It can't start a new chain anyway.
1186 if (!Advance && !isMemOp && MBBI != E) {
1192 return NumMerges > 0;
1196 struct OffsetCompare {
1197 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1198 int LOffset = getMemoryOpOffset(LHS);
1199 int ROffset = getMemoryOpOffset(RHS);
1200 assert(LHS == RHS || LOffset != ROffset);
1201 return LOffset > ROffset;
1206 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1207 /// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it
1208 /// directly restore the value of LR into pc.
1209 /// ldmfd sp!, {..., lr}
1212 /// ldmfd sp!, {..., lr}
1215 /// ldmfd sp!, {..., pc}
1216 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1217 if (MBB.empty()) return false;
1219 MachineBasicBlock::iterator MBBI = prior(MBB.end());
1220 if (MBBI != MBB.begin() &&
1221 (MBBI->getOpcode() == ARM::BX_RET ||
1222 MBBI->getOpcode() == ARM::tBX_RET ||
1223 MBBI->getOpcode() == ARM::MOVPCLR)) {
1224 MachineInstr *PrevMI = prior(MBBI);
1225 if (PrevMI->getOpcode() == ARM::LDM_UPD ||
1226 PrevMI->getOpcode() == ARM::t2LDM_UPD) {
1227 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1228 if (MO.getReg() != ARM::LR)
1230 unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET;
1231 PrevMI->setDesc(TII->get(NewOpc));
1240 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1241 const TargetMachine &TM = Fn.getTarget();
1242 AFI = Fn.getInfo<ARMFunctionInfo>();
1243 TII = TM.getInstrInfo();
1244 TRI = TM.getRegisterInfo();
1245 RS = new RegScavenger();
1246 isThumb2 = AFI->isThumb2Function();
1248 bool Modified = false;
1249 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1251 MachineBasicBlock &MBB = *MFI;
1252 Modified |= LoadStoreMultipleOpti(MBB);
1253 Modified |= MergeReturnIntoLDM(MBB);
1261 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1262 /// load / stores from consecutive locations close to make it more
1263 /// likely they will be combined later.
1266 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1268 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
1270 const TargetData *TD;
1271 const TargetInstrInfo *TII;
1272 const TargetRegisterInfo *TRI;
1273 const ARMSubtarget *STI;
1274 MachineRegisterInfo *MRI;
1275 MachineFunction *MF;
1277 virtual bool runOnMachineFunction(MachineFunction &Fn);
1279 virtual const char *getPassName() const {
1280 return "ARM pre- register allocation load / store optimization pass";
1284 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1285 unsigned &NewOpc, unsigned &EvenReg,
1286 unsigned &OddReg, unsigned &BaseReg,
1287 unsigned &OffReg, int &Offset,
1288 unsigned &PredReg, ARMCC::CondCodes &Pred,
1290 bool RescheduleOps(MachineBasicBlock *MBB,
1291 SmallVector<MachineInstr*, 4> &Ops,
1292 unsigned Base, bool isLd,
1293 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1294 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1296 char ARMPreAllocLoadStoreOpt::ID = 0;
1299 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1300 TD = Fn.getTarget().getTargetData();
1301 TII = Fn.getTarget().getInstrInfo();
1302 TRI = Fn.getTarget().getRegisterInfo();
1303 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1304 MRI = &Fn.getRegInfo();
1307 bool Modified = false;
1308 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1310 Modified |= RescheduleLoadStoreInstrs(MFI);
1315 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1316 MachineBasicBlock::iterator I,
1317 MachineBasicBlock::iterator E,
1318 SmallPtrSet<MachineInstr*, 4> &MemOps,
1319 SmallSet<unsigned, 4> &MemRegs,
1320 const TargetRegisterInfo *TRI) {
1321 // Are there stores / loads / calls between them?
1322 // FIXME: This is overly conservative. We should make use of alias information
1324 SmallSet<unsigned, 4> AddedRegPressure;
1326 if (I->isDebugValue() || MemOps.count(&*I))
1328 const TargetInstrDesc &TID = I->getDesc();
1329 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
1331 if (isLd && TID.mayStore())
1336 // It's not safe to move the first 'str' down.
1339 // str r4, [r0, #+4]
1343 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1344 MachineOperand &MO = I->getOperand(j);
1347 unsigned Reg = MO.getReg();
1348 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1350 if (Reg != Base && !MemRegs.count(Reg))
1351 AddedRegPressure.insert(Reg);
1355 // Estimate register pressure increase due to the transformation.
1356 if (MemRegs.size() <= 4)
1357 // Ok if we are moving small number of instructions.
1359 return AddedRegPressure.size() <= MemRegs.size() * 2;
1363 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1365 unsigned &NewOpc, unsigned &EvenReg,
1366 unsigned &OddReg, unsigned &BaseReg,
1367 unsigned &OffReg, int &Offset,
1369 ARMCC::CondCodes &Pred,
1371 // Make sure we're allowed to generate LDRD/STRD.
1372 if (!STI->hasV5TEOps())
1375 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1377 unsigned Opcode = Op0->getOpcode();
1378 if (Opcode == ARM::LDR)
1380 else if (Opcode == ARM::STR)
1382 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1383 NewOpc = ARM::t2LDRDi8;
1386 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1387 NewOpc = ARM::t2STRDi8;
1393 // Make sure the offset registers match.
1395 (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg()))
1398 // Must sure the base address satisfies i64 ld / st alignment requirement.
1399 if (!Op0->hasOneMemOperand() ||
1400 !(*Op0->memoperands_begin())->getValue() ||
1401 (*Op0->memoperands_begin())->isVolatile())
1404 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1405 const Function *Func = MF->getFunction();
1406 unsigned ReqAlign = STI->hasV6Ops()
1407 ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext()))
1408 : 8; // Pre-v6 need 8-byte align
1409 if (Align < ReqAlign)
1412 // Then make sure the immediate offset fits.
1413 int OffImm = getMemoryOpOffset(Op0);
1417 // Can't fall back to t2LDRi8 / t2STRi8.
1420 int Limit = (1 << 8) * Scale;
1421 if (OffImm >= Limit || (OffImm & (Scale-1)))
1426 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1428 AddSub = ARM_AM::sub;
1431 int Limit = (1 << 8) * Scale;
1432 if (OffImm >= Limit || (OffImm & (Scale-1)))
1434 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1436 EvenReg = Op0->getOperand(0).getReg();
1437 OddReg = Op1->getOperand(0).getReg();
1438 if (EvenReg == OddReg)
1440 BaseReg = Op0->getOperand(1).getReg();
1442 OffReg = Op0->getOperand(2).getReg();
1443 Pred = llvm::getInstrPredicate(Op0, PredReg);
1444 dl = Op0->getDebugLoc();
1448 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1449 SmallVector<MachineInstr*, 4> &Ops,
1450 unsigned Base, bool isLd,
1451 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1452 bool RetVal = false;
1454 // Sort by offset (in reverse order).
1455 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1457 // The loads / stores of the same base are in order. Scan them from first to
1458 // last and check for the following:
1459 // 1. Any def of base.
1461 while (Ops.size() > 1) {
1462 unsigned FirstLoc = ~0U;
1463 unsigned LastLoc = 0;
1464 MachineInstr *FirstOp = 0;
1465 MachineInstr *LastOp = 0;
1467 unsigned LastOpcode = 0;
1468 unsigned LastBytes = 0;
1469 unsigned NumMove = 0;
1470 for (int i = Ops.size() - 1; i >= 0; --i) {
1471 MachineInstr *Op = Ops[i];
1472 unsigned Loc = MI2LocMap[Op];
1473 if (Loc <= FirstLoc) {
1477 if (Loc >= LastLoc) {
1482 unsigned Opcode = Op->getOpcode();
1483 if (LastOpcode && Opcode != LastOpcode)
1486 int Offset = getMemoryOpOffset(Op);
1487 unsigned Bytes = getLSMultipleTransferSize(Op);
1489 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1492 LastOffset = Offset;
1494 LastOpcode = Opcode;
1495 if (++NumMove == 8) // FIXME: Tune this limit.
1502 SmallPtrSet<MachineInstr*, 4> MemOps;
1503 SmallSet<unsigned, 4> MemRegs;
1504 for (int i = NumMove-1; i >= 0; --i) {
1505 MemOps.insert(Ops[i]);
1506 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1509 // Be conservative, if the instructions are too far apart, don't
1510 // move them. We want to limit the increase of register pressure.
1511 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1513 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1514 MemOps, MemRegs, TRI);
1516 for (unsigned i = 0; i != NumMove; ++i)
1519 // This is the new location for the loads / stores.
1520 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1521 while (InsertPos != MBB->end()
1522 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1525 // If we are moving a pair of loads / stores, see if it makes sense
1526 // to try to allocate a pair of registers that can form register pairs.
1527 MachineInstr *Op0 = Ops.back();
1528 MachineInstr *Op1 = Ops[Ops.size()-2];
1529 unsigned EvenReg = 0, OddReg = 0;
1530 unsigned BaseReg = 0, OffReg = 0, PredReg = 0;
1531 ARMCC::CondCodes Pred = ARMCC::AL;
1533 unsigned NewOpc = 0;
1536 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1537 EvenReg, OddReg, BaseReg, OffReg,
1538 Offset, PredReg, Pred, isT2)) {
1542 // Form the pair instruction.
1544 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1545 dl, TII->get(NewOpc))
1546 .addReg(EvenReg, RegState::Define)
1547 .addReg(OddReg, RegState::Define)
1551 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1554 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1555 dl, TII->get(NewOpc))
1561 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1567 // Add register allocation hints to form register pairs.
1568 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1569 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1571 for (unsigned i = 0; i != NumMove; ++i) {
1572 MachineInstr *Op = Ops.back();
1574 MBB->splice(InsertPos, MBB, Op);
1578 NumLdStMoved += NumMove;
1588 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1589 bool RetVal = false;
1591 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1592 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1593 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1594 SmallVector<unsigned, 4> LdBases;
1595 SmallVector<unsigned, 4> StBases;
1598 MachineBasicBlock::iterator MBBI = MBB->begin();
1599 MachineBasicBlock::iterator E = MBB->end();
1601 for (; MBBI != E; ++MBBI) {
1602 MachineInstr *MI = MBBI;
1603 const TargetInstrDesc &TID = MI->getDesc();
1604 if (TID.isCall() || TID.isTerminator()) {
1605 // Stop at barriers.
1610 if (!MI->isDebugValue())
1611 MI2LocMap[MI] = ++Loc;
1613 if (!isMemoryOp(MI))
1615 unsigned PredReg = 0;
1616 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1619 int Opc = MI->getOpcode();
1620 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1621 unsigned Base = MI->getOperand(1).getReg();
1622 int Offset = getMemoryOpOffset(MI);
1624 bool StopHere = false;
1626 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1627 Base2LdsMap.find(Base);
1628 if (BI != Base2LdsMap.end()) {
1629 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1630 if (Offset == getMemoryOpOffset(BI->second[i])) {
1636 BI->second.push_back(MI);
1638 SmallVector<MachineInstr*, 4> MIs;
1640 Base2LdsMap[Base] = MIs;
1641 LdBases.push_back(Base);
1644 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1645 Base2StsMap.find(Base);
1646 if (BI != Base2StsMap.end()) {
1647 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1648 if (Offset == getMemoryOpOffset(BI->second[i])) {
1654 BI->second.push_back(MI);
1656 SmallVector<MachineInstr*, 4> MIs;
1658 Base2StsMap[Base] = MIs;
1659 StBases.push_back(Base);
1664 // Found a duplicate (a base+offset combination that's seen earlier).
1671 // Re-schedule loads.
1672 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1673 unsigned Base = LdBases[i];
1674 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1676 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1679 // Re-schedule stores.
1680 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1681 unsigned Base = StBases[i];
1682 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1684 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1688 Base2LdsMap.clear();
1689 Base2StsMap.clear();
1699 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1700 /// optimization pass.
1701 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1703 return new ARMPreAllocLoadStoreOpt();
1704 return new ARMLoadStoreOpt();