1 //=- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #include "AArch64InstrInfo.h"
16 #include "AArch64Subtarget.h"
17 #include "MCTargetDesc/AArch64AddressingModes.h"
18 #include "llvm/ADT/BitVector.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/CodeGen/MachineBasicBlock.h"
22 #include "llvm/CodeGen/MachineFunctionPass.h"
23 #include "llvm/CodeGen/MachineInstr.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/Support/CommandLine.h"
26 #include "llvm/Support/Debug.h"
27 #include "llvm/Support/ErrorHandling.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include "llvm/Target/TargetInstrInfo.h"
30 #include "llvm/Target/TargetMachine.h"
31 #include "llvm/Target/TargetRegisterInfo.h"
34 #define DEBUG_TYPE "aarch64-ldst-opt"
36 /// AArch64AllocLoadStoreOpt - Post-register allocation pass to combine
37 /// load / store instructions to form ldp / stp instructions.
39 STATISTIC(NumPairCreated, "Number of load/store pair instructions generated");
40 STATISTIC(NumPostFolded, "Number of post-index updates folded");
41 STATISTIC(NumPreFolded, "Number of pre-index updates folded");
42 STATISTIC(NumUnscaledPairCreated,
43 "Number of load/store from unscaled generated");
45 static cl::opt<unsigned> ScanLimit("aarch64-load-store-scan-limit",
46 cl::init(20), cl::Hidden);
48 // Place holder while testing unscaled load/store combining
49 static cl::opt<bool> EnableAArch64UnscaledMemOp(
50 "aarch64-unscaled-mem-op", cl::Hidden,
51 cl::desc("Allow AArch64 unscaled load/store combining"), cl::init(true));
54 struct AArch64LoadStoreOpt : public MachineFunctionPass {
56 AArch64LoadStoreOpt() : MachineFunctionPass(ID) {}
58 const AArch64InstrInfo *TII;
59 const TargetRegisterInfo *TRI;
61 // Scan the instructions looking for a load/store that can be combined
62 // with the current instruction into a load/store pair.
63 // Return the matching instruction if one is found, else MBB->end().
64 // If a matching instruction is found, MergeForward is set to true if the
65 // merge is to remove the first instruction and replace the second with
66 // a pair-wise insn, and false if the reverse is true.
67 // \p SExtIdx[out] gives the index of the result of the load pair that
68 // must be extended. The value of SExtIdx assumes that the paired load
69 // produces the value in this order: (I, returned iterator), i.e.,
70 // -1 means no value has to be extended, 0 means I, and 1 means the
72 MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
73 bool &MergeForward, int &SExtIdx,
75 // Merge the two instructions indicated into a single pair-wise instruction.
76 // If MergeForward is true, erase the first instruction and fold its
77 // operation into the second. If false, the reverse. Return the instruction
78 // following the first instruction (which may change during processing).
79 // \p SExtIdx index of the result that must be extended for a paired load.
80 // -1 means none, 0 means I, and 1 means Paired.
81 MachineBasicBlock::iterator
82 mergePairedInsns(MachineBasicBlock::iterator I,
83 MachineBasicBlock::iterator Paired, bool MergeForward,
86 // Scan the instruction list to find a base register update that can
87 // be combined with the current instruction (a load or store) using
88 // pre or post indexed addressing with writeback. Scan forwards.
89 MachineBasicBlock::iterator
90 findMatchingUpdateInsnForward(MachineBasicBlock::iterator I, unsigned Limit,
93 // Scan the instruction list to find a base register update that can
94 // be combined with the current instruction (a load or store) using
95 // pre or post indexed addressing with writeback. Scan backwards.
96 MachineBasicBlock::iterator
97 findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit);
99 // Merge a pre-index base register update into a ld/st instruction.
100 MachineBasicBlock::iterator
101 mergePreIdxUpdateInsn(MachineBasicBlock::iterator I,
102 MachineBasicBlock::iterator Update);
104 // Merge a post-index base register update into a ld/st instruction.
105 MachineBasicBlock::iterator
106 mergePostIdxUpdateInsn(MachineBasicBlock::iterator I,
107 MachineBasicBlock::iterator Update);
109 bool optimizeBlock(MachineBasicBlock &MBB);
111 bool runOnMachineFunction(MachineFunction &Fn) override;
113 const char *getPassName() const override {
114 return "AArch64 load / store optimization pass";
118 int getMemSize(MachineInstr *MemMI);
120 char AArch64LoadStoreOpt::ID = 0;
123 static bool isUnscaledLdst(unsigned Opc) {
127 case AArch64::STURSi:
129 case AArch64::STURDi:
131 case AArch64::STURQi:
133 case AArch64::STURWi:
135 case AArch64::STURXi:
137 case AArch64::LDURSi:
139 case AArch64::LDURDi:
141 case AArch64::LDURQi:
143 case AArch64::LDURWi:
145 case AArch64::LDURXi:
147 case AArch64::LDURSWi:
152 // Size in bytes of the data moved by an unscaled load or store
153 int AArch64LoadStoreOpt::getMemSize(MachineInstr *MemMI) {
154 switch (MemMI->getOpcode()) {
156 llvm_unreachable("Opcode has unknown size!");
157 case AArch64::STRSui:
158 case AArch64::STURSi:
160 case AArch64::STRDui:
161 case AArch64::STURDi:
163 case AArch64::STRQui:
164 case AArch64::STURQi:
166 case AArch64::STRWui:
167 case AArch64::STURWi:
169 case AArch64::STRXui:
170 case AArch64::STURXi:
172 case AArch64::LDRSui:
173 case AArch64::LDURSi:
175 case AArch64::LDRDui:
176 case AArch64::LDURDi:
178 case AArch64::LDRQui:
179 case AArch64::LDURQi:
181 case AArch64::LDRWui:
182 case AArch64::LDURWi:
184 case AArch64::LDRXui:
185 case AArch64::LDURXi:
187 case AArch64::LDRSWui:
188 case AArch64::LDURSWi:
193 static unsigned getMatchingNonSExtOpcode(unsigned Opc,
194 bool *IsValidLdStrOpc = nullptr) {
196 *IsValidLdStrOpc = true;
200 *IsValidLdStrOpc = false;
202 case AArch64::STRDui:
203 case AArch64::STURDi:
204 case AArch64::STRQui:
205 case AArch64::STURQi:
206 case AArch64::STRWui:
207 case AArch64::STURWi:
208 case AArch64::STRXui:
209 case AArch64::STURXi:
210 case AArch64::LDRDui:
211 case AArch64::LDURDi:
212 case AArch64::LDRQui:
213 case AArch64::LDURQi:
214 case AArch64::LDRWui:
215 case AArch64::LDURWi:
216 case AArch64::LDRXui:
217 case AArch64::LDURXi:
218 case AArch64::STRSui:
219 case AArch64::STURSi:
220 case AArch64::LDRSui:
221 case AArch64::LDURSi:
223 case AArch64::LDRSWui:
224 return AArch64::LDRWui;
225 case AArch64::LDURSWi:
226 return AArch64::LDURWi;
230 static unsigned getMatchingPairOpcode(unsigned Opc) {
233 llvm_unreachable("Opcode has no pairwise equivalent!");
234 case AArch64::STRSui:
235 case AArch64::STURSi:
236 return AArch64::STPSi;
237 case AArch64::STRDui:
238 case AArch64::STURDi:
239 return AArch64::STPDi;
240 case AArch64::STRQui:
241 case AArch64::STURQi:
242 return AArch64::STPQi;
243 case AArch64::STRWui:
244 case AArch64::STURWi:
245 return AArch64::STPWi;
246 case AArch64::STRXui:
247 case AArch64::STURXi:
248 return AArch64::STPXi;
249 case AArch64::LDRSui:
250 case AArch64::LDURSi:
251 return AArch64::LDPSi;
252 case AArch64::LDRDui:
253 case AArch64::LDURDi:
254 return AArch64::LDPDi;
255 case AArch64::LDRQui:
256 case AArch64::LDURQi:
257 return AArch64::LDPQi;
258 case AArch64::LDRWui:
259 case AArch64::LDURWi:
260 return AArch64::LDPWi;
261 case AArch64::LDRXui:
262 case AArch64::LDURXi:
263 return AArch64::LDPXi;
264 case AArch64::LDRSWui:
265 case AArch64::LDURSWi:
266 return AArch64::LDPSWi;
270 static unsigned getPreIndexedOpcode(unsigned Opc) {
273 llvm_unreachable("Opcode has no pre-indexed equivalent!");
274 case AArch64::STRSui:
275 return AArch64::STRSpre;
276 case AArch64::STRDui:
277 return AArch64::STRDpre;
278 case AArch64::STRQui:
279 return AArch64::STRQpre;
280 case AArch64::STRWui:
281 return AArch64::STRWpre;
282 case AArch64::STRXui:
283 return AArch64::STRXpre;
284 case AArch64::LDRSui:
285 return AArch64::LDRSpre;
286 case AArch64::LDRDui:
287 return AArch64::LDRDpre;
288 case AArch64::LDRQui:
289 return AArch64::LDRQpre;
290 case AArch64::LDRWui:
291 return AArch64::LDRWpre;
292 case AArch64::LDRXui:
293 return AArch64::LDRXpre;
294 case AArch64::LDRSWui:
295 return AArch64::LDRSWpre;
299 static unsigned getPostIndexedOpcode(unsigned Opc) {
302 llvm_unreachable("Opcode has no post-indexed wise equivalent!");
303 case AArch64::STRSui:
304 return AArch64::STRSpost;
305 case AArch64::STRDui:
306 return AArch64::STRDpost;
307 case AArch64::STRQui:
308 return AArch64::STRQpost;
309 case AArch64::STRWui:
310 return AArch64::STRWpost;
311 case AArch64::STRXui:
312 return AArch64::STRXpost;
313 case AArch64::LDRSui:
314 return AArch64::LDRSpost;
315 case AArch64::LDRDui:
316 return AArch64::LDRDpost;
317 case AArch64::LDRQui:
318 return AArch64::LDRQpost;
319 case AArch64::LDRWui:
320 return AArch64::LDRWpost;
321 case AArch64::LDRXui:
322 return AArch64::LDRXpost;
323 case AArch64::LDRSWui:
324 return AArch64::LDRSWpost;
328 MachineBasicBlock::iterator
329 AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
330 MachineBasicBlock::iterator Paired,
331 bool MergeForward, int SExtIdx) {
332 MachineBasicBlock::iterator NextI = I;
334 // If NextI is the second of the two instructions to be merged, we need
335 // to skip one further. Either way we merge will invalidate the iterator,
336 // and we don't need to scan the new instruction, as it's a pairwise
337 // instruction, which we're not considering for further action anyway.
342 SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
343 bool IsUnscaled = isUnscaledLdst(Opc);
345 IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(I) : 1;
347 unsigned NewOpc = getMatchingPairOpcode(Opc);
348 // Insert our new paired instruction after whichever of the paired
349 // instructions MergeForward indicates.
350 MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
351 // Also based on MergeForward is from where we copy the base register operand
352 // so we get the flags compatible with the input code.
353 MachineOperand &BaseRegOp =
354 MergeForward ? Paired->getOperand(1) : I->getOperand(1);
356 // Which register is Rt and which is Rt2 depends on the offset order.
357 MachineInstr *RtMI, *Rt2MI;
358 if (I->getOperand(2).getImm() ==
359 Paired->getOperand(2).getImm() + OffsetStride) {
362 // Here we swapped the assumption made for SExtIdx.
363 // I.e., we turn ldp I, Paired into ldp Paired, I.
364 // Update the index accordingly.
366 SExtIdx = (SExtIdx + 1) % 2;
372 int OffsetImm = RtMI->getOperand(2).getImm();
373 if (IsUnscaled && EnableAArch64UnscaledMemOp)
374 OffsetImm /= OffsetStride;
376 // Construct the new instruction.
377 MachineInstrBuilder MIB = BuildMI(*I->getParent(), InsertionPoint,
378 I->getDebugLoc(), TII->get(NewOpc))
379 .addOperand(RtMI->getOperand(0))
380 .addOperand(Rt2MI->getOperand(0))
381 .addOperand(BaseRegOp)
385 // FIXME: Do we need/want to copy the mem operands from the source
386 // instructions? Probably. What uses them after this?
388 DEBUG(dbgs() << "Creating pair load/store. Replacing instructions:\n ");
389 DEBUG(I->print(dbgs()));
390 DEBUG(dbgs() << " ");
391 DEBUG(Paired->print(dbgs()));
392 DEBUG(dbgs() << " with instruction:\n ");
395 // Generate the sign extension for the proper result of the ldp.
396 // I.e., with X1, that would be:
397 // %W1<def> = KILL %W1, %X1<imp-def>
398 // %X1<def> = SBFMXri %X1<kill>, 0, 31
399 MachineOperand &DstMO = MIB->getOperand(SExtIdx);
400 // Right now, DstMO has the extended register, since it comes from an
402 unsigned DstRegX = DstMO.getReg();
403 // Get the W variant of that register.
404 unsigned DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32);
405 // Update the result of LDP to use the W instead of the X variant.
406 DstMO.setReg(DstRegW);
407 DEBUG(((MachineInstr *)MIB)->print(dbgs()));
408 DEBUG(dbgs() << "\n");
409 // Make the machine verifier happy by providing a definition for
411 // Insert this definition right after the generated LDP, i.e., before
413 MachineInstrBuilder MIBKill =
414 BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
415 TII->get(TargetOpcode::KILL), DstRegW)
417 .addReg(DstRegX, RegState::Define);
418 MIBKill->getOperand(2).setImplicit();
419 // Create the sign extension.
420 MachineInstrBuilder MIBSXTW =
421 BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
422 TII->get(AArch64::SBFMXri), DstRegX)
427 DEBUG(dbgs() << " Extend operand:\n ");
428 DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs()));
429 DEBUG(dbgs() << "\n");
431 DEBUG(((MachineInstr *)MIB)->print(dbgs()));
432 DEBUG(dbgs() << "\n");
435 // Erase the old instructions.
436 I->eraseFromParent();
437 Paired->eraseFromParent();
442 /// trackRegDefsUses - Remember what registers the specified instruction uses
444 static void trackRegDefsUses(MachineInstr *MI, BitVector &ModifiedRegs,
446 const TargetRegisterInfo *TRI) {
447 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
448 MachineOperand &MO = MI->getOperand(i);
450 ModifiedRegs.setBitsNotInMask(MO.getRegMask());
454 unsigned Reg = MO.getReg();
456 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
457 ModifiedRegs.set(*AI);
459 assert(MO.isUse() && "Reg operand not a def and not a use?!?");
460 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
466 static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) {
467 if (!IsUnscaled && (Offset > 63 || Offset < -64))
470 // Convert the byte-offset used by unscaled into an "element" offset used
471 // by the scaled pair load/store instructions.
472 int ElemOffset = Offset / OffsetStride;
473 if (ElemOffset > 63 || ElemOffset < -64)
479 // Do alignment, specialized to power of 2 and for signed ints,
480 // avoiding having to do a C-style cast from uint_64t to int when
481 // using RoundUpToAlignment from include/llvm/Support/MathExtras.h.
482 // FIXME: Move this function to include/MathExtras.h?
483 static int alignTo(int Num, int PowOf2) {
484 return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
487 static bool mayAlias(MachineInstr *MIa, MachineInstr *MIb,
488 const AArch64InstrInfo *TII) {
489 // One of the instructions must modify memory.
490 if (!MIa->mayStore() && !MIb->mayStore())
493 // Both instructions must be memory operations.
494 if (!MIa->mayLoadOrStore() && !MIb->mayLoadOrStore())
497 return !TII->areMemAccessesTriviallyDisjoint(MIa, MIb);
500 static bool mayAlias(MachineInstr *MIa,
501 SmallVectorImpl<MachineInstr *> &MemInsns,
502 const AArch64InstrInfo *TII) {
503 for (auto &MIb : MemInsns)
504 if (mayAlias(MIa, MIb, TII))
510 /// findMatchingInsn - Scan the instructions looking for a load/store that can
511 /// be combined with the current instruction into a load/store pair.
512 MachineBasicBlock::iterator
513 AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
514 bool &MergeForward, int &SExtIdx,
516 MachineBasicBlock::iterator E = I->getParent()->end();
517 MachineBasicBlock::iterator MBBI = I;
518 MachineInstr *FirstMI = I;
521 unsigned Opc = FirstMI->getOpcode();
522 bool MayLoad = FirstMI->mayLoad();
523 bool IsUnscaled = isUnscaledLdst(Opc);
524 unsigned Reg = FirstMI->getOperand(0).getReg();
525 unsigned BaseReg = FirstMI->getOperand(1).getReg();
526 int Offset = FirstMI->getOperand(2).getImm();
528 // Early exit if the first instruction modifies the base register.
529 // e.g., ldr x0, [x0]
530 // Early exit if the offset if not possible to match. (6 bits of positive
531 // range, plus allow an extra one in case we find a later insn that matches
533 if (FirstMI->modifiesRegister(BaseReg, TRI))
536 IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(FirstMI) : 1;
537 if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
540 // Track which registers have been modified and used between the first insn
541 // (inclusive) and the second insn.
542 BitVector ModifiedRegs, UsedRegs;
543 ModifiedRegs.resize(TRI->getNumRegs());
544 UsedRegs.resize(TRI->getNumRegs());
546 // Remember any instructions that read/write memory between FirstMI and MI.
547 SmallVector<MachineInstr *, 4> MemInsns;
549 for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
550 MachineInstr *MI = MBBI;
551 // Skip DBG_VALUE instructions. Otherwise debug info can affect the
552 // optimization by changing how far we scan.
553 if (MI->isDebugValue())
556 // Now that we know this is a real instruction, count it.
559 bool CanMergeOpc = Opc == MI->getOpcode();
562 bool IsValidLdStrOpc;
563 unsigned NonSExtOpc = getMatchingNonSExtOpcode(Opc, &IsValidLdStrOpc);
564 if (!IsValidLdStrOpc)
566 // Opc will be the first instruction in the pair.
567 SExtIdx = NonSExtOpc == (unsigned)Opc ? 1 : 0;
568 CanMergeOpc = NonSExtOpc == getMatchingNonSExtOpcode(MI->getOpcode());
571 if (CanMergeOpc && MI->getOperand(2).isImm()) {
572 // If we've found another instruction with the same opcode, check to see
573 // if the base and offset are compatible with our starting instruction.
574 // These instructions all have scaled immediate operands, so we just
575 // check for +1/-1. Make sure to check the new instruction offset is
576 // actually an immediate and not a symbolic reference destined for
579 // Pairwise instructions have a 7-bit signed offset field. Single insns
580 // have a 12-bit unsigned offset field. To be a valid combine, the
581 // final offset must be in range.
582 unsigned MIBaseReg = MI->getOperand(1).getReg();
583 int MIOffset = MI->getOperand(2).getImm();
584 if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) ||
585 (Offset + OffsetStride == MIOffset))) {
586 int MinOffset = Offset < MIOffset ? Offset : MIOffset;
587 // If this is a volatile load/store that otherwise matched, stop looking
588 // as something is going on that we don't have enough information to
589 // safely transform. Similarly, stop if we see a hint to avoid pairs.
590 if (MI->hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI))
592 // If the resultant immediate offset of merging these instructions
593 // is out of range for a pairwise instruction, bail and keep looking.
594 bool MIIsUnscaled = isUnscaledLdst(MI->getOpcode());
595 if (!inBoundsForPair(MIIsUnscaled, MinOffset, OffsetStride)) {
596 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
597 if (MI->mayLoadOrStore())
598 MemInsns.push_back(MI);
601 // If the alignment requirements of the paired (scaled) instruction
602 // can't express the offset of the unscaled input, bail and keep
604 if (IsUnscaled && EnableAArch64UnscaledMemOp &&
605 (alignTo(MinOffset, OffsetStride) != MinOffset)) {
606 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
607 if (MI->mayLoadOrStore())
608 MemInsns.push_back(MI);
611 // If the destination register of the loads is the same register, bail
612 // and keep looking. A load-pair instruction with both destination
613 // registers the same is UNPREDICTABLE and will result in an exception.
614 if (MayLoad && Reg == MI->getOperand(0).getReg()) {
615 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
616 if (MI->mayLoadOrStore())
617 MemInsns.push_back(MI);
621 // If the Rt of the second instruction was not modified or used between
622 // the two instructions and none of the instructions between the second
623 // and first alias with the second, we can combine the second into the
625 if (!ModifiedRegs[MI->getOperand(0).getReg()] &&
626 !(MI->mayLoad() && UsedRegs[MI->getOperand(0).getReg()]) &&
627 !mayAlias(MI, MemInsns, TII)) {
628 MergeForward = false;
632 // Likewise, if the Rt of the first instruction is not modified or used
633 // between the two instructions and none of the instructions between the
634 // first and the second alias with the first, we can combine the first
636 if (!ModifiedRegs[FirstMI->getOperand(0).getReg()] &&
637 !(FirstMI->mayLoad() &&
638 UsedRegs[FirstMI->getOperand(0).getReg()]) &&
639 !mayAlias(FirstMI, MemInsns, TII)) {
643 // Unable to combine these instructions due to interference in between.
648 // If the instruction wasn't a matching load or store. Stop searching if we
649 // encounter a call instruction that might modify memory.
653 // Update modified / uses register lists.
654 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
656 // Otherwise, if the base register is modified, we have no match, so
658 if (ModifiedRegs[BaseReg])
661 // Update list of instructions that read/write memory.
662 if (MI->mayLoadOrStore())
663 MemInsns.push_back(MI);
668 MachineBasicBlock::iterator
669 AArch64LoadStoreOpt::mergePreIdxUpdateInsn(MachineBasicBlock::iterator I,
670 MachineBasicBlock::iterator Update) {
671 assert((Update->getOpcode() == AArch64::ADDXri ||
672 Update->getOpcode() == AArch64::SUBXri) &&
673 "Unexpected base register update instruction to merge!");
674 MachineBasicBlock::iterator NextI = I;
675 // Return the instruction following the merged instruction, which is
676 // the instruction following our unmerged load. Unless that's the add/sub
677 // instruction we're merging, in which case it's the one after that.
678 if (++NextI == Update)
681 int Value = Update->getOperand(2).getImm();
682 assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
683 "Can't merge 1 << 12 offset into pre-indexed load / store");
684 if (Update->getOpcode() == AArch64::SUBXri)
687 unsigned NewOpc = getPreIndexedOpcode(I->getOpcode());
688 MachineInstrBuilder MIB =
689 BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
690 .addOperand(Update->getOperand(0))
691 .addOperand(I->getOperand(0))
692 .addOperand(I->getOperand(1))
696 DEBUG(dbgs() << "Creating pre-indexed load/store.");
697 DEBUG(dbgs() << " Replacing instructions:\n ");
698 DEBUG(I->print(dbgs()));
699 DEBUG(dbgs() << " ");
700 DEBUG(Update->print(dbgs()));
701 DEBUG(dbgs() << " with instruction:\n ");
702 DEBUG(((MachineInstr *)MIB)->print(dbgs()));
703 DEBUG(dbgs() << "\n");
705 // Erase the old instructions for the block.
706 I->eraseFromParent();
707 Update->eraseFromParent();
712 MachineBasicBlock::iterator AArch64LoadStoreOpt::mergePostIdxUpdateInsn(
713 MachineBasicBlock::iterator I, MachineBasicBlock::iterator Update) {
714 assert((Update->getOpcode() == AArch64::ADDXri ||
715 Update->getOpcode() == AArch64::SUBXri) &&
716 "Unexpected base register update instruction to merge!");
717 MachineBasicBlock::iterator NextI = I;
718 // Return the instruction following the merged instruction, which is
719 // the instruction following our unmerged load. Unless that's the add/sub
720 // instruction we're merging, in which case it's the one after that.
721 if (++NextI == Update)
724 int Value = Update->getOperand(2).getImm();
725 assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
726 "Can't merge 1 << 12 offset into post-indexed load / store");
727 if (Update->getOpcode() == AArch64::SUBXri)
730 unsigned NewOpc = getPostIndexedOpcode(I->getOpcode());
731 MachineInstrBuilder MIB =
732 BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
733 .addOperand(Update->getOperand(0))
734 .addOperand(I->getOperand(0))
735 .addOperand(I->getOperand(1))
739 DEBUG(dbgs() << "Creating post-indexed load/store.");
740 DEBUG(dbgs() << " Replacing instructions:\n ");
741 DEBUG(I->print(dbgs()));
742 DEBUG(dbgs() << " ");
743 DEBUG(Update->print(dbgs()));
744 DEBUG(dbgs() << " with instruction:\n ");
745 DEBUG(((MachineInstr *)MIB)->print(dbgs()));
746 DEBUG(dbgs() << "\n");
748 // Erase the old instructions for the block.
749 I->eraseFromParent();
750 Update->eraseFromParent();
755 static bool isMatchingUpdateInsn(MachineInstr *MI, unsigned BaseReg,
757 switch (MI->getOpcode()) {
760 case AArch64::SUBXri:
761 // Negate the offset for a SUB instruction.
764 case AArch64::ADDXri:
765 // Make sure it's a vanilla immediate operand, not a relocation or
766 // anything else we can't handle.
767 if (!MI->getOperand(2).isImm())
769 // Watch out for 1 << 12 shifted value.
770 if (AArch64_AM::getShiftValue(MI->getOperand(3).getImm()))
772 // If the instruction has the base register as source and dest and the
773 // immediate will fit in a signed 9-bit integer, then we have a match.
774 if (MI->getOperand(0).getReg() == BaseReg &&
775 MI->getOperand(1).getReg() == BaseReg &&
776 MI->getOperand(2).getImm() <= 255 &&
777 MI->getOperand(2).getImm() >= -256) {
778 // If we have a non-zero Offset, we check that it matches the amount
779 // we're adding to the register.
780 if (!Offset || Offset == MI->getOperand(2).getImm())
788 MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
789 MachineBasicBlock::iterator I, unsigned Limit, int Value) {
790 MachineBasicBlock::iterator E = I->getParent()->end();
791 MachineInstr *MemMI = I;
792 MachineBasicBlock::iterator MBBI = I;
793 const MachineFunction &MF = *MemMI->getParent()->getParent();
795 unsigned DestReg = MemMI->getOperand(0).getReg();
796 unsigned BaseReg = MemMI->getOperand(1).getReg();
797 int Offset = MemMI->getOperand(2).getImm() *
798 TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize();
800 // If the base register overlaps the destination register, we can't
802 if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
805 // Scan forward looking for post-index opportunities.
806 // Updating instructions can't be formed if the memory insn already
807 // has an offset other than the value we're looking for.
811 // Track which registers have been modified and used between the first insn
812 // (inclusive) and the second insn.
813 BitVector ModifiedRegs, UsedRegs;
814 ModifiedRegs.resize(TRI->getNumRegs());
815 UsedRegs.resize(TRI->getNumRegs());
817 for (unsigned Count = 0; MBBI != E; ++MBBI) {
818 MachineInstr *MI = MBBI;
819 // Skip DBG_VALUE instructions. Otherwise debug info can affect the
820 // optimization by changing how far we scan.
821 if (MI->isDebugValue())
824 // Now that we know this is a real instruction, count it.
827 // If we found a match, return it.
828 if (isMatchingUpdateInsn(MI, BaseReg, Value))
831 // Update the status of what the instruction clobbered and used.
832 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
834 // Otherwise, if the base register is used or modified, we have no match, so
836 if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg])
842 MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
843 MachineBasicBlock::iterator I, unsigned Limit) {
844 MachineBasicBlock::iterator B = I->getParent()->begin();
845 MachineBasicBlock::iterator E = I->getParent()->end();
846 MachineInstr *MemMI = I;
847 MachineBasicBlock::iterator MBBI = I;
848 const MachineFunction &MF = *MemMI->getParent()->getParent();
850 unsigned DestReg = MemMI->getOperand(0).getReg();
851 unsigned BaseReg = MemMI->getOperand(1).getReg();
852 int Offset = MemMI->getOperand(2).getImm();
853 unsigned RegSize = TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize();
855 // If the load/store is the first instruction in the block, there's obviously
856 // not any matching update. Ditto if the memory offset isn't zero.
857 if (MBBI == B || Offset != 0)
859 // If the base register overlaps the destination register, we can't
861 if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
864 // Track which registers have been modified and used between the first insn
865 // (inclusive) and the second insn.
866 BitVector ModifiedRegs, UsedRegs;
867 ModifiedRegs.resize(TRI->getNumRegs());
868 UsedRegs.resize(TRI->getNumRegs());
870 for (unsigned Count = 0; MBBI != B; --MBBI) {
871 MachineInstr *MI = MBBI;
872 // Skip DBG_VALUE instructions. Otherwise debug info can affect the
873 // optimization by changing how far we scan.
874 if (MI->isDebugValue())
877 // Now that we know this is a real instruction, count it.
880 // If we found a match, return it.
881 if (isMatchingUpdateInsn(MI, BaseReg, RegSize))
884 // Update the status of what the instruction clobbered and used.
885 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
887 // Otherwise, if the base register is used or modified, we have no match, so
889 if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg])
895 bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
896 bool Modified = false;
897 // Two tranformations to do here:
898 // 1) Find loads and stores that can be merged into a single load or store
905 // 2) Find base register updates that can be merged into the load or store
906 // as a base-reg writeback.
913 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
915 MachineInstr *MI = MBBI;
916 switch (MI->getOpcode()) {
918 // Just move on to the next instruction.
921 case AArch64::STRSui:
922 case AArch64::STRDui:
923 case AArch64::STRQui:
924 case AArch64::STRXui:
925 case AArch64::STRWui:
926 case AArch64::LDRSui:
927 case AArch64::LDRDui:
928 case AArch64::LDRQui:
929 case AArch64::LDRXui:
930 case AArch64::LDRWui:
931 case AArch64::LDRSWui:
932 // do the unscaled versions as well
933 case AArch64::STURSi:
934 case AArch64::STURDi:
935 case AArch64::STURQi:
936 case AArch64::STURWi:
937 case AArch64::STURXi:
938 case AArch64::LDURSi:
939 case AArch64::LDURDi:
940 case AArch64::LDURQi:
941 case AArch64::LDURWi:
942 case AArch64::LDURXi:
943 case AArch64::LDURSWi: {
944 // If this is a volatile load/store, don't mess with it.
945 if (MI->hasOrderedMemoryRef()) {
949 // Make sure this is a reg+imm (as opposed to an address reloc).
950 if (!MI->getOperand(2).isImm()) {
954 // Check if this load/store has a hint to avoid pair formation.
955 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
956 if (TII->isLdStPairSuppressed(MI)) {
960 // Look ahead up to ScanLimit instructions for a pairable instruction.
961 bool MergeForward = false;
963 MachineBasicBlock::iterator Paired =
964 findMatchingInsn(MBBI, MergeForward, SExtIdx, ScanLimit);
966 // Merge the loads into a pair. Keeping the iterator straight is a
967 // pain, so we let the merge routine tell us what the next instruction
968 // is after it's done mucking about.
969 MBBI = mergePairedInsns(MBBI, Paired, MergeForward, SExtIdx);
973 if (isUnscaledLdst(MI->getOpcode()))
974 ++NumUnscaledPairCreated;
980 // FIXME: Do the other instructions.
984 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
986 MachineInstr *MI = MBBI;
987 // Do update merging. It's simpler to keep this separate from the above
988 // switch, though not strictly necessary.
989 unsigned Opc = MI->getOpcode();
992 // Just move on to the next instruction.
995 case AArch64::STRSui:
996 case AArch64::STRDui:
997 case AArch64::STRQui:
998 case AArch64::STRXui:
999 case AArch64::STRWui:
1000 case AArch64::LDRSui:
1001 case AArch64::LDRDui:
1002 case AArch64::LDRQui:
1003 case AArch64::LDRXui:
1004 case AArch64::LDRWui:
1005 // do the unscaled versions as well
1006 case AArch64::STURSi:
1007 case AArch64::STURDi:
1008 case AArch64::STURQi:
1009 case AArch64::STURWi:
1010 case AArch64::STURXi:
1011 case AArch64::LDURSi:
1012 case AArch64::LDURDi:
1013 case AArch64::LDURQi:
1014 case AArch64::LDURWi:
1015 case AArch64::LDURXi: {
1016 // Make sure this is a reg+imm (as opposed to an address reloc).
1017 if (!MI->getOperand(2).isImm()) {
1021 // Look ahead up to ScanLimit instructions for a mergable instruction.
1022 MachineBasicBlock::iterator Update =
1023 findMatchingUpdateInsnForward(MBBI, ScanLimit, 0);
1025 // Merge the update into the ld/st.
1026 MBBI = mergePostIdxUpdateInsn(MBBI, Update);
1031 // Don't know how to handle pre/post-index versions, so move to the next
1033 if (isUnscaledLdst(Opc)) {
1038 // Look back to try to find a pre-index instruction. For example,
1042 // ldr x1, [x0, #8]!
1043 Update = findMatchingUpdateInsnBackward(MBBI, ScanLimit);
1045 // Merge the update into the ld/st.
1046 MBBI = mergePreIdxUpdateInsn(MBBI, Update);
1052 // Look forward to try to find a post-index instruction. For example,
1053 // ldr x1, [x0, #64]
1056 // ldr x1, [x0, #64]!
1058 // The immediate in the load/store is scaled by the size of the register
1059 // being loaded. The immediate in the add we're looking for,
1060 // however, is not, so adjust here.
1061 int Value = MI->getOperand(2).getImm() *
1062 TII->getRegClass(MI->getDesc(), 0, TRI, *(MBB.getParent()))
1064 Update = findMatchingUpdateInsnForward(MBBI, ScanLimit, Value);
1066 // Merge the update into the ld/st.
1067 MBBI = mergePreIdxUpdateInsn(MBBI, Update);
1073 // Nothing found. Just move to the next instruction.
1077 // FIXME: Do the other instructions.
1084 bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1085 TII = static_cast<const AArch64InstrInfo *>(Fn.getSubtarget().getInstrInfo());
1086 TRI = Fn.getSubtarget().getRegisterInfo();
1088 bool Modified = false;
1089 for (auto &MBB : Fn)
1090 Modified |= optimizeBlock(MBB);
1095 // FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep
1096 // loads and stores near one another?
1098 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1099 /// optimization pass.
1100 FunctionPass *llvm::createAArch64LoadStoreOptimizationPass() {
1101 return new AArch64LoadStoreOpt();