1 //===-------------- ARM64CollectLOH.cpp - ARM64 collect LOH pass --*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that collect the Linker Optimization Hint (LOH).
11 // This pass should be run at the very end of the compilation flow, just before
13 // To be useful for the linker, the LOH must be printed into the assembly file.
14 // Currently supported LOH are:
15 // * So called non-ADRP-related:
16 // - .loh AdrpAddLdr L1, L2, L3:
17 // L1: adrp xA, sym@PAGE
18 // L2: add xB, xA, sym@PAGEOFF
19 // L3: ldr xC, [xB, #imm]
20 // - .loh AdrpLdrGotLdr L1, L2, L3:
21 // L1: adrp xA, sym@GOTPAGE
22 // L2: ldr xB, [xA, sym@GOTPAGEOFF]
23 // L3: ldr xC, [xB, #imm]
24 // - .loh AdrpLdr L1, L3:
25 // L1: adrp xA, sym@PAGE
26 // L3: ldr xC, [xA, sym@PAGEOFF]
27 // - .loh AdrpAddStr L1, L2, L3:
28 // L1: adrp xA, sym@PAGE
29 // L2: add xB, xA, sym@PAGEOFF
30 // L3: str xC, [xB, #imm]
31 // - .loh AdrpLdrGotStr L1, L2, L3:
32 // L1: adrp xA, sym@GOTPAGE
33 // L2: ldr xB, [xA, sym@GOTPAGEOFF]
34 // L3: str xC, [xB, #imm]
35 // - .loh AdrpAdd L1, L2:
36 // L1: adrp xA, sym@PAGE
37 // L2: add xB, xA, sym@PAGEOFF
38 // For all these LOHs, L1, L2, L3 form a simple chain:
39 // L1 result is used only by L2 and L2 result by L3.
40 // L3 LOH-related argument is defined only by L2 and L2 LOH-related argument
43 // * So called ADRP-related:
44 // - .loh AdrpAdrp L2, L1:
45 // L2: ADRP xA, sym1@PAGE
46 // L1: ADRP xA, sym2@PAGE
47 // L2 dominates L1 and xA is not redifined between L2 and L1
49 // More information are available in the design document attached to
51 //===----------------------------------------------------------------------===//
53 #define DEBUG_TYPE "arm64-collect-loh"
55 #include "ARM64InstrInfo.h"
56 #include "ARM64MachineFunctionInfo.h"
57 #include "MCTargetDesc/ARM64AddressingModes.h"
58 #include "llvm/ADT/BitVector.h"
59 #include "llvm/ADT/DenseMap.h"
60 #include "llvm/ADT/MapVector.h"
61 #include "llvm/ADT/SetVector.h"
62 #include "llvm/ADT/SmallVector.h"
63 #include "llvm/CodeGen/MachineBasicBlock.h"
64 #include "llvm/CodeGen/MachineDominators.h"
65 #include "llvm/CodeGen/MachineFunctionPass.h"
66 #include "llvm/CodeGen/MachineInstr.h"
67 #include "llvm/CodeGen/MachineInstrBuilder.h"
68 #include "llvm/Target/TargetInstrInfo.h"
69 #include "llvm/Target/TargetMachine.h"
70 #include "llvm/Target/TargetRegisterInfo.h"
71 #include "llvm/Support/CommandLine.h"
72 #include "llvm/Support/Debug.h"
73 #include "llvm/Support/ErrorHandling.h"
74 #include "llvm/Support/raw_ostream.h"
75 #include "llvm/ADT/Statistic.h"
79 PreCollectRegister("arm64-collect-loh-pre-collect-register", cl::Hidden,
80 cl::desc("Restrict analysis to registers invovled"
85 BasicBlockScopeOnly("arm64-collect-loh-bb-only", cl::Hidden,
86 cl::desc("Restrict analysis at basic block scope"),
89 STATISTIC(NumADRPSimpleCandidate,
90 "Number of simplifiable ADRP dominate by another");
91 STATISTIC(NumADRPComplexCandidate2,
92 "Number of simplifiable ADRP reachable by 2 defs");
93 STATISTIC(NumADRPComplexCandidate3,
94 "Number of simplifiable ADRP reachable by 3 defs");
95 STATISTIC(NumADRPComplexCandidateOther,
96 "Number of simplifiable ADRP reachable by 4 or more defs");
97 STATISTIC(NumADDToSTRWithImm,
98 "Number of simplifiable STR with imm reachable by ADD");
99 STATISTIC(NumLDRToSTRWithImm,
100 "Number of simplifiable STR with imm reachable by LDR");
101 STATISTIC(NumADDToSTR, "Number of simplifiable STR reachable by ADD");
102 STATISTIC(NumLDRToSTR, "Number of simplifiable STR reachable by LDR");
103 STATISTIC(NumADDToLDRWithImm,
104 "Number of simplifiable LDR with imm reachable by ADD");
105 STATISTIC(NumLDRToLDRWithImm,
106 "Number of simplifiable LDR with imm reachable by LDR");
107 STATISTIC(NumADDToLDR, "Number of simplifiable LDR reachable by ADD");
108 STATISTIC(NumLDRToLDR, "Number of simplifiable LDR reachable by LDR");
109 STATISTIC(NumADRPToLDR, "Number of simplifiable LDR reachable by ADRP");
110 STATISTIC(NumCplxLvl1, "Number of complex case of level 1");
111 STATISTIC(NumTooCplxLvl1, "Number of too complex case of level 1");
112 STATISTIC(NumCplxLvl2, "Number of complex case of level 2");
113 STATISTIC(NumTooCplxLvl2, "Number of too complex case of level 2");
114 STATISTIC(NumADRSimpleCandidate, "Number of simplifiable ADRP + ADD");
115 STATISTIC(NumADRComplexCandidate, "Number of too complex ADRP + ADD");
118 void initializeARM64CollectLOHPass(PassRegistry &);
122 struct ARM64CollectLOH : public MachineFunctionPass {
124 ARM64CollectLOH() : MachineFunctionPass(ID) {
125 initializeARM64CollectLOHPass(*PassRegistry::getPassRegistry());
128 virtual bool runOnMachineFunction(MachineFunction &Fn);
130 virtual const char *getPassName() const {
131 return "ARM64 Collect Linker Optimization Hint (LOH)";
134 void getAnalysisUsage(AnalysisUsage &AU) const {
135 AU.setPreservesAll();
136 MachineFunctionPass::getAnalysisUsage(AU);
137 AU.addRequired<MachineDominatorTree>();
143 /// A set of MachineInstruction.
144 typedef SetVector<const MachineInstr *> SetOfMachineInstr;
145 /// Map a basic block to a set of instructions per register.
146 /// This is used to represent the exposed uses of a basic block
148 typedef MapVector<const MachineBasicBlock *, SetOfMachineInstr *>
149 BlockToSetOfInstrsPerColor;
150 /// Map a basic block to an instruction per register.
151 /// This is used to represent the live-out definitions of a basic block
153 typedef MapVector<const MachineBasicBlock *, const MachineInstr **>
154 BlockToInstrPerColor;
155 /// Map an instruction to a set of instructions. Used to represent the
156 /// mapping def to reachable uses or use to definitions.
157 typedef MapVector<const MachineInstr *, SetOfMachineInstr> InstrToInstrs;
158 /// Map a basic block to a BitVector.
159 /// This is used to record the kill registers per basic block.
160 typedef MapVector<const MachineBasicBlock *, BitVector> BlockToRegSet;
162 /// Map a register to a dense id.
163 typedef DenseMap<unsigned, unsigned> MapRegToId;
164 /// Map a dense id to a register. Used for debug purposes.
165 typedef SmallVector<unsigned, 32> MapIdToReg;
166 } // end anonymous namespace.
168 char ARM64CollectLOH::ID = 0;
170 INITIALIZE_PASS_BEGIN(ARM64CollectLOH, "arm64-collect-loh",
171 "ARM64 Collect Linker Optimization Hint (LOH)", false,
173 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
174 INITIALIZE_PASS_END(ARM64CollectLOH, "arm64-collect-loh",
175 "ARM64 Collect Linker Optimization Hint (LOH)", false,
178 /// Given a couple (MBB, reg) get the corresponding set of instruction from
179 /// the given "sets".
180 /// If this couple does not reference any set, an empty set is added to "sets"
181 /// for this couple and returned.
182 /// \param nbRegs is used internally allocate some memory. It must be consistent
183 /// with the way sets is used.
184 static SetOfMachineInstr &getSet(BlockToSetOfInstrsPerColor &sets,
185 const MachineBasicBlock *MBB, unsigned reg,
187 SetOfMachineInstr *result;
188 BlockToSetOfInstrsPerColor::iterator it = sets.find(MBB);
189 if (it != sets.end()) {
192 result = sets[MBB] = new SetOfMachineInstr[nbRegs];
198 /// Given a couple (reg, MI) get the corresponding set of instructions from the
199 /// the given "sets".
200 /// This is used to get the uses record in sets of a definition identified by
201 /// MI and reg, i.e., MI defines reg.
202 /// If the couple does not reference anything, an empty set is added to
204 /// \pre set[reg] is valid.
205 static SetOfMachineInstr &getUses(InstrToInstrs *sets, unsigned reg,
206 const MachineInstr *MI) {
207 return sets[reg][MI];
210 /// Same as getUses but does not modify the input map: sets.
211 /// \return NULL if the couple (reg, MI) is not in sets.
212 static const SetOfMachineInstr *getUses(const InstrToInstrs *sets, unsigned reg,
213 const MachineInstr *MI) {
214 InstrToInstrs::const_iterator Res = sets[reg].find(MI);
215 if (Res != sets[reg].end())
216 return &(Res->second);
220 /// Initialize the reaching definition algorithm:
221 /// For each basic block BB in MF, record:
223 /// - its reachable uses (uses that are exposed to BB's predecessors).
224 /// - its the generated definitions.
225 /// \param DummyOp if not NULL, specifies a Dummy Operation to be added to
226 /// the list of uses of exposed defintions.
227 /// \param ADRPMode specifies to only consider ADRP instructions for generated
228 /// definition. It also consider definitions of ADRP instructions as uses and
229 /// ignore other uses. The ADRPMode is used to collect the information for LHO
230 /// that involve ADRP operation only.
231 static void initReachingDef(MachineFunction *MF,
232 InstrToInstrs *ColorOpToReachedUses,
233 BlockToInstrPerColor &Gen, BlockToRegSet &Kill,
234 BlockToSetOfInstrsPerColor &ReachableUses,
235 const MapRegToId &RegToId,
236 const MachineInstr *DummyOp, bool ADRPMode) {
237 const TargetMachine &TM = MF->getTarget();
238 const TargetRegisterInfo *TRI = TM.getRegisterInfo();
240 unsigned NbReg = RegToId.size();
242 for (MachineFunction::const_iterator IMBB = MF->begin(), IMBBEnd = MF->end();
243 IMBB != IMBBEnd; ++IMBB) {
244 const MachineBasicBlock *MBB = &(*IMBB);
245 const MachineInstr **&BBGen = Gen[MBB];
246 BBGen = new const MachineInstr *[NbReg];
247 memset(BBGen, 0, sizeof(const MachineInstr *) * NbReg);
249 BitVector &BBKillSet = Kill[MBB];
250 BBKillSet.resize(NbReg);
251 for (MachineBasicBlock::const_iterator II = MBB->begin(), IEnd = MBB->end();
253 bool IsADRP = II->getOpcode() == ARM64::ADRP;
255 // Process uses first.
256 if (IsADRP || !ADRPMode)
257 for (MachineInstr::const_mop_iterator IO = II->operands_begin(),
258 IOEnd = II->operands_end();
260 // Treat ADRP def as use, as the goal of the analysis is to find
261 // ADRP defs reached by other ADRP defs.
262 if (!IO->isReg() || (!ADRPMode && !IO->isUse()) ||
263 (ADRPMode && (!IsADRP || !IO->isDef())))
265 unsigned CurReg = IO->getReg();
266 MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg);
267 if (ItCurRegId == RegToId.end())
269 CurReg = ItCurRegId->second;
271 // if CurReg has not been defined, this use is reachable.
272 if (!BBGen[CurReg] && !BBKillSet.test(CurReg))
273 getSet(ReachableUses, MBB, CurReg, NbReg).insert(&(*II));
274 // current basic block definition for this color, if any, is in Gen.
276 getUses(ColorOpToReachedUses, CurReg, BBGen[CurReg]).insert(&(*II));
280 for (MachineInstr::const_mop_iterator IO = II->operands_begin(),
281 IOEnd = II->operands_end();
283 if (!IO->isRegMask())
285 // Clobbers kill the related colors.
286 const uint32_t *PreservedRegs = IO->getRegMask();
288 // Set generated regs.
289 for (MapRegToId::const_iterator ItRegId = RegToId.begin(),
290 EndIt = RegToId.end();
291 ItRegId != EndIt; ++ItRegId) {
292 unsigned Reg = ItRegId->second;
293 // Use the global register ID when querying APIs external to this
295 if (MachineOperand::clobbersPhysReg(PreservedRegs, ItRegId->first)) {
296 // Do not register clobbered definition for no ADRP.
297 // This definition is not used anyway (otherwise register
298 // allocation is wrong).
299 BBGen[Reg] = ADRPMode ? II : NULL;
306 for (MachineInstr::const_mop_iterator IO = II->operands_begin(),
307 IOEnd = II->operands_end();
309 if (!IO->isReg() || !IO->isDef())
311 unsigned CurReg = IO->getReg();
312 MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg);
313 if (ItCurRegId == RegToId.end())
316 for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI) {
317 MapRegToId::const_iterator ItRegId = RegToId.find(*AI);
318 assert(ItRegId != RegToId.end() &&
319 "Sub-register of an "
320 "involved register, not recorded as involved!");
321 BBKillSet.set(ItRegId->second);
322 BBGen[ItRegId->second] = &(*II);
324 BBGen[ItCurRegId->second] = &(*II);
328 // If we restrict our analysis to basic block scope, conservatively add a
330 // use for each generated value.
331 if (!ADRPMode && DummyOp && !MBB->succ_empty())
332 for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg)
334 getUses(ColorOpToReachedUses, CurReg, BBGen[CurReg]).insert(DummyOp);
338 /// Reaching def core algorithm:
339 /// while an Out has changed
342 /// In[bb][color] = U Out[bb.predecessors][color]
343 /// insert reachableUses[bb][color] in each in[bb][color]
346 /// Out[bb] = Gen[bb] U (In[bb] - Kill[bb])
347 static void reachingDefAlgorithm(MachineFunction *MF,
348 InstrToInstrs *ColorOpToReachedUses,
349 BlockToSetOfInstrsPerColor &In,
350 BlockToSetOfInstrsPerColor &Out,
351 BlockToInstrPerColor &Gen, BlockToRegSet &Kill,
352 BlockToSetOfInstrsPerColor &ReachableUses,
357 for (MachineFunction::const_iterator IMBB = MF->begin(),
359 IMBB != IMBBEnd; ++IMBB) {
360 const MachineBasicBlock *MBB = &(*IMBB);
362 for (CurReg = 0; CurReg < NbReg; ++CurReg) {
363 SetOfMachineInstr &BBInSet = getSet(In, MBB, CurReg, NbReg);
364 SetOfMachineInstr &BBReachableUses =
365 getSet(ReachableUses, MBB, CurReg, NbReg);
366 SetOfMachineInstr &BBOutSet = getSet(Out, MBB, CurReg, NbReg);
367 unsigned Size = BBOutSet.size();
368 // In[bb][color] = U Out[bb.predecessors][color]
369 for (MachineBasicBlock::const_pred_iterator
370 PredMBB = MBB->pred_begin(),
371 EndPredMBB = MBB->pred_end();
372 PredMBB != EndPredMBB; ++PredMBB) {
373 SetOfMachineInstr &PredOutSet = getSet(Out, *PredMBB, CurReg, NbReg);
374 BBInSet.insert(PredOutSet.begin(), PredOutSet.end());
376 // insert reachableUses[bb][color] in each in[bb][color] op.reachedses
377 for (SetOfMachineInstr::const_iterator InstrIt = BBInSet.begin(),
378 EndInstrIt = BBInSet.end();
379 InstrIt != EndInstrIt; ++InstrIt) {
380 SetOfMachineInstr &OpReachedUses =
381 getUses(ColorOpToReachedUses, CurReg, *InstrIt);
382 OpReachedUses.insert(BBReachableUses.begin(), BBReachableUses.end());
384 // Out[bb] = Gen[bb] U (In[bb] - Kill[bb])
385 if (!Kill[MBB].test(CurReg))
386 BBOutSet.insert(BBInSet.begin(), BBInSet.end());
387 if (Gen[MBB][CurReg])
388 BBOutSet.insert(Gen[MBB][CurReg]);
389 HasChanged |= BBOutSet.size() != Size;
392 } while (HasChanged);
395 /// Release all memory dynamically allocated during the reaching
396 /// definition algorithm.
397 static void finitReachingDef(BlockToSetOfInstrsPerColor &In,
398 BlockToSetOfInstrsPerColor &Out,
399 BlockToInstrPerColor &Gen,
400 BlockToSetOfInstrsPerColor &ReachableUses) {
401 for (BlockToSetOfInstrsPerColor::const_iterator IT = Out.begin(),
405 for (BlockToSetOfInstrsPerColor::const_iterator IT = In.begin(),
409 for (BlockToSetOfInstrsPerColor::const_iterator IT = ReachableUses.begin(),
410 End = ReachableUses.end();
413 for (BlockToInstrPerColor::const_iterator IT = Gen.begin(), End = Gen.end();
418 /// Reaching definiton algorithm.
419 /// \param MF function on which the algorithm will operate.
420 /// \param[out] ColorOpToReachedUses will contain the result of the reaching
422 /// \param ADRPMode specify whether the reaching def algorithm should be tuned
423 /// for ADRP optimization. \see initReachingDef for more details.
424 /// \param DummyOp if not NULL, the algorithm will work at
425 /// basic block scope and will set for every exposed defintion a use to
427 /// \pre ColorOpToReachedUses is an array of at least number of registers of
429 static void reachingDef(MachineFunction *MF,
430 InstrToInstrs *ColorOpToReachedUses,
431 const MapRegToId &RegToId, bool ADRPMode = false,
432 const MachineInstr *DummyOp = NULL) {
434 // For each basic block.
435 // Out: a set per color of definitions that reach the
436 // out boundary of this block.
437 // In: Same as Out but for in boundary.
438 // Gen: generated color in this block (one operation per color).
439 // Kill: register set of killed color in this block.
440 // ReachableUses: a set per color of uses (operation) reachable
441 // for "In" definitions.
442 BlockToSetOfInstrsPerColor Out, In, ReachableUses;
443 BlockToInstrPerColor Gen;
446 // Initialize Gen, kill and reachableUses.
447 initReachingDef(MF, ColorOpToReachedUses, Gen, Kill, ReachableUses, RegToId,
452 reachingDefAlgorithm(MF, ColorOpToReachedUses, In, Out, Gen, Kill,
453 ReachableUses, RegToId.size());
456 finitReachingDef(In, Out, Gen, ReachableUses);
460 /// print the result of the reaching definition algorithm.
461 static void printReachingDef(const InstrToInstrs *ColorOpToReachedUses,
462 unsigned NbReg, const TargetRegisterInfo *TRI,
463 const MapIdToReg &IdToReg) {
465 for (CurReg = 0; CurReg < NbReg; ++CurReg) {
466 if (ColorOpToReachedUses[CurReg].empty())
468 DEBUG(dbgs() << "*** Reg " << PrintReg(IdToReg[CurReg], TRI) << " ***\n");
470 InstrToInstrs::const_iterator DefsIt = ColorOpToReachedUses[CurReg].begin();
471 InstrToInstrs::const_iterator DefsItEnd =
472 ColorOpToReachedUses[CurReg].end();
473 for (; DefsIt != DefsItEnd; ++DefsIt) {
474 DEBUG(dbgs() << "Def:\n");
475 DEBUG(DefsIt->first->print(dbgs()));
476 DEBUG(dbgs() << "Reachable uses:\n");
477 for (SetOfMachineInstr::const_iterator UsesIt = DefsIt->second.begin(),
478 UsesItEnd = DefsIt->second.end();
479 UsesIt != UsesItEnd; ++UsesIt) {
480 DEBUG((*UsesIt)->print(dbgs()));
487 /// Answer the following question: Can Def be one of the definition
488 /// involved in a part of a LOH?
489 static bool canDefBePartOfLOH(const MachineInstr *Def) {
490 unsigned Opc = Def->getOpcode();
491 // Accept ADRP, ADDLow and LOADGot.
498 // Check immediate to see if the immediate is an address.
499 switch (Def->getOperand(2).getType()) {
502 case MachineOperand::MO_GlobalAddress:
503 case MachineOperand::MO_JumpTableIndex:
504 case MachineOperand::MO_ConstantPoolIndex:
505 case MachineOperand::MO_BlockAddress:
509 // Check immediate to see if the immediate is an address.
510 switch (Def->getOperand(2).getType()) {
513 case MachineOperand::MO_GlobalAddress:
521 /// Check whether the given instruction can the end of a LOH chain involving a
523 static bool isCandidateStore(const MachineInstr *Instr) {
524 switch (Instr->getOpcode()) {
534 // In case we have str xA, [xA, #imm], this is two different uses
535 // of xA and we cannot fold, otherwise the xA stored may be wrong,
536 // even if #imm == 0.
537 if (Instr->getOperand(0).getReg() != Instr->getOperand(1).getReg())
543 /// Given the result of a reaching defintion algorithm in ColorOpToReachedUses,
544 /// Build the Use to Defs information and filter out obvious non-LOH candidates.
545 /// In ADRPMode, non-LOH candidates are "uses" with non-ADRP definitions.
546 /// In non-ADRPMode, non-LOH candidates are "uses" with several definition,
547 /// i.e., no simple chain.
548 /// \param ADRPMode -- \see initReachingDef.
549 static void reachedUsesToDefs(InstrToInstrs &UseToReachingDefs,
550 const InstrToInstrs *ColorOpToReachedUses,
551 const MapRegToId &RegToId,
552 bool ADRPMode = false) {
554 SetOfMachineInstr NotCandidate;
555 unsigned NbReg = RegToId.size();
556 MapRegToId::const_iterator EndIt = RegToId.end();
557 for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg) {
558 // If this color is never defined, continue.
559 if (ColorOpToReachedUses[CurReg].empty())
562 InstrToInstrs::const_iterator DefsIt = ColorOpToReachedUses[CurReg].begin();
563 InstrToInstrs::const_iterator DefsItEnd =
564 ColorOpToReachedUses[CurReg].end();
565 for (; DefsIt != DefsItEnd; ++DefsIt) {
566 for (SetOfMachineInstr::const_iterator UsesIt = DefsIt->second.begin(),
567 UsesItEnd = DefsIt->second.end();
568 UsesIt != UsesItEnd; ++UsesIt) {
569 const MachineInstr *Def = DefsIt->first;
570 MapRegToId::const_iterator It;
571 // if all the reaching defs are not adrp, this use will not be
573 if ((ADRPMode && Def->getOpcode() != ARM64::ADRP) ||
574 (!ADRPMode && !canDefBePartOfLOH(Def)) ||
575 (!ADRPMode && isCandidateStore(*UsesIt) &&
576 // store are LOH candidate iff the end of the chain is used as
578 ((It = RegToId.find((*UsesIt)->getOperand(1).getReg())) == EndIt ||
579 It->second != CurReg))) {
580 NotCandidate.insert(*UsesIt);
583 // Do not consider self reaching as a simplifiable case for ADRP.
584 if (!ADRPMode || *UsesIt != DefsIt->first) {
585 UseToReachingDefs[*UsesIt].insert(DefsIt->first);
586 // If UsesIt has several reaching definitions, it is not
587 // candidate for simplificaton in non-ADRPMode.
588 if (!ADRPMode && UseToReachingDefs[*UsesIt].size() > 1)
589 NotCandidate.insert(*UsesIt);
594 for (SetOfMachineInstr::const_iterator NotCandidateIt = NotCandidate.begin(),
595 NotCandidateItEnd = NotCandidate.end();
596 NotCandidateIt != NotCandidateItEnd; ++NotCandidateIt) {
597 DEBUG(dbgs() << "Too many reaching defs: " << **NotCandidateIt << "\n");
598 // It would have been better if we could just remove the entry
599 // from the map. Because of that, we have to filter the garbage
600 // (second.empty) in the subsequence analysis.
601 UseToReachingDefs[*NotCandidateIt].clear();
605 /// Based on the use to defs information (in ADRPMode), compute the
606 /// opportunities of LOH ADRP-related.
607 static void computeADRP(const InstrToInstrs &UseToDefs,
608 ARM64FunctionInfo &ARM64FI,
609 const MachineDominatorTree *MDT) {
610 DEBUG(dbgs() << "*** Compute LOH for ADRP\n");
611 for (InstrToInstrs::const_iterator UseIt = UseToDefs.begin(),
612 EndUseIt = UseToDefs.end();
613 UseIt != EndUseIt; ++UseIt) {
614 unsigned Size = UseIt->second.size();
618 const MachineInstr *L2 = *UseIt->second.begin();
619 const MachineInstr *L1 = UseIt->first;
620 if (!MDT->dominates(L2, L1)) {
621 DEBUG(dbgs() << "Dominance check failed:\n" << *L2 << '\n' << *L1
625 DEBUG(dbgs() << "Record AdrpAdrp:\n" << *L2 << '\n' << *L1 << '\n');
626 SmallVector<const MachineInstr *, 2> Args;
629 ARM64FI.addLOHDirective(MCLOH_AdrpAdrp, Args);
630 ++NumADRPSimpleCandidate;
634 ++NumADRPComplexCandidate2;
636 ++NumADRPComplexCandidate3;
638 ++NumADRPComplexCandidateOther;
640 // if Size < 1, the use should have been removed from the candidates
641 assert(Size >= 1 && "No reaching defs for that use!");
645 /// Check whether the given instruction can be the end of a LOH chain
646 /// involving a load.
647 static bool isCandidateLoad(const MachineInstr *Instr) {
648 switch (Instr->getOpcode()) {
651 case ARM64::LDRSBWui:
652 case ARM64::LDRSBXui:
653 case ARM64::LDRSHWui:
654 case ARM64::LDRSHXui:
663 if (Instr->getOperand(2).getTargetFlags() & ARM64II::MO_GOT)
671 /// Check whether the given instruction can load a litteral.
672 static bool supportLoadFromLiteral(const MachineInstr *Instr) {
673 switch (Instr->getOpcode()) {
688 /// Check whether the given instruction is a LOH candidate.
689 /// \param UseToDefs is used to check that Instr is at the end of LOH supported
691 /// \pre UseToDefs contains only on def per use, i.e., obvious non candidate are
692 /// already been filtered out.
693 static bool isCandidate(const MachineInstr *Instr,
694 const InstrToInstrs &UseToDefs,
695 const MachineDominatorTree *MDT) {
696 if (!isCandidateLoad(Instr) && !isCandidateStore(Instr))
699 const MachineInstr *Def = *UseToDefs.find(Instr)->second.begin();
700 if (Def->getOpcode() != ARM64::ADRP) {
701 // At this point, Def is ADDXri or LDRXui of the right type of
702 // symbol, because we filtered out the uses that were not defined
703 // by these kind of instructions (+ ADRP).
705 // Check if this forms a simple chain: each intermediate node must
706 // dominates the next one.
707 if (!MDT->dominates(Def, Instr))
709 // Move one node up in the simple chain.
710 if (UseToDefs.find(Def) == UseToDefs.end()
711 // The map may contain garbage we have to ignore.
713 UseToDefs.find(Def)->second.empty())
716 Def = *UseToDefs.find(Def)->second.begin();
718 // Check if we reached the top of the simple chain:
720 // - check the simple chain property: each intermediate node must
721 // dominates the next one.
722 if (Def->getOpcode() == ARM64::ADRP)
723 return MDT->dominates(Def, Instr);
727 static bool registerADRCandidate(const MachineInstr *Use,
728 const InstrToInstrs &UseToDefs,
729 const InstrToInstrs *DefsPerColorToUses,
730 ARM64FunctionInfo &ARM64FI,
731 SetOfMachineInstr *InvolvedInLOHs,
732 const MapRegToId &RegToId) {
733 // Look for opportunities to turn ADRP -> ADD or
734 // ADRP -> LDR GOTPAGEOFF into ADR.
735 // If ADRP has more than one use. Give up.
736 if (Use->getOpcode() != ARM64::ADDXri &&
737 (Use->getOpcode() != ARM64::LDRXui ||
738 !(Use->getOperand(2).getTargetFlags() & ARM64II::MO_GOT)))
740 InstrToInstrs::const_iterator It = UseToDefs.find(Use);
741 // The map may contain garbage that we need to ignore.
742 if (It == UseToDefs.end() || It->second.empty())
744 const MachineInstr *Def = *It->second.begin();
745 if (Def->getOpcode() != ARM64::ADRP)
747 // Check the number of users of ADRP.
748 const SetOfMachineInstr *Users =
749 getUses(DefsPerColorToUses,
750 RegToId.find(Def->getOperand(0).getReg())->second, Def);
751 if (Users->size() > 1) {
752 ++NumADRComplexCandidate;
755 ++NumADRSimpleCandidate;
756 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Def)) &&
757 "ADRP already involved in LOH.");
758 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Use)) &&
759 "ADD already involved in LOH.");
760 DEBUG(dbgs() << "Record AdrpAdd\n" << *Def << '\n' << *Use << '\n');
762 SmallVector<const MachineInstr *, 2> Args;
766 ARM64FI.addLOHDirective(Use->getOpcode() == ARM64::ADDXri ? MCLOH_AdrpAdd
772 /// Based on the use to defs information (in non-ADRPMode), compute the
773 /// opportunities of LOH non-ADRP-related
774 static void computeOthers(const InstrToInstrs &UseToDefs,
775 const InstrToInstrs *DefsPerColorToUses,
776 ARM64FunctionInfo &ARM64FI, const MapRegToId &RegToId,
777 const MachineDominatorTree *MDT) {
778 SetOfMachineInstr *InvolvedInLOHs = NULL;
780 SetOfMachineInstr InvolvedInLOHsStorage;
781 InvolvedInLOHs = &InvolvedInLOHsStorage;
783 DEBUG(dbgs() << "*** Compute LOH for Others\n");
784 // ADRP -> ADD/LDR -> LDR/STR pattern.
785 // Fall back to ADRP -> ADD pattern if we fail to catch the bigger pattern.
787 // FIXME: When the statistics are not important,
788 // This initial filtering loop can be merged into the next loop.
789 // Currently, we didn't do it to have the same code for both DEBUG and
790 // NDEBUG builds. Indeed, the iterator of the second loop would need
792 SetOfMachineInstr PotentialCandidates;
793 SetOfMachineInstr PotentialADROpportunities;
794 for (InstrToInstrs::const_iterator UseIt = UseToDefs.begin(),
795 EndUseIt = UseToDefs.end();
796 UseIt != EndUseIt; ++UseIt) {
797 // If no definition is available, this is a non candidate.
798 if (UseIt->second.empty())
800 // Keep only instructions that are load or store and at the end of
801 // a ADRP -> ADD/LDR/Nothing chain.
802 // We already filtered out the no-chain cases.
803 if (!isCandidate(UseIt->first, UseToDefs, MDT)) {
804 PotentialADROpportunities.insert(UseIt->first);
807 PotentialCandidates.insert(UseIt->first);
810 // Make the following distinctions for statistics as the linker does
811 // know how to decode instructions:
812 // - ADD/LDR/Nothing make there different patterns.
813 // - LDR/STR make two different patterns.
814 // Hence, 6 - 1 base patterns.
815 // (because ADRP-> Nothing -> STR is not simplifiable)
817 // The linker is only able to have a simple semantic, i.e., if pattern A
819 // However, we want to see the opportunity we may miss if we were able to
820 // catch more complex cases.
822 // PotentialCandidates are result of a chain ADRP -> ADD/LDR ->
823 // A potential candidate becomes a candidate, if its current immediate
824 // operand is zero and all nodes of the chain have respectively only one user
825 SetOfMachineInstr::const_iterator CandidateIt, EndCandidateIt;
827 SetOfMachineInstr DefsOfPotentialCandidates;
829 for (CandidateIt = PotentialCandidates.begin(),
830 EndCandidateIt = PotentialCandidates.end();
831 CandidateIt != EndCandidateIt; ++CandidateIt) {
832 const MachineInstr *Candidate = *CandidateIt;
833 // Get the definition of the candidate i.e., ADD or LDR.
834 const MachineInstr *Def = *UseToDefs.find(Candidate)->second.begin();
835 // Record the elements of the chain.
836 const MachineInstr *L1 = Def;
837 const MachineInstr *L2 = NULL;
838 unsigned ImmediateDefOpc = Def->getOpcode();
839 if (Def->getOpcode() != ARM64::ADRP) {
840 // Check the number of users of this node.
841 const SetOfMachineInstr *Users =
842 getUses(DefsPerColorToUses,
843 RegToId.find(Def->getOperand(0).getReg())->second, Def);
844 if (Users->size() > 1) {
846 // if all the uses of this def are in potential candidate, this is
847 // a complex candidate of level 2.
848 SetOfMachineInstr::const_iterator UseIt = Users->begin();
849 SetOfMachineInstr::const_iterator EndUseIt = Users->end();
850 for (; UseIt != EndUseIt; ++UseIt) {
851 if (!PotentialCandidates.count(*UseIt)) {
856 if (UseIt == EndUseIt)
859 PotentialADROpportunities.insert(Def);
863 Def = *UseToDefs.find(Def)->second.begin();
865 } // else the element in the middle of the chain is nothing, thus
866 // Def already contains the first element of the chain.
868 // Check the number of users of the first node in the chain, i.e., ADRP
869 const SetOfMachineInstr *Users =
870 getUses(DefsPerColorToUses,
871 RegToId.find(Def->getOperand(0).getReg())->second, Def);
872 if (Users->size() > 1) {
874 // if all the uses of this def are in the defs of the potential candidate,
875 // this is a complex candidate of level 1
876 if (DefsOfPotentialCandidates.empty()) {
878 DefsOfPotentialCandidates = PotentialCandidates;
879 for (SetOfMachineInstr::const_iterator
880 It = PotentialCandidates.begin(),
881 EndIt = PotentialCandidates.end();
883 if (!UseToDefs.find(Candidate)->second.empty())
884 DefsOfPotentialCandidates.insert(
885 *UseToDefs.find(Candidate)->second.begin());
887 SetOfMachineInstr::const_iterator UseIt = Users->begin();
888 SetOfMachineInstr::const_iterator EndUseIt = Users->end();
889 for (; UseIt != EndUseIt; ++UseIt) {
890 if (!DefsOfPotentialCandidates.count(*UseIt)) {
895 if (UseIt == EndUseIt)
901 bool IsL2Add = (ImmediateDefOpc == ARM64::ADDXri);
902 // If the chain is three instructions long and ldr is the second element,
903 // then this ldr must load form GOT, otherwise this is not a correct chain.
904 if (L2 && !IsL2Add && L2->getOperand(2).getTargetFlags() != ARM64II::MO_GOT)
906 SmallVector<const MachineInstr *, 3> Args;
908 if (isCandidateLoad(Candidate)) {
910 // At this point, the candidate LOH indicates that the ldr instruction
911 // may use a direct access to the symbol. There is not such encoding
912 // for loads of byte and half.
913 if (!supportLoadFromLiteral(Candidate))
916 DEBUG(dbgs() << "Record AdrpLdr:\n" << *L1 << '\n' << *Candidate
918 Kind = MCLOH_AdrpLdr;
920 Args.push_back(Candidate);
921 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
922 "L1 already involved in LOH.");
923 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
924 "Candidate already involved in LOH.");
927 DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot")
928 << "Ldr:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate
931 Kind = IsL2Add ? MCLOH_AdrpAddLdr : MCLOH_AdrpLdrGotLdr;
934 Args.push_back(Candidate);
936 PotentialADROpportunities.remove(L2);
937 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
938 "L1 already involved in LOH.");
939 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) &&
940 "L2 already involved in LOH.");
941 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
942 "Candidate already involved in LOH.");
944 // get the immediate of the load
945 if (Candidate->getOperand(2).getImm() == 0)
946 if (ImmediateDefOpc == ARM64::ADDXri)
950 else if (ImmediateDefOpc == ARM64::ADDXri)
951 ++NumADDToLDRWithImm;
953 ++NumLDRToLDRWithImm;
957 if (ImmediateDefOpc == ARM64::ADRP)
961 DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot")
962 << "Str:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate
965 Kind = IsL2Add ? MCLOH_AdrpAddStr : MCLOH_AdrpLdrGotStr;
968 Args.push_back(Candidate);
970 PotentialADROpportunities.remove(L2);
971 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
972 "L1 already involved in LOH.");
973 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) &&
974 "L2 already involved in LOH.");
975 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
976 "Candidate already involved in LOH.");
978 // get the immediate of the store
979 if (Candidate->getOperand(2).getImm() == 0)
980 if (ImmediateDefOpc == ARM64::ADDXri)
984 else if (ImmediateDefOpc == ARM64::ADDXri)
985 ++NumADDToSTRWithImm;
987 ++NumLDRToSTRWithImm;
991 ARM64FI.addLOHDirective(Kind, Args);
994 // Now, we grabbed all the big patterns, check ADR opportunities.
995 for (SetOfMachineInstr::const_iterator
996 CandidateIt = PotentialADROpportunities.begin(),
997 EndCandidateIt = PotentialADROpportunities.end();
998 CandidateIt != EndCandidateIt; ++CandidateIt)
999 registerADRCandidate(*CandidateIt, UseToDefs, DefsPerColorToUses, ARM64FI,
1000 InvolvedInLOHs, RegToId);
1003 /// Look for every register defined by potential LOHs candidates.
1004 /// Map these registers with dense id in @p RegToId and vice-versa in
1005 /// @p IdToReg. @p IdToReg is populated only in DEBUG mode.
1006 static void collectInvolvedReg(MachineFunction &MF, MapRegToId &RegToId,
1007 MapIdToReg &IdToReg,
1008 const TargetRegisterInfo *TRI) {
1009 unsigned CurRegId = 0;
1010 if (!PreCollectRegister) {
1011 unsigned NbReg = TRI->getNumRegs();
1012 for (; CurRegId < NbReg; ++CurRegId) {
1013 RegToId[CurRegId] = CurRegId;
1014 DEBUG(IdToReg.push_back(CurRegId));
1015 DEBUG(assert(IdToReg[CurRegId] == CurRegId && "Reg index mismatches"));
1020 DEBUG(dbgs() << "** Collect Involved Register\n");
1021 for (MachineFunction::const_iterator IMBB = MF.begin(), IMBBEnd = MF.end();
1022 IMBB != IMBBEnd; ++IMBB)
1023 for (MachineBasicBlock::const_iterator II = IMBB->begin(),
1027 if (!canDefBePartOfLOH(II))
1031 for (MachineInstr::const_mop_iterator IO = II->operands_begin(),
1032 IOEnd = II->operands_end();
1033 IO != IOEnd; ++IO) {
1034 if (!IO->isReg() || !IO->isDef())
1036 unsigned CurReg = IO->getReg();
1037 for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI)
1038 if (RegToId.find(*AI) == RegToId.end()) {
1039 DEBUG(IdToReg.push_back(*AI);
1040 assert(IdToReg[CurRegId] == *AI &&
1041 "Reg index mismatches insertion index."));
1042 RegToId[*AI] = CurRegId++;
1043 DEBUG(dbgs() << "Register: " << PrintReg(*AI, TRI) << '\n');
1049 bool ARM64CollectLOH::runOnMachineFunction(MachineFunction &Fn) {
1050 const TargetMachine &TM = Fn.getTarget();
1051 const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1052 const MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>();
1056 ARM64FunctionInfo *ARM64FI = Fn.getInfo<ARM64FunctionInfo>();
1057 assert(ARM64FI && "No MachineFunctionInfo for this function!");
1059 DEBUG(dbgs() << "Looking for LOH in " << Fn.getName() << '\n');
1061 collectInvolvedReg(Fn, RegToId, IdToReg, TRI);
1062 if (RegToId.empty())
1065 MachineInstr *DummyOp = NULL;
1066 if (BasicBlockScopeOnly) {
1067 const ARM64InstrInfo *TII =
1068 static_cast<const ARM64InstrInfo *>(TM.getInstrInfo());
1069 // For local analysis, create a dummy operation to record uses that are not
1071 DummyOp = Fn.CreateMachineInstr(TII->get(ARM64::COPY), DebugLoc());
1074 unsigned NbReg = RegToId.size();
1075 bool Modified = false;
1078 InstrToInstrs *ColorOpToReachedUses = new InstrToInstrs[NbReg];
1080 // Compute the reaching def in ADRP mode, meaning ADRP definitions
1081 // are first considered as uses.
1082 reachingDef(&Fn, ColorOpToReachedUses, RegToId, true, DummyOp);
1083 DEBUG(dbgs() << "ADRP reaching defs\n");
1084 DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg));
1086 // Translate the definition to uses map into a use to definitions map to ease
1087 // statistic computation.
1088 InstrToInstrs ADRPToReachingDefs;
1089 reachedUsesToDefs(ADRPToReachingDefs, ColorOpToReachedUses, RegToId, true);
1091 // Compute LOH for ADRP.
1092 computeADRP(ADRPToReachingDefs, *ARM64FI, MDT);
1093 delete[] ColorOpToReachedUses;
1095 // Continue with general ADRP -> ADD/LDR -> LDR/STR pattern.
1096 ColorOpToReachedUses = new InstrToInstrs[NbReg];
1098 // first perform a regular reaching def analysis.
1099 reachingDef(&Fn, ColorOpToReachedUses, RegToId, false, DummyOp);
1100 DEBUG(dbgs() << "All reaching defs\n");
1101 DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg));
1103 // Turn that into a use to defs to ease statistic computation.
1104 InstrToInstrs UsesToReachingDefs;
1105 reachedUsesToDefs(UsesToReachingDefs, ColorOpToReachedUses, RegToId, false);
1107 // Compute other than AdrpAdrp LOH.
1108 computeOthers(UsesToReachingDefs, ColorOpToReachedUses, *ARM64FI, RegToId,
1110 delete[] ColorOpToReachedUses;
1112 if (BasicBlockScopeOnly)
1113 Fn.DeleteMachineInstr(DummyOp);
1118 /// createARM64CollectLOHPass - returns an instance of the Statistic for
1119 /// linker optimization pass.
1120 FunctionPass *llvm::createARM64CollectLOHPass() {
1121 return new ARM64CollectLOH();