+ /// Allocate and return a hazard recognizer to use for this target when
+ /// scheduling the machine instructions before register allocation.
+ virtual ScheduleHazardRecognizer*
+ CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
+ const ScheduleDAG *DAG) const;
+
+ /// Allocate and return a hazard recognizer to use for this target when
+ /// scheduling the machine instructions before register allocation.
+ virtual ScheduleHazardRecognizer*
+ CreateTargetMIHazardRecognizer(const InstrItineraryData*,
+ const ScheduleDAG *DAG) const;
+
+ /// Allocate and return a hazard recognizer to use for this target when
+ /// scheduling the machine instructions after register allocation.
+ virtual ScheduleHazardRecognizer*
+ CreateTargetPostRAHazardRecognizer(const InstrItineraryData*,
+ const ScheduleDAG *DAG) const;
+
+ /// Provide a global flag for disabling the PreRA hazard recognizer that
+ /// targets may choose to honor.
+ bool usePreRAHazardRecognizer() const;
+
+ /// For a comparison instruction, return the source registers
+ /// in SrcReg and SrcReg2 if having two register operands, and the value it
+ /// compares against in CmpValue. Return true if the comparison instruction
+ /// can be analyzed.
+ virtual bool analyzeCompare(const MachineInstr *MI,
+ unsigned &SrcReg, unsigned &SrcReg2,
+ int &Mask, int &Value) const {
+ return false;
+ }
+
+ /// See if the comparison instruction can be converted
+ /// into something more efficient. E.g., on ARM most instructions can set the
+ /// flags register, obviating the need for a separate CMP.
+ virtual bool optimizeCompareInstr(MachineInstr *CmpInstr,
+ unsigned SrcReg, unsigned SrcReg2,
+ int Mask, int Value,
+ const MachineRegisterInfo *MRI) const {
+ return false;
+ }
+ virtual bool optimizeCondBranch(MachineInstr *MI) const { return false; }
+
+ /// Try to remove the load by folding it to a register operand at the use.
+ /// We fold the load instructions if and only if the
+ /// def and use are in the same BB. We only look at one load and see
+ /// whether it can be folded into MI. FoldAsLoadDefReg is the virtual register
+ /// defined by the load we are trying to fold. DefMI returns the machine
+ /// instruction that defines FoldAsLoadDefReg, and the function returns
+ /// the machine instruction generated due to folding.
+ virtual MachineInstr* optimizeLoadInstr(MachineInstr *MI,
+ const MachineRegisterInfo *MRI,
+ unsigned &FoldAsLoadDefReg,
+ MachineInstr *&DefMI) const {
+ return nullptr;
+ }
+
+ /// 'Reg' is known to be defined by a move immediate instruction,
+ /// try to fold the immediate into the use instruction.
+ /// If MRI->hasOneNonDBGUse(Reg) is true, and this function returns true,
+ /// then the caller may assume that DefMI has been erased from its parent
+ /// block. The caller may assume that it will not be erased by this
+ /// function otherwise.
+ virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
+ unsigned Reg, MachineRegisterInfo *MRI) const {
+ return false;
+ }
+
+ /// Return the number of u-operations the given machine
+ /// instruction will be decoded to on the target cpu. The itinerary's
+ /// IssueWidth is the number of microops that can be dispatched each
+ /// cycle. An instruction with zero microops takes no dispatch resources.
+ virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData,
+ const MachineInstr *MI) const;
+
+ /// Return true for pseudo instructions that don't consume any
+ /// machine resources in their current form. These are common cases that the
+ /// scheduler should consider free, rather than conservatively handling them
+ /// as instructions with no itinerary.
+ bool isZeroCost(unsigned Opcode) const {
+ return Opcode <= TargetOpcode::COPY;
+ }
+
+ virtual int getOperandLatency(const InstrItineraryData *ItinData,
+ SDNode *DefNode, unsigned DefIdx,
+ SDNode *UseNode, unsigned UseIdx) const;
+
+ /// Compute and return the use operand latency of a given pair of def and use.
+ /// In most cases, the static scheduling itinerary was enough to determine the
+ /// operand latency. But it may not be possible for instructions with variable
+ /// number of defs / uses.
+ ///
+ /// This is a raw interface to the itinerary that may be directly overridden
+ /// by a target. Use computeOperandLatency to get the best estimate of
+ /// latency.
+ virtual int getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI,
+ unsigned UseIdx) const;
+
+ /// Compute and return the latency of the given data
+ /// dependent def and use when the operand indices are already known.
+ unsigned computeOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx)
+ const;
+
+ /// Compute the instruction latency of a given instruction.
+ /// If the instruction has higher cost when predicated, it's returned via
+ /// PredCost.
+ virtual unsigned getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost = nullptr) const;
+
+ virtual unsigned getPredicationCost(const MachineInstr *MI) const;
+
+ virtual int getInstrLatency(const InstrItineraryData *ItinData,
+ SDNode *Node) const;
+
+ /// Return the default expected latency for a def based on it's opcode.
+ unsigned defaultDefLatency(const MCSchedModel &SchedModel,
+ const MachineInstr *DefMI) const;
+
+ int computeDefOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI) const;
+
+ /// Return true if this opcode has high latency to its result.
+ virtual bool isHighLatencyDef(int opc) const { return false; }
+
+ /// Compute operand latency between a def of 'Reg'
+ /// and a use in the current loop. Return true if the target considered
+ /// it 'high'. This is used by optimization passes such as machine LICM to
+ /// determine whether it makes sense to hoist an instruction out even in a
+ /// high register pressure situation.
+ virtual
+ bool hasHighOperandLatency(const TargetSchedModel &SchedModel,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const {
+ return false;
+ }
+
+ /// Compute operand latency of a def of 'Reg'. Return true
+ /// if the target considered it 'low'.
+ virtual
+ bool hasLowDefLatency(const TargetSchedModel &SchedModel,
+ const MachineInstr *DefMI, unsigned DefIdx) const;
+
+ /// Perform target-specific instruction verification.
+ virtual
+ bool verifyInstruction(const MachineInstr *MI, StringRef &ErrInfo) const {
+ return true;
+ }
+
+ /// Return the current execution domain and bit mask of
+ /// possible domains for instruction.
+ ///
+ /// Some micro-architectures have multiple execution domains, and multiple
+ /// opcodes that perform the same operation in different domains. For
+ /// example, the x86 architecture provides the por, orps, and orpd
+ /// instructions that all do the same thing. There is a latency penalty if a
+ /// register is written in one domain and read in another.
+ ///
+ /// This function returns a pair (domain, mask) containing the execution
+ /// domain of MI, and a bit mask of possible domains. The setExecutionDomain
+ /// function can be used to change the opcode to one of the domains in the
+ /// bit mask. Instructions whose execution domain can't be changed should
+ /// return a 0 mask.
+ ///
+ /// The execution domain numbers don't have any special meaning except domain
+ /// 0 is used for instructions that are not associated with any interesting
+ /// execution domain.
+ ///
+ virtual std::pair<uint16_t, uint16_t>
+ getExecutionDomain(const MachineInstr *MI) const {
+ return std::make_pair(0, 0);
+ }
+
+ /// Change the opcode of MI to execute in Domain.
+ ///
+ /// The bit (1 << Domain) must be set in the mask returned from
+ /// getExecutionDomain(MI).
+ virtual void setExecutionDomain(MachineInstr *MI, unsigned Domain) const {}
+
+
+ /// Returns the preferred minimum clearance
+ /// before an instruction with an unwanted partial register update.
+ ///
+ /// Some instructions only write part of a register, and implicitly need to
+ /// read the other parts of the register. This may cause unwanted stalls
+ /// preventing otherwise unrelated instructions from executing in parallel in
+ /// an out-of-order CPU.
+ ///
+ /// For example, the x86 instruction cvtsi2ss writes its result to bits
+ /// [31:0] of the destination xmm register. Bits [127:32] are unaffected, so
+ /// the instruction needs to wait for the old value of the register to become
+ /// available:
+ ///
+ /// addps %xmm1, %xmm0
+ /// movaps %xmm0, (%rax)
+ /// cvtsi2ss %rbx, %xmm0
+ ///
+ /// In the code above, the cvtsi2ss instruction needs to wait for the addps
+ /// instruction before it can issue, even though the high bits of %xmm0
+ /// probably aren't needed.
+ ///
+ /// This hook returns the preferred clearance before MI, measured in
+ /// instructions. Other defs of MI's operand OpNum are avoided in the last N
+ /// instructions before MI. It should only return a positive value for
+ /// unwanted dependencies. If the old bits of the defined register have
+ /// useful values, or if MI is determined to otherwise read the dependency,
+ /// the hook should return 0.
+ ///
+ /// The unwanted dependency may be handled by:
+ ///
+ /// 1. Allocating the same register for an MI def and use. That makes the
+ /// unwanted dependency identical to a required dependency.
+ ///
+ /// 2. Allocating a register for the def that has no defs in the previous N
+ /// instructions.
+ ///
+ /// 3. Calling breakPartialRegDependency() with the same arguments. This
+ /// allows the target to insert a dependency breaking instruction.
+ ///
+ virtual unsigned
+ getPartialRegUpdateClearance(const MachineInstr *MI, unsigned OpNum,
+ const TargetRegisterInfo *TRI) const {
+ // The default implementation returns 0 for no partial register dependency.
+ return 0;
+ }
+
+ /// \brief Return the minimum clearance before an instruction that reads an
+ /// unused register.
+ ///
+ /// For example, AVX instructions may copy part of a register operand into
+ /// the unused high bits of the destination register.
+ ///
+ /// vcvtsi2sdq %rax, %xmm0<undef>, %xmm14
+ ///
+ /// In the code above, vcvtsi2sdq copies %xmm0[127:64] into %xmm14 creating a
+ /// false dependence on any previous write to %xmm0.
+ ///
+ /// This hook works similarly to getPartialRegUpdateClearance, except that it
+ /// does not take an operand index. Instead sets \p OpNum to the index of the
+ /// unused register.
+ virtual unsigned getUndefRegClearance(const MachineInstr *MI, unsigned &OpNum,
+ const TargetRegisterInfo *TRI) const {
+ // The default implementation returns 0 for no undef register dependency.
+ return 0;
+ }
+
+ /// Insert a dependency-breaking instruction
+ /// before MI to eliminate an unwanted dependency on OpNum.
+ ///
+ /// If it wasn't possible to avoid a def in the last N instructions before MI
+ /// (see getPartialRegUpdateClearance), this hook will be called to break the
+ /// unwanted dependency.
+ ///
+ /// On x86, an xorps instruction can be used as a dependency breaker:
+ ///
+ /// addps %xmm1, %xmm0
+ /// movaps %xmm0, (%rax)
+ /// xorps %xmm0, %xmm0
+ /// cvtsi2ss %rbx, %xmm0
+ ///
+ /// An <imp-kill> operand should be added to MI if an instruction was
+ /// inserted. This ties the instructions together in the post-ra scheduler.
+ ///
+ virtual void
+ breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum,
+ const TargetRegisterInfo *TRI) const {}
+
+ /// Create machine specific model for scheduling.
+ virtual DFAPacketizer *
+ CreateTargetScheduleState(const TargetSubtargetInfo &) const {
+ return nullptr;
+ }
+
+ // Sometimes, it is possible for the target
+ // to tell, even without aliasing information, that two MIs access different
+ // memory addresses. This function returns true if two MIs access different
+ // memory addresses and false otherwise.
+ virtual bool
+ areMemAccessesTriviallyDisjoint(MachineInstr *MIa, MachineInstr *MIb,
+ AliasAnalysis *AA = nullptr) const {
+ assert(MIa && (MIa->mayLoad() || MIa->mayStore()) &&
+ "MIa must load from or modify a memory location");
+ assert(MIb && (MIb->mayLoad() || MIb->mayStore()) &&
+ "MIb must load from or modify a memory location");
+ return false;
+ }
+
+ /// \brief Return the value to use for the MachineCSE's LookAheadLimit,
+ /// which is a heuristic used for CSE'ing phys reg defs.
+ virtual unsigned getMachineCSELookAheadLimit () const {
+ // The default lookahead is small to prevent unprofitable quadratic
+ // behavior.
+ return 5;
+ }
+
+private:
+ unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode;