X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=include%2Fllvm%2FTarget%2FTargetInstrInfo.h;h=9392f7c6c616d4c0db03aced1cbbb13c59bbb1bb;hp=bbe58978210a46fc0c1d31251a9fa87e57dc4d62;hb=07d392bdf2a4149f53318defc9513c2be6d14513;hpb=c291e2f5780c3a8470113a2a58c1fa680cd54b20 diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h index bbe58978210..9392f7c6c61 100644 --- a/include/llvm/Target/TargetInstrInfo.h +++ b/include/llvm/Target/TargetInstrInfo.h @@ -14,8 +14,12 @@ #ifndef LLVM_TARGET_TARGETINSTRINFO_H #define LLVM_TARGET_TARGETINSTRINFO_H -#include "llvm/MC/MCInstrInfo.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineCombinerPattern.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" namespace llvm { @@ -26,6 +30,8 @@ class MachineMemOperand; class MachineRegisterInfo; class MDNode; class MCInst; +struct MCSchedModel; +class MCSymbolRefExpr; class SDNode; class ScheduleHazardRecognizer; class SelectionDAG; @@ -33,6 +39,8 @@ class ScheduleDAG; class TargetRegisterClass; class TargetRegisterInfo; class BranchProbability; +class TargetSubtargetInfo; +class DFAPacketizer; template class SmallVectorImpl; @@ -42,8 +50,8 @@ template class SmallVectorImpl; /// TargetInstrInfo - Interface to description of machine instruction set /// class TargetInstrInfo : public MCInstrInfo { - TargetInstrInfo(const TargetInstrInfo &); // DO NOT IMPLEMENT - void operator=(const TargetInstrInfo &); // DO NOT IMPLEMENT + TargetInstrInfo(const TargetInstrInfo &) LLVM_DELETED_FUNCTION; + void operator=(const TargetInstrInfo &) LLVM_DELETED_FUNCTION; public: TargetInstrInfo(int CFSetupOpcode = -1, int CFDestroyOpcode = -1) : CallFrameSetupOpcode(CFSetupOpcode), @@ -56,13 +64,14 @@ public: /// class constraint for OpNum, or NULL. const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum, - const TargetRegisterInfo *TRI) const; + const TargetRegisterInfo *TRI, + const MachineFunction &MF) const; /// isTriviallyReMaterializable - Return true if the instruction is trivially /// rematerializable, meaning it has no side effects and requires no operands /// that aren't always available. bool isTriviallyReMaterializable(const MachineInstr *MI, - AliasAnalysis *AA = 0) const { + AliasAnalysis *AA = nullptr) const { return MI->getOpcode() == TargetOpcode::IMPLICIT_DEF || (MI->getDesc().isRematerializable() && (isReallyTriviallyReMaterializable(MI, AA) || @@ -139,9 +148,7 @@ public: /// missed. virtual bool hasLoadFromStackSlot(const MachineInstr *MI, const MachineMemOperand *&MMO, - int &FrameIndex) const { - return 0; - } + int &FrameIndex) const; /// isStoreToStackSlot - If the specified machine instruction is a direct /// store to a stack slot, return the virtual or physical register number of @@ -169,8 +176,40 @@ public: /// stack. This is just a hint, as some cases may be missed. virtual bool hasStoreToStackSlot(const MachineInstr *MI, const MachineMemOperand *&MMO, - int &FrameIndex) const { - return 0; + int &FrameIndex) const; + + /// isStackSlotCopy - Return true if the specified machine instruction + /// is a copy of one stack slot to another and has no other effect. + /// Provide the identity of the two frame indices. + virtual bool isStackSlotCopy(const MachineInstr *MI, int &DestFrameIndex, + int &SrcFrameIndex) const { + return false; + } + + /// Compute the size in bytes and offset within a stack slot of a spilled + /// register or subregister. + /// + /// \param [out] Size in bytes of the spilled value. + /// \param [out] Offset in bytes within the stack slot. + /// \returns true if both Size and Offset are successfully computed. + /// + /// Not all subregisters have computable spill slots. For example, + /// subregisters registers may not be byte-sized, and a pair of discontiguous + /// subregisters has no single offset. + /// + /// Targets with nontrivial bigendian implementations may need to override + /// this, particularly to support spilled vector registers. + virtual bool getStackSlotRange(const TargetRegisterClass *RC, unsigned SubIdx, + unsigned &Size, unsigned &Offset, + const TargetMachine *TM) const; + + /// isAsCheapAsAMove - Return true if the instruction is as cheap as a move + /// instruction. + /// + /// Targets for different archs need to override this, and different + /// micro-architectures can also be finely tuned inside. + virtual bool isAsCheapAsAMove(const MachineInstr *MI) const { + return MI->isAsCheapAsAMove(); } /// reMaterialize - Re-issue the specified 'original' instruction at the @@ -182,15 +221,7 @@ public: MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, - const TargetRegisterInfo &TRI) const = 0; - - /// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the - /// two-addrss instruction inserted by two-address pass. - virtual void scheduleTwoAddrSource(MachineInstr *SrcMI, - MachineInstr *UseMI, - const TargetRegisterInfo &TRI) const { - // Do nothing. - } + const TargetRegisterInfo &TRI) const; /// duplicate - Create a duplicate of the Orig instruction in MF. This is like /// MachineFunction::CloneMachineInstr(), but the target may update operands @@ -198,7 +229,7 @@ public: /// /// The instruction must be duplicable as indicated by isNotDuplicable(). virtual MachineInstr *duplicate(MachineInstr *Orig, - MachineFunction &MF) const = 0; + MachineFunction &MF) const; /// convertToThreeAddress - This method must be implemented by targets that /// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target @@ -213,7 +244,7 @@ public: virtual MachineInstr * convertToThreeAddress(MachineFunction::iterator &MFI, MachineBasicBlock::iterator &MBBI, LiveVariables *LV) const { - return 0; + return nullptr; } /// commuteInstruction - If a target has any instructions that are @@ -225,13 +256,92 @@ public: /// method for a non-commutable instruction, but there may be some cases /// where this method fails and returns null. virtual MachineInstr *commuteInstruction(MachineInstr *MI, - bool NewMI = false) const = 0; + bool NewMI = false) const; /// findCommutedOpIndices - If specified MI is commutable, return the two /// operand indices that would swap value. Return false if the instruction /// is not in a form which this routine understands. virtual bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, - unsigned &SrcOpIdx2) const = 0; + unsigned &SrcOpIdx2) const; + + /// A pair composed of a register and a sub-register index. + /// Used to give some type checking when modeling Reg:SubReg. + struct RegSubRegPair { + unsigned Reg; + unsigned SubReg; + RegSubRegPair(unsigned Reg = 0, unsigned SubReg = 0) + : Reg(Reg), SubReg(SubReg) {} + }; + /// A pair composed of a pair of a register and a sub-register index, + /// and another sub-register index. + /// Used to give some type checking when modeling Reg:SubReg1, SubReg2. + struct RegSubRegPairAndIdx : RegSubRegPair { + unsigned SubIdx; + RegSubRegPairAndIdx(unsigned Reg = 0, unsigned SubReg = 0, + unsigned SubIdx = 0) + : RegSubRegPair(Reg, SubReg), SubIdx(SubIdx) {} + }; + + /// Build the equivalent inputs of a REG_SEQUENCE for the given \p MI + /// and \p DefIdx. + /// \p [out] InputRegs of the equivalent REG_SEQUENCE. Each element of + /// the list is modeled as . + /// E.g., REG_SEQUENCE vreg1:sub1, sub0, vreg2, sub1 would produce + /// two elements: + /// - vreg1:sub1, sub0 + /// - vreg2<:0>, sub1 + /// + /// \returns true if it is possible to build such an input sequence + /// with the pair \p MI, \p DefIdx. False otherwise. + /// + /// \pre MI.isRegSequence() or MI.isRegSequenceLike(). + /// + /// \note The generic implementation does not provide any support for + /// MI.isRegSequenceLike(). In other words, one has to override + /// getRegSequenceLikeInputs for target specific instructions. + bool + getRegSequenceInputs(const MachineInstr &MI, unsigned DefIdx, + SmallVectorImpl &InputRegs) const; + + /// Build the equivalent inputs of a EXTRACT_SUBREG for the given \p MI + /// and \p DefIdx. + /// \p [out] InputReg of the equivalent EXTRACT_SUBREG. + /// E.g., EXTRACT_SUBREG vreg1:sub1, sub0, sub1 would produce: + /// - vreg1:sub1, sub0 + /// + /// \returns true if it is possible to build such an input sequence + /// with the pair \p MI, \p DefIdx. False otherwise. + /// + /// \pre MI.isExtractSubreg() or MI.isExtractSubregLike(). + /// + /// \note The generic implementation does not provide any support for + /// MI.isExtractSubregLike(). In other words, one has to override + /// getExtractSubregLikeInputs for target specific instructions. + bool + getExtractSubregInputs(const MachineInstr &MI, unsigned DefIdx, + RegSubRegPairAndIdx &InputReg) const; + + /// Build the equivalent inputs of a INSERT_SUBREG for the given \p MI + /// and \p DefIdx. + /// \p [out] BaseReg and \p [out] InsertedReg contain + /// the equivalent inputs of INSERT_SUBREG. + /// E.g., INSERT_SUBREG vreg0:sub0, vreg1:sub1, sub3 would produce: + /// - BaseReg: vreg0:sub0 + /// - InsertedReg: vreg1:sub1, sub3 + /// + /// \returns true if it is possible to build such an input sequence + /// with the pair \p MI, \p DefIdx. False otherwise. + /// + /// \pre MI.isInsertSubreg() or MI.isInsertSubregLike(). + /// + /// \note The generic implementation does not provide any support for + /// MI.isInsertSubregLike(). In other words, one has to override + /// getInsertSubregLikeInputs for target specific instructions. + bool + getInsertSubregInputs(const MachineInstr &MI, unsigned DefIdx, + RegSubRegPair &BaseReg, + RegSubRegPairAndIdx &InsertedReg) const; + /// produceSameValue - Return true if two machine instructions would produce /// identical values. By default, this is only true when the two instructions @@ -240,7 +350,7 @@ public: /// aggressive checks. virtual bool produceSameValue(const MachineInstr *MI0, const MachineInstr *MI1, - const MachineRegisterInfo *MRI = 0) const = 0; + const MachineRegisterInfo *MRI = nullptr) const; /// AnalyzeBranch - Analyze the branching code at the end of MBB, returning /// true if it cannot be understood (e.g. it's a switch dispatch or isn't @@ -278,8 +388,7 @@ public: /// This is only invoked in cases where AnalyzeBranch returns success. It /// returns the number of instructions that were removed. virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const { - assert(0 && "Target didn't implement TargetInstrInfo::RemoveBranch!"); - return 0; + llvm_unreachable("Target didn't implement TargetInstrInfo::RemoveBranch!"); } /// InsertBranch - Insert branch code into the end of the specified @@ -296,15 +405,28 @@ public: MachineBasicBlock *FBB, const SmallVectorImpl &Cond, DebugLoc DL) const { - assert(0 && "Target didn't implement TargetInstrInfo::InsertBranch!"); - return 0; + llvm_unreachable("Target didn't implement TargetInstrInfo::InsertBranch!"); } /// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything /// after it, replacing it with an unconditional branch to NewDest. This is /// used by the tail merging pass. virtual void ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, - MachineBasicBlock *NewDest) const = 0; + MachineBasicBlock *NewDest) const; + + /// getUnconditionalBranch - Get an instruction that performs an unconditional + /// branch to the given symbol. + virtual void + getUnconditionalBranch(MCInst &MI, + const MCSymbolRefExpr *BranchTarget) const { + llvm_unreachable("Target didn't implement " + "TargetInstrInfo::getUnconditionalBranch!"); + } + + /// getTrap - Get a machine trap instruction + virtual void getTrap(MCInst &MI) const { + llvm_unreachable("Target didn't implement TargetInstrInfo::getTrap!"); + } /// isLegalToSplitMBBAt - Return true if it's legal to split the given basic /// block at the specified instruction (i.e. instruction would be the start @@ -320,7 +442,7 @@ public: /// being executed is given by Probability, and Confidence is a measure /// of our confidence that it will be properly predicted. virtual - bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles, + bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, const BranchProbability &Probability) const { return false; @@ -348,17 +470,134 @@ public: /// Probability, and Confidence is a measure of our confidence that it /// will be properly predicted. virtual bool - isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles, + isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, const BranchProbability &Probability) const { return false; } + /// isProfitableToUnpredicate - Return true if it's profitable to unpredicate + /// one side of a 'diamond', i.e. two sides of if-else predicated on mutually + /// exclusive predicates. + /// e.g. + /// subeq r0, r1, #1 + /// addne r0, r1, #1 + /// => + /// sub r0, r1, #1 + /// addne r0, r1, #1 + /// + /// This may be profitable is conditional instructions are always executed. + virtual bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, + MachineBasicBlock &FMBB) const { + return false; + } + + /// canInsertSelect - Return true if it is possible to insert a select + /// instruction that chooses between TrueReg and FalseReg based on the + /// condition code in Cond. + /// + /// When successful, also return the latency in cycles from TrueReg, + /// FalseReg, and Cond to the destination register. In most cases, a select + /// instruction will be 1 cycle, so CondCycles = TrueCycles = FalseCycles = 1 + /// + /// Some x86 implementations have 2-cycle cmov instructions. + /// + /// @param MBB Block where select instruction would be inserted. + /// @param Cond Condition returned by AnalyzeBranch. + /// @param TrueReg Virtual register to select when Cond is true. + /// @param FalseReg Virtual register to select when Cond is false. + /// @param CondCycles Latency from Cond+Branch to select output. + /// @param TrueCycles Latency from TrueReg to select output. + /// @param FalseCycles Latency from FalseReg to select output. + virtual bool canInsertSelect(const MachineBasicBlock &MBB, + const SmallVectorImpl &Cond, + unsigned TrueReg, unsigned FalseReg, + int &CondCycles, + int &TrueCycles, int &FalseCycles) const { + return false; + } + + /// insertSelect - Insert a select instruction into MBB before I that will + /// copy TrueReg to DstReg when Cond is true, and FalseReg to DstReg when + /// Cond is false. + /// + /// This function can only be called after canInsertSelect() returned true. + /// The condition in Cond comes from AnalyzeBranch, and it can be assumed + /// that the same flags or registers required by Cond are available at the + /// insertion point. + /// + /// @param MBB Block where select instruction should be inserted. + /// @param I Insertion point. + /// @param DL Source location for debugging. + /// @param DstReg Virtual register to be defined by select instruction. + /// @param Cond Condition as computed by AnalyzeBranch. + /// @param TrueReg Virtual register to copy when Cond is true. + /// @param FalseReg Virtual register to copy when Cons is false. + virtual void insertSelect(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DstReg, + const SmallVectorImpl &Cond, + unsigned TrueReg, unsigned FalseReg) const { + llvm_unreachable("Target didn't implement TargetInstrInfo::insertSelect!"); + } + + /// analyzeSelect - Analyze the given select instruction, returning true if + /// it cannot be understood. It is assumed that MI->isSelect() is true. + /// + /// When successful, return the controlling condition and the operands that + /// determine the true and false result values. + /// + /// Result = SELECT Cond, TrueOp, FalseOp + /// + /// Some targets can optimize select instructions, for example by predicating + /// the instruction defining one of the operands. Such targets should set + /// Optimizable. + /// + /// @param MI Select instruction to analyze. + /// @param Cond Condition controlling the select. + /// @param TrueOp Operand number of the value selected when Cond is true. + /// @param FalseOp Operand number of the value selected when Cond is false. + /// @param Optimizable Returned as true if MI is optimizable. + /// @returns False on success. + virtual bool analyzeSelect(const MachineInstr *MI, + SmallVectorImpl &Cond, + unsigned &TrueOp, unsigned &FalseOp, + bool &Optimizable) const { + assert(MI && MI->getDesc().isSelect() && "MI must be a select instruction"); + return true; + } + + /// optimizeSelect - Given a select instruction that was understood by + /// analyzeSelect and returned Optimizable = true, attempt to optimize MI by + /// merging it with one of its operands. Returns NULL on failure. + /// + /// When successful, returns the new select instruction. The client is + /// responsible for deleting MI. + /// + /// If both sides of the select can be optimized, PreferFalse is used to pick + /// a side. + /// + /// @param MI Optimizable select instruction. + /// @param PreferFalse Try to optimize FalseOp instead of TrueOp. + /// @returns Optimized instruction or NULL. + virtual MachineInstr *optimizeSelect(MachineInstr *MI, + bool PreferFalse = false) const { + // This function must be implemented if Optimizable is ever set. + llvm_unreachable("Target must implement TargetInstrInfo::optimizeSelect!"); + } + /// copyPhysReg - Emit instructions to copy a pair of physical registers. + /// + /// This function should support copies within any legal register class as + /// well as any cross-class copies created during instruction selection. + /// + /// The source and destination registers may overlap, which may require a + /// careful implementation when multiple copy instructions are required for + /// large registers. See for example the ARM target. virtual void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { - assert(0 && "Target didn't implement TargetInstrInfo::copyPhysReg!"); + llvm_unreachable("Target didn't implement TargetInstrInfo::copyPhysReg!"); } /// storeRegToStackSlot - Store the specified register of the given register @@ -371,7 +610,8 @@ public: unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - assert(0 && "Target didn't implement TargetInstrInfo::storeRegToStackSlot!"); + llvm_unreachable("Target didn't implement " + "TargetInstrInfo::storeRegToStackSlot!"); } /// loadRegFromStackSlot - Load the specified register of the given register @@ -383,7 +623,8 @@ public: unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - assert(0 && "Target didn't implement TargetInstrInfo::loadRegFromStackSlot!"); + llvm_unreachable("Target didn't implement " + "TargetInstrInfo::loadRegFromStackSlot!"); } /// expandPostRAPseudo - This function is called for all pseudo instructions @@ -392,26 +633,10 @@ public: /// into real instructions. The target can edit MI in place, or it can insert /// new instructions and erase MI. The function should return true if /// anything was changed. - bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const { + virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const { return false; } - /// emitFrameIndexDebugValue - Emit a target-dependent form of - /// DBG_VALUE encoding the address of a frame index. Addresses would - /// normally be lowered the same way as other addresses on the target, - /// e.g. in load instructions. For targets that do not support this - /// the debug info is simply lost. - /// If you add this for a target you should handle this DBG_VALUE in the - /// target-specific AsmPrinter code as well; you will probably get invalid - /// assembly output if you don't. - virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, - int FrameIx, - uint64_t Offset, - const MDNode *MDPtr, - DebugLoc dl) const { - return 0; - } - /// foldMemoryOperand - Attempt to fold a load or store of the specified stack /// slot into the specified machine instruction for the specified operand(s). /// If this is possible, a new instruction is returned with the specified @@ -429,6 +654,42 @@ public: const SmallVectorImpl &Ops, MachineInstr* LoadMI) const; + /// hasPattern - return true when there is potentially a faster code sequence + /// for an instruction chain ending in \p Root. All potential pattern are + /// returned in the \p Pattern vector. Pattern should be sorted in priority + /// order since the pattern evaluator stops checking as soon as it finds a + /// faster sequence. + /// \param Root - Instruction that could be combined with one of its operands + /// \param Pattern - Vector of possible combination pattern + + virtual bool hasPattern( + MachineInstr &Root, + SmallVectorImpl &Pattern) const { + return false; + } + + /// genAlternativeCodeSequence - when hasPattern() finds a pattern this + /// function generates the instructions that could replace the original code + /// sequence. The client has to decide whether the actual replacementment is + /// beneficial or not. + /// \param Root - Instruction that could be combined with one of its operands + /// \param P - Combination pattern for Root + /// \param InsInstrs - Vector of new instructions that implement P + /// \param DelInstrs - Old instructions, including Root, that could be replaced + /// by InsInstr + /// \param InstrIdxForVirtReg - map of virtual register to instruction in + /// InsInstr that defines it + virtual void genAlternativeCodeSequence( + MachineInstr &Root, MachineCombinerPattern::MC_PATTERN P, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs, + DenseMap &InstrIdxForVirtReg) const { + return; + } + + /// useMachineCombiner - return true when a target supports MachineCombiner + virtual bool useMachineCombiner() const { return false; } + protected: /// foldMemoryOperandImpl - Target-dependent implementation for /// foldMemoryOperand. Target-independent code in foldMemoryOperand will @@ -437,7 +698,7 @@ protected: MachineInstr* MI, const SmallVectorImpl &Ops, int FrameIndex) const { - return 0; + return nullptr; } /// foldMemoryOperandImpl - Target-dependent implementation for @@ -447,7 +708,50 @@ protected: MachineInstr* MI, const SmallVectorImpl &Ops, MachineInstr* LoadMI) const { - return 0; + return nullptr; + } + + /// \brief Target-dependent implementation of getRegSequenceInputs. + /// + /// \returns true if it is possible to build the equivalent + /// REG_SEQUENCE inputs with the pair \p MI, \p DefIdx. False otherwise. + /// + /// \pre MI.isRegSequenceLike(). + /// + /// \see TargetInstrInfo::getRegSequenceInputs. + virtual bool getRegSequenceLikeInputs( + const MachineInstr &MI, unsigned DefIdx, + SmallVectorImpl &InputRegs) const { + return false; + } + + /// \brief Target-dependent implementation of getExtractSubregInputs. + /// + /// \returns true if it is possible to build the equivalent + /// EXTRACT_SUBREG inputs with the pair \p MI, \p DefIdx. False otherwise. + /// + /// \pre MI.isExtractSubregLike(). + /// + /// \see TargetInstrInfo::getExtractSubregInputs. + virtual bool getExtractSubregLikeInputs( + const MachineInstr &MI, unsigned DefIdx, + RegSubRegPairAndIdx &InputReg) const { + return false; + } + + /// \brief Target-dependent implementation of getInsertSubregInputs. + /// + /// \returns true if it is possible to build the equivalent + /// INSERT_SUBREG inputs with the pair \p MI, \p DefIdx. False otherwise. + /// + /// \pre MI.isInsertSubregLike(). + /// + /// \see TargetInstrInfo::getInsertSubregInputs. + virtual bool + getInsertSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx, + RegSubRegPair &BaseReg, + RegSubRegPairAndIdx &InsertedReg) const { + return false; } public: @@ -455,7 +759,7 @@ public: /// folding is possible. virtual bool canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl &Ops) const =0; + const SmallVectorImpl &Ops) const; /// unfoldMemoryOperand - Separate a single instruction which folded a load or /// a store or a load and a store into two or more instruction. If this is @@ -479,7 +783,7 @@ public: /// value. virtual unsigned getOpcodeAfterMemoryUnfold(unsigned Opc, bool UnfoldLoad, bool UnfoldStore, - unsigned *LoadRegIndex = 0) const { + unsigned *LoadRegIndex = nullptr) const { return 0; } @@ -507,6 +811,28 @@ public: return false; } + /// \brief Get the base register and byte offset of a load/store instr. + virtual bool getLdStBaseRegImmOfs(MachineInstr *LdSt, + unsigned &BaseReg, unsigned &Offset, + const TargetRegisterInfo *TRI) const { + return false; + } + + virtual bool enableClusterLoads() const { return false; } + + virtual bool shouldClusterLoads(MachineInstr *FirstLdSt, + MachineInstr *SecondLdSt, + unsigned NumLoads) const { + return false; + } + + /// \brief Can this target fuse the given instructions if they are scheduled + /// adjacent. + virtual bool shouldScheduleAdjacent(MachineInstr* First, + MachineInstr *Second) const { + return false; + } + /// ReverseBranchCondition - Reverses the branch condition of the specified /// condition list, returning false on success and true if it cannot be /// reversed. @@ -521,10 +847,8 @@ public: MachineBasicBlock::iterator MI) const; - /// getNoopForMachoTarget - Return the noop instruction to use for a noop. - virtual void getNoopForMachoTarget(MCInst &NopInst) const { - // Default to just using 'nop' string. - } + /// Return the noop instruction to use for a noop. + virtual void getNoopForMachoTarget(MCInst &NopInst) const; /// isPredicated - Returns true if the instruction is already predicated. @@ -541,7 +865,7 @@ public: /// instruction. It returns true if the operation was successful. virtual bool PredicateInstruction(MachineInstr *MI, - const SmallVectorImpl &Pred) const = 0; + const SmallVectorImpl &Pred) const; /// SubsumesPredicate - Returns true if the first specified predicate /// subsumes the second, e.g. GE subsumes GT. @@ -577,7 +901,7 @@ public: /// terminators. virtual bool isSchedulingBoundary(const MachineInstr *MI, const MachineBasicBlock *MBB, - const MachineFunction &MF) const = 0; + const MachineFunction &MF) const; /// Measure the specified inline asm to determine an approximation of its /// length. @@ -588,42 +912,76 @@ public: /// use for this target when scheduling the machine instructions before /// register allocation. virtual ScheduleHazardRecognizer* - CreateTargetHazardRecognizer(const TargetMachine *TM, - const ScheduleDAG *DAG) const = 0; + CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, + const ScheduleDAG *DAG) const; + + /// CreateTargetMIHazardRecognizer - Allocate and return a hazard recognizer + /// to use for this target when scheduling the machine instructions before + /// register allocation. + virtual ScheduleHazardRecognizer* + CreateTargetMIHazardRecognizer(const InstrItineraryData*, + const ScheduleDAG *DAG) const; /// CreateTargetPostRAHazardRecognizer - Allocate and return a hazard /// recognizer to use for this target when scheduling the machine instructions /// after register allocation. virtual ScheduleHazardRecognizer* CreateTargetPostRAHazardRecognizer(const InstrItineraryData*, - const ScheduleDAG *DAG) const = 0; + const ScheduleDAG *DAG) const; - /// AnalyzeCompare - For a comparison instruction, return the source register - /// in SrcReg and the value it compares against in CmpValue. Return true if - /// the comparison instruction can be analyzed. - virtual bool AnalyzeCompare(const MachineInstr *MI, - unsigned &SrcReg, int &Mask, int &Value) const { + /// Provide a global flag for disabling the PreRA hazard recognizer that + /// targets may choose to honor. + bool usePreRAHazardRecognizer() const; + + /// analyzeCompare - For a comparison instruction, return the source registers + /// in SrcReg and SrcReg2 if having two register operands, and the value it + /// compares against in CmpValue. Return true if the comparison instruction + /// can be analyzed. + virtual bool analyzeCompare(const MachineInstr *MI, + unsigned &SrcReg, unsigned &SrcReg2, + int &Mask, int &Value) const { return false; } - /// OptimizeCompareInstr - See if the comparison instruction can be converted + /// optimizeCompareInstr - See if the comparison instruction can be converted /// into something more efficient. E.g., on ARM most instructions can set the /// flags register, obviating the need for a separate CMP. - virtual bool OptimizeCompareInstr(MachineInstr *CmpInstr, - unsigned SrcReg, int Mask, int Value, + virtual bool optimizeCompareInstr(MachineInstr *CmpInstr, + unsigned SrcReg, unsigned SrcReg2, + int Mask, int Value, const MachineRegisterInfo *MRI) const { return false; } + /// optimizeLoadInstr - Try to remove the load by folding it to a register + /// operand at the use. We fold the load instructions if and only if the + /// def and use are in the same BB. We only look at one load and see + /// whether it can be folded into MI. FoldAsLoadDefReg is the virtual register + /// defined by the load we are trying to fold. DefMI returns the machine + /// instruction that defines FoldAsLoadDefReg, and the function returns + /// the machine instruction generated due to folding. + virtual MachineInstr* optimizeLoadInstr(MachineInstr *MI, + const MachineRegisterInfo *MRI, + unsigned &FoldAsLoadDefReg, + MachineInstr *&DefMI) const { + return nullptr; + } + /// FoldImmediate - 'Reg' is known to be defined by a move immediate /// instruction, try to fold the immediate into the use instruction. + /// If MRI->hasOneNonDBGUse(Reg) is true, and this function returns true, + /// then the caller may assume that DefMI has been erased from its parent + /// block. The caller may assume that it will not be erased by this + /// function otherwise. virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, unsigned Reg, MachineRegisterInfo *MRI) const { return false; } /// getNumMicroOps - Return the number of u-operations the given machine - /// instruction will be decoded to on the target cpu. + /// instruction will be decoded to on the target cpu. The itinerary's + /// IssueWidth is the number of microops that can be dispatched each + /// cycle. An instruction with zero microops takes no dispatch resources. virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData, const MachineInstr *MI) const; @@ -635,29 +993,49 @@ public: return Opcode <= TargetOpcode::COPY; } + virtual int getOperandLatency(const InstrItineraryData *ItinData, + SDNode *DefNode, unsigned DefIdx, + SDNode *UseNode, unsigned UseIdx) const; + /// getOperandLatency - Compute and return the use operand latency of a given /// pair of def and use. /// In most cases, the static scheduling itinerary was enough to determine the /// operand latency. But it may not be possible for instructions with variable /// number of defs / uses. + /// + /// This is a raw interface to the itinerary that may be directly overriden by + /// a target. Use computeOperandLatency to get the best estimate of latency. virtual int getOperandLatency(const InstrItineraryData *ItinData, - const MachineInstr *DefMI, unsigned DefIdx, - const MachineInstr *UseMI, unsigned UseIdx) const; + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, + unsigned UseIdx) const; - virtual int getOperandLatency(const InstrItineraryData *ItinData, - SDNode *DefNode, unsigned DefIdx, - SDNode *UseNode, unsigned UseIdx) const; + /// computeOperandLatency - Compute and return the latency of the given data + /// dependent def and use when the operand indices are already known. + unsigned computeOperandLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, unsigned UseIdx) + const; /// getInstrLatency - Compute the instruction latency of a given instruction. /// If the instruction has higher cost when predicated, it's returned via /// PredCost. - virtual int getInstrLatency(const InstrItineraryData *ItinData, - const MachineInstr *MI, - unsigned *PredCost = 0) const; + virtual unsigned getInstrLatency(const InstrItineraryData *ItinData, + const MachineInstr *MI, + unsigned *PredCost = nullptr) const; + + virtual unsigned getPredicationCost(const MachineInstr *MI) const; virtual int getInstrLatency(const InstrItineraryData *ItinData, SDNode *Node) const; + /// Return the default expected latency for a def based on it's opcode. + unsigned defaultDefLatency(const MCSchedModel &SchedModel, + const MachineInstr *DefMI) const; + + int computeDefOperandLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI) const; + /// isHighLatencyDef - Return true if this opcode has high latency to its /// result. virtual bool isHighLatencyDef(int opc) const { return false; } @@ -687,58 +1065,147 @@ public: return true; } -private: - int CallFrameSetupOpcode, CallFrameDestroyOpcode; -}; + /// getExecutionDomain - Return the current execution domain and bit mask of + /// possible domains for instruction. + /// + /// Some micro-architectures have multiple execution domains, and multiple + /// opcodes that perform the same operation in different domains. For + /// example, the x86 architecture provides the por, orps, and orpd + /// instructions that all do the same thing. There is a latency penalty if a + /// register is written in one domain and read in another. + /// + /// This function returns a pair (domain, mask) containing the execution + /// domain of MI, and a bit mask of possible domains. The setExecutionDomain + /// function can be used to change the opcode to one of the domains in the + /// bit mask. Instructions whose execution domain can't be changed should + /// return a 0 mask. + /// + /// The execution domain numbers don't have any special meaning except domain + /// 0 is used for instructions that are not associated with any interesting + /// execution domain. + /// + virtual std::pair + getExecutionDomain(const MachineInstr *MI) const { + return std::make_pair(0, 0); + } -/// TargetInstrInfoImpl - This is the default implementation of -/// TargetInstrInfo, which just provides a couple of default implementations -/// for various methods. This separated out because it is implemented in -/// libcodegen, not in libtarget. -class TargetInstrInfoImpl : public TargetInstrInfo { -protected: - TargetInstrInfoImpl(int CallFrameSetupOpcode = -1, - int CallFrameDestroyOpcode = -1) - : TargetInstrInfo(CallFrameSetupOpcode, CallFrameDestroyOpcode) {} -public: - virtual void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, - MachineBasicBlock *NewDest) const; - virtual MachineInstr *commuteInstruction(MachineInstr *MI, - bool NewMI = false) const; - virtual bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, - unsigned &SrcOpIdx2) const; - virtual bool canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl &Ops) const; - virtual bool hasLoadFromStackSlot(const MachineInstr *MI, - const MachineMemOperand *&MMO, - int &FrameIndex) const; - virtual bool hasStoreToStackSlot(const MachineInstr *MI, - const MachineMemOperand *&MMO, - int &FrameIndex) const; - virtual bool PredicateInstruction(MachineInstr *MI, - const SmallVectorImpl &Pred) const; - virtual void reMaterialize(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, unsigned SubReg, - const MachineInstr *Orig, - const TargetRegisterInfo &TRI) const; - virtual MachineInstr *duplicate(MachineInstr *Orig, - MachineFunction &MF) const; - virtual bool produceSameValue(const MachineInstr *MI0, - const MachineInstr *MI1, - const MachineRegisterInfo *MRI) const; - virtual bool isSchedulingBoundary(const MachineInstr *MI, - const MachineBasicBlock *MBB, - const MachineFunction &MF) const; + /// setExecutionDomain - Change the opcode of MI to execute in Domain. + /// + /// The bit (1 << Domain) must be set in the mask returned from + /// getExecutionDomain(MI). + /// + virtual void setExecutionDomain(MachineInstr *MI, unsigned Domain) const {} - bool usePreRAHazardRecognizer() const; - virtual ScheduleHazardRecognizer * - CreateTargetHazardRecognizer(const TargetMachine*, const ScheduleDAG*) const; + /// getPartialRegUpdateClearance - Returns the preferred minimum clearance + /// before an instruction with an unwanted partial register update. + /// + /// Some instructions only write part of a register, and implicitly need to + /// read the other parts of the register. This may cause unwanted stalls + /// preventing otherwise unrelated instructions from executing in parallel in + /// an out-of-order CPU. + /// + /// For example, the x86 instruction cvtsi2ss writes its result to bits + /// [31:0] of the destination xmm register. Bits [127:32] are unaffected, so + /// the instruction needs to wait for the old value of the register to become + /// available: + /// + /// addps %xmm1, %xmm0 + /// movaps %xmm0, (%rax) + /// cvtsi2ss %rbx, %xmm0 + /// + /// In the code above, the cvtsi2ss instruction needs to wait for the addps + /// instruction before it can issue, even though the high bits of %xmm0 + /// probably aren't needed. + /// + /// This hook returns the preferred clearance before MI, measured in + /// instructions. Other defs of MI's operand OpNum are avoided in the last N + /// instructions before MI. It should only return a positive value for + /// unwanted dependencies. If the old bits of the defined register have + /// useful values, or if MI is determined to otherwise read the dependency, + /// the hook should return 0. + /// + /// The unwanted dependency may be handled by: + /// + /// 1. Allocating the same register for an MI def and use. That makes the + /// unwanted dependency identical to a required dependency. + /// + /// 2. Allocating a register for the def that has no defs in the previous N + /// instructions. + /// + /// 3. Calling breakPartialRegDependency() with the same arguments. This + /// allows the target to insert a dependency breaking instruction. + /// + virtual unsigned + getPartialRegUpdateClearance(const MachineInstr *MI, unsigned OpNum, + const TargetRegisterInfo *TRI) const { + // The default implementation returns 0 for no partial register dependency. + return 0; + } + + /// \brief Return the minimum clearance before an instruction that reads an + /// unused register. + /// + /// For example, AVX instructions may copy part of an register operand into + /// the unused high bits of the destination register. + /// + /// vcvtsi2sdq %rax, %xmm0, %xmm14 + /// + /// In the code above, vcvtsi2sdq copies %xmm0[127:64] into %xmm14 creating a + /// false dependence on any previous write to %xmm0. + /// + /// This hook works similarly to getPartialRegUpdateClearance, except that it + /// does not take an operand index. Instead sets \p OpNum to the index of the + /// unused register. + virtual unsigned getUndefRegClearance(const MachineInstr *MI, unsigned &OpNum, + const TargetRegisterInfo *TRI) const { + // The default implementation returns 0 for no undef register dependency. + return 0; + } - virtual ScheduleHazardRecognizer * - CreateTargetPostRAHazardRecognizer(const InstrItineraryData*, - const ScheduleDAG*) const; + /// breakPartialRegDependency - Insert a dependency-breaking instruction + /// before MI to eliminate an unwanted dependency on OpNum. + /// + /// If it wasn't possible to avoid a def in the last N instructions before MI + /// (see getPartialRegUpdateClearance), this hook will be called to break the + /// unwanted dependency. + /// + /// On x86, an xorps instruction can be used as a dependency breaker: + /// + /// addps %xmm1, %xmm0 + /// movaps %xmm0, (%rax) + /// xorps %xmm0, %xmm0 + /// cvtsi2ss %rbx, %xmm0 + /// + /// An operand should be added to MI if an instruction was + /// inserted. This ties the instructions together in the post-ra scheduler. + /// + virtual void + breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum, + const TargetRegisterInfo *TRI) const {} + + /// Create machine specific model for scheduling. + virtual DFAPacketizer * + CreateTargetScheduleState(const TargetSubtargetInfo &) const { + return nullptr; + } + + // areMemAccessesTriviallyDisjoint - Sometimes, it is possible for the target + // to tell, even without aliasing information, that two MIs access different + // memory addresses. This function returns true if two MIs access different + // memory addresses, and false otherwise. + virtual bool + areMemAccessesTriviallyDisjoint(MachineInstr *MIa, MachineInstr *MIb, + AliasAnalysis *AA = nullptr) const { + assert(MIa && (MIa->mayLoad() || MIa->mayStore()) && + "MIa must load from or modify a memory location"); + assert(MIb && (MIb->mayLoad() || MIb->mayStore()) && + "MIb must load from or modify a memory location"); + return false; + } + +private: + int CallFrameSetupOpcode, CallFrameDestroyOpcode; }; } // End llvm namespace