X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FNVPTX%2FNVPTXISelLowering.h;h=60914c1d09b472dd713d8c82a72fc65b3d2b179f;hb=5f506f370b194cbc3e0ab8bf4274c9ee08aaf524;hp=3cd49d38af76a845d81df22242d55c11e686e6af;hpb=3639ce2575660a0e6938d2e84e8bd9a738fd7051;p=oota-llvm.git diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index 3cd49d38af7..60914c1d09b 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -12,34 +12,27 @@ // //===----------------------------------------------------------------------===// -#ifndef NVPTXISELLOWERING_H -#define NVPTXISELLOWERING_H +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H #include "NVPTX.h" -#include "NVPTXSubtarget.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetLowering.h" namespace llvm { namespace NVPTXISD { -enum NodeType { +enum NodeType : unsigned { // Start the numbering from where ISD NodeType finishes. FIRST_NUMBER = ISD::BUILTIN_OP_END, Wrapper, CALL, RET_FLAG, LOAD_PARAM, - NVBuiltin, DeclareParam, DeclareScalarParam, DeclareRetParam, DeclareRet, DeclareScalarRet, - LoadParam, - StoreParam, - StoreParamS32, // to sext and store a <32bit value, not used currently - StoreParamU32, // to zext and store a <32bit value, not used currently - MoveToParam, PrintCall, PrintCallUni, CallArgBegin, @@ -51,13 +44,16 @@ enum NodeType { CallSymbol, Prototype, MoveParam, - MoveRetval, - MoveToRetval, - StoreRetval, PseudoUseParam, RETURN, CallSeqBegin, CallSeqEnd, + CallPrototype, + FUN_SHFL_CLAMP, + FUN_SHFR_CLAMP, + MUL_WIDE_SIGNED, + MUL_WIDE_UNSIGNED, + IMAD, Dummy, LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE, @@ -67,85 +63,460 @@ enum NodeType { LDUV2, // LDU.v2 LDUV4, // LDU.v4 StoreV2, - StoreV4 + StoreV4, + LoadParam, + LoadParamV2, + LoadParamV4, + StoreParam, + StoreParamV2, + StoreParamV4, + StoreParamS32, // to sext and store a <32bit value, not used currently + StoreParamU32, // to zext and store a <32bit value, not used currently + StoreRetval, + StoreRetvalV2, + StoreRetvalV4, + + // Texture intrinsics + Tex1DFloatS32, + Tex1DFloatFloat, + Tex1DFloatFloatLevel, + Tex1DFloatFloatGrad, + Tex1DS32S32, + Tex1DS32Float, + Tex1DS32FloatLevel, + Tex1DS32FloatGrad, + Tex1DU32S32, + Tex1DU32Float, + Tex1DU32FloatLevel, + Tex1DU32FloatGrad, + Tex1DArrayFloatS32, + Tex1DArrayFloatFloat, + Tex1DArrayFloatFloatLevel, + Tex1DArrayFloatFloatGrad, + Tex1DArrayS32S32, + Tex1DArrayS32Float, + Tex1DArrayS32FloatLevel, + Tex1DArrayS32FloatGrad, + Tex1DArrayU32S32, + Tex1DArrayU32Float, + Tex1DArrayU32FloatLevel, + Tex1DArrayU32FloatGrad, + Tex2DFloatS32, + Tex2DFloatFloat, + Tex2DFloatFloatLevel, + Tex2DFloatFloatGrad, + Tex2DS32S32, + Tex2DS32Float, + Tex2DS32FloatLevel, + Tex2DS32FloatGrad, + Tex2DU32S32, + Tex2DU32Float, + Tex2DU32FloatLevel, + Tex2DU32FloatGrad, + Tex2DArrayFloatS32, + Tex2DArrayFloatFloat, + Tex2DArrayFloatFloatLevel, + Tex2DArrayFloatFloatGrad, + Tex2DArrayS32S32, + Tex2DArrayS32Float, + Tex2DArrayS32FloatLevel, + Tex2DArrayS32FloatGrad, + Tex2DArrayU32S32, + Tex2DArrayU32Float, + Tex2DArrayU32FloatLevel, + Tex2DArrayU32FloatGrad, + Tex3DFloatS32, + Tex3DFloatFloat, + Tex3DFloatFloatLevel, + Tex3DFloatFloatGrad, + Tex3DS32S32, + Tex3DS32Float, + Tex3DS32FloatLevel, + Tex3DS32FloatGrad, + Tex3DU32S32, + Tex3DU32Float, + Tex3DU32FloatLevel, + Tex3DU32FloatGrad, + TexCubeFloatFloat, + TexCubeFloatFloatLevel, + TexCubeS32Float, + TexCubeS32FloatLevel, + TexCubeU32Float, + TexCubeU32FloatLevel, + TexCubeArrayFloatFloat, + TexCubeArrayFloatFloatLevel, + TexCubeArrayS32Float, + TexCubeArrayS32FloatLevel, + TexCubeArrayU32Float, + TexCubeArrayU32FloatLevel, + Tld4R2DFloatFloat, + Tld4G2DFloatFloat, + Tld4B2DFloatFloat, + Tld4A2DFloatFloat, + Tld4R2DS64Float, + Tld4G2DS64Float, + Tld4B2DS64Float, + Tld4A2DS64Float, + Tld4R2DU64Float, + Tld4G2DU64Float, + Tld4B2DU64Float, + Tld4A2DU64Float, + TexUnified1DFloatS32, + TexUnified1DFloatFloat, + TexUnified1DFloatFloatLevel, + TexUnified1DFloatFloatGrad, + TexUnified1DS32S32, + TexUnified1DS32Float, + TexUnified1DS32FloatLevel, + TexUnified1DS32FloatGrad, + TexUnified1DU32S32, + TexUnified1DU32Float, + TexUnified1DU32FloatLevel, + TexUnified1DU32FloatGrad, + TexUnified1DArrayFloatS32, + TexUnified1DArrayFloatFloat, + TexUnified1DArrayFloatFloatLevel, + TexUnified1DArrayFloatFloatGrad, + TexUnified1DArrayS32S32, + TexUnified1DArrayS32Float, + TexUnified1DArrayS32FloatLevel, + TexUnified1DArrayS32FloatGrad, + TexUnified1DArrayU32S32, + TexUnified1DArrayU32Float, + TexUnified1DArrayU32FloatLevel, + TexUnified1DArrayU32FloatGrad, + TexUnified2DFloatS32, + TexUnified2DFloatFloat, + TexUnified2DFloatFloatLevel, + TexUnified2DFloatFloatGrad, + TexUnified2DS32S32, + TexUnified2DS32Float, + TexUnified2DS32FloatLevel, + TexUnified2DS32FloatGrad, + TexUnified2DU32S32, + TexUnified2DU32Float, + TexUnified2DU32FloatLevel, + TexUnified2DU32FloatGrad, + TexUnified2DArrayFloatS32, + TexUnified2DArrayFloatFloat, + TexUnified2DArrayFloatFloatLevel, + TexUnified2DArrayFloatFloatGrad, + TexUnified2DArrayS32S32, + TexUnified2DArrayS32Float, + TexUnified2DArrayS32FloatLevel, + TexUnified2DArrayS32FloatGrad, + TexUnified2DArrayU32S32, + TexUnified2DArrayU32Float, + TexUnified2DArrayU32FloatLevel, + TexUnified2DArrayU32FloatGrad, + TexUnified3DFloatS32, + TexUnified3DFloatFloat, + TexUnified3DFloatFloatLevel, + TexUnified3DFloatFloatGrad, + TexUnified3DS32S32, + TexUnified3DS32Float, + TexUnified3DS32FloatLevel, + TexUnified3DS32FloatGrad, + TexUnified3DU32S32, + TexUnified3DU32Float, + TexUnified3DU32FloatLevel, + TexUnified3DU32FloatGrad, + TexUnifiedCubeFloatFloat, + TexUnifiedCubeFloatFloatLevel, + TexUnifiedCubeS32Float, + TexUnifiedCubeS32FloatLevel, + TexUnifiedCubeU32Float, + TexUnifiedCubeU32FloatLevel, + TexUnifiedCubeArrayFloatFloat, + TexUnifiedCubeArrayFloatFloatLevel, + TexUnifiedCubeArrayS32Float, + TexUnifiedCubeArrayS32FloatLevel, + TexUnifiedCubeArrayU32Float, + TexUnifiedCubeArrayU32FloatLevel, + Tld4UnifiedR2DFloatFloat, + Tld4UnifiedG2DFloatFloat, + Tld4UnifiedB2DFloatFloat, + Tld4UnifiedA2DFloatFloat, + Tld4UnifiedR2DS64Float, + Tld4UnifiedG2DS64Float, + Tld4UnifiedB2DS64Float, + Tld4UnifiedA2DS64Float, + Tld4UnifiedR2DU64Float, + Tld4UnifiedG2DU64Float, + Tld4UnifiedB2DU64Float, + Tld4UnifiedA2DU64Float, + + // Surface intrinsics + Suld1DI8Clamp, + Suld1DI16Clamp, + Suld1DI32Clamp, + Suld1DI64Clamp, + Suld1DV2I8Clamp, + Suld1DV2I16Clamp, + Suld1DV2I32Clamp, + Suld1DV2I64Clamp, + Suld1DV4I8Clamp, + Suld1DV4I16Clamp, + Suld1DV4I32Clamp, + + Suld1DArrayI8Clamp, + Suld1DArrayI16Clamp, + Suld1DArrayI32Clamp, + Suld1DArrayI64Clamp, + Suld1DArrayV2I8Clamp, + Suld1DArrayV2I16Clamp, + Suld1DArrayV2I32Clamp, + Suld1DArrayV2I64Clamp, + Suld1DArrayV4I8Clamp, + Suld1DArrayV4I16Clamp, + Suld1DArrayV4I32Clamp, + + Suld2DI8Clamp, + Suld2DI16Clamp, + Suld2DI32Clamp, + Suld2DI64Clamp, + Suld2DV2I8Clamp, + Suld2DV2I16Clamp, + Suld2DV2I32Clamp, + Suld2DV2I64Clamp, + Suld2DV4I8Clamp, + Suld2DV4I16Clamp, + Suld2DV4I32Clamp, + + Suld2DArrayI8Clamp, + Suld2DArrayI16Clamp, + Suld2DArrayI32Clamp, + Suld2DArrayI64Clamp, + Suld2DArrayV2I8Clamp, + Suld2DArrayV2I16Clamp, + Suld2DArrayV2I32Clamp, + Suld2DArrayV2I64Clamp, + Suld2DArrayV4I8Clamp, + Suld2DArrayV4I16Clamp, + Suld2DArrayV4I32Clamp, + + Suld3DI8Clamp, + Suld3DI16Clamp, + Suld3DI32Clamp, + Suld3DI64Clamp, + Suld3DV2I8Clamp, + Suld3DV2I16Clamp, + Suld3DV2I32Clamp, + Suld3DV2I64Clamp, + Suld3DV4I8Clamp, + Suld3DV4I16Clamp, + Suld3DV4I32Clamp, + + Suld1DI8Trap, + Suld1DI16Trap, + Suld1DI32Trap, + Suld1DI64Trap, + Suld1DV2I8Trap, + Suld1DV2I16Trap, + Suld1DV2I32Trap, + Suld1DV2I64Trap, + Suld1DV4I8Trap, + Suld1DV4I16Trap, + Suld1DV4I32Trap, + + Suld1DArrayI8Trap, + Suld1DArrayI16Trap, + Suld1DArrayI32Trap, + Suld1DArrayI64Trap, + Suld1DArrayV2I8Trap, + Suld1DArrayV2I16Trap, + Suld1DArrayV2I32Trap, + Suld1DArrayV2I64Trap, + Suld1DArrayV4I8Trap, + Suld1DArrayV4I16Trap, + Suld1DArrayV4I32Trap, + + Suld2DI8Trap, + Suld2DI16Trap, + Suld2DI32Trap, + Suld2DI64Trap, + Suld2DV2I8Trap, + Suld2DV2I16Trap, + Suld2DV2I32Trap, + Suld2DV2I64Trap, + Suld2DV4I8Trap, + Suld2DV4I16Trap, + Suld2DV4I32Trap, + + Suld2DArrayI8Trap, + Suld2DArrayI16Trap, + Suld2DArrayI32Trap, + Suld2DArrayI64Trap, + Suld2DArrayV2I8Trap, + Suld2DArrayV2I16Trap, + Suld2DArrayV2I32Trap, + Suld2DArrayV2I64Trap, + Suld2DArrayV4I8Trap, + Suld2DArrayV4I16Trap, + Suld2DArrayV4I32Trap, + + Suld3DI8Trap, + Suld3DI16Trap, + Suld3DI32Trap, + Suld3DI64Trap, + Suld3DV2I8Trap, + Suld3DV2I16Trap, + Suld3DV2I32Trap, + Suld3DV2I64Trap, + Suld3DV4I8Trap, + Suld3DV4I16Trap, + Suld3DV4I32Trap, + + Suld1DI8Zero, + Suld1DI16Zero, + Suld1DI32Zero, + Suld1DI64Zero, + Suld1DV2I8Zero, + Suld1DV2I16Zero, + Suld1DV2I32Zero, + Suld1DV2I64Zero, + Suld1DV4I8Zero, + Suld1DV4I16Zero, + Suld1DV4I32Zero, + + Suld1DArrayI8Zero, + Suld1DArrayI16Zero, + Suld1DArrayI32Zero, + Suld1DArrayI64Zero, + Suld1DArrayV2I8Zero, + Suld1DArrayV2I16Zero, + Suld1DArrayV2I32Zero, + Suld1DArrayV2I64Zero, + Suld1DArrayV4I8Zero, + Suld1DArrayV4I16Zero, + Suld1DArrayV4I32Zero, + + Suld2DI8Zero, + Suld2DI16Zero, + Suld2DI32Zero, + Suld2DI64Zero, + Suld2DV2I8Zero, + Suld2DV2I16Zero, + Suld2DV2I32Zero, + Suld2DV2I64Zero, + Suld2DV4I8Zero, + Suld2DV4I16Zero, + Suld2DV4I32Zero, + + Suld2DArrayI8Zero, + Suld2DArrayI16Zero, + Suld2DArrayI32Zero, + Suld2DArrayI64Zero, + Suld2DArrayV2I8Zero, + Suld2DArrayV2I16Zero, + Suld2DArrayV2I32Zero, + Suld2DArrayV2I64Zero, + Suld2DArrayV4I8Zero, + Suld2DArrayV4I16Zero, + Suld2DArrayV4I32Zero, + + Suld3DI8Zero, + Suld3DI16Zero, + Suld3DI32Zero, + Suld3DI64Zero, + Suld3DV2I8Zero, + Suld3DV2I16Zero, + Suld3DV2I32Zero, + Suld3DV2I64Zero, + Suld3DV4I8Zero, + Suld3DV4I16Zero, + Suld3DV4I32Zero }; } +class NVPTXSubtarget; + //===--------------------------------------------------------------------===// // TargetLowering Implementation //===--------------------------------------------------------------------===// class NVPTXTargetLowering : public TargetLowering { public: - explicit NVPTXTargetLowering(NVPTXTargetMachine &TM); - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM, + const NVPTXSubtarget &STI); + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerGlobalAddress(const GlobalValue *GV, int64_t Offset, - SelectionDAG &DAG) const; - virtual const char *getTargetNodeName(unsigned Opcode) const; - - bool isTypeSupportedInIntrinsic(MVT VT) const; + const char *getTargetNodeName(unsigned Opcode) const override; bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, - unsigned Intrinsic) const; + unsigned Intrinsic) const override; /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type /// Used to guide target specific optimizations, like loop strength /// reduction (LoopStrengthReduce.cpp) and memory optimization for /// address mode (CodeGenPrepare.cpp) - virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const; + bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, + unsigned AS) const override; - /// getFunctionAlignment - Return the Log2 alignment of this function. - virtual unsigned getFunctionAlignment(const Function *F) const; + bool isTruncateFree(Type *SrcTy, Type *DstTy) const override { + // Truncating 64-bit to 32-bit is free in SASS. + if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) + return false; + return SrcTy->getPrimitiveSizeInBits() == 64 && + DstTy->getPrimitiveSizeInBits() == 32; + } - virtual EVT getSetCCResultType(EVT VT) const { + EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx, + EVT VT) const override { if (VT.isVector()) - return MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); + return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements()); return MVT::i1; } - ConstraintType getConstraintType(const std::string &Constraint) const; + ConstraintType getConstraintType(StringRef Constraint) const override; std::pair - getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; + getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + StringRef Constraint, MVT VT) const override; - virtual SDValue LowerFormalArguments( + SDValue LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const; + const SmallVectorImpl &Ins, SDLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) const override; - virtual SDValue - LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &InVals) const; + SDValue LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const override; - std::string getPrototype(Type *, const ArgListTy &, + std::string getPrototype(const DataLayout &DL, Type *, const ArgListTy &, const SmallVectorImpl &, - unsigned retAlignment) const; + unsigned retAlignment, + const ImmutableCallSite *CS) const; - virtual SDValue + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, DebugLoc dl, - SelectionDAG &DAG) const; + const SmallVectorImpl &OutVals, SDLoc dl, + SelectionDAG &DAG) const override; - virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, - std::vector &Ops, - SelectionDAG &DAG) const; + void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, + std::vector &Ops, + SelectionDAG &DAG) const override; - NVPTXTargetMachine *nvTM; + const NVPTXTargetMachine *nvTM; // PTX always uses 32-bit shift amounts - virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; } + MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { + return MVT::i32; + } - virtual bool shouldSplitVectorElementType(EVT VT) const; + TargetLoweringBase::LegalizeTypeAction + getPreferredVectorAction(EVT VT) const override; -private: - const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here + bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const; + + bool isFMAFasterThanFMulAndFAdd(EVT) const override { return true; } + + bool enableAggressiveFMAFusion(EVT VT) const override { return true; } - SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx, - EVT = MVT::i32) const; - SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT = MVT::i32) const; - SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx); +private: + const NVPTXSubtarget &STI; // cache the subtarget here + SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const; SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; @@ -156,9 +527,18 @@ private: SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const; - virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, - SelectionDAG &DAG) const; + SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerSelect(SDValue Op, SelectionDAG &DAG) const; + + void ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, + SelectionDAG &DAG) const override; + SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; + + unsigned getArgumentAlignment(SDValue Callee, const ImmutableCallSite *CS, + Type *Ty, unsigned Idx) const; }; } // namespace llvm -#endif // NVPTXISELLOWERING_H +#endif