X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FNVPTX%2FNVPTXISelLowering.h;h=e5c37321a33b503c803c72ecb0e188c055411901;hb=bd11ccb2b9c5ff62a37893e47d338d1620f7ba08;hp=d311f681f3ab13752b1a1e615104f1494ef5299c;hpb=9832f7dc71a0b01cca69a5b9d99f8f0be4bf3368;p=oota-llvm.git diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index d311f681f3a..e5c37321a33 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef NVPTXISELLOWERING_H -#define NVPTXISELLOWERING_H +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H #include "NVPTX.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -21,7 +21,7 @@ namespace llvm { namespace NVPTXISD { -enum NodeType { +enum NodeType : unsigned { // Start the numbering from where ISD NodeType finishes. FIRST_NUMBER = ISD::BUILTIN_OP_END, Wrapper, @@ -77,54 +77,244 @@ enum NodeType { StoreRetvalV4, // Texture intrinsics - Tex1DFloatI32, + Tex1DFloatS32, Tex1DFloatFloat, Tex1DFloatFloatLevel, Tex1DFloatFloatGrad, - Tex1DI32I32, - Tex1DI32Float, - Tex1DI32FloatLevel, - Tex1DI32FloatGrad, - Tex1DArrayFloatI32, + Tex1DS32S32, + Tex1DS32Float, + Tex1DS32FloatLevel, + Tex1DS32FloatGrad, + Tex1DU32S32, + Tex1DU32Float, + Tex1DU32FloatLevel, + Tex1DU32FloatGrad, + Tex1DArrayFloatS32, Tex1DArrayFloatFloat, Tex1DArrayFloatFloatLevel, Tex1DArrayFloatFloatGrad, - Tex1DArrayI32I32, - Tex1DArrayI32Float, - Tex1DArrayI32FloatLevel, - Tex1DArrayI32FloatGrad, - Tex2DFloatI32, + Tex1DArrayS32S32, + Tex1DArrayS32Float, + Tex1DArrayS32FloatLevel, + Tex1DArrayS32FloatGrad, + Tex1DArrayU32S32, + Tex1DArrayU32Float, + Tex1DArrayU32FloatLevel, + Tex1DArrayU32FloatGrad, + Tex2DFloatS32, Tex2DFloatFloat, Tex2DFloatFloatLevel, Tex2DFloatFloatGrad, - Tex2DI32I32, - Tex2DI32Float, - Tex2DI32FloatLevel, - Tex2DI32FloatGrad, - Tex2DArrayFloatI32, + Tex2DS32S32, + Tex2DS32Float, + Tex2DS32FloatLevel, + Tex2DS32FloatGrad, + Tex2DU32S32, + Tex2DU32Float, + Tex2DU32FloatLevel, + Tex2DU32FloatGrad, + Tex2DArrayFloatS32, Tex2DArrayFloatFloat, Tex2DArrayFloatFloatLevel, Tex2DArrayFloatFloatGrad, - Tex2DArrayI32I32, - Tex2DArrayI32Float, - Tex2DArrayI32FloatLevel, - Tex2DArrayI32FloatGrad, - Tex3DFloatI32, + Tex2DArrayS32S32, + Tex2DArrayS32Float, + Tex2DArrayS32FloatLevel, + Tex2DArrayS32FloatGrad, + Tex2DArrayU32S32, + Tex2DArrayU32Float, + Tex2DArrayU32FloatLevel, + Tex2DArrayU32FloatGrad, + Tex3DFloatS32, Tex3DFloatFloat, Tex3DFloatFloatLevel, Tex3DFloatFloatGrad, - Tex3DI32I32, - Tex3DI32Float, - Tex3DI32FloatLevel, - Tex3DI32FloatGrad, + Tex3DS32S32, + Tex3DS32Float, + Tex3DS32FloatLevel, + Tex3DS32FloatGrad, + Tex3DU32S32, + Tex3DU32Float, + Tex3DU32FloatLevel, + Tex3DU32FloatGrad, + TexCubeFloatFloat, + TexCubeFloatFloatLevel, + TexCubeS32Float, + TexCubeS32FloatLevel, + TexCubeU32Float, + TexCubeU32FloatLevel, + TexCubeArrayFloatFloat, + TexCubeArrayFloatFloatLevel, + TexCubeArrayS32Float, + TexCubeArrayS32FloatLevel, + TexCubeArrayU32Float, + TexCubeArrayU32FloatLevel, + Tld4R2DFloatFloat, + Tld4G2DFloatFloat, + Tld4B2DFloatFloat, + Tld4A2DFloatFloat, + Tld4R2DS64Float, + Tld4G2DS64Float, + Tld4B2DS64Float, + Tld4A2DS64Float, + Tld4R2DU64Float, + Tld4G2DU64Float, + Tld4B2DU64Float, + Tld4A2DU64Float, + TexUnified1DFloatS32, + TexUnified1DFloatFloat, + TexUnified1DFloatFloatLevel, + TexUnified1DFloatFloatGrad, + TexUnified1DS32S32, + TexUnified1DS32Float, + TexUnified1DS32FloatLevel, + TexUnified1DS32FloatGrad, + TexUnified1DU32S32, + TexUnified1DU32Float, + TexUnified1DU32FloatLevel, + TexUnified1DU32FloatGrad, + TexUnified1DArrayFloatS32, + TexUnified1DArrayFloatFloat, + TexUnified1DArrayFloatFloatLevel, + TexUnified1DArrayFloatFloatGrad, + TexUnified1DArrayS32S32, + TexUnified1DArrayS32Float, + TexUnified1DArrayS32FloatLevel, + TexUnified1DArrayS32FloatGrad, + TexUnified1DArrayU32S32, + TexUnified1DArrayU32Float, + TexUnified1DArrayU32FloatLevel, + TexUnified1DArrayU32FloatGrad, + TexUnified2DFloatS32, + TexUnified2DFloatFloat, + TexUnified2DFloatFloatLevel, + TexUnified2DFloatFloatGrad, + TexUnified2DS32S32, + TexUnified2DS32Float, + TexUnified2DS32FloatLevel, + TexUnified2DS32FloatGrad, + TexUnified2DU32S32, + TexUnified2DU32Float, + TexUnified2DU32FloatLevel, + TexUnified2DU32FloatGrad, + TexUnified2DArrayFloatS32, + TexUnified2DArrayFloatFloat, + TexUnified2DArrayFloatFloatLevel, + TexUnified2DArrayFloatFloatGrad, + TexUnified2DArrayS32S32, + TexUnified2DArrayS32Float, + TexUnified2DArrayS32FloatLevel, + TexUnified2DArrayS32FloatGrad, + TexUnified2DArrayU32S32, + TexUnified2DArrayU32Float, + TexUnified2DArrayU32FloatLevel, + TexUnified2DArrayU32FloatGrad, + TexUnified3DFloatS32, + TexUnified3DFloatFloat, + TexUnified3DFloatFloatLevel, + TexUnified3DFloatFloatGrad, + TexUnified3DS32S32, + TexUnified3DS32Float, + TexUnified3DS32FloatLevel, + TexUnified3DS32FloatGrad, + TexUnified3DU32S32, + TexUnified3DU32Float, + TexUnified3DU32FloatLevel, + TexUnified3DU32FloatGrad, + TexUnifiedCubeFloatFloat, + TexUnifiedCubeFloatFloatLevel, + TexUnifiedCubeS32Float, + TexUnifiedCubeS32FloatLevel, + TexUnifiedCubeU32Float, + TexUnifiedCubeU32FloatLevel, + TexUnifiedCubeArrayFloatFloat, + TexUnifiedCubeArrayFloatFloatLevel, + TexUnifiedCubeArrayS32Float, + TexUnifiedCubeArrayS32FloatLevel, + TexUnifiedCubeArrayU32Float, + TexUnifiedCubeArrayU32FloatLevel, + Tld4UnifiedR2DFloatFloat, + Tld4UnifiedG2DFloatFloat, + Tld4UnifiedB2DFloatFloat, + Tld4UnifiedA2DFloatFloat, + Tld4UnifiedR2DS64Float, + Tld4UnifiedG2DS64Float, + Tld4UnifiedB2DS64Float, + Tld4UnifiedA2DS64Float, + Tld4UnifiedR2DU64Float, + Tld4UnifiedG2DU64Float, + Tld4UnifiedB2DU64Float, + Tld4UnifiedA2DU64Float, // Surface intrinsics + Suld1DI8Clamp, + Suld1DI16Clamp, + Suld1DI32Clamp, + Suld1DI64Clamp, + Suld1DV2I8Clamp, + Suld1DV2I16Clamp, + Suld1DV2I32Clamp, + Suld1DV2I64Clamp, + Suld1DV4I8Clamp, + Suld1DV4I16Clamp, + Suld1DV4I32Clamp, + + Suld1DArrayI8Clamp, + Suld1DArrayI16Clamp, + Suld1DArrayI32Clamp, + Suld1DArrayI64Clamp, + Suld1DArrayV2I8Clamp, + Suld1DArrayV2I16Clamp, + Suld1DArrayV2I32Clamp, + Suld1DArrayV2I64Clamp, + Suld1DArrayV4I8Clamp, + Suld1DArrayV4I16Clamp, + Suld1DArrayV4I32Clamp, + + Suld2DI8Clamp, + Suld2DI16Clamp, + Suld2DI32Clamp, + Suld2DI64Clamp, + Suld2DV2I8Clamp, + Suld2DV2I16Clamp, + Suld2DV2I32Clamp, + Suld2DV2I64Clamp, + Suld2DV4I8Clamp, + Suld2DV4I16Clamp, + Suld2DV4I32Clamp, + + Suld2DArrayI8Clamp, + Suld2DArrayI16Clamp, + Suld2DArrayI32Clamp, + Suld2DArrayI64Clamp, + Suld2DArrayV2I8Clamp, + Suld2DArrayV2I16Clamp, + Suld2DArrayV2I32Clamp, + Suld2DArrayV2I64Clamp, + Suld2DArrayV4I8Clamp, + Suld2DArrayV4I16Clamp, + Suld2DArrayV4I32Clamp, + + Suld3DI8Clamp, + Suld3DI16Clamp, + Suld3DI32Clamp, + Suld3DI64Clamp, + Suld3DV2I8Clamp, + Suld3DV2I16Clamp, + Suld3DV2I32Clamp, + Suld3DV2I64Clamp, + Suld3DV4I8Clamp, + Suld3DV4I16Clamp, + Suld3DV4I32Clamp, + Suld1DI8Trap, Suld1DI16Trap, Suld1DI32Trap, + Suld1DI64Trap, Suld1DV2I8Trap, Suld1DV2I16Trap, Suld1DV2I32Trap, + Suld1DV2I64Trap, Suld1DV4I8Trap, Suld1DV4I16Trap, Suld1DV4I32Trap, @@ -132,9 +322,11 @@ enum NodeType { Suld1DArrayI8Trap, Suld1DArrayI16Trap, Suld1DArrayI32Trap, + Suld1DArrayI64Trap, Suld1DArrayV2I8Trap, Suld1DArrayV2I16Trap, Suld1DArrayV2I32Trap, + Suld1DArrayV2I64Trap, Suld1DArrayV4I8Trap, Suld1DArrayV4I16Trap, Suld1DArrayV4I32Trap, @@ -142,9 +334,11 @@ enum NodeType { Suld2DI8Trap, Suld2DI16Trap, Suld2DI32Trap, + Suld2DI64Trap, Suld2DV2I8Trap, Suld2DV2I16Trap, Suld2DV2I32Trap, + Suld2DV2I64Trap, Suld2DV4I8Trap, Suld2DV4I16Trap, Suld2DV4I32Trap, @@ -152,9 +346,11 @@ enum NodeType { Suld2DArrayI8Trap, Suld2DArrayI16Trap, Suld2DArrayI32Trap, + Suld2DArrayI64Trap, Suld2DArrayV2I8Trap, Suld2DArrayV2I16Trap, Suld2DArrayV2I32Trap, + Suld2DArrayV2I64Trap, Suld2DArrayV4I8Trap, Suld2DArrayV4I16Trap, Suld2DArrayV4I32Trap, @@ -162,12 +358,74 @@ enum NodeType { Suld3DI8Trap, Suld3DI16Trap, Suld3DI32Trap, + Suld3DI64Trap, Suld3DV2I8Trap, Suld3DV2I16Trap, Suld3DV2I32Trap, + Suld3DV2I64Trap, Suld3DV4I8Trap, Suld3DV4I16Trap, - Suld3DV4I32Trap + Suld3DV4I32Trap, + + Suld1DI8Zero, + Suld1DI16Zero, + Suld1DI32Zero, + Suld1DI64Zero, + Suld1DV2I8Zero, + Suld1DV2I16Zero, + Suld1DV2I32Zero, + Suld1DV2I64Zero, + Suld1DV4I8Zero, + Suld1DV4I16Zero, + Suld1DV4I32Zero, + + Suld1DArrayI8Zero, + Suld1DArrayI16Zero, + Suld1DArrayI32Zero, + Suld1DArrayI64Zero, + Suld1DArrayV2I8Zero, + Suld1DArrayV2I16Zero, + Suld1DArrayV2I32Zero, + Suld1DArrayV2I64Zero, + Suld1DArrayV4I8Zero, + Suld1DArrayV4I16Zero, + Suld1DArrayV4I32Zero, + + Suld2DI8Zero, + Suld2DI16Zero, + Suld2DI32Zero, + Suld2DI64Zero, + Suld2DV2I8Zero, + Suld2DV2I16Zero, + Suld2DV2I32Zero, + Suld2DV2I64Zero, + Suld2DV4I8Zero, + Suld2DV4I16Zero, + Suld2DV4I32Zero, + + Suld2DArrayI8Zero, + Suld2DArrayI16Zero, + Suld2DArrayI32Zero, + Suld2DArrayI64Zero, + Suld2DArrayV2I8Zero, + Suld2DArrayV2I16Zero, + Suld2DArrayV2I32Zero, + Suld2DArrayV2I64Zero, + Suld2DArrayV4I8Zero, + Suld2DArrayV4I16Zero, + Suld2DArrayV4I32Zero, + + Suld3DI8Zero, + Suld3DI16Zero, + Suld3DI32Zero, + Suld3DI64Zero, + Suld3DV2I8Zero, + Suld3DV2I16Zero, + Suld3DV2I32Zero, + Suld3DV2I64Zero, + Suld3DV4I8Zero, + Suld3DV4I16Zero, + Suld3DV4I32Zero }; } @@ -178,7 +436,8 @@ class NVPTXSubtarget; //===--------------------------------------------------------------------===// class NVPTXTargetLowering : public TargetLowering { public: - explicit NVPTXTargetLowering(NVPTXTargetMachine &TM); + explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM, + const NVPTXSubtarget &STI); SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; @@ -197,22 +456,23 @@ public: /// Used to guide target specific optimizations, like loop strength /// reduction (LoopStrengthReduce.cpp) and memory optimization for /// address mode (CodeGenPrepare.cpp) - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; + bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, + unsigned AS) const override; /// getFunctionAlignment - Return the Log2 alignment of this function. unsigned getFunctionAlignment(const Function *F) const; - EVT getSetCCResultType(LLVMContext &Ctx, EVT VT) const override { + EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx, + EVT VT) const override { if (VT.isVector()) return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements()); return MVT::i1; } - ConstraintType - getConstraintType(const std::string &Constraint) const override; + ConstraintType getConstraintType(StringRef Constraint) const override; std::pair - getRegForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const override; + getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + StringRef Constraint, MVT VT) const override; SDValue LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -222,7 +482,7 @@ public: SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &InVals) const override; - std::string getPrototype(Type *, const ArgListTy &, + std::string getPrototype(const DataLayout &DL, Type *, const ArgListTy &, const SmallVectorImpl &, unsigned retAlignment, const ImmutableCallSite *CS) const; @@ -237,15 +497,24 @@ public: std::vector &Ops, SelectionDAG &DAG) const override; - NVPTXTargetMachine *nvTM; + const NVPTXTargetMachine *nvTM; // PTX always uses 32-bit shift amounts - MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; } + MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { + return MVT::i32; + } + + TargetLoweringBase::LegalizeTypeAction + getPreferredVectorAction(EVT VT) const override; - bool shouldSplitVectorType(EVT VT) const override; + bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const; + + bool isFMAFasterThanFMulAndFAdd(EVT) const override { return true; } + + bool enableAggressiveFMAFusion(EVT VT) const override { return true; } private: - const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here + const NVPTXSubtarget &STI; // cache the subtarget here SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx, EVT = MVT::i32) const; @@ -264,6 +533,8 @@ private: SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSelect(SDValue Op, SelectionDAG &DAG) const; + void ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const override; SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; @@ -273,4 +544,4 @@ private: }; } // namespace llvm -#endif // NVPTXISELLOWERING_H +#endif