X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FNVPTX%2FNVPTXISelLowering.cpp;h=e9a9fbfd04fbb9f8c3263788ecca3079fe109765;hb=3ef5383b3537a420c5e2ab3e657c378e5185549d;hp=82db0886c755d17c32277bd1a0266b5531d578e9;hpb=034b94b17006f51722886b0f2283fb6fb19aca1f;p=oota-llvm.git diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index 82db0886c75..e9a9fbfd04f 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -23,13 +23,13 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/GlobalValue.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Intrinsics.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/Module.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -44,16 +44,28 @@ using namespace llvm; static unsigned int uniqueCallSite = 0; -static cl::opt -RetainVectorOperands("nvptx-codegen-vectors", - cl::desc("NVPTX Specific: Retain LLVM's vectors and generate PTX vectors"), - cl::init(true)); - static cl::opt sched4reg("nvptx-sched4reg", cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false)); +static bool IsPTXVectorType(MVT VT) { + switch (VT.SimpleTy) { + default: return false; + case MVT::v2i8: + case MVT::v4i8: + case MVT::v2i16: + case MVT::v4i16: + case MVT::v2i32: + case MVT::v4i32: + case MVT::v2i64: + case MVT::v2f32: + case MVT::v4f32: + case MVT::v2f64: + return true; + } +} + // NVPTXTargetLowering Constructor. NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) : TargetLowering(TM, new NVPTXTargetObjectFile()), @@ -63,9 +75,9 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) // always lower memset, memcpy, and memmove intrinsics to load/store // instructions, rather // then generating calls to memset, mempcy or memmove. - maxStoresPerMemset = (unsigned)0xFFFFFFFF; - maxStoresPerMemcpy = (unsigned)0xFFFFFFFF; - maxStoresPerMemmove = (unsigned)0xFFFFFFFF; + MaxStoresPerMemset = (unsigned)0xFFFFFFFF; + MaxStoresPerMemcpy = (unsigned)0xFFFFFFFF; + MaxStoresPerMemmove = (unsigned)0xFFFFFFFF; setBooleanContents(ZeroOrNegativeOneBooleanContent); @@ -87,44 +99,15 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass); addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass); - if (RetainVectorOperands) { - addRegisterClass(MVT::v2f32, &NVPTX::V2F32RegsRegClass); - addRegisterClass(MVT::v4f32, &NVPTX::V4F32RegsRegClass); - addRegisterClass(MVT::v2i32, &NVPTX::V2I32RegsRegClass); - addRegisterClass(MVT::v4i32, &NVPTX::V4I32RegsRegClass); - addRegisterClass(MVT::v2f64, &NVPTX::V2F64RegsRegClass); - addRegisterClass(MVT::v2i64, &NVPTX::V2I64RegsRegClass); - addRegisterClass(MVT::v2i16, &NVPTX::V2I16RegsRegClass); - addRegisterClass(MVT::v4i16, &NVPTX::V4I16RegsRegClass); - addRegisterClass(MVT::v2i8, &NVPTX::V2I8RegsRegClass); - addRegisterClass(MVT::v4i8, &NVPTX::V4I8RegsRegClass); - - setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32 , Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32 , Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16 , Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4i8 , Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64 , Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64 , Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32 , Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32 , Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2i16 , Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2i8 , Custom); - - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32 , Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f32 , Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i16 , Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i8 , Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i64 , Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f64 , Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32 , Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32 , Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i16 , Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i8 , Custom); - } - // Operations not directly supported by NVPTX. setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); - setOperationAction(ISD::BR_CC, MVT::Other, Expand); + setOperationAction(ISD::BR_CC, MVT::f32, Expand); + setOperationAction(ISD::BR_CC, MVT::f64, Expand); + setOperationAction(ISD::BR_CC, MVT::i1, Expand); + setOperationAction(ISD::BR_CC, MVT::i8, Expand); + setOperationAction(ISD::BR_CC, MVT::i16, Expand); + setOperationAction(ISD::BR_CC, MVT::i32, Expand); + setOperationAction(ISD::BR_CC, MVT::i64, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); @@ -191,42 +174,16 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) // TRAP can be lowered to PTX trap setOperationAction(ISD::TRAP, MVT::Other, Legal); - // By default, CONCAT_VECTORS is implemented via store/load - // through stack. It is slow and uses local memory. We need - // to custom-lowering them. - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32 , Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32 , Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i16 , Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i8 , Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64 , Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64 , Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i32 , Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f32 , Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i16 , Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i8 , Custom); - - // Expand vector int to float and float to int conversions - // - For SINT_TO_FP and UINT_TO_FP, the src type - // (Node->getOperand(0).getValueType()) - // is used to determine the action, while for FP_TO_UINT and FP_TO_SINT, - // the dest type (Node->getValueType(0)) is used. - // - // See VectorLegalizer::LegalizeOp() (LegalizeVectorOps.cpp) for the vector - // case, and - // SelectionDAGLegalize::LegalizeOp() (LegalizeDAG.cpp) for the scalar case. - // - // That is why v4i32 or v2i32 are used here. - // - // The expansion for vectors happens in VectorLegalizer::LegalizeOp() - // (LegalizeVectorOps.cpp). - setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand); - setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Expand); - setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand); - setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Expand); - setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Expand); - setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand); - setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Expand); - setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand); + // Register custom handling for vector loads/stores + for (int i = MVT::FIRST_VECTOR_VALUETYPE; + i <= MVT::LAST_VECTOR_VALUETYPE; ++i) { + MVT VT = (MVT::SimpleValueType)i; + if (IsPTXVectorType(VT)) { + setOperationAction(ISD::LOAD, VT, Custom); + setOperationAction(ISD::STORE, VT, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom); + } + } // Now deduce the information based on the above mentioned // actions @@ -268,6 +225,14 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { case NVPTXISD::RETURN: return "NVPTXISD::RETURN"; case NVPTXISD::CallSeqBegin: return "NVPTXISD::CallSeqBegin"; case NVPTXISD::CallSeqEnd: return "NVPTXISD::CallSeqEnd"; + case NVPTXISD::LoadV2: return "NVPTXISD::LoadV2"; + case NVPTXISD::LoadV4: return "NVPTXISD::LoadV4"; + case NVPTXISD::LDGV2: return "NVPTXISD::LDGV2"; + case NVPTXISD::LDGV4: return "NVPTXISD::LDGV4"; + case NVPTXISD::LDUV2: return "NVPTXISD::LDUV2"; + case NVPTXISD::LDUV4: return "NVPTXISD::LDUV4"; + case NVPTXISD::StoreV2: return "NVPTXISD::StoreV2"; + case NVPTXISD::StoreV4: return "NVPTXISD::StoreV4"; } } @@ -757,16 +722,15 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, for (unsigned i=0,e=Ins.size(); i!=e; ++i) { unsigned sz = Ins[i].VT.getSizeInBits(); if (Ins[i].VT.isInteger() && (sz < 8)) sz = 8; - std::vector LoadRetVTs; - LoadRetVTs.push_back(Ins[i].VT); - LoadRetVTs.push_back(MVT::Other); LoadRetVTs.push_back(MVT::Glue); - std::vector LoadRetOps; - LoadRetOps.push_back(Chain); - LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); - LoadRetOps.push_back(DAG.getConstant(resoffset, MVT::i32)); - LoadRetOps.push_back(InFlag); + EVT LoadRetVTs[] = { Ins[i].VT, MVT::Other, MVT::Glue }; + SDValue LoadRetOps[] = { + Chain, + DAG.getConstant(1, MVT::i32), + DAG.getConstant(resoffset, MVT::i32), + InFlag + }; SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, LoadRetVTs, - &LoadRetOps[0], LoadRetOps.size()); + LoadRetOps, array_lengthof(LoadRetOps)); Chain = retval.getValue(1); InFlag = retval.getValue(2); InVals.push_back(retval); @@ -791,16 +755,15 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } std::vector tempRetVals; for (unsigned j=0; j MoveRetVTs; - MoveRetVTs.push_back(elemtype); - MoveRetVTs.push_back(MVT::Other); MoveRetVTs.push_back(MVT::Glue); - std::vector MoveRetOps; - MoveRetOps.push_back(Chain); - MoveRetOps.push_back(DAG.getConstant(0, MVT::i32)); - MoveRetOps.push_back(DAG.getConstant(paramNum, MVT::i32)); - MoveRetOps.push_back(InFlag); + EVT MoveRetVTs[] = { elemtype, MVT::Other, MVT::Glue }; + SDValue MoveRetOps[] = { + Chain, + DAG.getConstant(0, MVT::i32), + DAG.getConstant(paramNum, MVT::i32), + InFlag + }; SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, MoveRetVTs, - &MoveRetOps[0], MoveRetOps.size()); + MoveRetOps, array_lengthof(MoveRetOps)); Chain = retval.getValue(1); InFlag = retval.getValue(2); tempRetVals.push_back(retval); @@ -868,12 +831,19 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const { } +SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { + if (Op.getValueType() == MVT::i1) + return LowerLOADi1(Op, DAG); + else + return SDValue(); +} + // v = ld i1* addr // => // v1 = ld i8* addr // v = trunc v1 to i1 SDValue NVPTXTargetLowering:: -LowerLOAD(SDValue Op, SelectionDAG &DAG) const { +LowerLOADi1(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); LoadSDNode *LD = cast(Node); DebugLoc dl = Node->getDebugLoc(); @@ -893,12 +863,109 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { return DAG.getMergeValues(Ops, 2, dl); } +SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { + EVT ValVT = Op.getOperand(1).getValueType(); + if (ValVT == MVT::i1) + return LowerSTOREi1(Op, DAG); + else if (ValVT.isVector()) + return LowerSTOREVector(Op, DAG); + else + return SDValue(); +} + +SDValue +NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { + SDNode *N = Op.getNode(); + SDValue Val = N->getOperand(1); + DebugLoc DL = N->getDebugLoc(); + EVT ValVT = Val.getValueType(); + + if (ValVT.isVector()) { + // We only handle "native" vector sizes for now, e.g. <4 x double> is not + // legal. We can (and should) split that into 2 stores of <2 x double> here + // but I'm leaving that as a TODO for now. + if (!ValVT.isSimple()) + return SDValue(); + switch (ValVT.getSimpleVT().SimpleTy) { + default: return SDValue(); + case MVT::v2i8: + case MVT::v2i16: + case MVT::v2i32: + case MVT::v2i64: + case MVT::v2f32: + case MVT::v2f64: + case MVT::v4i8: + case MVT::v4i16: + case MVT::v4i32: + case MVT::v4f32: + // This is a "native" vector type + break; + } + + unsigned Opcode = 0; + EVT EltVT = ValVT.getVectorElementType(); + unsigned NumElts = ValVT.getVectorNumElements(); + + // Since StoreV2 is a target node, we cannot rely on DAG type legalization. + // Therefore, we must ensure the type is legal. For i1 and i8, we set the + // stored type to i16 and propogate the "real" type as the memory type. + bool NeedExt = false; + if (EltVT.getSizeInBits() < 16) + NeedExt = true; + + switch (NumElts) { + default: return SDValue(); + case 2: + Opcode = NVPTXISD::StoreV2; + break; + case 4: { + Opcode = NVPTXISD::StoreV4; + break; + } + } + + SmallVector Ops; + + // First is the chain + Ops.push_back(N->getOperand(0)); + + // Then the split values + for (unsigned i = 0; i < NumElts; ++i) { + SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val, + DAG.getIntPtrConstant(i)); + if (NeedExt) + // ANY_EXTEND is correct here since the store will only look at the + // lower-order bits anyway. + ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal); + Ops.push_back(ExtVal); + } + + // Then any remaining arguments + for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) { + Ops.push_back(N->getOperand(i)); + } + + MemSDNode *MemSD = cast(N); + + SDValue NewSt = DAG.getMemIntrinsicNode(Opcode, DL, + DAG.getVTList(MVT::Other), &Ops[0], + Ops.size(), MemSD->getMemoryVT(), + MemSD->getMemOperand()); + + + //return DCI.CombineTo(N, NewSt, true); + return NewSt; + } + + return SDValue(); +} + // st i1 v, addr // => // v1 = zxt v to i8 // st i8, addr SDValue NVPTXTargetLowering:: -LowerSTORE(SDValue Op, SelectionDAG &DAG) const { +LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); DebugLoc dl = Node->getDebugLoc(); StoreSDNode *ST = cast(Node); @@ -1022,14 +1089,16 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain, // to newly created nodes. The SDNOdes for params have to // appear in the same order as their order of appearance // in the original function. "idx+1" holds that order. - if (PAL.getParamAttributes(i+1).hasAttribute(Attribute::ByVal) == false) { + if (PAL.hasAttribute(i+1, Attribute::ByVal) == false) { // A plain scalar. if (isABI || isKernel) { // If ABI, load from the param symbol SDValue Arg = getParamSymbol(DAG, idx); - Value *srcValue = new Argument(PointerType::get(ObjectVT.getTypeForEVT( - F->getContext()), - llvm::ADDRESS_SPACE_PARAM)); + // Conjure up a value that we can get the address space from. + // FIXME: Using a constant here is a hack. + Value *srcValue = Constant::getNullValue(PointerType::get( + ObjectVT.getTypeForEVT(F->getContext()), + llvm::ADDRESS_SPACE_PARAM)); SDValue p = DAG.getLoad(ObjectVT, dl, Root, Arg, MachinePointerInfo(srcValue), false, false, false, @@ -1346,3 +1415,242 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const { return 4; } + +/// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads. +static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, + SmallVectorImpl& Results) { + EVT ResVT = N->getValueType(0); + DebugLoc DL = N->getDebugLoc(); + + assert(ResVT.isVector() && "Vector load must have vector type"); + + // We only handle "native" vector sizes for now, e.g. <4 x double> is not + // legal. We can (and should) split that into 2 loads of <2 x double> here + // but I'm leaving that as a TODO for now. + assert(ResVT.isSimple() && "Can only handle simple types"); + switch (ResVT.getSimpleVT().SimpleTy) { + default: return; + case MVT::v2i8: + case MVT::v2i16: + case MVT::v2i32: + case MVT::v2i64: + case MVT::v2f32: + case MVT::v2f64: + case MVT::v4i8: + case MVT::v4i16: + case MVT::v4i32: + case MVT::v4f32: + // This is a "native" vector type + break; + } + + EVT EltVT = ResVT.getVectorElementType(); + unsigned NumElts = ResVT.getVectorNumElements(); + + // Since LoadV2 is a target node, we cannot rely on DAG type legalization. + // Therefore, we must ensure the type is legal. For i1 and i8, we set the + // loaded type to i16 and propogate the "real" type as the memory type. + bool NeedTrunc = false; + if (EltVT.getSizeInBits() < 16) { + EltVT = MVT::i16; + NeedTrunc = true; + } + + unsigned Opcode = 0; + SDVTList LdResVTs; + + switch (NumElts) { + default: return; + case 2: + Opcode = NVPTXISD::LoadV2; + LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); + break; + case 4: { + Opcode = NVPTXISD::LoadV4; + EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; + LdResVTs = DAG.getVTList(ListVTs, 5); + break; + } + } + + SmallVector OtherOps; + + // Copy regular operands + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + OtherOps.push_back(N->getOperand(i)); + + LoadSDNode *LD = cast(N); + + // The select routine does not have access to the LoadSDNode instance, so + // pass along the extension information + OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType())); + + SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0], + OtherOps.size(), LD->getMemoryVT(), + LD->getMemOperand()); + + SmallVector ScalarRes; + + for (unsigned i = 0; i < NumElts; ++i) { + SDValue Res = NewLD.getValue(i); + if (NeedTrunc) + Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); + ScalarRes.push_back(Res); + } + + SDValue LoadChain = NewLD.getValue(NumElts); + + SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts); + + Results.push_back(BuildVec); + Results.push_back(LoadChain); +} + +static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, + SelectionDAG &DAG, + SmallVectorImpl &Results) { + SDValue Chain = N->getOperand(0); + SDValue Intrin = N->getOperand(1); + DebugLoc DL = N->getDebugLoc(); + + // Get the intrinsic ID + unsigned IntrinNo = cast(Intrin.getNode())->getZExtValue(); + switch(IntrinNo) { + default: return; + case Intrinsic::nvvm_ldg_global_i: + case Intrinsic::nvvm_ldg_global_f: + case Intrinsic::nvvm_ldg_global_p: + case Intrinsic::nvvm_ldu_global_i: + case Intrinsic::nvvm_ldu_global_f: + case Intrinsic::nvvm_ldu_global_p: { + EVT ResVT = N->getValueType(0); + + if (ResVT.isVector()) { + // Vector LDG/LDU + + unsigned NumElts = ResVT.getVectorNumElements(); + EVT EltVT = ResVT.getVectorElementType(); + + // Since LDU/LDG are target nodes, we cannot rely on DAG type legalization. + // Therefore, we must ensure the type is legal. For i1 and i8, we set the + // loaded type to i16 and propogate the "real" type as the memory type. + bool NeedTrunc = false; + if (EltVT.getSizeInBits() < 16) { + EltVT = MVT::i16; + NeedTrunc = true; + } + + unsigned Opcode = 0; + SDVTList LdResVTs; + + switch (NumElts) { + default: return; + case 2: + switch(IntrinNo) { + default: return; + case Intrinsic::nvvm_ldg_global_i: + case Intrinsic::nvvm_ldg_global_f: + case Intrinsic::nvvm_ldg_global_p: + Opcode = NVPTXISD::LDGV2; + break; + case Intrinsic::nvvm_ldu_global_i: + case Intrinsic::nvvm_ldu_global_f: + case Intrinsic::nvvm_ldu_global_p: + Opcode = NVPTXISD::LDUV2; + break; + } + LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); + break; + case 4: { + switch(IntrinNo) { + default: return; + case Intrinsic::nvvm_ldg_global_i: + case Intrinsic::nvvm_ldg_global_f: + case Intrinsic::nvvm_ldg_global_p: + Opcode = NVPTXISD::LDGV4; + break; + case Intrinsic::nvvm_ldu_global_i: + case Intrinsic::nvvm_ldu_global_f: + case Intrinsic::nvvm_ldu_global_p: + Opcode = NVPTXISD::LDUV4; + break; + } + EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; + LdResVTs = DAG.getVTList(ListVTs, 5); + break; + } + } + + SmallVector OtherOps; + + // Copy regular operands + + OtherOps.push_back(Chain); // Chain + // Skip operand 1 (intrinsic ID) + // Others + for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) + OtherOps.push_back(N->getOperand(i)); + + MemIntrinsicSDNode *MemSD = cast(N); + + SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0], + OtherOps.size(), MemSD->getMemoryVT(), + MemSD->getMemOperand()); + + SmallVector ScalarRes; + + for (unsigned i = 0; i < NumElts; ++i) { + SDValue Res = NewLD.getValue(i); + if (NeedTrunc) + Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); + ScalarRes.push_back(Res); + } + + SDValue LoadChain = NewLD.getValue(NumElts); + + SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts); + + Results.push_back(BuildVec); + Results.push_back(LoadChain); + } else { + // i8 LDG/LDU + assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 && + "Custom handling of non-i8 ldu/ldg?"); + + // Just copy all operands as-is + SmallVector Ops; + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + Ops.push_back(N->getOperand(i)); + + // Force output to i16 + SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other); + + MemIntrinsicSDNode *MemSD = cast(N); + + // We make sure the memory type is i8, which will be used during isel + // to select the proper instruction. + SDValue NewLD = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, + LdResVTs, &Ops[0], + Ops.size(), MVT::i8, + MemSD->getMemOperand()); + + Results.push_back(NewLD.getValue(0)); + Results.push_back(NewLD.getValue(1)); + } + } + } +} + +void NVPTXTargetLowering::ReplaceNodeResults(SDNode *N, + SmallVectorImpl &Results, + SelectionDAG &DAG) const { + switch (N->getOpcode()) { + default: report_fatal_error("Unhandled custom legalization"); + case ISD::LOAD: + ReplaceLoadVector(N, DAG, Results); + return; + case ISD::INTRINSIC_W_CHAIN: + ReplaceINTRINSIC_W_CHAIN(N, DAG, Results); + return; + } +}