X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FR600%2FAMDGPUISelLowering.cpp;h=36de48292190519808fdb4e5f862e68e216df5d1;hb=69239a98b69eefc70e68d82282b62583f6e92e10;hp=9891ad32fa7c29c38a849d8ec8e12f3e60297db7;hpb=68e132866236f5d59271d2c7ffb77a9c8e743752;p=oota-llvm.git diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 9891ad32fa7..36de4829219 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -18,6 +18,7 @@ #include "AMDGPURegisterInfo.h" #include "AMDGPUSubtarget.h" #include "AMDILIntrinsicInfo.h" +#include "R600MachineFunctionInfo.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFunction.h" @@ -27,6 +28,14 @@ #include "llvm/IR/DataLayout.h" using namespace llvm; +static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + unsigned Offset = State.AllocateStack(ValVT.getSizeInBits() / 8, ArgFlags.getOrigAlign()); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + + return true; +} #include "AMDGPUGenCallingConv.inc" @@ -57,48 +66,148 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::STORE, MVT::f32, Promote); AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32); + setOperationAction(ISD::STORE, MVT::v2f32, Promote); + AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32); + setOperationAction(ISD::STORE, MVT::v4f32, Promote); AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32); + setOperationAction(ISD::STORE, MVT::v8f32, Promote); + AddPromotedToType(ISD::STORE, MVT::v8f32, MVT::v8i32); + + setOperationAction(ISD::STORE, MVT::v16f32, Promote); + AddPromotedToType(ISD::STORE, MVT::v16f32, MVT::v16i32); + setOperationAction(ISD::STORE, MVT::f64, Promote); AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64); + // Custom lowering of vector stores is required for local address space + // stores. + setOperationAction(ISD::STORE, MVT::v4i32, Custom); + // XXX: Native v2i32 local address space stores are possible, but not + // currently implemented. + setOperationAction(ISD::STORE, MVT::v2i32, Custom); + + setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom); + setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom); + setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom); + // XXX: This can be change to Custom, once ExpandVectorStores can + // handle 64-bit stores. + setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand); + setOperationAction(ISD::LOAD, MVT::f32, Promote); AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32); + setOperationAction(ISD::LOAD, MVT::v2f32, Promote); + AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32); + setOperationAction(ISD::LOAD, MVT::v4f32, Promote); AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32); + setOperationAction(ISD::LOAD, MVT::v8f32, Promote); + AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32); + + setOperationAction(ISD::LOAD, MVT::v16f32, Promote); + AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32); + setOperationAction(ISD::LOAD, MVT::f64, Promote); AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom); + + setLoadExtAction(ISD::EXTLOAD, MVT::v2i8, Expand); + setLoadExtAction(ISD::SEXTLOAD, MVT::v2i8, Expand); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i8, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Expand); + setLoadExtAction(ISD::SEXTLOAD, MVT::v4i8, Expand); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, Expand); + setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, Expand); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, Expand); + setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, Expand); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, Expand); + + setOperationAction(ISD::FNEG, MVT::v2f32, Expand); + setOperationAction(ISD::FNEG, MVT::v4f32, Expand); + + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::MUL, MVT::i64, Expand); setOperationAction(ISD::UDIV, MVT::i32, Expand); setOperationAction(ISD::UDIVREM, MVT::i32, Custom); setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::VSELECT, MVT::v2f32, Expand); + setOperationAction(ISD::VSELECT, MVT::v4f32, Expand); - int types[] = { - (int)MVT::v2i32, - (int)MVT::v4i32 + static const MVT::SimpleValueType IntTypes[] = { + MVT::v2i32, MVT::v4i32 }; - size_t NumTypes = sizeof(types) / sizeof(*types); + const size_t NumIntTypes = array_lengthof(IntTypes); - for (unsigned int x = 0; x < NumTypes; ++x) { - MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x]; + for (unsigned int x = 0; x < NumIntTypes; ++x) { + MVT::SimpleValueType VT = IntTypes[x]; //Expand the following operations for the current type by default setOperationAction(ISD::ADD, VT, Expand); setOperationAction(ISD::AND, VT, Expand); + setOperationAction(ISD::FP_TO_SINT, VT, Expand); + setOperationAction(ISD::FP_TO_UINT, VT, Expand); setOperationAction(ISD::MUL, VT, Expand); setOperationAction(ISD::OR, VT, Expand); setOperationAction(ISD::SHL, VT, Expand); + setOperationAction(ISD::SINT_TO_FP, VT, Expand); setOperationAction(ISD::SRL, VT, Expand); setOperationAction(ISD::SRA, VT, Expand); setOperationAction(ISD::SUB, VT, Expand); setOperationAction(ISD::UDIV, VT, Expand); + setOperationAction(ISD::UINT_TO_FP, VT, Expand); setOperationAction(ISD::UREM, VT, Expand); + setOperationAction(ISD::VSELECT, VT, Expand); setOperationAction(ISD::XOR, VT, Expand); } + + static const MVT::SimpleValueType FloatTypes[] = { + MVT::v2f32, MVT::v4f32 + }; + const size_t NumFloatTypes = array_lengthof(FloatTypes); + + for (unsigned int x = 0; x < NumFloatTypes; ++x) { + MVT::SimpleValueType VT = FloatTypes[x]; + setOperationAction(ISD::FADD, VT, Expand); + setOperationAction(ISD::FDIV, VT, Expand); + setOperationAction(ISD::FFLOOR, VT, Expand); + setOperationAction(ISD::FMUL, VT, Expand); + setOperationAction(ISD::FRINT, VT, Expand); + setOperationAction(ISD::FSQRT, VT, Expand); + setOperationAction(ISD::FSUB, VT, Expand); + } +} + +//===----------------------------------------------------------------------===// +// Target Information +//===----------------------------------------------------------------------===// + +MVT AMDGPUTargetLowering::getVectorIdxTy() const { + return MVT::i32; +} + + +//===---------------------------------------------------------------------===// +// Target Properties +//===---------------------------------------------------------------------===// + +bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const { + assert(VT.isFloatingPoint()); + return VT == MVT::f32; +} + +bool AMDGPUTargetLowering::isFNegFree(EVT VT) const { + assert(VT.isFloatingPoint()); + return VT == MVT::f32; } //===---------------------------------------------------------------------===// @@ -139,8 +248,12 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); // AMDGPU DAG lowering + case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); + case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::UDIVREM: return LowerUDIVREM(Op, DAG); + case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); } return Op; } @@ -151,20 +264,69 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, const DataLayout *TD = getTargetMachine().getDataLayout(); GlobalAddressSDNode *G = cast(Op); + + assert(G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS); // XXX: What does the value of G->getOffset() mean? assert(G->getOffset() == 0 && "Do not know what to do with an non-zero offset"); - unsigned Offset = MFI->LDSSize; const GlobalValue *GV = G->getGlobal(); - uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType()); - // XXX: Account for alignment? - MFI->LDSSize += Size; + unsigned Offset; + if (MFI->LocalMemoryObjects.count(GV) == 0) { + uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType()); + Offset = MFI->LDSSize; + MFI->LocalMemoryObjects[GV] = Offset; + // XXX: Account for alignment? + MFI->LDSSize += Size; + } else { + Offset = MFI->LocalMemoryObjects[GV]; + } + + return DAG.getConstant(Offset, getPointerTy(G->getAddressSpace())); +} + +void AMDGPUTargetLowering::ExtractVectorElements(SDValue Op, SelectionDAG &DAG, + SmallVectorImpl &Args, + unsigned Start, + unsigned Count) const { + EVT VT = Op.getValueType(); + for (unsigned i = Start, e = Start + Count; i != e; ++i) { + Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), + VT.getVectorElementType(), + Op, DAG.getConstant(i, MVT::i32))); + } +} + +SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op, + SelectionDAG &DAG) const { + SmallVector Args; + SDValue A = Op.getOperand(0); + SDValue B = Op.getOperand(1); + + ExtractVectorElements(A, DAG, Args, 0, + A.getValueType().getVectorNumElements()); + ExtractVectorElements(B, DAG, Args, 0, + B.getValueType().getVectorNumElements()); - return DAG.getConstant(Offset, TD->getPointerSize() == 8 ? MVT::i64 : MVT::i32); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), + &Args[0], Args.size()); } +SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, + SelectionDAG &DAG) const { + + SmallVector Args; + EVT VT = Op.getValueType(); + unsigned Start = cast(Op.getOperand(1))->getZExtValue(); + ExtractVectorElements(Op.getOperand(0), DAG, Args, Start, + VT.getVectorNumElements()); + + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), + &Args[0], Args.size()); +} + + SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned IntrinsicID = cast(Op.getOperand(0))->getZExtValue(); @@ -292,7 +454,121 @@ SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op, return Op; } +SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op, + SelectionDAG &DAG) const { + LoadSDNode *Load = dyn_cast(Op); + EVT MemEltVT = Load->getMemoryVT().getVectorElementType(); + EVT EltVT = Op.getValueType().getVectorElementType(); + EVT PtrVT = Load->getBasePtr().getValueType(); + unsigned NumElts = Load->getMemoryVT().getVectorNumElements(); + SmallVector Loads; + SDLoc SL(Op); + + for (unsigned i = 0, e = NumElts; i != e; ++i) { + SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Load->getBasePtr(), + DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), PtrVT)); + Loads.push_back(DAG.getExtLoad(Load->getExtensionType(), SL, EltVT, + Load->getChain(), Ptr, + MachinePointerInfo(Load->getMemOperand()->getValue()), + MemEltVT, Load->isVolatile(), Load->isNonTemporal(), + Load->getAlignment())); + } + return DAG.getNode(ISD::BUILD_VECTOR, SL, Op.getValueType(), &Loads[0], + Loads.size()); +} +SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op, + SelectionDAG &DAG) const { + StoreSDNode *Store = dyn_cast(Op); + EVT MemVT = Store->getMemoryVT(); + unsigned MemBits = MemVT.getSizeInBits(); + + // Byte stores are really expensive, so if possible, try to pack + // 32-bit vector truncatating store into an i32 store. + // XXX: We could also handle optimize other vector bitwidths + if (!MemVT.isVector() || MemBits > 32) { + return SDValue(); + } + + SDLoc DL(Op); + const SDValue &Value = Store->getValue(); + EVT VT = Value.getValueType(); + const SDValue &Ptr = Store->getBasePtr(); + EVT MemEltVT = MemVT.getVectorElementType(); + unsigned MemEltBits = MemEltVT.getSizeInBits(); + unsigned MemNumElements = MemVT.getVectorNumElements(); + EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()); + SDValue Mask; + switch(MemEltBits) { + case 8: + Mask = DAG.getConstant(0xFF, PackedVT); + break; + case 16: + Mask = DAG.getConstant(0xFFFF, PackedVT); + break; + default: + llvm_unreachable("Cannot lower this vector store"); + } + SDValue PackedValue; + for (unsigned i = 0; i < MemNumElements; ++i) { + EVT ElemVT = VT.getVectorElementType(); + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value, + DAG.getConstant(i, MVT::i32)); + Elt = DAG.getZExtOrTrunc(Elt, DL, PackedVT); + Elt = DAG.getNode(ISD::AND, DL, PackedVT, Elt, Mask); + SDValue Shift = DAG.getConstant(MemEltBits * i, PackedVT); + Elt = DAG.getNode(ISD::SHL, DL, PackedVT, Elt, Shift); + if (i == 0) { + PackedValue = Elt; + } else { + PackedValue = DAG.getNode(ISD::OR, DL, PackedVT, PackedValue, Elt); + } + } + return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr, + MachinePointerInfo(Store->getMemOperand()->getValue()), + Store->isVolatile(), Store->isNonTemporal(), + Store->getAlignment()); +} + +SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op, + SelectionDAG &DAG) const { + StoreSDNode *Store = cast(Op); + EVT MemEltVT = Store->getMemoryVT().getVectorElementType(); + EVT EltVT = Store->getValue().getValueType().getVectorElementType(); + EVT PtrVT = Store->getBasePtr().getValueType(); + unsigned NumElts = Store->getMemoryVT().getVectorNumElements(); + SDLoc SL(Op); + + SmallVector Chains; + + for (unsigned i = 0, e = NumElts; i != e; ++i) { + SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, + Store->getValue(), DAG.getConstant(i, MVT::i32)); + SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, + Store->getBasePtr(), + DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), + PtrVT)); + Chains.push_back(DAG.getTruncStore(Store->getChain(), SL, Val, Ptr, + MachinePointerInfo(Store->getMemOperand()->getValue()), + MemEltVT, Store->isVolatile(), Store->isNonTemporal(), + Store->getAlignment())); + } + return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, &Chains[0], NumElts); +} + +SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { + SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG); + if (Result.getNode()) { + return Result; + } + + StoreSDNode *Store = cast(Op); + if (Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && + Store->getValue().getValueType().isVector()) { + return SplitVectorStore(Op, DAG); + } + return SDValue(); +} SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const { @@ -349,13 +625,13 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den, DAG.getConstant(-1, VT), DAG.getConstant(0, VT), - ISD::SETGE); - // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0) - SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder, - DAG.getConstant(0, VT), + ISD::SETUGE); + // Remainder_GE_Zero = (Num >= Num_S_Remainder ? -1 : 0) + SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Num, + Num_S_Remainder, DAG.getConstant(-1, VT), DAG.getConstant(0, VT), - ISD::SETGE); + ISD::SETUGE); // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den, Remainder_GE_Zero); @@ -399,10 +675,62 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, return DAG.getMergeValues(Ops, 2, DL); } +SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op, + SelectionDAG &DAG) const { + SDValue S0 = Op.getOperand(0); + SDLoc DL(Op); + if (Op.getValueType() != MVT::f32 || S0.getValueType() != MVT::i64) + return SDValue(); + + // f32 uint_to_fp i64 + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0, + DAG.getConstant(0, MVT::i32)); + SDValue FloatLo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Lo); + SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0, + DAG.getConstant(1, MVT::i32)); + SDValue FloatHi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Hi); + FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi, + DAG.getConstantFP(4294967296.0f, MVT::f32)); // 2^32 + return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi); + +} + //===----------------------------------------------------------------------===// // Helper functions //===----------------------------------------------------------------------===// +void AMDGPUTargetLowering::getOriginalFunctionArgs( + SelectionDAG &DAG, + const Function *F, + const SmallVectorImpl &Ins, + SmallVectorImpl &OrigIns) const { + + for (unsigned i = 0, e = Ins.size(); i < e; ++i) { + if (Ins[i].ArgVT == Ins[i].VT) { + OrigIns.push_back(Ins[i]); + continue; + } + + EVT VT; + if (Ins[i].ArgVT.isVector() && !Ins[i].VT.isVector()) { + // Vector has been split into scalars. + VT = Ins[i].ArgVT.getVectorElementType(); + } else if (Ins[i].VT.isVector() && Ins[i].ArgVT.isVector() && + Ins[i].ArgVT.getVectorElementType() != + Ins[i].VT.getVectorElementType()) { + // Vector elements have been promoted + VT = Ins[i].ArgVT; + } else { + // Vector has been spilt into smaller vectors. + VT = Ins[i].VT; + } + + ISD::InputArg Arg(Ins[i].Flags, VT, VT, Ins[i].Used, + Ins[i].OrigArgIndex, Ins[i].PartOffset); + OrigIns.push_back(Arg); + } +} + bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const { if (ConstantFPSDNode * CFP = dyn_cast(Op)) { return CFP->isExactlyValue(1.0); @@ -464,5 +792,13 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(CONST_ADDRESS) NODE_NAME_CASE(REGISTER_LOAD) NODE_NAME_CASE(REGISTER_STORE) + NODE_NAME_CASE(LOAD_CONSTANT) + NODE_NAME_CASE(LOAD_INPUT) + NODE_NAME_CASE(SAMPLE) + NODE_NAME_CASE(SAMPLEB) + NODE_NAME_CASE(SAMPLED) + NODE_NAME_CASE(SAMPLEL) + NODE_NAME_CASE(STORE_MSKOR) + NODE_NAME_CASE(TBUFFER_STORE_FORMAT) } }