X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FR600%2FAMDGPUISelLowering.cpp;h=83083786fe8699aaac5fce6834ff8632b3227a48;hb=24e874a1dd608b6ab53bfcd78088efa9d6e009fb;hp=b30c9441524dacc4b65494d25c5d65850453b371;hpb=5bc44c76030a56140e877ff25356b71f143bcaad;p=oota-llvm.git diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index b30c9441524..83083786fe8 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -103,7 +103,7 @@ EVT AMDGPUTargetLowering::getEquivalentLoadRegType(LLVMContext &Ctx, EVT VT) { } AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : - TargetLowering(TM, new TargetLoweringObjectFileELF()) { + TargetLowering(TM) { Subtarget = &TM.getSubtarget(); @@ -130,6 +130,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::FROUND, MVT::f32, Legal); setOperationAction(ISD::FTRUNC, MVT::f32, Legal); + setOperationAction(ISD::FREM, MVT::f32, Custom); + setOperationAction(ISD::FREM, MVT::f64, Custom); + // Lower floating point store/load to integer store/load to reduce the number // of patterns in tablegen. setOperationAction(ISD::STORE, MVT::f32, Promote); @@ -282,6 +285,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::UDIV, MVT::i32, Expand); setOperationAction(ISD::UREM, MVT::i32, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); if (!Subtarget->hasFFBH()) @@ -341,12 +347,15 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : for (MVT VT : FloatVectorTypes) { setOperationAction(ISD::FABS, VT, Expand); + setOperationAction(ISD::FMINNUM, VT, Expand); + setOperationAction(ISD::FMAXNUM, VT, Expand); setOperationAction(ISD::FADD, VT, Expand); setOperationAction(ISD::FCEIL, VT, Expand); setOperationAction(ISD::FCOS, VT, Expand); setOperationAction(ISD::FDIV, VT, Expand); setOperationAction(ISD::FEXP2, VT, Expand); setOperationAction(ISD::FLOG2, VT, Expand); + setOperationAction(ISD::FREM, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); setOperationAction(ISD::FFLOOR, VT, Expand); setOperationAction(ISD::FTRUNC, VT, Expand); @@ -369,6 +378,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom); setTargetDAGCombine(ISD::MUL); + setTargetDAGCombine(ISD::SELECT); setTargetDAGCombine(ISD::SELECT_CC); setTargetDAGCombine(ISD::STORE); @@ -386,10 +396,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : // There are no integer divide instructions, and these expand to a pretty // large sequence of instructions. setIntDivIsCheap(false); - setPow2DivIsCheap(false); - - // TODO: Investigate this when 64-bit divides are implemented. - addBypassSlowDiv(64, 32); + setPow2SDivIsCheap(false); // FIXME: Need to really handle these. MaxStoresPerMemcpy = 4096; @@ -441,12 +448,12 @@ bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy, bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const { assert(VT.isFloatingPoint()); - return VT == MVT::f32; + return VT == MVT::f32 || VT == MVT::f64; } bool AMDGPUTargetLowering::isFNegFree(EVT VT) const { assert(VT.isFloatingPoint()); - return VT == MVT::f32; + return VT == MVT::f32 || VT == MVT::f64; } bool AMDGPUTargetLowering::isTruncateFree(EVT Source, EVT Dest) const { @@ -548,12 +555,16 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::UDIVREM: return LowerUDIVREM(Op, DAG); case ISD::SDIVREM: return LowerSDIVREM(Op, DAG); + case ISD::FREM: return LowerFREM(Op, DAG); case ISD::FCEIL: return LowerFCEIL(Op, DAG); case ISD::FTRUNC: return LowerFTRUNC(Op, DAG); case ISD::FRINT: return LowerFRINT(Op, DAG); case ISD::FNEARBYINT: return LowerFNEARBYINT(Op, DAG); case ISD::FFLOOR: return LowerFFLOOR(Op, DAG); + case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); + case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); + case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG); } return Op; } @@ -681,6 +692,17 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init, llvm_unreachable("Unhandled constant initializer"); } +static bool hasDefinedInitializer(const GlobalValue *GV) { + const GlobalVariable *GVar = dyn_cast(GV); + if (!GVar || !GVar->hasInitializer()) + return false; + + if (isa(GVar->getInitializer())) + return false; + + return true; +} + SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, SDValue Op, SelectionDAG &DAG) const { @@ -690,13 +712,15 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, const GlobalValue *GV = G->getGlobal(); switch (G->getAddressSpace()) { - default: llvm_unreachable("Global Address lowering not implemented for this " - "address space"); case AMDGPUAS::LOCAL_ADDRESS: { // XXX: What does the value of G->getOffset() mean? assert(G->getOffset() == 0 && "Do not know what to do with an non-zero offset"); + // TODO: We could emit code to handle the initialization somewhere. + if (hasDefinedInitializer(GV)) + break; + unsigned Offset; if (MFI->LocalMemoryObjects.count(GV) == 0) { uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType()); @@ -750,6 +774,12 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, return DAG.getZExtOrTrunc(InitPtr, SDLoc(Op), ConstPtrVT); } } + + const Function &Fn = *DAG.getMachineFunction().getFunction(); + DiagnosticInfoUnsupported BadInit(Fn, + "initializer for address space"); + DAG.getContext()->diagnose(BadInit); + return SDValue(); } SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op, @@ -823,6 +853,12 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, // first parameter must be the same as the first instruction. SDValue Numerator = Op.getOperand(1); SDValue Denominator = Op.getOperand(2); + + // Note this order is opposite of the machine instruction's operations, + // which is s0.f = Quotient, s1.f = Denominator, s2.f = Numerator. The + // intrinsic has the numerator as the first operand to match a normal + // division operation. + SDValue Src0 = Param->isAllOnesValue() ? Numerator : Denominator; return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, Op->getVTList(), Src0, @@ -830,6 +866,8 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, } case Intrinsic::AMDGPU_div_fmas: + // FIXME: Dropping bool parameter. Work is needed to support the implicit + // read from VCC. return DAG.getNode(AMDGPUISD::DIV_FMAS, DL, VT, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); @@ -962,21 +1000,16 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op, } /// \brief Generate Min/Max node -SDValue AMDGPUTargetLowering::CombineMinMax(SDNode *N, - SelectionDAG &DAG) const { - SDLoc DL(N); - EVT VT = N->getValueType(0); - - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - SDValue True = N->getOperand(2); - SDValue False = N->getOperand(3); - SDValue CC = N->getOperand(4); - - if (VT != MVT::f32 || - !((LHS == True && RHS == False) || (LHS == False && RHS == True))) { +SDValue AMDGPUTargetLowering::CombineFMinMax(SDLoc DL, + EVT VT, + SDValue LHS, + SDValue RHS, + SDValue True, + SDValue False, + SDValue CC, + SelectionDAG &DAG) const { + if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) return SDValue(); - } ISD::CondCode CCOpcode = cast(CC)->get(); switch (CCOpcode) { @@ -992,14 +1025,15 @@ SDValue AMDGPUTargetLowering::CombineMinMax(SDNode *N, case ISD::SETTRUE2: case ISD::SETUO: case ISD::SETO: - llvm_unreachable("Operation should already be optimised!"); + break; case ISD::SETULE: case ISD::SETULT: case ISD::SETOLE: case ISD::SETOLT: case ISD::SETLE: case ISD::SETLT: { - unsigned Opc = (LHS == True) ? AMDGPUISD::FMIN : AMDGPUISD::FMAX; + unsigned Opc + = (LHS == True) ? AMDGPUISD::FMIN_LEGACY : AMDGPUISD::FMAX_LEGACY; return DAG.getNode(Opc, DL, VT, LHS, RHS); } case ISD::SETGT: @@ -1008,7 +1042,8 @@ SDValue AMDGPUTargetLowering::CombineMinMax(SDNode *N, case ISD::SETOGE: case ISD::SETUGT: case ISD::SETOGT: { - unsigned Opc = (LHS == True) ? AMDGPUISD::FMAX : AMDGPUISD::FMIN; + unsigned Opc + = (LHS == True) ? AMDGPUISD::FMAX_LEGACY : AMDGPUISD::FMIN_LEGACY; return DAG.getNode(Opc, DL, VT, LHS, RHS); } case ISD::SETCC_INVALID: @@ -1017,6 +1052,45 @@ SDValue AMDGPUTargetLowering::CombineMinMax(SDNode *N, return SDValue(); } +/// \brief Generate Min/Max node +SDValue AMDGPUTargetLowering::CombineIMinMax(SDLoc DL, + EVT VT, + SDValue LHS, + SDValue RHS, + SDValue True, + SDValue False, + SDValue CC, + SelectionDAG &DAG) const { + if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) + return SDValue(); + + ISD::CondCode CCOpcode = cast(CC)->get(); + switch (CCOpcode) { + case ISD::SETULE: + case ISD::SETULT: { + unsigned Opc = (LHS == True) ? AMDGPUISD::UMIN : AMDGPUISD::UMAX; + return DAG.getNode(Opc, DL, VT, LHS, RHS); + } + case ISD::SETLE: + case ISD::SETLT: { + unsigned Opc = (LHS == True) ? AMDGPUISD::SMIN : AMDGPUISD::SMAX; + return DAG.getNode(Opc, DL, VT, LHS, RHS); + } + case ISD::SETGT: + case ISD::SETGE: { + unsigned Opc = (LHS == True) ? AMDGPUISD::SMAX : AMDGPUISD::SMIN; + return DAG.getNode(Opc, DL, VT, LHS, RHS); + } + case ISD::SETUGE: + case ISD::SETUGT: { + unsigned Opc = (LHS == True) ? AMDGPUISD::UMAX : AMDGPUISD::UMIN; + return DAG.getNode(Opc, DL, VT, LHS, RHS); + } + default: + return SDValue(); + } +} + SDValue AMDGPUTargetLowering::ScalarizeVectorLoad(const SDValue Op, SelectionDAG &DAG) const { LoadSDNode *Load = cast(Op); @@ -1510,8 +1584,8 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, // e is rounding error. SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den); - // RCP_LO = umulo(RCP, Den) */ - SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den); + // RCP_LO = mul(RCP, Den) */ + SDValue RCP_LO = DAG.getNode(ISD::MUL, DL, VT, RCP, Den); // RCP_HI = mulhu (RCP, Den) */ SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den); @@ -1542,7 +1616,7 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num); // Num_S_Remainder = Quotient * Den - SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den); + SDValue Num_S_Remainder = DAG.getNode(ISD::MUL, DL, VT, Quotient, Den); // Remainder = Num - Num_S_Remainder SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder); @@ -1650,6 +1724,20 @@ SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op, return DAG.getMergeValues(Res, DL); } +// (frem x, y) -> (fsub x, (fmul (ftrunc (fdiv x, y)), y)) +SDValue AMDGPUTargetLowering::LowerFREM(SDValue Op, SelectionDAG &DAG) const { + SDLoc SL(Op); + EVT VT = Op.getValueType(); + SDValue X = Op.getOperand(0); + SDValue Y = Op.getOperand(1); + + SDValue Div = DAG.getNode(ISD::FDIV, SL, VT, X, Y); + SDValue Floor = DAG.getNode(ISD::FTRUNC, SL, VT, Div); + SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Floor, Y); + + return DAG.getNode(ISD::FSUB, SL, VT, X, Mul); +} + SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const { SDLoc SL(Op); SDValue Src = Op.getOperand(0); @@ -1692,7 +1780,7 @@ SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const { const unsigned ExpBits = 11; // Extract the exponent. - SDValue ExpPart = DAG.getNode(AMDGPUISD::BFE_I32, SL, MVT::i32, + SDValue ExpPart = DAG.getNode(AMDGPUISD::BFE_U32, SL, MVT::i32, Hi, DAG.getConstant(FractBits - 32, MVT::i32), DAG.getConstant(ExpBits, MVT::i32)); @@ -1783,13 +1871,43 @@ SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add); } +SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG, + bool Signed) const { + SDLoc SL(Op); + SDValue Src = Op.getOperand(0); + + SDValue BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src); + + SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BC, + DAG.getConstant(0, MVT::i32)); + SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BC, + DAG.getConstant(1, MVT::i32)); + + SDValue CvtHi = DAG.getNode(Signed ? ISD::SINT_TO_FP : ISD::UINT_TO_FP, + SL, MVT::f64, Hi); + + SDValue CvtLo = DAG.getNode(ISD::UINT_TO_FP, SL, MVT::f64, Lo); + + SDValue LdExp = DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f64, CvtHi, + DAG.getConstant(32, MVT::i32)); + + return DAG.getNode(ISD::FADD, SL, MVT::f64, LdExp, CvtLo); +} + SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { SDValue S0 = Op.getOperand(0); - SDLoc DL(Op); - if (Op.getValueType() != MVT::f32 || S0.getValueType() != MVT::i64) + if (S0.getValueType() != MVT::i64) return SDValue(); + EVT DestVT = Op.getValueType(); + if (DestVT == MVT::f64) + return LowerINT_TO_FP64(Op, DAG, false); + + assert(DestVT == MVT::f32); + + SDLoc DL(Op); + // f32 uint_to_fp i64 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0, DAG.getConstant(0, MVT::i32)); @@ -1802,16 +1920,62 @@ SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op, return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi); } -SDValue AMDGPUTargetLowering::ExpandSIGN_EXTEND_INREG(SDValue Op, - unsigned BitsDiff, - SelectionDAG &DAG) const { - MVT VT = Op.getSimpleValueType(); - SDLoc DL(Op); - SDValue Shift = DAG.getConstant(BitsDiff, VT); - // Shift left by 'Shift' bits. - SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, Op.getOperand(0), Shift); - // Signed shift Right by 'Shift' bits. - return DAG.getNode(ISD::SRA, DL, VT, Shl, Shift); +SDValue AMDGPUTargetLowering::LowerSINT_TO_FP(SDValue Op, + SelectionDAG &DAG) const { + SDValue Src = Op.getOperand(0); + if (Src.getValueType() == MVT::i64 && Op.getValueType() == MVT::f64) + return LowerINT_TO_FP64(Op, DAG, true); + + return SDValue(); +} + +SDValue AMDGPUTargetLowering::LowerFP64_TO_INT(SDValue Op, SelectionDAG &DAG, + bool Signed) const { + SDLoc SL(Op); + + SDValue Src = Op.getOperand(0); + + SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src); + + SDValue K0 + = DAG.getConstantFP(BitsToDouble(UINT64_C(0x3df0000000000000)), MVT::f64); + SDValue K1 + = DAG.getConstantFP(BitsToDouble(UINT64_C(0xc1f0000000000000)), MVT::f64); + + SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f64, Trunc, K0); + + SDValue FloorMul = DAG.getNode(ISD::FFLOOR, SL, MVT::f64, Mul); + + + SDValue Fma = DAG.getNode(ISD::FMA, SL, MVT::f64, FloorMul, K1, Trunc); + + SDValue Hi = DAG.getNode(Signed ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, SL, + MVT::i32, FloorMul); + SDValue Lo = DAG.getNode(ISD::FP_TO_UINT, SL, MVT::i32, Fma); + + SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, Lo, Hi); + + return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Result); +} + +SDValue AMDGPUTargetLowering::LowerFP_TO_SINT(SDValue Op, + SelectionDAG &DAG) const { + SDValue Src = Op.getOperand(0); + + if (Op.getValueType() == MVT::i64 && Src.getValueType() == MVT::f64) + return LowerFP64_TO_INT(Op, DAG, true); + + return SDValue(); +} + +SDValue AMDGPUTargetLowering::LowerFP_TO_UINT(SDValue Op, + SelectionDAG &DAG) const { + SDValue Src = Op.getOperand(0); + + if (Op.getValueType() == MVT::i64 && Src.getValueType() == MVT::f64) + return LowerFP64_TO_INT(Op, DAG, false); + + return SDValue(); } SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, @@ -1877,7 +2041,8 @@ template static SDValue constantFoldBFE(SelectionDAG &DAG, IntTy Src0, uint32_t Offset, uint32_t Width) { if (Width + Offset < 32) { - IntTy Result = (Src0 << (32 - Offset - Width)) >> (32 - Width); + uint32_t Shl = static_cast(Src0) << (32 - Offset - Width); + IntTy Result = static_cast(Shl) >> (32 - Width); return DAG.getConstant(Result, MVT::i32); } @@ -1982,9 +2147,51 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, simplifyI24(N1, DCI); return SDValue(); } - case ISD::SELECT_CC: { - return CombineMinMax(N, DAG); + case ISD::SELECT_CC: { + SDLoc DL(N); + EVT VT = N->getValueType(0); + + if (VT == MVT::f32 || + (VT == MVT::f64 && + Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue True = N->getOperand(2); + SDValue False = N->getOperand(3); + SDValue CC = N->getOperand(4); + + return CombineFMinMax(DL, VT, LHS, RHS, True, False, CC, DAG); } + + break; + } + case ISD::SELECT: { + SDValue Cond = N->getOperand(0); + if (Cond.getOpcode() == ISD::SETCC) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + SDValue LHS = Cond.getOperand(0); + SDValue RHS = Cond.getOperand(1); + SDValue CC = Cond.getOperand(2); + + SDValue True = N->getOperand(1); + SDValue False = N->getOperand(2); + + if (VT == MVT::f32 || + (VT == MVT::f64 && + Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)) { + return CombineFMinMax(DL, VT, LHS, RHS, True, False, CC, DAG); + } + + // TODO: Implement min / max Evergreen instructions. + if (VT == MVT::i32 && + Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { + return CombineIMinMax(DL, VT, LHS, RHS, True, False, CC, DAG); + } + } + + break; + } case AMDGPUISD::BFE_I32: case AMDGPUISD::BFE_U32: { assert(!N->getValueType(0).isVector() && @@ -2029,37 +2236,40 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, return DAG.getZeroExtendInReg(BitsFrom, DL, SmallVT); } - if (ConstantSDNode *Val = dyn_cast(N->getOperand(0))) { + if (ConstantSDNode *CVal = dyn_cast(BitsFrom)) { if (Signed) { return constantFoldBFE(DAG, - Val->getSExtValue(), + CVal->getSExtValue(), OffsetVal, WidthVal); } return constantFoldBFE(DAG, - Val->getZExtValue(), + CVal->getZExtValue(), OffsetVal, WidthVal); } - APInt Demanded = APInt::getBitsSet(32, - OffsetVal, - OffsetVal + WidthVal); - if ((OffsetVal + WidthVal) >= 32) { SDValue ShiftVal = DAG.getConstant(OffsetVal, MVT::i32); return DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, MVT::i32, BitsFrom, ShiftVal); } - APInt KnownZero, KnownOne; - TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), - !DCI.isBeforeLegalizeOps()); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLO.ShrinkDemandedConstant(BitsFrom, Demanded) || - TLI.SimplifyDemandedBits(BitsFrom, Demanded, KnownZero, KnownOne, TLO)) { - DCI.CommitTargetLoweringOpt(TLO); + if (BitsFrom.hasOneUse()) { + APInt Demanded = APInt::getBitsSet(32, + OffsetVal, + OffsetVal + WidthVal); + + APInt KnownZero, KnownOne; + TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), + !DCI.isBeforeLegalizeOps()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLO.ShrinkDemandedConstant(BitsFrom, Demanded) || + TLI.SimplifyDemandedBits(BitsFrom, Demanded, + KnownZero, KnownOne, TLO)) { + DCI.CommitTargetLoweringOpt(TLO); + } } break; @@ -2157,12 +2367,19 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(DWORDADDR) NODE_NAME_CASE(FRACT) NODE_NAME_CASE(CLAMP) - NODE_NAME_CASE(FMAX) + NODE_NAME_CASE(MAD) + NODE_NAME_CASE(FMAX_LEGACY) NODE_NAME_CASE(SMAX) NODE_NAME_CASE(UMAX) - NODE_NAME_CASE(FMIN) + NODE_NAME_CASE(FMIN_LEGACY) NODE_NAME_CASE(SMIN) NODE_NAME_CASE(UMIN) + NODE_NAME_CASE(FMAX3) + NODE_NAME_CASE(SMAX3) + NODE_NAME_CASE(UMAX3) + NODE_NAME_CASE(FMIN3) + NODE_NAME_CASE(SMIN3) + NODE_NAME_CASE(UMIN3) NODE_NAME_CASE(URECIP) NODE_NAME_CASE(DIV_SCALE) NODE_NAME_CASE(DIV_FMAS) @@ -2267,17 +2484,8 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode( unsigned BitWidth = 32; uint32_t Width = CWidth->getZExtValue() & 0x1f; - if (Width == 0) { - KnownZero = APInt::getAllOnesValue(BitWidth); - KnownOne = APInt::getNullValue(BitWidth); - return; - } - // FIXME: This could do a lot more. If offset is 0, should be the same as - // sign_extend_inreg implementation, but that involves duplicating it. - if (Opc == AMDGPUISD::BFE_I32) - KnownOne = APInt::getHighBitsSet(BitWidth, BitWidth - Width); - else + if (Opc == AMDGPUISD::BFE_U32) KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - Width); break;