setOperationAction(ISD::SETCC, MVT::v4i1, Expand);
setOperationAction(ISD::BSWAP, MVT::i32, Legal);
+ setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Custom);
setTargetDAGCombine(ISD::SMAX);
setTargetDAGCombine(ISD::UMIN);
setTargetDAGCombine(ISD::UMAX);
- setTargetDAGCombine(ISD::SELECT_CC);
setTargetDAGCombine(ISD::SETCC);
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::OR);
return isFlatGlobalAddrSpace(SrcAS) && isFlatGlobalAddrSpace(DestAS);
}
+
+bool SITargetLowering::isMemOpUniform(const SDNode *N) const {
+ const MemSDNode *MemNode = cast<MemSDNode>(N);
+ const Value *Ptr = MemNode->getMemOperand()->getValue();
+
+ // UndefValue means this is a load of a kernel input. These are uniform.
+ // Sometimes LDS instructions have constant pointers
+ if (isa<UndefValue>(Ptr) || isa<Argument>(Ptr) || isa<Constant>(Ptr) ||
+ isa<GlobalValue>(Ptr))
+ return true;
+
+ const Instruction *I = dyn_cast_or_null<Instruction>(Ptr);
+ return I && I->getMetadata("amdgpu.uniform");
+}
+
TargetLoweringBase::LegalizeTypeAction
SITargetLowering::getPreferredVectorAction(EVT VT) const {
if (VT.getVectorNumElements() != 1 && VT.getScalarType().bitsLE(MVT::i16))
switch (IntrinsicID) {
case Intrinsic::amdgcn_dispatch_ptr:
+ if (!Subtarget->isAmdHsaOS()) {
+ DiagnosticInfoUnsupported BadIntrin(*MF.getFunction(),
+ "hsa intrinsic without hsa target");
+ DAG.getContext()->diagnose(BadIntrin);
+ return DAG.getUNDEF(VT);
+ }
+
return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass,
TRI->getPreloadedValue(MF, SIRegisterInfo::DISPATCH_PTR), VT);
return DAG.getNode(AMDGPUISD::INTERP_P2, DL, MVT::f32, P1, J,
Op.getOperand(1), Op.getOperand(2), Glue);
}
+ case Intrinsic::amdgcn_interp_p1: {
+ SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(4));
+ SDValue Glue = M0.getValue(1);
+ return DAG.getNode(AMDGPUISD::INTERP_P1, DL, MVT::f32, Op.getOperand(1),
+ Op.getOperand(2), Op.getOperand(3), Glue);
+ }
+ case Intrinsic::amdgcn_interp_p2: {
+ SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(5));
+ SDValue Glue = SDValue(M0.getNode(), 1);
+ return DAG.getNode(AMDGPUISD::INTERP_P2, DL, MVT::f32, Op.getOperand(1),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(4),
+ Glue);
+ }
default:
return AMDGPUTargetLowering::LowerOperation(Op, DAG);
}
switch (Load->getAddressSpace()) {
default: break;
+ case AMDGPUAS::CONSTANT_ADDRESS:
+ if (isMemOpUniform(Load))
+ break;
+ // Non-uniform loads will be selected to MUBUF instructions, so they
+ // have the same legalization requires ments as global and private
+ // loads.
+ //
+ // Fall-through
case AMDGPUAS::GLOBAL_ADDRESS:
case AMDGPUAS::PRIVATE_ADDRESS:
if (NumElements >= 8)
case ISD::UINT_TO_FP: {
return performUCharToFloatCombine(N, DCI);
-
+ }
case ISD::FADD: {
if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
break;
break;
}
- }
case ISD::LOAD:
case ISD::STORE:
case ISD::ATOMIC_LOAD: