AMDGPU: Fix crash with dispatch.ptr intrinsic with non-HSA target

[oota-llvm.git] / lib / Target / AMDGPU / SIISelLowering.cpp
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp

index 8dc3934b8cbf2682e8127c1ad76e78ae9335f8ae..c251752bb0dddf8528d5d30fa90b664b90e87894 100644 (file)
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -107,6 +107,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
    setOperationAction(ISD::SETCC, MVT::v4i1, Expand);
  
    setOperationAction(ISD::BSWAP, MVT::i32, Legal);
+  setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
  
    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Legal);
    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Custom);
@@ -258,7 +259,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
    setTargetDAGCombine(ISD::SMAX);
    setTargetDAGCombine(ISD::UMIN);
    setTargetDAGCombine(ISD::UMAX);
-  setTargetDAGCombine(ISD::SELECT_CC);
    setTargetDAGCombine(ISD::SETCC);
    setTargetDAGCombine(ISD::AND);
    setTargetDAGCombine(ISD::OR);
@@ -503,6 +503,21 @@ bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
    return isFlatGlobalAddrSpace(SrcAS) &&  isFlatGlobalAddrSpace(DestAS);
  }
  
+
+bool SITargetLowering::isMemOpUniform(const SDNode *N) const {
+  const MemSDNode *MemNode = cast<MemSDNode>(N);
+  const Value *Ptr = MemNode->getMemOperand()->getValue();
+
+  // UndefValue means this is a load of a kernel input.  These are uniform.
+  // Sometimes LDS instructions have constant pointers
+  if (isa<UndefValue>(Ptr) || isa<Argument>(Ptr) || isa<Constant>(Ptr) ||
+      isa<GlobalValue>(Ptr))
+    return true;
+
+  const Instruction *I = dyn_cast_or_null<Instruction>(Ptr);
+  return I && I->getMetadata("amdgpu.uniform");
+}
+
  TargetLoweringBase::LegalizeTypeAction
  SITargetLowering::getPreferredVectorAction(EVT VT) const {
    if (VT.getVectorNumElements() != 1 && VT.getScalarType().bitsLE(MVT::i16))
@@ -1142,6 +1157,13 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
  
    switch (IntrinsicID) {
    case Intrinsic::amdgcn_dispatch_ptr:
+    if (!Subtarget->isAmdHsaOS()) {
+      DiagnosticInfoUnsupported BadIntrin(*MF.getFunction(),
+                                          "hsa intrinsic without hsa target");
+      DAG.getContext()->diagnose(BadIntrin);
+      return DAG.getUNDEF(VT);
+    }
+
      return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass,
        TRI->getPreloadedValue(MF, SIRegisterInfo::DISPATCH_PTR), VT);
  
@@ -1251,6 +1273,19 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
      return DAG.getNode(AMDGPUISD::INTERP_P2, DL, MVT::f32, P1, J,
                               Op.getOperand(1), Op.getOperand(2), Glue);
    }
+  case Intrinsic::amdgcn_interp_p1: {
+    SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(4));
+    SDValue Glue = M0.getValue(1);
+    return DAG.getNode(AMDGPUISD::INTERP_P1, DL, MVT::f32, Op.getOperand(1),
+                       Op.getOperand(2), Op.getOperand(3), Glue);
+  }
+  case Intrinsic::amdgcn_interp_p2: {
+    SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(5));
+    SDValue Glue = SDValue(M0.getNode(), 1);
+    return DAG.getNode(AMDGPUISD::INTERP_P2, DL, MVT::f32, Op.getOperand(1),
+                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(4),
+                       Glue);
+  }
    default:
      return AMDGPUTargetLowering::LowerOperation(Op, DAG);
    }
@@ -1314,6 +1349,14 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
  
      switch (Load->getAddressSpace()) {
        default: break;
+      case AMDGPUAS::CONSTANT_ADDRESS:
+      if (isMemOpUniform(Load))
+        break;
+        // Non-uniform loads will be selected to MUBUF instructions, so they
+        // have the same legalization requires ments as global and private
+        // loads.
+        //
+        // Fall-through
        case AMDGPUAS::GLOBAL_ADDRESS:
        case AMDGPUAS::PRIVATE_ADDRESS:
          if (NumElements >= 8)
@@ -1990,7 +2033,7 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
  
    case ISD::UINT_TO_FP: {
      return performUCharToFloatCombine(N, DCI);
-
+  }
    case ISD::FADD: {
      if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
        break;
@@ -2072,7 +2115,6 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
  
      break;
    }
-  }
    case ISD::LOAD:
    case ISD::STORE:
    case ISD::ATOMIC_LOAD: