Call the version of ConvertCostTableLookup that takes a statically sized array rather...

[oota-llvm.git] / lib / Target / AMDGPU / AMDGPUISelLowering.cpp
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

index 212ae7ba74fbb6a665e4b1524e98060bcadd7a96..af9fcbde9f1a83fe5827aa8a396c86a66a88592e 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -15,6 +15,7 @@
  
  #include "AMDGPUISelLowering.h"
  #include "AMDGPU.h"
+#include "AMDGPUDiagnosticInfoUnsupported.h"
  #include "AMDGPUFrameLowering.h"
  #include "AMDGPUIntrinsicInfo.h"
  #include "AMDGPURegisterInfo.h"
@@ -27,50 +28,9 @@
  #include "llvm/CodeGen/SelectionDAG.h"
  #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
  #include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DiagnosticInfo.h"
-#include "llvm/IR/DiagnosticPrinter.h"
  
  using namespace llvm;
  
-namespace {
-
-/// Diagnostic information for unimplemented or unsupported feature reporting.
-class DiagnosticInfoUnsupported : public DiagnosticInfo {
-private:
-  const Twine &Description;
-  const Function &Fn;
-
-  static int KindID;
-
-  static int getKindID() {
-    if (KindID == 0)
-      KindID = llvm::getNextAvailablePluginDiagnosticKind();
-    return KindID;
-  }
-
-public:
-  DiagnosticInfoUnsupported(const Function &Fn, const Twine &Desc,
-                          DiagnosticSeverity Severity = DS_Error)
-    : DiagnosticInfo(getKindID(), Severity),
-      Description(Desc),
-      Fn(Fn) { }
-
-  const Function &getFunction() const { return Fn; }
-  const Twine &getDescription() const { return Description; }
-
-  void print(DiagnosticPrinter &DP) const override {
-    DP << "unsupported " << getDescription() << " in " << Fn.getName();
-  }
-
-  static bool classof(const DiagnosticInfo *DI) {
-    return DI->getKind() == getKindID();
-  }
-};
-
-int DiagnosticInfoUnsupported::KindID = 0;
-}
-
-
  static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT,
                        CCValAssign::LocInfo LocInfo,
                        ISD::ArgFlagsTy ArgFlags, CCState &State) {
@@ -355,7 +315,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
      setOperationAction(ISD::SMUL_LOHI, VT, Expand);
      setOperationAction(ISD::UMUL_LOHI, VT, Expand);
      setOperationAction(ISD::SDIVREM, VT, Custom);
-    setOperationAction(ISD::UDIVREM, VT, Custom);
+    setOperationAction(ISD::UDIVREM, VT, Expand);
      setOperationAction(ISD::ADDC, VT, Expand);
      setOperationAction(ISD::SUBC, VT, Expand);
      setOperationAction(ISD::ADDE, VT, Expand);
@@ -533,6 +493,18 @@ bool AMDGPUTargetLowering:: storeOfVectorConstantIsCheap(EVT MemVT,
    return true;
  }
  
+bool AMDGPUTargetLowering::aggressivelyPreferBuildVectorSources(EVT VecVT) const {
+  // There are few operations which truly have vector input operands. Any vector
+  // operation is going to involve operations on each component, and a
+  // build_vector will be a copy per element, so it always makes sense to use a
+  // build_vector input in place of the extracted element to avoid a copy into a
+  // super register.
+  //
+  // We should probably only do this if all users are extracts only, but this
+  // should be the common case.
+  return true;
+}
+
  bool AMDGPUTargetLowering::isTruncateFree(EVT Source, EVT Dest) const {
    // Truncate is just accessing a subregister.
    return Dest.bitsLT(Source) && (Dest.getSizeInBits() % 32 == 0);
@@ -1088,6 +1060,7 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
                                                  SelectionDAG &DAG) const {
    SDLoc DL(Op);
    EVT VT = Op.getValueType();
+  // TODO: Should this propagate fast-math-flags?
    SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
                                  DAG.getConstantFP(1.0f, DL, MVT::f32),
                                  Op.getOperand(1));
@@ -1602,6 +1575,7 @@ SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool
    // float fb = (float)ib;
    SDValue fb = DAG.getNode(ToFp, DL, FltVT, ib);
  
+  // TODO: Should this propagate fast-math-flags?
    // float fq = native_divide(fa, fb);
    SDValue fq = DAG.getNode(ISD::FMUL, DL, FltVT,
                             fa, DAG.getNode(AMDGPUISD::RCP, DL, FltVT, fb));
@@ -1912,6 +1886,8 @@ SDValue AMDGPUTargetLowering::LowerFREM(SDValue Op, SelectionDAG &DAG) const {
    SDValue X = Op.getOperand(0);
    SDValue Y = Op.getOperand(1);
  
+  // TODO: Should this propagate fast-math-flags?
+
    SDValue Div = DAG.getNode(ISD::FDIV, SL, VT, X, Y);
    SDValue Floor = DAG.getNode(ISD::FTRUNC, SL, VT, Div);
    SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Floor, Y);
@@ -1940,6 +1916,7 @@ SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const {
    SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
  
    SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, One, Zero);
+  // TODO: Should this propagate fast-math-flags?
    return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
  }
  
@@ -2017,6 +1994,8 @@ SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
    SDValue C1 = DAG.getConstantFP(C1Val, SL, MVT::f64);
    SDValue CopySign = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, C1, Src);
  
+  // TODO: Should this propagate fast-math-flags?
+
    SDValue Tmp1 = DAG.getNode(ISD::FADD, SL, MVT::f64, Src, CopySign);
    SDValue Tmp2 = DAG.getNode(ISD::FSUB, SL, MVT::f64, Tmp1, CopySign);
  
@@ -2046,6 +2025,8 @@ SDValue AMDGPUTargetLowering::LowerFROUND32(SDValue Op, SelectionDAG &DAG) const
  
    SDValue T = DAG.getNode(ISD::FTRUNC, SL, MVT::f32, X);
  
+  // TODO: Should this propagate fast-math-flags?
+
    SDValue Diff = DAG.getNode(ISD::FSUB, SL, MVT::f32, X, T);
  
    SDValue AbsDiff = DAG.getNode(ISD::FABS, SL, MVT::f32, Diff);
@@ -2156,6 +2137,7 @@ SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {
    SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
  
    SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, NegOne, Zero);
+  // TODO: Should this propagate fast-math-flags?
    return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
  }
  
@@ -2178,7 +2160,7 @@ SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG,
  
    SDValue LdExp = DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f64, CvtHi,
                                DAG.getConstant(32, SL, MVT::i32));
-
+  // TODO: Should this propagate fast-math-flags?
    return DAG.getNode(ISD::FADD, SL, MVT::f64, LdExp, CvtLo);
  }
  
@@ -2203,6 +2185,7 @@ SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
    SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
                             DAG.getConstant(1, DL, MVT::i32));
    SDValue FloatHi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Hi);
+  // TODO: Should this propagate fast-math-flags?
    FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi,
                          DAG.getConstantFP(4294967296.0f, DL, MVT::f32)); // 2^32
    return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi);
@@ -2229,7 +2212,7 @@ SDValue AMDGPUTargetLowering::LowerFP64_TO_INT(SDValue Op, SelectionDAG &DAG,
                                   MVT::f64);
    SDValue K1 = DAG.getConstantFP(BitsToDouble(UINT64_C(0xc1f0000000000000)), SL,
                                   MVT::f64);
-
+  // TODO: Should this propagate fast-math-flags?
    SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f64, Trunc, K0);
  
    SDValue FloorMul = DAG.getNode(ISD::FFLOOR, SL, MVT::f64, Mul);