R600: Fix an infinite loop when trying to reorganize export/tex vector input

[oota-llvm.git] / lib / Target / R600 / AMDILISelLowering.cpp
diff --git a/lib/Target/R600/AMDILISelLowering.cpp b/lib/Target/R600/AMDILISelLowering.cpp

index 2e60adcc99faf018c4435ad62539f57916b57e70..970787ef31e00ad100aa77db0ae57a58c1f51f79 100644 (file)
--- a/lib/Target/R600/AMDILISelLowering.cpp
+++ b/lib/Target/R600/AMDILISelLowering.cpp
@@ -15,7 +15,6 @@
  #include "AMDGPUISelLowering.h"
  #include "AMDGPURegisterInfo.h"
  #include "AMDGPUSubtarget.h"
-#include "AMDILDevices.h"
  #include "AMDILIntrinsicInfo.h"
  #include "llvm/CodeGen/MachineFrameInfo.h"
  #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -32,11 +31,6 @@
  #include "llvm/Target/TargetOptions.h"
  
  using namespace llvm;
-//===----------------------------------------------------------------------===//
-// Calling Convention Implementation
-//===----------------------------------------------------------------------===//
-#include "AMDGPUGenCallingConv.inc"
-
  //===----------------------------------------------------------------------===//
  // TargetLowering Implementation Help Functions End
  //===----------------------------------------------------------------------===//
@@ -45,7 +39,7 @@ using namespace llvm;
  // TargetLowering Class Implementation Begins
  //===----------------------------------------------------------------------===//
  void AMDGPUTargetLowering::InitAMDILLowering() {
-  int types[] = {
+  static const int types[] = {
      (int)MVT::i8,
      (int)MVT::i16,
      (int)MVT::i32,
@@ -64,19 +58,19 @@ void AMDGPUTargetLowering::InitAMDILLowering() {
      (int)MVT::v2i64
    };
  
-  int IntTypes[] = {
+  static const int IntTypes[] = {
      (int)MVT::i8,
      (int)MVT::i16,
      (int)MVT::i32,
      (int)MVT::i64
    };
  
-  int FloatTypes[] = {
+  static const int FloatTypes[] = {
      (int)MVT::f32,
      (int)MVT::f64
    };
  
-  int VectorTypes[] = {
+  static const int VectorTypes[] = {
      (int)MVT::v2i8,
      (int)MVT::v4i8,
      (int)MVT::v2i16,
@@ -88,10 +82,10 @@ void AMDGPUTargetLowering::InitAMDILLowering() {
      (int)MVT::v2f64,
      (int)MVT::v2i64
    };
-  size_t NumTypes = sizeof(types) / sizeof(*types);
-  size_t NumFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
-  size_t NumIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
-  size_t NumVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
+  const size_t NumTypes = array_lengthof(types);
+  const size_t NumFloatTypes = array_lengthof(FloatTypes);
+  const size_t NumIntTypes = array_lengthof(IntTypes);
+  const size_t NumVectorTypes = array_lengthof(VectorTypes);
  
    const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
    // These are the current register classes that are
@@ -143,8 +137,6 @@ void AMDGPUTargetLowering::InitAMDILLowering() {
      setOperationAction(ISD::SMUL_LOHI, VT, Expand);
      setOperationAction(ISD::UMUL_LOHI, VT, Expand);
  
-    // GPU doesn't have a rotl, rotr, or byteswap instruction
-    setOperationAction(ISD::ROTR, VT, Expand);
      setOperationAction(ISD::BSWAP, VT, Expand);
  
      // GPU doesn't have any counting operators
@@ -163,21 +155,19 @@ void AMDGPUTargetLowering::InitAMDILLowering() {
      setOperationAction(ISD::SELECT_CC, VT, Expand);
  
    }
-  if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) {
-    setOperationAction(ISD::MULHU, MVT::i64, Expand);
-    setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
-    setOperationAction(ISD::MULHS, MVT::i64, Expand);
-    setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
-    setOperationAction(ISD::ADD, MVT::v2i64, Expand);
-    setOperationAction(ISD::SREM, MVT::v2i64, Expand);
-    setOperationAction(ISD::Constant          , MVT::i64  , Legal);
-    setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
-    setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
-    setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
-    setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
-    setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
-  }
-  if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) {
+  setOperationAction(ISD::MULHU, MVT::i64, Expand);
+  setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
+  setOperationAction(ISD::MULHS, MVT::i64, Expand);
+  setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
+  setOperationAction(ISD::ADD, MVT::v2i64, Expand);
+  setOperationAction(ISD::SREM, MVT::v2i64, Expand);
+  setOperationAction(ISD::Constant          , MVT::i64  , Legal);
+  setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
+  setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
+  setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
+  setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
+  setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
+  if (STM.hasHWFP64()) {
      // we support loading/storing v2f64 but not operations on the type
      setOperationAction(ISD::FADD, MVT::v2f64, Expand);
      setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
@@ -220,9 +210,9 @@ void AMDGPUTargetLowering::InitAMDILLowering() {
    setSelectIsExpensive(true);
    setJumpIsExpensive(true);
  
-  maxStoresPerMemcpy  = 4096;
-  maxStoresPerMemmove = 4096;
-  maxStoresPerMemset  = 4096;
+  MaxStoresPerMemcpy  = 4096;
+  MaxStoresPerMemmove = 4096;
+  MaxStoresPerMemset  = 4096;
  
  }
  
@@ -336,7 +326,7 @@ SDValue
  AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const {
    SDValue Data = Op.getOperand(0);
    VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
-  DebugLoc DL = Op.getDebugLoc();
+  SDLoc DL(Op);
    EVT DVT = Data.getValueType();
    EVT BVT = BaseType->getVT();
    unsigned baseBits = BVT.getScalarType().getSizeInBits();
@@ -392,7 +382,7 @@ AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
    SDValue Result;
    Result = DAG.getNode(
        AMDGPUISD::BRANCH_COND,
-      Op.getDebugLoc(),
+      SDLoc(Op),
        Op.getValueType(),
        Chain, Jump, Cond);
    return Result;
@@ -400,7 +390,7 @@ AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
  
  SDValue
  AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
-  DebugLoc DL = Op.getDebugLoc();
+  SDLoc DL(Op);
    EVT OVT = Op.getValueType();
    SDValue LHS = Op.getOperand(0);
    SDValue RHS = Op.getOperand(1);
@@ -451,7 +441,8 @@ AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
    SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
  
    // float fr = mad(fqneg, fb, fa);
-  SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa);
+  SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,
+      DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);
  
    // int iq = (int)fq;
    SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
@@ -480,7 +471,7 @@ AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
  
  SDValue
  AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const {
-  DebugLoc DL = Op.getDebugLoc();
+  SDLoc DL(Op);
    EVT OVT = Op.getValueType();
    SDValue LHS = Op.getOperand(0);
    SDValue RHS = Op.getOperand(1);
@@ -551,7 +542,7 @@ AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const {
  
  SDValue
  AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const {
-  DebugLoc DL = Op.getDebugLoc();
+  SDLoc DL(Op);
    EVT OVT = Op.getValueType();
    MVT INTTY = MVT::i32;
    if (OVT == MVT::v2i8) {
@@ -568,7 +559,7 @@ AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const {
  
  SDValue
  AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const {
-  DebugLoc DL = Op.getDebugLoc();
+  SDLoc DL(Op);
    EVT OVT = Op.getValueType();
    MVT INTTY = MVT::i32;
    if (OVT == MVT::v2i16) {
@@ -585,7 +576,7 @@ AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const {
  
  SDValue
  AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const {
-  DebugLoc DL = Op.getDebugLoc();
+  SDLoc DL(Op);
    EVT OVT = Op.getValueType();
    SDValue LHS = Op.getOperand(0);
    SDValue RHS = Op.getOperand(1);