lib/Target/R600/AMDILISelLowering.cpp

   1 //===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //==-----------------------------------------------------------------------===//
   9 //
  10 /// \file
  11 /// \brief TargetLowering functions borrowed from AMDIL.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "AMDGPUISelLowering.h"
  16 #include "AMDGPURegisterInfo.h"
  17 #include "AMDGPUSubtarget.h"
  18 #include "AMDILDevices.h"
  19 #include "AMDILIntrinsicInfo.h"
  20 #include "llvm/CodeGen/MachineFrameInfo.h"
  21 #include "llvm/CodeGen/MachineRegisterInfo.h"
  22 #include "llvm/CodeGen/PseudoSourceValue.h"
  23 #include "llvm/CodeGen/SelectionDAG.h"
  24 #include "llvm/CodeGen/SelectionDAGNodes.h"
  25 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
  26 #include "llvm/IR/CallingConv.h"
  27 #include "llvm/IR/DerivedTypes.h"
  28 #include "llvm/IR/Instructions.h"
  29 #include "llvm/IR/Intrinsics.h"
  30 #include "llvm/Support/raw_ostream.h"
  31 #include "llvm/Target/TargetInstrInfo.h"
  32 #include "llvm/Target/TargetOptions.h"
  33
  34 using namespace llvm;
  35 //===----------------------------------------------------------------------===//
  36 // Calling Convention Implementation
  37 //===----------------------------------------------------------------------===//
  38 #include "AMDGPUGenCallingConv.inc"
  39
  40 //===----------------------------------------------------------------------===//
  41 // TargetLowering Implementation Help Functions End
  42 //===----------------------------------------------------------------------===//
  43
  44 //===----------------------------------------------------------------------===//
  45 // TargetLowering Class Implementation Begins
  46 //===----------------------------------------------------------------------===//
  47 void AMDGPUTargetLowering::InitAMDILLowering() {
  48   int types[] = {
  49     (int)MVT::i8,
  50     (int)MVT::i16,
  51     (int)MVT::i32,
  52     (int)MVT::f32,
  53     (int)MVT::f64,
  54     (int)MVT::i64,
  55     (int)MVT::v2i8,
  56     (int)MVT::v4i8,
  57     (int)MVT::v2i16,
  58     (int)MVT::v4i16,
  59     (int)MVT::v4f32,
  60     (int)MVT::v4i32,
  61     (int)MVT::v2f32,
  62     (int)MVT::v2i32,
  63     (int)MVT::v2f64,
  64     (int)MVT::v2i64
  65   };
  66
  67   int IntTypes[] = {
  68     (int)MVT::i8,
  69     (int)MVT::i16,
  70     (int)MVT::i32,
  71     (int)MVT::i64
  72   };
  73
  74   int FloatTypes[] = {
  75     (int)MVT::f32,
  76     (int)MVT::f64
  77   };
  78
  79   int VectorTypes[] = {
  80     (int)MVT::v2i8,
  81     (int)MVT::v4i8,
  82     (int)MVT::v2i16,
  83     (int)MVT::v4i16,
  84     (int)MVT::v4f32,
  85     (int)MVT::v4i32,
  86     (int)MVT::v2f32,
  87     (int)MVT::v2i32,
  88     (int)MVT::v2f64,
  89     (int)MVT::v2i64
  90   };
  91   size_t NumTypes = sizeof(types) / sizeof(*types);
  92   size_t NumFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
  93   size_t NumIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
  94   size_t NumVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
  95
  96   const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
  97   // These are the current register classes that are
  98   // supported
  99
 100   for (unsigned int x  = 0; x < NumTypes; ++x) {
 101     MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
 102
 103     //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
 104     // We cannot sextinreg, expand to shifts
 105     setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
 106     setOperationAction(ISD::SUBE, VT, Expand);
 107     setOperationAction(ISD::SUBC, VT, Expand);
 108     setOperationAction(ISD::ADDE, VT, Expand);
 109     setOperationAction(ISD::ADDC, VT, Expand);
 110     setOperationAction(ISD::BRCOND, VT, Custom);
 111     setOperationAction(ISD::BR_JT, VT, Expand);
 112     setOperationAction(ISD::BRIND, VT, Expand);
 113     // TODO: Implement custom UREM/SREM routines
 114     setOperationAction(ISD::SREM, VT, Expand);
 115     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 116     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
 117     if (VT != MVT::i64 && VT != MVT::v2i64) {
 118       setOperationAction(ISD::SDIV, VT, Custom);
 119     }
 120   }
 121   for (unsigned int x = 0; x < NumFloatTypes; ++x) {
 122     MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
 123
 124     // IL does not have these operations for floating point types
 125     setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
 126     setOperationAction(ISD::SETOLT, VT, Expand);
 127     setOperationAction(ISD::SETOGE, VT, Expand);
 128     setOperationAction(ISD::SETOGT, VT, Expand);
 129     setOperationAction(ISD::SETOLE, VT, Expand);
 130     setOperationAction(ISD::SETULT, VT, Expand);
 131     setOperationAction(ISD::SETUGE, VT, Expand);
 132     setOperationAction(ISD::SETUGT, VT, Expand);
 133     setOperationAction(ISD::SETULE, VT, Expand);
 134   }
 135
 136   for (unsigned int x = 0; x < NumIntTypes; ++x) {
 137     MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
 138
 139     // GPU also does not have divrem function for signed or unsigned
 140     setOperationAction(ISD::SDIVREM, VT, Expand);
 141
 142     // GPU does not have [S|U]MUL_LOHI functions as a single instruction
 143     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 144     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
 145
 146     // GPU doesn't have a rotl, rotr, or byteswap instruction
 147     setOperationAction(ISD::ROTR, VT, Expand);
 148     setOperationAction(ISD::BSWAP, VT, Expand);
 149
 150     // GPU doesn't have any counting operators
 151     setOperationAction(ISD::CTPOP, VT, Expand);
 152     setOperationAction(ISD::CTTZ, VT, Expand);
 153     setOperationAction(ISD::CTLZ, VT, Expand);
 154   }
 155
 156   for (unsigned int ii = 0; ii < NumVectorTypes; ++ii) {
 157     MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
 158
 159     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
 160     setOperationAction(ISD::SDIVREM, VT, Expand);
 161     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 162     // setOperationAction(ISD::VSETCC, VT, Expand);
 163     setOperationAction(ISD::SELECT_CC, VT, Expand);
 164
 165   }
 166   if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) {
 167     setOperationAction(ISD::MULHU, MVT::i64, Expand);
 168     setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
 169     setOperationAction(ISD::MULHS, MVT::i64, Expand);
 170     setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
 171     setOperationAction(ISD::ADD, MVT::v2i64, Expand);
 172     setOperationAction(ISD::SREM, MVT::v2i64, Expand);
 173     setOperationAction(ISD::Constant          , MVT::i64  , Legal);
 174     setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
 175     setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
 176     setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
 177     setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
 178     setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
 179   }
 180   if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) {
 181     // we support loading/storing v2f64 but not operations on the type
 182     setOperationAction(ISD::FADD, MVT::v2f64, Expand);
 183     setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
 184     setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
 185     setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
 186     setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
 187     setOperationAction(ISD::ConstantFP        , MVT::f64  , Legal);
 188     // We want to expand vector conversions into their scalar
 189     // counterparts.
 190     setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
 191     setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
 192     setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
 193     setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
 194     setOperationAction(ISD::FABS, MVT::f64, Expand);
 195     setOperationAction(ISD::FABS, MVT::v2f64, Expand);
 196   }
 197   // TODO: Fix the UDIV24 algorithm so it works for these
 198   // types correctly. This needs vector comparisons
 199   // for this to work correctly.
 200   setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
 201   setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
 202   setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
 203   setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
 204   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
 205   setOperationAction(ISD::SUBC, MVT::Other, Expand);
 206   setOperationAction(ISD::ADDE, MVT::Other, Expand);
 207   setOperationAction(ISD::ADDC, MVT::Other, Expand);
 208   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
 209   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
 210   setOperationAction(ISD::BRIND, MVT::Other, Expand);
 211   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
 212
 213
 214   // Use the default implementation.
 215   setOperationAction(ISD::ConstantFP        , MVT::f32    , Legal);
 216   setOperationAction(ISD::Constant          , MVT::i32    , Legal);
 217
 218   setSchedulingPreference(Sched::RegPressure);
 219   setPow2DivIsCheap(false);
 220   setSelectIsExpensive(true);
 221   setJumpIsExpensive(true);
 222
 223   maxStoresPerMemcpy  = 4096;
 224   maxStoresPerMemmove = 4096;
 225   maxStoresPerMemset  = 4096;
 226
 227 }
 228
 229 bool
 230 AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
 231     const CallInst &I, unsigned Intrinsic) const {
 232   return false;
 233 }
 234
 235 // The backend supports 32 and 64 bit floating point immediates
 236 bool
 237 AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
 238   if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
 239       || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
 240     return true;
 241   } else {
 242     return false;
 243   }
 244 }
 245
 246 bool
 247 AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
 248   if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
 249       || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
 250     return false;
 251   } else {
 252     return true;
 253   }
 254 }
 255
 256
 257 // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
 258 // be zero. Op is expected to be a target specific node. Used by DAG
 259 // combiner.
 260
 261 void
 262 AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
 263     const SDValue Op,
 264     APInt &KnownZero,
 265     APInt &KnownOne,
 266     const SelectionDAG &DAG,
 267     unsigned Depth) const {
 268   APInt KnownZero2;
 269   APInt KnownOne2;
 270   KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
 271   switch (Op.getOpcode()) {
 272     default: break;
 273     case ISD::SELECT_CC:
 274              DAG.ComputeMaskedBits(
 275                  Op.getOperand(1),
 276                  KnownZero,
 277                  KnownOne,
 278                  Depth + 1
 279                  );
 280              DAG.ComputeMaskedBits(
 281                  Op.getOperand(0),
 282                  KnownZero2,
 283                  KnownOne2
 284                  );
 285              assert((KnownZero & KnownOne) == 0
 286                  && "Bits known to be one AND zero?");
 287              assert((KnownZero2 & KnownOne2) == 0
 288                  && "Bits known to be one AND zero?");
 289              // Only known if known in both the LHS and RHS
 290              KnownOne &= KnownOne2;
 291              KnownZero &= KnownZero2;
 292              break;
 293   };
 294 }
 295
 296 //===----------------------------------------------------------------------===//
 297 //                           Other Lowering Hooks
 298 //===----------------------------------------------------------------------===//
 299
 300 SDValue
 301 AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
 302   EVT OVT = Op.getValueType();
 303   SDValue DST;
 304   if (OVT.getScalarType() == MVT::i64) {
 305     DST = LowerSDIV64(Op, DAG);
 306   } else if (OVT.getScalarType() == MVT::i32) {
 307     DST = LowerSDIV32(Op, DAG);
 308   } else if (OVT.getScalarType() == MVT::i16
 309       || OVT.getScalarType() == MVT::i8) {
 310     DST = LowerSDIV24(Op, DAG);
 311   } else {
 312     DST = SDValue(Op.getNode(), 0);
 313   }
 314   return DST;
 315 }
 316
 317 SDValue
 318 AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const {
 319   EVT OVT = Op.getValueType();
 320   SDValue DST;
 321   if (OVT.getScalarType() == MVT::i64) {
 322     DST = LowerSREM64(Op, DAG);
 323   } else if (OVT.getScalarType() == MVT::i32) {
 324     DST = LowerSREM32(Op, DAG);
 325   } else if (OVT.getScalarType() == MVT::i16) {
 326     DST = LowerSREM16(Op, DAG);
 327   } else if (OVT.getScalarType() == MVT::i8) {
 328     DST = LowerSREM8(Op, DAG);
 329   } else {
 330     DST = SDValue(Op.getNode(), 0);
 331   }
 332   return DST;
 333 }
 334
 335 SDValue
 336 AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const {
 337   SDValue Data = Op.getOperand(0);
 338   VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
 339   DebugLoc DL = Op.getDebugLoc();
 340   EVT DVT = Data.getValueType();
 341   EVT BVT = BaseType->getVT();
 342   unsigned baseBits = BVT.getScalarType().getSizeInBits();
 343   unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
 344   unsigned shiftBits = srcBits - baseBits;
 345   if (srcBits < 32) {
 346     // If the op is less than 32 bits, then it needs to extend to 32bits
 347     // so it can properly keep the upper bits valid.
 348     EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
 349     Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
 350     shiftBits = 32 - baseBits;
 351     DVT = IVT;
 352   }
 353   SDValue Shift = DAG.getConstant(shiftBits, DVT);
 354   // Shift left by 'Shift' bits.
 355   Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
 356   // Signed shift Right by 'Shift' bits.
 357   Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
 358   if (srcBits < 32) {
 359     // Once the sign extension is done, the op needs to be converted to
 360     // its original type.
 361     Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
 362   }
 363   return Data;
 364 }
 365 EVT
 366 AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const {
 367   int iSize = (size * numEle);
 368   int vEle = (iSize >> ((size == 64) ? 6 : 5));
 369   if (!vEle) {
 370     vEle = 1;
 371   }
 372   if (size == 64) {
 373     if (vEle == 1) {
 374       return EVT(MVT::i64);
 375     } else {
 376       return EVT(MVT::getVectorVT(MVT::i64, vEle));
 377     }
 378   } else {
 379     if (vEle == 1) {
 380       return EVT(MVT::i32);
 381     } else {
 382       return EVT(MVT::getVectorVT(MVT::i32, vEle));
 383     }
 384   }
 385 }
 386
 387 SDValue
 388 AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
 389   SDValue Chain = Op.getOperand(0);
 390   SDValue Cond  = Op.getOperand(1);
 391   SDValue Jump  = Op.getOperand(2);
 392   SDValue Result;
 393   Result = DAG.getNode(
 394       AMDGPUISD::BRANCH_COND,
 395       Op.getDebugLoc(),
 396       Op.getValueType(),
 397       Chain, Jump, Cond);
 398   return Result;
 399 }
 400
 401 SDValue
 402 AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
 403   DebugLoc DL = Op.getDebugLoc();
 404   EVT OVT = Op.getValueType();
 405   SDValue LHS = Op.getOperand(0);
 406   SDValue RHS = Op.getOperand(1);
 407   MVT INTTY;
 408   MVT FLTTY;
 409   if (!OVT.isVector()) {
 410     INTTY = MVT::i32;
 411     FLTTY = MVT::f32;
 412   } else if (OVT.getVectorNumElements() == 2) {
 413     INTTY = MVT::v2i32;
 414     FLTTY = MVT::v2f32;
 415   } else if (OVT.getVectorNumElements() == 4) {
 416     INTTY = MVT::v4i32;
 417     FLTTY = MVT::v4f32;
 418   }
 419   unsigned bitsize = OVT.getScalarType().getSizeInBits();
 420   // char|short jq = ia ^ ib;
 421   SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
 422
 423   // jq = jq >> (bitsize - 2)
 424   jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
 425
 426   // jq = jq | 0x1
 427   jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
 428
 429   // jq = (int)jq
 430   jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
 431
 432   // int ia = (int)LHS;
 433   SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
 434
 435   // int ib, (int)RHS;
 436   SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
 437
 438   // float fa = (float)ia;
 439   SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
 440
 441   // float fb = (float)ib;
 442   SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
 443
 444   // float fq = native_divide(fa, fb);
 445   SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
 446
 447   // fq = trunc(fq);
 448   fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
 449
 450   // float fqneg = -fq;
 451   SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
 452
 453   // float fr = mad(fqneg, fb, fa);
 454   SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,
 455       DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);
 456
 457   // int iq = (int)fq;
 458   SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
 459
 460   // fr = fabs(fr);
 461   fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
 462
 463   // fb = fabs(fb);
 464   fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
 465
 466   // int cv = fr >= fb;
 467   SDValue cv;
 468   if (INTTY == MVT::i32) {
 469     cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
 470   } else {
 471     cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
 472   }
 473   // jq = (cv ? jq : 0);
 474   jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
 475       DAG.getConstant(0, OVT));
 476   // dst = iq + jq;
 477   iq = DAG.getSExtOrTrunc(iq, DL, OVT);
 478   iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
 479   return iq;
 480 }
 481
 482 SDValue
 483 AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const {
 484   DebugLoc DL = Op.getDebugLoc();
 485   EVT OVT = Op.getValueType();
 486   SDValue LHS = Op.getOperand(0);
 487   SDValue RHS = Op.getOperand(1);
 488   // The LowerSDIV32 function generates equivalent to the following IL.
 489   // mov r0, LHS
 490   // mov r1, RHS
 491   // ilt r10, r0, 0
 492   // ilt r11, r1, 0
 493   // iadd r0, r0, r10
 494   // iadd r1, r1, r11
 495   // ixor r0, r0, r10
 496   // ixor r1, r1, r11
 497   // udiv r0, r0, r1
 498   // ixor r10, r10, r11
 499   // iadd r0, r0, r10
 500   // ixor DST, r0, r10
 501
 502   // mov r0, LHS
 503   SDValue r0 = LHS;
 504
 505   // mov r1, RHS
 506   SDValue r1 = RHS;
 507
 508   // ilt r10, r0, 0
 509   SDValue r10 = DAG.getSelectCC(DL,
 510       r0, DAG.getConstant(0, OVT),
 511       DAG.getConstant(-1, MVT::i32),
 512       DAG.getConstant(0, MVT::i32),
 513       ISD::SETLT);
 514
 515   // ilt r11, r1, 0
 516   SDValue r11 = DAG.getSelectCC(DL,
 517       r1, DAG.getConstant(0, OVT),
 518       DAG.getConstant(-1, MVT::i32),
 519       DAG.getConstant(0, MVT::i32),
 520       ISD::SETLT);
 521
 522   // iadd r0, r0, r10
 523   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
 524
 525   // iadd r1, r1, r11
 526   r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
 527
 528   // ixor r0, r0, r10
 529   r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
 530
 531   // ixor r1, r1, r11
 532   r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
 533
 534   // udiv r0, r0, r1
 535   r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
 536
 537   // ixor r10, r10, r11
 538   r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
 539
 540   // iadd r0, r0, r10
 541   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
 542
 543   // ixor DST, r0, r10
 544   SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
 545   return DST;
 546 }
 547
 548 SDValue
 549 AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const {
 550   return SDValue(Op.getNode(), 0);
 551 }
 552
 553 SDValue
 554 AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const {
 555   DebugLoc DL = Op.getDebugLoc();
 556   EVT OVT = Op.getValueType();
 557   MVT INTTY = MVT::i32;
 558   if (OVT == MVT::v2i8) {
 559     INTTY = MVT::v2i32;
 560   } else if (OVT == MVT::v4i8) {
 561     INTTY = MVT::v4i32;
 562   }
 563   SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
 564   SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
 565   LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
 566   LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
 567   return LHS;
 568 }
 569
 570 SDValue
 571 AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const {
 572   DebugLoc DL = Op.getDebugLoc();
 573   EVT OVT = Op.getValueType();
 574   MVT INTTY = MVT::i32;
 575   if (OVT == MVT::v2i16) {
 576     INTTY = MVT::v2i32;
 577   } else if (OVT == MVT::v4i16) {
 578     INTTY = MVT::v4i32;
 579   }
 580   SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
 581   SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
 582   LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
 583   LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
 584   return LHS;
 585 }
 586
 587 SDValue
 588 AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const {
 589   DebugLoc DL = Op.getDebugLoc();
 590   EVT OVT = Op.getValueType();
 591   SDValue LHS = Op.getOperand(0);
 592   SDValue RHS = Op.getOperand(1);
 593   // The LowerSREM32 function generates equivalent to the following IL.
 594   // mov r0, LHS
 595   // mov r1, RHS
 596   // ilt r10, r0, 0
 597   // ilt r11, r1, 0
 598   // iadd r0, r0, r10
 599   // iadd r1, r1, r11
 600   // ixor r0, r0, r10
 601   // ixor r1, r1, r11
 602   // udiv r20, r0, r1
 603   // umul r20, r20, r1
 604   // sub r0, r0, r20
 605   // iadd r0, r0, r10
 606   // ixor DST, r0, r10
 607
 608   // mov r0, LHS
 609   SDValue r0 = LHS;
 610
 611   // mov r1, RHS
 612   SDValue r1 = RHS;
 613
 614   // ilt r10, r0, 0
 615   SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
 616
 617   // ilt r11, r1, 0
 618   SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
 619
 620   // iadd r0, r0, r10
 621   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
 622
 623   // iadd r1, r1, r11
 624   r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
 625
 626   // ixor r0, r0, r10
 627   r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
 628
 629   // ixor r1, r1, r11
 630   r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
 631
 632   // udiv r20, r0, r1
 633   SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
 634
 635   // umul r20, r20, r1
 636   r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
 637
 638   // sub r0, r0, r20
 639   r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
 640
 641   // iadd r0, r0, r10
 642   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
 643
 644   // ixor DST, r0, r10
 645   SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
 646   return DST;
 647 }
 648
 649 SDValue
 650 AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const {
 651   return SDValue(Op.getNode(), 0);
 652 }