lib/Target/R600/AMDILISelLowering.cpp

   1 //===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //==-----------------------------------------------------------------------===//
   9 //
  10 /// \file
  11 /// \brief TargetLowering functions borrowed from AMDIL.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "AMDGPUISelLowering.h"
  16 #include "AMDGPURegisterInfo.h"
  17 #include "AMDGPUSubtarget.h"
  18 #include "AMDILIntrinsicInfo.h"
  19 #include "llvm/CodeGen/MachineFrameInfo.h"
  20 #include "llvm/CodeGen/MachineRegisterInfo.h"
  21 #include "llvm/CodeGen/PseudoSourceValue.h"
  22 #include "llvm/CodeGen/SelectionDAG.h"
  23 #include "llvm/CodeGen/SelectionDAGNodes.h"
  24 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
  25 #include "llvm/IR/CallingConv.h"
  26 #include "llvm/IR/DerivedTypes.h"
  27 #include "llvm/IR/Instructions.h"
  28 #include "llvm/IR/Intrinsics.h"
  29 #include "llvm/Support/raw_ostream.h"
  30 #include "llvm/Target/TargetInstrInfo.h"
  31 #include "llvm/Target/TargetOptions.h"
  32
  33 using namespace llvm;
  34 //===----------------------------------------------------------------------===//
  35 // TargetLowering Implementation Help Functions End
  36 //===----------------------------------------------------------------------===//
  37
  38 //===----------------------------------------------------------------------===//
  39 // TargetLowering Class Implementation Begins
  40 //===----------------------------------------------------------------------===//
  41 void AMDGPUTargetLowering::InitAMDILLowering() {
  42   static const int types[] = {
  43     (int)MVT::i8,
  44     (int)MVT::i16,
  45     (int)MVT::i32,
  46     (int)MVT::f32,
  47     (int)MVT::f64,
  48     (int)MVT::i64,
  49     (int)MVT::v2i8,
  50     (int)MVT::v4i8,
  51     (int)MVT::v2i16,
  52     (int)MVT::v4i16,
  53     (int)MVT::v4f32,
  54     (int)MVT::v4i32,
  55     (int)MVT::v2f32,
  56     (int)MVT::v2i32,
  57     (int)MVT::v2f64,
  58     (int)MVT::v2i64
  59   };
  60
  61   static const int IntTypes[] = {
  62     (int)MVT::i8,
  63     (int)MVT::i16,
  64     (int)MVT::i32,
  65     (int)MVT::i64
  66   };
  67
  68   static const int FloatTypes[] = {
  69     (int)MVT::f32,
  70     (int)MVT::f64
  71   };
  72
  73   static const int VectorTypes[] = {
  74     (int)MVT::v2i8,
  75     (int)MVT::v4i8,
  76     (int)MVT::v2i16,
  77     (int)MVT::v4i16,
  78     (int)MVT::v4f32,
  79     (int)MVT::v4i32,
  80     (int)MVT::v2f32,
  81     (int)MVT::v2i32,
  82     (int)MVT::v2f64,
  83     (int)MVT::v2i64
  84   };
  85   const size_t NumTypes = array_lengthof(types);
  86   const size_t NumFloatTypes = array_lengthof(FloatTypes);
  87   const size_t NumIntTypes = array_lengthof(IntTypes);
  88   const size_t NumVectorTypes = array_lengthof(VectorTypes);
  89
  90   const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
  91   // These are the current register classes that are
  92   // supported
  93
  94   for (unsigned int x  = 0; x < NumTypes; ++x) {
  95     MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
  96
  97     setOperationAction(ISD::SUBE, VT, Expand);
  98     setOperationAction(ISD::SUBC, VT, Expand);
  99     setOperationAction(ISD::ADDE, VT, Expand);
 100     setOperationAction(ISD::ADDC, VT, Expand);
 101     setOperationAction(ISD::BRCOND, VT, Custom);
 102     setOperationAction(ISD::BR_JT, VT, Expand);
 103     setOperationAction(ISD::BRIND, VT, Expand);
 104     // TODO: Implement custom UREM/SREM routines
 105     setOperationAction(ISD::SREM, VT, Expand);
 106     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 107     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
 108     if (VT != MVT::i64 && VT != MVT::v2i64) {
 109       setOperationAction(ISD::SDIV, VT, Custom);
 110     }
 111   }
 112   for (unsigned int x = 0; x < NumFloatTypes; ++x) {
 113     MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
 114
 115     // IL does not have these operations for floating point types
 116     setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
 117     setOperationAction(ISD::SETOLT, VT, Expand);
 118     setOperationAction(ISD::SETOGE, VT, Expand);
 119     setOperationAction(ISD::SETOGT, VT, Expand);
 120     setOperationAction(ISD::SETOLE, VT, Expand);
 121     setOperationAction(ISD::SETULT, VT, Expand);
 122     setOperationAction(ISD::SETUGE, VT, Expand);
 123     setOperationAction(ISD::SETUGT, VT, Expand);
 124     setOperationAction(ISD::SETULE, VT, Expand);
 125   }
 126
 127   for (unsigned int x = 0; x < NumIntTypes; ++x) {
 128     MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
 129
 130     // GPU also does not have divrem function for signed or unsigned
 131     setOperationAction(ISD::SDIVREM, VT, Expand);
 132
 133     // GPU does not have [S|U]MUL_LOHI functions as a single instruction
 134     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 135     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
 136
 137     setOperationAction(ISD::BSWAP, VT, Expand);
 138
 139     // GPU doesn't have any counting operators
 140     setOperationAction(ISD::CTPOP, VT, Expand);
 141     setOperationAction(ISD::CTTZ, VT, Expand);
 142     setOperationAction(ISD::CTLZ, VT, Expand);
 143   }
 144
 145   for (unsigned int ii = 0; ii < NumVectorTypes; ++ii) {
 146     MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
 147
 148     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
 149     setOperationAction(ISD::SDIVREM, VT, Expand);
 150     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 151     // setOperationAction(ISD::VSETCC, VT, Expand);
 152     setOperationAction(ISD::SELECT_CC, VT, Expand);
 153
 154   }
 155   setOperationAction(ISD::MULHU, MVT::i64, Expand);
 156   setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
 157   setOperationAction(ISD::MULHS, MVT::i64, Expand);
 158   setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
 159   setOperationAction(ISD::ADD, MVT::v2i64, Expand);
 160   setOperationAction(ISD::SREM, MVT::v2i64, Expand);
 161   setOperationAction(ISD::Constant          , MVT::i64  , Legal);
 162   setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
 163   setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
 164   setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
 165   setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
 166   setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
 167   if (STM.hasHWFP64()) {
 168     // we support loading/storing v2f64 but not operations on the type
 169     setOperationAction(ISD::FADD, MVT::v2f64, Expand);
 170     setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
 171     setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
 172     setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
 173     setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
 174     setOperationAction(ISD::ConstantFP        , MVT::f64  , Legal);
 175     // We want to expand vector conversions into their scalar
 176     // counterparts.
 177     setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
 178     setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
 179     setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
 180     setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
 181     setOperationAction(ISD::FABS, MVT::f64, Expand);
 182     setOperationAction(ISD::FABS, MVT::v2f64, Expand);
 183   }
 184   // TODO: Fix the UDIV24 algorithm so it works for these
 185   // types correctly. This needs vector comparisons
 186   // for this to work correctly.
 187   setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
 188   setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
 189   setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
 190   setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
 191   setOperationAction(ISD::SUBC, MVT::Other, Expand);
 192   setOperationAction(ISD::ADDE, MVT::Other, Expand);
 193   setOperationAction(ISD::ADDC, MVT::Other, Expand);
 194   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
 195   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
 196   setOperationAction(ISD::BRIND, MVT::Other, Expand);
 197
 198
 199   // Use the default implementation.
 200   setOperationAction(ISD::ConstantFP        , MVT::f32    , Legal);
 201   setOperationAction(ISD::Constant          , MVT::i32    , Legal);
 202
 203   setSchedulingPreference(Sched::RegPressure);
 204   setPow2DivIsCheap(false);
 205   setSelectIsExpensive(true);
 206   setJumpIsExpensive(true);
 207
 208   MaxStoresPerMemcpy  = 4096;
 209   MaxStoresPerMemmove = 4096;
 210   MaxStoresPerMemset  = 4096;
 211
 212 }
 213
 214 bool
 215 AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
 216     const CallInst &I, unsigned Intrinsic) const {
 217   return false;
 218 }
 219
 220 // The backend supports 32 and 64 bit floating point immediates
 221 bool
 222 AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
 223   if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
 224       || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
 225     return true;
 226   } else {
 227     return false;
 228   }
 229 }
 230
 231 bool
 232 AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
 233   if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
 234       || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
 235     return false;
 236   } else {
 237     return true;
 238   }
 239 }
 240
 241
 242 // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
 243 // be zero. Op is expected to be a target specific node. Used by DAG
 244 // combiner.
 245
 246 void
 247 AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
 248     const SDValue Op,
 249     APInt &KnownZero,
 250     APInt &KnownOne,
 251     const SelectionDAG &DAG,
 252     unsigned Depth) const {
 253   APInt KnownZero2;
 254   APInt KnownOne2;
 255   KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
 256   switch (Op.getOpcode()) {
 257     default: break;
 258     case ISD::SELECT_CC:
 259              DAG.ComputeMaskedBits(
 260                  Op.getOperand(1),
 261                  KnownZero,
 262                  KnownOne,
 263                  Depth + 1
 264                  );
 265              DAG.ComputeMaskedBits(
 266                  Op.getOperand(0),
 267                  KnownZero2,
 268                  KnownOne2
 269                  );
 270              assert((KnownZero & KnownOne) == 0
 271                  && "Bits known to be one AND zero?");
 272              assert((KnownZero2 & KnownOne2) == 0
 273                  && "Bits known to be one AND zero?");
 274              // Only known if known in both the LHS and RHS
 275              KnownOne &= KnownOne2;
 276              KnownZero &= KnownZero2;
 277              break;
 278   };
 279 }
 280
 281 //===----------------------------------------------------------------------===//
 282 //                           Other Lowering Hooks
 283 //===----------------------------------------------------------------------===//
 284
 285 SDValue
 286 AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
 287   EVT OVT = Op.getValueType();
 288   SDValue DST;
 289   if (OVT.getScalarType() == MVT::i64) {
 290     DST = LowerSDIV64(Op, DAG);
 291   } else if (OVT.getScalarType() == MVT::i32) {
 292     DST = LowerSDIV32(Op, DAG);
 293   } else if (OVT.getScalarType() == MVT::i16
 294       || OVT.getScalarType() == MVT::i8) {
 295     DST = LowerSDIV24(Op, DAG);
 296   } else {
 297     DST = SDValue(Op.getNode(), 0);
 298   }
 299   return DST;
 300 }
 301
 302 SDValue
 303 AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const {
 304   EVT OVT = Op.getValueType();
 305   SDValue DST;
 306   if (OVT.getScalarType() == MVT::i64) {
 307     DST = LowerSREM64(Op, DAG);
 308   } else if (OVT.getScalarType() == MVT::i32) {
 309     DST = LowerSREM32(Op, DAG);
 310   } else if (OVT.getScalarType() == MVT::i16) {
 311     DST = LowerSREM16(Op, DAG);
 312   } else if (OVT.getScalarType() == MVT::i8) {
 313     DST = LowerSREM8(Op, DAG);
 314   } else {
 315     DST = SDValue(Op.getNode(), 0);
 316   }
 317   return DST;
 318 }
 319
 320 EVT
 321 AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const {
 322   int iSize = (size * numEle);
 323   int vEle = (iSize >> ((size == 64) ? 6 : 5));
 324   if (!vEle) {
 325     vEle = 1;
 326   }
 327   if (size == 64) {
 328     if (vEle == 1) {
 329       return EVT(MVT::i64);
 330     } else {
 331       return EVT(MVT::getVectorVT(MVT::i64, vEle));
 332     }
 333   } else {
 334     if (vEle == 1) {
 335       return EVT(MVT::i32);
 336     } else {
 337       return EVT(MVT::getVectorVT(MVT::i32, vEle));
 338     }
 339   }
 340 }
 341
 342 SDValue
 343 AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
 344   SDValue Chain = Op.getOperand(0);
 345   SDValue Cond  = Op.getOperand(1);
 346   SDValue Jump  = Op.getOperand(2);
 347   SDValue Result;
 348   Result = DAG.getNode(
 349       AMDGPUISD::BRANCH_COND,
 350       SDLoc(Op),
 351       Op.getValueType(),
 352       Chain, Jump, Cond);
 353   return Result;
 354 }
 355
 356 SDValue
 357 AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
 358   SDLoc DL(Op);
 359   EVT OVT = Op.getValueType();
 360   SDValue LHS = Op.getOperand(0);
 361   SDValue RHS = Op.getOperand(1);
 362   MVT INTTY;
 363   MVT FLTTY;
 364   if (!OVT.isVector()) {
 365     INTTY = MVT::i32;
 366     FLTTY = MVT::f32;
 367   } else if (OVT.getVectorNumElements() == 2) {
 368     INTTY = MVT::v2i32;
 369     FLTTY = MVT::v2f32;
 370   } else if (OVT.getVectorNumElements() == 4) {
 371     INTTY = MVT::v4i32;
 372     FLTTY = MVT::v4f32;
 373   }
 374   unsigned bitsize = OVT.getScalarType().getSizeInBits();
 375   // char|short jq = ia ^ ib;
 376   SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
 377
 378   // jq = jq >> (bitsize - 2)
 379   jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
 380
 381   // jq = jq | 0x1
 382   jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
 383
 384   // jq = (int)jq
 385   jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
 386
 387   // int ia = (int)LHS;
 388   SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
 389
 390   // int ib, (int)RHS;
 391   SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
 392
 393   // float fa = (float)ia;
 394   SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
 395
 396   // float fb = (float)ib;
 397   SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
 398
 399   // float fq = native_divide(fa, fb);
 400   SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
 401
 402   // fq = trunc(fq);
 403   fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
 404
 405   // float fqneg = -fq;
 406   SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
 407
 408   // float fr = mad(fqneg, fb, fa);
 409   SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,
 410       DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);
 411
 412   // int iq = (int)fq;
 413   SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
 414
 415   // fr = fabs(fr);
 416   fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
 417
 418   // fb = fabs(fb);
 419   fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
 420
 421   // int cv = fr >= fb;
 422   SDValue cv;
 423   if (INTTY == MVT::i32) {
 424     cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
 425   } else {
 426     cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
 427   }
 428   // jq = (cv ? jq : 0);
 429   jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
 430       DAG.getConstant(0, OVT));
 431   // dst = iq + jq;
 432   iq = DAG.getSExtOrTrunc(iq, DL, OVT);
 433   iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
 434   return iq;
 435 }
 436
 437 SDValue
 438 AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const {
 439   SDLoc DL(Op);
 440   EVT OVT = Op.getValueType();
 441   SDValue LHS = Op.getOperand(0);
 442   SDValue RHS = Op.getOperand(1);
 443   // The LowerSDIV32 function generates equivalent to the following IL.
 444   // mov r0, LHS
 445   // mov r1, RHS
 446   // ilt r10, r0, 0
 447   // ilt r11, r1, 0
 448   // iadd r0, r0, r10
 449   // iadd r1, r1, r11
 450   // ixor r0, r0, r10
 451   // ixor r1, r1, r11
 452   // udiv r0, r0, r1
 453   // ixor r10, r10, r11
 454   // iadd r0, r0, r10
 455   // ixor DST, r0, r10
 456
 457   // mov r0, LHS
 458   SDValue r0 = LHS;
 459
 460   // mov r1, RHS
 461   SDValue r1 = RHS;
 462
 463   // ilt r10, r0, 0
 464   SDValue r10 = DAG.getSelectCC(DL,
 465       r0, DAG.getConstant(0, OVT),
 466       DAG.getConstant(-1, MVT::i32),
 467       DAG.getConstant(0, MVT::i32),
 468       ISD::SETLT);
 469
 470   // ilt r11, r1, 0
 471   SDValue r11 = DAG.getSelectCC(DL,
 472       r1, DAG.getConstant(0, OVT),
 473       DAG.getConstant(-1, MVT::i32),
 474       DAG.getConstant(0, MVT::i32),
 475       ISD::SETLT);
 476
 477   // iadd r0, r0, r10
 478   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
 479
 480   // iadd r1, r1, r11
 481   r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
 482
 483   // ixor r0, r0, r10
 484   r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
 485
 486   // ixor r1, r1, r11
 487   r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
 488
 489   // udiv r0, r0, r1
 490   r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
 491
 492   // ixor r10, r10, r11
 493   r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
 494
 495   // iadd r0, r0, r10
 496   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
 497
 498   // ixor DST, r0, r10
 499   SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
 500   return DST;
 501 }
 502
 503 SDValue
 504 AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const {
 505   return SDValue(Op.getNode(), 0);
 506 }
 507
 508 SDValue
 509 AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const {
 510   SDLoc DL(Op);
 511   EVT OVT = Op.getValueType();
 512   MVT INTTY = MVT::i32;
 513   if (OVT == MVT::v2i8) {
 514     INTTY = MVT::v2i32;
 515   } else if (OVT == MVT::v4i8) {
 516     INTTY = MVT::v4i32;
 517   }
 518   SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
 519   SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
 520   LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
 521   LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
 522   return LHS;
 523 }
 524
 525 SDValue
 526 AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const {
 527   SDLoc DL(Op);
 528   EVT OVT = Op.getValueType();
 529   MVT INTTY = MVT::i32;
 530   if (OVT == MVT::v2i16) {
 531     INTTY = MVT::v2i32;
 532   } else if (OVT == MVT::v4i16) {
 533     INTTY = MVT::v4i32;
 534   }
 535   SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
 536   SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
 537   LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
 538   LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
 539   return LHS;
 540 }
 541
 542 SDValue
 543 AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const {
 544   SDLoc DL(Op);
 545   EVT OVT = Op.getValueType();
 546   SDValue LHS = Op.getOperand(0);
 547   SDValue RHS = Op.getOperand(1);
 548   // The LowerSREM32 function generates equivalent to the following IL.
 549   // mov r0, LHS
 550   // mov r1, RHS
 551   // ilt r10, r0, 0
 552   // ilt r11, r1, 0
 553   // iadd r0, r0, r10
 554   // iadd r1, r1, r11
 555   // ixor r0, r0, r10
 556   // ixor r1, r1, r11
 557   // udiv r20, r0, r1
 558   // umul r20, r20, r1
 559   // sub r0, r0, r20
 560   // iadd r0, r0, r10
 561   // ixor DST, r0, r10
 562
 563   // mov r0, LHS
 564   SDValue r0 = LHS;
 565
 566   // mov r1, RHS
 567   SDValue r1 = RHS;
 568
 569   // ilt r10, r0, 0
 570   SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
 571
 572   // ilt r11, r1, 0
 573   SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
 574
 575   // iadd r0, r0, r10
 576   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
 577
 578   // iadd r1, r1, r11
 579   r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
 580
 581   // ixor r0, r0, r10
 582   r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
 583
 584   // ixor r1, r1, r11
 585   r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
 586
 587   // udiv r20, r0, r1
 588   SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
 589
 590   // umul r20, r20, r1
 591   r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
 592
 593   // sub r0, r0, r20
 594   r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
 595
 596   // iadd r0, r0, r10
 597   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
 598
 599   // ixor DST, r0, r10
 600   SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
 601   return DST;
 602 }
 603
 604 SDValue
 605 AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const {
 606   return SDValue(Op.getNode(), 0);
 607 }