lib/Target/AMDGPU/AMDILISelLowering.cpp

   1 //===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //==-----------------------------------------------------------------------===//
   9 //
  10 // This file implements the interfaces that AMDIL uses to lower LLVM code into a
  11 // selection DAG.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "AMDILISelLowering.h"
  16 #include "AMDILDevices.h"
  17 #include "AMDILIntrinsicInfo.h"
  18 #include "AMDILRegisterInfo.h"
  19 #include "AMDILSubtarget.h"
  20 #include "AMDILUtilityFunctions.h"
  21 #include "llvm/CallingConv.h"
  22 #include "llvm/CodeGen/MachineFrameInfo.h"
  23 #include "llvm/CodeGen/MachineRegisterInfo.h"
  24 #include "llvm/CodeGen/PseudoSourceValue.h"
  25 #include "llvm/CodeGen/SelectionDAG.h"
  26 #include "llvm/CodeGen/SelectionDAGNodes.h"
  27 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
  28 #include "llvm/DerivedTypes.h"
  29 #include "llvm/Instructions.h"
  30 #include "llvm/Intrinsics.h"
  31 #include "llvm/Support/raw_ostream.h"
  32 #include "llvm/Target/TargetInstrInfo.h"
  33 #include "llvm/Target/TargetOptions.h"
  34
  35 using namespace llvm;
  36 #define ISDBITCAST  ISD::BITCAST
  37 #define MVTGLUE     MVT::Glue
  38 //===----------------------------------------------------------------------===//
  39 // Calling Convention Implementation
  40 //===----------------------------------------------------------------------===//
  41 #include "AMDGPUGenCallingConv.inc"
  42
  43 //===----------------------------------------------------------------------===//
  44 // TargetLowering Implementation Help Functions Begin
  45 //===----------------------------------------------------------------------===//
  46   static SDValue
  47 getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType)
  48 {
  49   DebugLoc DL = Src.getDebugLoc();
  50   EVT svt = Src.getValueType().getScalarType();
  51   EVT dvt = Dst.getValueType().getScalarType();
  52   if (svt.isFloatingPoint() && dvt.isFloatingPoint()) {
  53     if (dvt.bitsGT(svt)) {
  54       Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src);
  55     } else if (svt.bitsLT(svt)) {
  56       Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src,
  57           DAG.getConstant(1, MVT::i32));
  58     }
  59   } else if (svt.isInteger() && dvt.isInteger()) {
  60     if (!svt.bitsEq(dvt)) {
  61       Src = DAG.getSExtOrTrunc(Src, DL, dvt);
  62     }
  63   } else if (svt.isInteger()) {
  64     unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP;
  65     if (!svt.bitsEq(dvt)) {
  66       if (dvt.getSimpleVT().SimpleTy == MVT::f32) {
  67         Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
  68       } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) {
  69         Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64);
  70       } else {
  71         assert(0 && "We only support 32 and 64bit fp types");
  72       }
  73     }
  74     Src = DAG.getNode(opcode, DL, dvt, Src);
  75   } else if (dvt.isInteger()) {
  76     unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT;
  77     if (svt.getSimpleVT().SimpleTy == MVT::f32) {
  78       Src = DAG.getNode(opcode, DL, MVT::i32, Src);
  79     } else if (svt.getSimpleVT().SimpleTy == MVT::f64) {
  80       Src = DAG.getNode(opcode, DL, MVT::i64, Src);
  81     } else {
  82       assert(0 && "We only support 32 and 64bit fp types");
  83     }
  84     Src = DAG.getSExtOrTrunc(Src, DL, dvt);
  85   }
  86   return Src;
  87 }
  88 // CondCCodeToCC - Convert a DAG condition code to a AMDIL CC
  89 // condition.
  90   static AMDILCC::CondCodes
  91 CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type)
  92 {
  93   switch (CC) {
  94     default:
  95       {
  96         errs()<<"Condition Code: "<< (unsigned int)CC<<"\n";
  97         assert(0 && "Unknown condition code!");
  98       }
  99     case ISD::SETO:
 100       switch(type) {
 101         case MVT::f32:
 102           return AMDILCC::IL_CC_F_O;
 103         case MVT::f64:
 104           return AMDILCC::IL_CC_D_O;
 105         default:
 106           assert(0 && "Opcode combination not generated correctly!");
 107           return AMDILCC::COND_ERROR;
 108       };
 109     case ISD::SETUO:
 110       switch(type) {
 111         case MVT::f32:
 112           return AMDILCC::IL_CC_F_UO;
 113         case MVT::f64:
 114           return AMDILCC::IL_CC_D_UO;
 115         default:
 116           assert(0 && "Opcode combination not generated correctly!");
 117           return AMDILCC::COND_ERROR;
 118       };
 119     case ISD::SETGT:
 120       switch (type) {
 121         case MVT::i1:
 122         case MVT::i8:
 123         case MVT::i16:
 124         case MVT::i32:
 125           return AMDILCC::IL_CC_I_GT;
 126         case MVT::f32:
 127           return AMDILCC::IL_CC_F_GT;
 128         case MVT::f64:
 129           return AMDILCC::IL_CC_D_GT;
 130         case MVT::i64:
 131           return AMDILCC::IL_CC_L_GT;
 132         default:
 133           assert(0 && "Opcode combination not generated correctly!");
 134           return AMDILCC::COND_ERROR;
 135       };
 136     case ISD::SETGE:
 137       switch (type) {
 138         case MVT::i1:
 139         case MVT::i8:
 140         case MVT::i16:
 141         case MVT::i32:
 142           return AMDILCC::IL_CC_I_GE;
 143         case MVT::f32:
 144           return AMDILCC::IL_CC_F_GE;
 145         case MVT::f64:
 146           return AMDILCC::IL_CC_D_GE;
 147         case MVT::i64:
 148           return AMDILCC::IL_CC_L_GE;
 149         default:
 150           assert(0 && "Opcode combination not generated correctly!");
 151           return AMDILCC::COND_ERROR;
 152       };
 153     case ISD::SETLT:
 154       switch (type) {
 155         case MVT::i1:
 156         case MVT::i8:
 157         case MVT::i16:
 158         case MVT::i32:
 159           return AMDILCC::IL_CC_I_LT;
 160         case MVT::f32:
 161           return AMDILCC::IL_CC_F_LT;
 162         case MVT::f64:
 163           return AMDILCC::IL_CC_D_LT;
 164         case MVT::i64:
 165           return AMDILCC::IL_CC_L_LT;
 166         default:
 167           assert(0 && "Opcode combination not generated correctly!");
 168           return AMDILCC::COND_ERROR;
 169       };
 170     case ISD::SETLE:
 171       switch (type) {
 172         case MVT::i1:
 173         case MVT::i8:
 174         case MVT::i16:
 175         case MVT::i32:
 176           return AMDILCC::IL_CC_I_LE;
 177         case MVT::f32:
 178           return AMDILCC::IL_CC_F_LE;
 179         case MVT::f64:
 180           return AMDILCC::IL_CC_D_LE;
 181         case MVT::i64:
 182           return AMDILCC::IL_CC_L_LE;
 183         default:
 184           assert(0 && "Opcode combination not generated correctly!");
 185           return AMDILCC::COND_ERROR;
 186       };
 187     case ISD::SETNE:
 188       switch (type) {
 189         case MVT::i1:
 190         case MVT::i8:
 191         case MVT::i16:
 192         case MVT::i32:
 193           return AMDILCC::IL_CC_I_NE;
 194         case MVT::f32:
 195           return AMDILCC::IL_CC_F_NE;
 196         case MVT::f64:
 197           return AMDILCC::IL_CC_D_NE;
 198         case MVT::i64:
 199           return AMDILCC::IL_CC_L_NE;
 200         default:
 201           assert(0 && "Opcode combination not generated correctly!");
 202           return AMDILCC::COND_ERROR;
 203       };
 204     case ISD::SETEQ:
 205       switch (type) {
 206         case MVT::i1:
 207         case MVT::i8:
 208         case MVT::i16:
 209         case MVT::i32:
 210           return AMDILCC::IL_CC_I_EQ;
 211         case MVT::f32:
 212           return AMDILCC::IL_CC_F_EQ;
 213         case MVT::f64:
 214           return AMDILCC::IL_CC_D_EQ;
 215         case MVT::i64:
 216           return AMDILCC::IL_CC_L_EQ;
 217         default:
 218           assert(0 && "Opcode combination not generated correctly!");
 219           return AMDILCC::COND_ERROR;
 220       };
 221     case ISD::SETUGT:
 222       switch (type) {
 223         case MVT::i1:
 224         case MVT::i8:
 225         case MVT::i16:
 226         case MVT::i32:
 227           return AMDILCC::IL_CC_U_GT;
 228         case MVT::f32:
 229           return AMDILCC::IL_CC_F_UGT;
 230         case MVT::f64:
 231           return AMDILCC::IL_CC_D_UGT;
 232         case MVT::i64:
 233           return AMDILCC::IL_CC_UL_GT;
 234         default:
 235           assert(0 && "Opcode combination not generated correctly!");
 236           return AMDILCC::COND_ERROR;
 237       };
 238     case ISD::SETUGE:
 239       switch (type) {
 240         case MVT::i1:
 241         case MVT::i8:
 242         case MVT::i16:
 243         case MVT::i32:
 244           return AMDILCC::IL_CC_U_GE;
 245         case MVT::f32:
 246           return AMDILCC::IL_CC_F_UGE;
 247         case MVT::f64:
 248           return AMDILCC::IL_CC_D_UGE;
 249         case MVT::i64:
 250           return AMDILCC::IL_CC_UL_GE;
 251         default:
 252           assert(0 && "Opcode combination not generated correctly!");
 253           return AMDILCC::COND_ERROR;
 254       };
 255     case ISD::SETULT:
 256       switch (type) {
 257         case MVT::i1:
 258         case MVT::i8:
 259         case MVT::i16:
 260         case MVT::i32:
 261           return AMDILCC::IL_CC_U_LT;
 262         case MVT::f32:
 263           return AMDILCC::IL_CC_F_ULT;
 264         case MVT::f64:
 265           return AMDILCC::IL_CC_D_ULT;
 266         case MVT::i64:
 267           return AMDILCC::IL_CC_UL_LT;
 268         default:
 269           assert(0 && "Opcode combination not generated correctly!");
 270           return AMDILCC::COND_ERROR;
 271       };
 272     case ISD::SETULE:
 273       switch (type) {
 274         case MVT::i1:
 275         case MVT::i8:
 276         case MVT::i16:
 277         case MVT::i32:
 278           return AMDILCC::IL_CC_U_LE;
 279         case MVT::f32:
 280           return AMDILCC::IL_CC_F_ULE;
 281         case MVT::f64:
 282           return AMDILCC::IL_CC_D_ULE;
 283         case MVT::i64:
 284           return AMDILCC::IL_CC_UL_LE;
 285         default:
 286           assert(0 && "Opcode combination not generated correctly!");
 287           return AMDILCC::COND_ERROR;
 288       };
 289     case ISD::SETUNE:
 290       switch (type) {
 291         case MVT::i1:
 292         case MVT::i8:
 293         case MVT::i16:
 294         case MVT::i32:
 295           return AMDILCC::IL_CC_U_NE;
 296         case MVT::f32:
 297           return AMDILCC::IL_CC_F_UNE;
 298         case MVT::f64:
 299           return AMDILCC::IL_CC_D_UNE;
 300         case MVT::i64:
 301           return AMDILCC::IL_CC_UL_NE;
 302         default:
 303           assert(0 && "Opcode combination not generated correctly!");
 304           return AMDILCC::COND_ERROR;
 305       };
 306     case ISD::SETUEQ:
 307       switch (type) {
 308         case MVT::i1:
 309         case MVT::i8:
 310         case MVT::i16:
 311         case MVT::i32:
 312           return AMDILCC::IL_CC_U_EQ;
 313         case MVT::f32:
 314           return AMDILCC::IL_CC_F_UEQ;
 315         case MVT::f64:
 316           return AMDILCC::IL_CC_D_UEQ;
 317         case MVT::i64:
 318           return AMDILCC::IL_CC_UL_EQ;
 319         default:
 320           assert(0 && "Opcode combination not generated correctly!");
 321           return AMDILCC::COND_ERROR;
 322       };
 323     case ISD::SETOGT:
 324       switch (type) {
 325         case MVT::f32:
 326           return AMDILCC::IL_CC_F_OGT;
 327         case MVT::f64:
 328           return AMDILCC::IL_CC_D_OGT;
 329         case MVT::i1:
 330         case MVT::i8:
 331         case MVT::i16:
 332         case MVT::i32:
 333         case MVT::i64:
 334         default:
 335           assert(0 && "Opcode combination not generated correctly!");
 336           return AMDILCC::COND_ERROR;
 337       };
 338     case ISD::SETOGE:
 339       switch (type) {
 340         case MVT::f32:
 341           return AMDILCC::IL_CC_F_OGE;
 342         case MVT::f64:
 343           return AMDILCC::IL_CC_D_OGE;
 344         case MVT::i1:
 345         case MVT::i8:
 346         case MVT::i16:
 347         case MVT::i32:
 348         case MVT::i64:
 349         default:
 350           assert(0 && "Opcode combination not generated correctly!");
 351           return AMDILCC::COND_ERROR;
 352       };
 353     case ISD::SETOLT:
 354       switch (type) {
 355         case MVT::f32:
 356           return AMDILCC::IL_CC_F_OLT;
 357         case MVT::f64:
 358           return AMDILCC::IL_CC_D_OLT;
 359         case MVT::i1:
 360         case MVT::i8:
 361         case MVT::i16:
 362         case MVT::i32:
 363         case MVT::i64:
 364         default:
 365           assert(0 && "Opcode combination not generated correctly!");
 366           return AMDILCC::COND_ERROR;
 367       };
 368     case ISD::SETOLE:
 369       switch (type) {
 370         case MVT::f32:
 371           return AMDILCC::IL_CC_F_OLE;
 372         case MVT::f64:
 373           return AMDILCC::IL_CC_D_OLE;
 374         case MVT::i1:
 375         case MVT::i8:
 376         case MVT::i16:
 377         case MVT::i32:
 378         case MVT::i64:
 379         default:
 380           assert(0 && "Opcode combination not generated correctly!");
 381           return AMDILCC::COND_ERROR;
 382       };
 383     case ISD::SETONE:
 384       switch (type) {
 385         case MVT::f32:
 386           return AMDILCC::IL_CC_F_ONE;
 387         case MVT::f64:
 388           return AMDILCC::IL_CC_D_ONE;
 389         case MVT::i1:
 390         case MVT::i8:
 391         case MVT::i16:
 392         case MVT::i32:
 393         case MVT::i64:
 394         default:
 395           assert(0 && "Opcode combination not generated correctly!");
 396           return AMDILCC::COND_ERROR;
 397       };
 398     case ISD::SETOEQ:
 399       switch (type) {
 400         case MVT::f32:
 401           return AMDILCC::IL_CC_F_OEQ;
 402         case MVT::f64:
 403           return AMDILCC::IL_CC_D_OEQ;
 404         case MVT::i1:
 405         case MVT::i8:
 406         case MVT::i16:
 407         case MVT::i32:
 408         case MVT::i64:
 409         default:
 410           assert(0 && "Opcode combination not generated correctly!");
 411           return AMDILCC::COND_ERROR;
 412       };
 413   };
 414 }
 415
 416 SDValue
 417 AMDILTargetLowering::LowerMemArgument(
 418     SDValue Chain,
 419     CallingConv::ID CallConv,
 420     const SmallVectorImpl<ISD::InputArg> &Ins,
 421     DebugLoc dl, SelectionDAG &DAG,
 422     const CCValAssign &VA,
 423     MachineFrameInfo *MFI,
 424     unsigned i) const
 425 {
 426   // Create the nodes corresponding to a load from this parameter slot.
 427   ISD::ArgFlagsTy Flags = Ins[i].Flags;
 428
 429   bool AlwaysUseMutable = (CallConv==CallingConv::Fast) &&
 430     getTargetMachine().Options.GuaranteedTailCallOpt;
 431   bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
 432
 433   // FIXME: For now, all byval parameter objects are marked mutable. This can
 434   // be changed with more analysis.
 435   // In case of tail call optimization mark all arguments mutable. Since they
 436   // could be overwritten by lowering of arguments in case of a tail call.
 437   int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
 438       VA.getLocMemOffset(), isImmutable);
 439   SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
 440
 441   if (Flags.isByVal())
 442     return FIN;
 443   return DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
 444       MachinePointerInfo::getFixedStack(FI),
 445       false, false, false, 0);
 446 }
 447 //===----------------------------------------------------------------------===//
 448 // TargetLowering Implementation Help Functions End
 449 //===----------------------------------------------------------------------===//
 450
 451 //===----------------------------------------------------------------------===//
 452 // TargetLowering Class Implementation Begins
 453 //===----------------------------------------------------------------------===//
 454   AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM)
 455 : TargetLowering(TM, new TargetLoweringObjectFileELF())
 456 {
 457   int types[] =
 458   {
 459     (int)MVT::i8,
 460     (int)MVT::i16,
 461     (int)MVT::i32,
 462     (int)MVT::f32,
 463     (int)MVT::f64,
 464     (int)MVT::i64,
 465     (int)MVT::v2i8,
 466     (int)MVT::v4i8,
 467     (int)MVT::v2i16,
 468     (int)MVT::v4i16,
 469     (int)MVT::v4f32,
 470     (int)MVT::v4i32,
 471     (int)MVT::v2f32,
 472     (int)MVT::v2i32,
 473     (int)MVT::v2f64,
 474     (int)MVT::v2i64
 475   };
 476
 477   int IntTypes[] =
 478   {
 479     (int)MVT::i8,
 480     (int)MVT::i16,
 481     (int)MVT::i32,
 482     (int)MVT::i64
 483   };
 484
 485   int FloatTypes[] =
 486   {
 487     (int)MVT::f32,
 488     (int)MVT::f64
 489   };
 490
 491   int VectorTypes[] =
 492   {
 493     (int)MVT::v2i8,
 494     (int)MVT::v4i8,
 495     (int)MVT::v2i16,
 496     (int)MVT::v4i16,
 497     (int)MVT::v4f32,
 498     (int)MVT::v4i32,
 499     (int)MVT::v2f32,
 500     (int)MVT::v2i32,
 501     (int)MVT::v2f64,
 502     (int)MVT::v2i64
 503   };
 504   size_t numTypes = sizeof(types) / sizeof(*types);
 505   size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
 506   size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
 507   size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
 508
 509   const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>();
 510   // These are the current register classes that are
 511   // supported
 512
 513   for (unsigned int x  = 0; x < numTypes; ++x) {
 514     MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
 515
 516     //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
 517     // We cannot sextinreg, expand to shifts
 518     setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
 519     setOperationAction(ISD::SUBE, VT, Expand);
 520     setOperationAction(ISD::SUBC, VT, Expand);
 521     setOperationAction(ISD::ADDE, VT, Expand);
 522     setOperationAction(ISD::ADDC, VT, Expand);
 523     setOperationAction(ISD::SETCC, VT, Custom);
 524     setOperationAction(ISD::BRCOND, VT, Custom);
 525     setOperationAction(ISD::BR_CC, VT, Custom);
 526     setOperationAction(ISD::BR_JT, VT, Expand);
 527     setOperationAction(ISD::BRIND, VT, Expand);
 528     // TODO: Implement custom UREM/SREM routines
 529     setOperationAction(ISD::SREM, VT, Expand);
 530     setOperationAction(ISD::GlobalAddress, VT, Custom);
 531     setOperationAction(ISD::JumpTable, VT, Custom);
 532     setOperationAction(ISD::ConstantPool, VT, Custom);
 533     setOperationAction(ISD::SELECT, VT, Custom);
 534     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 535     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
 536     if (VT != MVT::i64 && VT != MVT::v2i64) {
 537       setOperationAction(ISD::SDIV, VT, Custom);
 538     }
 539   }
 540   for (unsigned int x = 0; x < numFloatTypes; ++x) {
 541     MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
 542
 543     // IL does not have these operations for floating point types
 544     setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
 545     setOperationAction(ISD::SETOLT, VT, Expand);
 546     setOperationAction(ISD::SETOGE, VT, Expand);
 547     setOperationAction(ISD::SETOGT, VT, Expand);
 548     setOperationAction(ISD::SETOLE, VT, Expand);
 549     setOperationAction(ISD::SETULT, VT, Expand);
 550     setOperationAction(ISD::SETUGE, VT, Expand);
 551     setOperationAction(ISD::SETUGT, VT, Expand);
 552     setOperationAction(ISD::SETULE, VT, Expand);
 553   }
 554
 555   for (unsigned int x = 0; x < numIntTypes; ++x) {
 556     MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
 557
 558     // GPU also does not have divrem function for signed or unsigned
 559     setOperationAction(ISD::SDIVREM, VT, Expand);
 560
 561     // GPU does not have [S|U]MUL_LOHI functions as a single instruction
 562     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 563     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
 564
 565     // GPU doesn't have a rotl, rotr, or byteswap instruction
 566     setOperationAction(ISD::ROTR, VT, Expand);
 567     setOperationAction(ISD::BSWAP, VT, Expand);
 568
 569     // GPU doesn't have any counting operators
 570     setOperationAction(ISD::CTPOP, VT, Expand);
 571     setOperationAction(ISD::CTTZ, VT, Expand);
 572     setOperationAction(ISD::CTLZ, VT, Expand);
 573   }
 574
 575   for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
 576   {
 577     MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
 578
 579     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
 580     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
 581     setOperationAction(ISD::SDIVREM, VT, Expand);
 582     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 583     // setOperationAction(ISD::VSETCC, VT, Expand);
 584     setOperationAction(ISD::SETCC, VT, Expand);
 585     setOperationAction(ISD::SELECT_CC, VT, Expand);
 586     setOperationAction(ISD::SELECT, VT, Expand);
 587
 588   }
 589   if (STM.device()->isSupported(AMDILDeviceInfo::LongOps)) {
 590     setOperationAction(ISD::MULHU, MVT::i64, Expand);
 591     setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
 592     setOperationAction(ISD::MULHS, MVT::i64, Expand);
 593     setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
 594     setOperationAction(ISD::ADD, MVT::v2i64, Expand);
 595     setOperationAction(ISD::SREM, MVT::v2i64, Expand);
 596     setOperationAction(ISD::Constant          , MVT::i64  , Legal);
 597     setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
 598     setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
 599     setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
 600     setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
 601     setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
 602   }
 603   if (STM.device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
 604     // we support loading/storing v2f64 but not operations on the type
 605     setOperationAction(ISD::FADD, MVT::v2f64, Expand);
 606     setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
 607     setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
 608     setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
 609     setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
 610     setOperationAction(ISD::ConstantFP        , MVT::f64  , Legal);
 611     // We want to expand vector conversions into their scalar
 612     // counterparts.
 613     setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
 614     setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
 615     setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
 616     setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
 617     setOperationAction(ISD::FABS, MVT::f64, Expand);
 618     setOperationAction(ISD::FABS, MVT::v2f64, Expand);
 619   }
 620   // TODO: Fix the UDIV24 algorithm so it works for these
 621   // types correctly. This needs vector comparisons
 622   // for this to work correctly.
 623   setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
 624   setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
 625   setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
 626   setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
 627   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
 628   setOperationAction(ISD::SUBC, MVT::Other, Expand);
 629   setOperationAction(ISD::ADDE, MVT::Other, Expand);
 630   setOperationAction(ISD::ADDC, MVT::Other, Expand);
 631   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
 632   setOperationAction(ISD::BR_CC, MVT::Other, Custom);
 633   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
 634   setOperationAction(ISD::BRIND, MVT::Other, Expand);
 635   setOperationAction(ISD::SETCC, MVT::Other, Custom);
 636   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
 637
 638   setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
 639   // Use the default implementation.
 640   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
 641   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 642   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 643   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 644   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
 645   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
 646   setOperationAction(ISD::ConstantFP        , MVT::f32    , Legal);
 647   setOperationAction(ISD::Constant          , MVT::i32    , Legal);
 648   setOperationAction(ISD::TRAP              , MVT::Other  , Legal);
 649
 650   setStackPointerRegisterToSaveRestore(AMDGPU::SP);
 651   setSchedulingPreference(Sched::RegPressure);
 652   setPow2DivIsCheap(false);
 653   setPrefLoopAlignment(16);
 654   setSelectIsExpensive(true);
 655   setJumpIsExpensive(true);
 656
 657   maxStoresPerMemcpy  = 4096;
 658   maxStoresPerMemmove = 4096;
 659   maxStoresPerMemset  = 4096;
 660
 661 #undef numTypes
 662 #undef numIntTypes
 663 #undef numVectorTypes
 664 #undef numFloatTypes
 665 }
 666
 667 const char *
 668 AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const
 669 {
 670   switch (Opcode) {
 671     default: return 0;
 672     case AMDILISD::CMOVLOG:  return "AMDILISD::CMOVLOG";
 673     case AMDILISD::MAD:  return "AMDILISD::MAD";
 674     case AMDILISD::CALL:  return "AMDILISD::CALL";
 675     case AMDILISD::SELECT_CC: return "AMDILISD::SELECT_CC";
 676     case AMDILISD::UMUL: return "AMDILISD::UMUL";
 677     case AMDILISD::DIV_INF: return "AMDILISD::DIV_INF";
 678     case AMDILISD::VBUILD: return "AMDILISD::VBUILD";
 679     case AMDILISD::CMP: return "AMDILISD::CMP";
 680     case AMDILISD::IL_CC_I_LT: return "AMDILISD::IL_CC_I_LT";
 681     case AMDILISD::IL_CC_I_LE: return "AMDILISD::IL_CC_I_LE";
 682     case AMDILISD::IL_CC_I_GT: return "AMDILISD::IL_CC_I_GT";
 683     case AMDILISD::IL_CC_I_GE: return "AMDILISD::IL_CC_I_GE";
 684     case AMDILISD::IL_CC_I_EQ: return "AMDILISD::IL_CC_I_EQ";
 685     case AMDILISD::IL_CC_I_NE: return "AMDILISD::IL_CC_I_NE";
 686     case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG";
 687     case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND";
 688
 689   };
 690 }
 691 bool
 692 AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
 693     const CallInst &I, unsigned Intrinsic) const
 694 {
 695   return false;
 696 }
 697
 698 // The backend supports 32 and 64 bit floating point immediates
 699 bool
 700 AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
 701 {
 702   if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
 703       || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
 704     return true;
 705   } else {
 706     return false;
 707   }
 708 }
 709
 710 bool
 711 AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const
 712 {
 713   if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
 714       || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
 715     return false;
 716   } else {
 717     return true;
 718   }
 719 }
 720
 721
 722 // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
 723 // be zero. Op is expected to be a target specific node. Used by DAG
 724 // combiner.
 725
 726 void
 727 AMDILTargetLowering::computeMaskedBitsForTargetNode(
 728     const SDValue Op,
 729     APInt &KnownZero,
 730     APInt &KnownOne,
 731     const SelectionDAG &DAG,
 732     unsigned Depth) const
 733 {
 734   APInt KnownZero2;
 735   APInt KnownOne2;
 736   KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
 737   switch (Op.getOpcode()) {
 738     default: break;
 739     case AMDILISD::SELECT_CC:
 740              DAG.ComputeMaskedBits(
 741                  Op.getOperand(1),
 742                  KnownZero,
 743                  KnownOne,
 744                  Depth + 1
 745                  );
 746              DAG.ComputeMaskedBits(
 747                  Op.getOperand(0),
 748                  KnownZero2,
 749                  KnownOne2
 750                  );
 751              assert((KnownZero & KnownOne) == 0
 752                  && "Bits known to be one AND zero?");
 753              assert((KnownZero2 & KnownOne2) == 0
 754                  && "Bits known to be one AND zero?");
 755              // Only known if known in both the LHS and RHS
 756              KnownOne &= KnownOne2;
 757              KnownZero &= KnownZero2;
 758              break;
 759   };
 760 }
 761
 762 // This is the function that determines which calling convention should
 763 // be used. Currently there is only one calling convention
 764 CCAssignFn*
 765 AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const
 766 {
 767   //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
 768   return CC_AMDIL32;
 769 }
 770
 771 // LowerCallResult - Lower the result values of an ISD::CALL into the
 772 // appropriate copies out of appropriate physical registers.  This assumes that
 773 // Chain/InFlag are the input chain/flag to use, and that TheCall is the call
 774 // being lowered.  The returns a SDNode with the same number of values as the
 775 // ISD::CALL.
 776 SDValue
 777 AMDILTargetLowering::LowerCallResult(
 778     SDValue Chain,
 779     SDValue InFlag,
 780     CallingConv::ID CallConv,
 781     bool isVarArg,
 782     const SmallVectorImpl<ISD::InputArg> &Ins,
 783     DebugLoc dl,
 784     SelectionDAG &DAG,
 785     SmallVectorImpl<SDValue> &InVals) const
 786 {
 787   // Assign locations to each value returned by this call
 788   SmallVector<CCValAssign, 16> RVLocs;
 789   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
 790                  getTargetMachine(), RVLocs, *DAG.getContext());
 791   CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32);
 792
 793   // Copy all of the result registers out of their specified physreg.
 794   for (unsigned i = 0; i != RVLocs.size(); ++i) {
 795     EVT CopyVT = RVLocs[i].getValVT();
 796     if (RVLocs[i].isRegLoc()) {
 797       Chain = DAG.getCopyFromReg(
 798           Chain,
 799           dl,
 800           RVLocs[i].getLocReg(),
 801           CopyVT,
 802           InFlag
 803           ).getValue(1);
 804       SDValue Val = Chain.getValue(0);
 805       InFlag = Chain.getValue(2);
 806       InVals.push_back(Val);
 807     }
 808   }
 809
 810   return Chain;
 811
 812 }
 813
 814 //===----------------------------------------------------------------------===//
 815 //                           Other Lowering Hooks
 816 //===----------------------------------------------------------------------===//
 817
 818 // Recursively assign SDNodeOrdering to any unordered nodes
 819 // This is necessary to maintain source ordering of instructions
 820 // under -O0 to avoid odd-looking "skipping around" issues.
 821   static const SDValue
 822 Ordered( SelectionDAG &DAG, unsigned order, const SDValue New )
 823 {
 824   if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) {
 825     DAG.AssignOrdering( New.getNode(), order );
 826     for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i)
 827       Ordered( DAG, order, New.getOperand(i) );
 828   }
 829   return New;
 830 }
 831
 832 #define LOWER(A) \
 833   case ISD:: A: \
 834 return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) )
 835
 836 SDValue
 837 AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
 838 {
 839   switch (Op.getOpcode()) {
 840     default:
 841       Op.getNode()->dump();
 842       assert(0 && "Custom lowering code for this"
 843           "instruction is not implemented yet!");
 844       break;
 845       LOWER(GlobalAddress);
 846       LOWER(JumpTable);
 847       LOWER(ConstantPool);
 848       LOWER(ExternalSymbol);
 849       LOWER(SDIV);
 850       LOWER(SREM);
 851       LOWER(BUILD_VECTOR);
 852       LOWER(SELECT);
 853       LOWER(SETCC);
 854       LOWER(SIGN_EXTEND_INREG);
 855       LOWER(DYNAMIC_STACKALLOC);
 856       LOWER(BRCOND);
 857       LOWER(BR_CC);
 858   }
 859   return Op;
 860 }
 861
 862 #undef LOWER
 863
 864 SDValue
 865 AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
 866 {
 867   SDValue DST = Op;
 868   const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op);
 869   const GlobalValue *G = GADN->getGlobal();
 870   DebugLoc DL = Op.getDebugLoc();
 871   const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
 872   if (!GV) {
 873     DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
 874   } else {
 875     if (GV->hasInitializer()) {
 876       const Constant *C = dyn_cast<Constant>(GV->getInitializer());
 877       if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
 878         DST = DAG.getConstant(CI->getValue(), Op.getValueType());
 879       } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) {
 880         DST = DAG.getConstantFP(CF->getValueAPF(),
 881             Op.getValueType());
 882       } else if (dyn_cast<ConstantAggregateZero>(C)) {
 883         EVT VT = Op.getValueType();
 884         if (VT.isInteger()) {
 885           DST = DAG.getConstant(0, VT);
 886         } else {
 887           DST = DAG.getConstantFP(0, VT);
 888         }
 889       } else {
 890         assert(!"lowering this type of Global Address "
 891             "not implemented yet!");
 892         C->dump();
 893         DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
 894       }
 895     } else {
 896       DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
 897     }
 898   }
 899   return DST;
 900 }
 901
 902 SDValue
 903 AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
 904 {
 905   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 906   SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
 907   return Result;
 908 }
 909 SDValue
 910 AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
 911 {
 912   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 913   EVT PtrVT = Op.getValueType();
 914   SDValue Result;
 915   if (CP->isMachineConstantPoolEntry()) {
 916     Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
 917         CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
 918   } else {
 919     Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
 920         CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
 921   }
 922   return Result;
 923 }
 924
 925 SDValue
 926 AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const
 927 {
 928   const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
 929   SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32);
 930   return Result;
 931 }
 932
 933 /// LowerFORMAL_ARGUMENTS - transform physical registers into
 934 /// virtual registers and generate load operations for
 935 /// arguments places on the stack.
 936 /// TODO: isVarArg, hasStructRet, isMemReg
 937   SDValue
 938 AMDILTargetLowering::LowerFormalArguments(SDValue Chain,
 939     CallingConv::ID CallConv,
 940     bool isVarArg,
 941     const SmallVectorImpl<ISD::InputArg> &Ins,
 942     DebugLoc dl,
 943     SelectionDAG &DAG,
 944     SmallVectorImpl<SDValue> &InVals)
 945 const
 946 {
 947
 948   MachineFunction &MF = DAG.getMachineFunction();
 949   MachineFrameInfo *MFI = MF.getFrameInfo();
 950   //const Function *Fn = MF.getFunction();
 951   //MachineRegisterInfo &RegInfo = MF.getRegInfo();
 952
 953   SmallVector<CCValAssign, 16> ArgLocs;
 954   CallingConv::ID CC = MF.getFunction()->getCallingConv();
 955   //bool hasStructRet = MF.getFunction()->hasStructRetAttr();
 956
 957   CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(),
 958                  getTargetMachine(), ArgLocs, *DAG.getContext());
 959
 960   // When more calling conventions are added, they need to be chosen here
 961   CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32);
 962   SDValue StackPtr;
 963
 964   //unsigned int FirstStackArgLoc = 0;
 965
 966   for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
 967     CCValAssign &VA = ArgLocs[i];
 968     if (VA.isRegLoc()) {
 969       EVT RegVT = VA.getLocVT();
 970       const TargetRegisterClass *RC = getRegClassFor(
 971           RegVT.getSimpleVT().SimpleTy);
 972
 973       unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC);
 974       SDValue ArgValue = DAG.getCopyFromReg(
 975           Chain,
 976           dl,
 977           Reg,
 978           RegVT);
 979       // If this is an 8 or 16-bit value, it is really passed
 980       // promoted to 32 bits.  Insert an assert[sz]ext to capture
 981       // this, then truncate to the right size.
 982
 983       if (VA.getLocInfo() == CCValAssign::SExt) {
 984         ArgValue = DAG.getNode(
 985             ISD::AssertSext,
 986             dl,
 987             RegVT,
 988             ArgValue,
 989             DAG.getValueType(VA.getValVT()));
 990       } else if (VA.getLocInfo() == CCValAssign::ZExt) {
 991         ArgValue = DAG.getNode(
 992             ISD::AssertZext,
 993             dl,
 994             RegVT,
 995             ArgValue,
 996             DAG.getValueType(VA.getValVT()));
 997       }
 998       if (VA.getLocInfo() != CCValAssign::Full) {
 999         ArgValue = DAG.getNode(
1000             ISD::TRUNCATE,
1001             dl,
1002             VA.getValVT(),
1003             ArgValue);
1004       }
1005       // Add the value to the list of arguments
1006       // to be passed in registers
1007       InVals.push_back(ArgValue);
1008       if (isVarArg) {
1009         assert(0 && "Variable arguments are not yet supported");
1010         // See MipsISelLowering.cpp for ideas on how to implement
1011       }
1012     } else if(VA.isMemLoc()) {
1013       InVals.push_back(LowerMemArgument(Chain, CallConv, Ins,
1014             dl, DAG, VA, MFI, i));
1015     } else {
1016       assert(0 && "found a Value Assign that is "
1017           "neither a register or a memory location");
1018     }
1019   }
1020   /*if (hasStructRet) {
1021     assert(0 && "Has struct return is not yet implemented");
1022   // See MipsISelLowering.cpp for ideas on how to implement
1023   }*/
1024
1025   if (isVarArg) {
1026     assert(0 && "Variable arguments are not yet supported");
1027     // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement
1028   }
1029   // This needs to be changed to non-zero if the return function needs
1030   // to pop bytes
1031   return Chain;
1032 }
1033 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
1034 /// by "Src" to address "Dst" with size and alignment information specified by
1035 /// the specific parameter attribute. The copy will be passed as a byval
1036 /// function parameter.
1037 static SDValue
1038 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
1039     ISD::ArgFlagsTy Flags, SelectionDAG &DAG) {
1040   assert(0 && "MemCopy does not exist yet");
1041   SDValue SizeNode     = DAG.getConstant(Flags.getByValSize(), MVT::i32);
1042
1043   return DAG.getMemcpy(Chain,
1044       Src.getDebugLoc(),
1045       Dst, Src, SizeNode, Flags.getByValAlign(),
1046       /*IsVol=*/false, /*AlwaysInline=*/true,
1047       MachinePointerInfo(), MachinePointerInfo());
1048 }
1049
1050 SDValue
1051 AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain,
1052     SDValue StackPtr, SDValue Arg,
1053     DebugLoc dl, SelectionDAG &DAG,
1054     const CCValAssign &VA,
1055     ISD::ArgFlagsTy Flags) const
1056 {
1057   unsigned int LocMemOffset = VA.getLocMemOffset();
1058   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
1059   PtrOff = DAG.getNode(ISD::ADD,
1060       dl,
1061       getPointerTy(), StackPtr, PtrOff);
1062   if (Flags.isByVal()) {
1063     PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG);
1064   } else {
1065     PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff,
1066         MachinePointerInfo::getStack(LocMemOffset),
1067         false, false, 0);
1068   }
1069   return PtrOff;
1070 }
1071 /// LowerCAL - functions arguments are copied from virtual
1072 /// regs to (physical regs)/(stack frame), CALLSEQ_START and
1073 /// CALLSEQ_END are emitted.
1074 /// TODO: isVarArg, isTailCall, hasStructRet
1075 SDValue
1076 AMDILTargetLowering::LowerCall(CallLoweringInfo &CLI,
1077     SmallVectorImpl<SDValue> &InVals) const
1078
1079 #if 0
1080     SDValue Chain, SDValue Callee,
1081     CallingConv::ID CallConv, bool isVarArg, bool doesNotRet,
1082     bool& isTailCall,
1083     const SmallVectorImpl<ISD::OutputArg> &Outs,
1084     const SmallVectorImpl<SDValue> &OutVals,
1085     const SmallVectorImpl<ISD::InputArg> &Ins,
1086     DebugLoc dl, SelectionDAG &DAG,
1087 #endif
1088 {
1089   CLI.IsTailCall = false;
1090   MachineFunction& MF = CLI.DAG.getMachineFunction();
1091   // FIXME: DO we need to handle fast calling conventions and tail call
1092   // optimizations?? X86/PPC ISelLowering
1093   /*bool hasStructRet = (TheCall->getNumArgs())
1094     ? TheCall->getArgFlags(0).device()->isSRet()
1095     : false;*/
1096
1097   MachineFrameInfo *MFI = MF.getFrameInfo();
1098
1099   // Analyze operands of the call, assigning locations to each operand
1100   SmallVector<CCValAssign, 16> ArgLocs;
1101   CCState CCInfo(CLI.CallConv, CLI.IsVarArg, CLI.DAG.getMachineFunction(),
1102                  getTargetMachine(), ArgLocs, *CLI.DAG.getContext());
1103   // Analyize the calling operands, but need to change
1104   // if we have more than one calling convetion
1105   CCInfo.AnalyzeCallOperands(CLI.Outs, CCAssignFnForNode(CLI.CallConv));
1106
1107   unsigned int NumBytes = CCInfo.getNextStackOffset();
1108   if (CLI.IsTailCall) {
1109     assert(CLI.IsTailCall && "Tail Call not handled yet!");
1110     // See X86/PPC ISelLowering
1111   }
1112
1113   CLI.Chain = CLI.DAG.getCALLSEQ_START(CLI.Chain,
1114                                    CLI.DAG.getIntPtrConstant(NumBytes, true));
1115
1116   SmallVector<std::pair<unsigned int, SDValue>, 8> RegsToPass;
1117   SmallVector<SDValue, 8> MemOpChains;
1118   SDValue StackPtr;
1119   //unsigned int FirstStacArgLoc = 0;
1120   //int LastArgStackLoc = 0;
1121
1122   // Walk the register/memloc assignments, insert copies/loads
1123   for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
1124     CCValAssign &VA = ArgLocs[i];
1125     //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers
1126     // Arguments start after the 5 first operands of ISD::CALL
1127     SDValue Arg = CLI.OutVals[i];
1128     //Promote the value if needed
1129     switch(VA.getLocInfo()) {
1130       default: assert(0 && "Unknown loc info!");
1131       case CCValAssign::Full:
1132                break;
1133       case CCValAssign::SExt:
1134                Arg = CLI.DAG.getNode(ISD::SIGN_EXTEND,
1135                    CLI.DL,
1136                    VA.getLocVT(), Arg);
1137                break;
1138       case CCValAssign::ZExt:
1139                Arg = CLI.DAG.getNode(ISD::ZERO_EXTEND,
1140                    CLI.DL,
1141                    VA.getLocVT(), Arg);
1142                break;
1143       case CCValAssign::AExt:
1144                Arg = CLI.DAG.getNode(ISD::ANY_EXTEND,
1145                    CLI.DL,
1146                    VA.getLocVT(), Arg);
1147                break;
1148     }
1149
1150     if (VA.isRegLoc()) {
1151       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1152     } else if (VA.isMemLoc()) {
1153       // Create the frame index object for this incoming parameter
1154       int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
1155           VA.getLocMemOffset(), true);
1156       SDValue PtrOff = CLI.DAG.getFrameIndex(FI,getPointerTy());
1157
1158       // emit ISD::STORE whichs stores the
1159       // parameter value to a stack Location
1160       MemOpChains.push_back(CLI.DAG.getStore(CLI.Chain, CLI.DL, Arg, PtrOff,
1161             MachinePointerInfo::getFixedStack(FI),
1162             false, false, 0));
1163     } else {
1164       assert(0 && "Not a Reg/Mem Loc, major error!");
1165     }
1166   }
1167   if (!MemOpChains.empty()) {
1168     CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor,
1169         CLI.DL,
1170         MVT::Other,
1171         &MemOpChains[0],
1172         MemOpChains.size());
1173   }
1174   SDValue InFlag;
1175   if (!CLI.IsTailCall) {
1176     for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
1177       CLI.Chain = CLI.DAG.getCopyToReg(CLI.Chain,
1178           CLI.DL,
1179           RegsToPass[i].first,
1180           RegsToPass[i].second,
1181           InFlag);
1182       InFlag = CLI.Chain.getValue(1);
1183     }
1184   }
1185
1186   // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
1187   // every direct call is) turn it into a TargetGlobalAddress/
1188   // TargetExternalSymbol
1189   // node so that legalize doesn't hack it.
1190   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(CLI.Callee))  {
1191     CLI.Callee = CLI.DAG.getTargetGlobalAddress(G->getGlobal(), CLI.DL, getPointerTy());
1192   }
1193   else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(CLI.Callee)) {
1194     CLI.Callee = CLI.DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
1195   }
1196   else if (CLI.IsTailCall) {
1197     assert(0 && "Tail calls are not handled yet");
1198     // see X86 ISelLowering for ideas on implementation: 1708
1199   }
1200
1201   SDVTList NodeTys = CLI.DAG.getVTList(MVT::Other, MVTGLUE);
1202   SmallVector<SDValue, 8> Ops;
1203
1204   if (CLI.IsTailCall) {
1205     assert(0 && "Tail calls are not handled yet");
1206     // see X86 ISelLowering for ideas on implementation: 1721
1207   }
1208   // If this is a direct call, pass the chain and the callee
1209   if (CLI.Callee.getNode()) {
1210     Ops.push_back(CLI.Chain);
1211     Ops.push_back(CLI.Callee);
1212   }
1213
1214   if (CLI.IsTailCall) {
1215     assert(0 && "Tail calls are not handled yet");
1216     // see X86 ISelLowering for ideas on implementation: 1739
1217   }
1218
1219   // Add argument registers to the end of the list so that they are known
1220   // live into the call
1221   for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
1222     Ops.push_back(CLI.DAG.getRegister(
1223           RegsToPass[i].first,
1224           RegsToPass[i].second.getValueType()));
1225   }
1226   if (InFlag.getNode()) {
1227     Ops.push_back(InFlag);
1228   }
1229
1230   // Emit Tail Call
1231   if (CLI.IsTailCall) {
1232     assert(0 && "Tail calls are not handled yet");
1233     // see X86 ISelLowering for ideas on implementation: 1762
1234   }
1235
1236   CLI.Chain = CLI.DAG.getNode(AMDILISD::CALL,
1237       CLI.DL,
1238       NodeTys, &Ops[0], Ops.size());
1239   InFlag = CLI.Chain.getValue(1);
1240
1241   // Create the CALLSEQ_END node
1242   CLI.Chain = CLI.DAG.getCALLSEQ_END(
1243       CLI.Chain,
1244       CLI.DAG.getIntPtrConstant(NumBytes, true),
1245       CLI.DAG.getIntPtrConstant(0, true),
1246       InFlag);
1247   InFlag = CLI.Chain.getValue(1);
1248   // Handle result values, copying them out of physregs into vregs that
1249   // we return
1250   return LowerCallResult(CLI.Chain, InFlag, CLI.CallConv, CLI.IsVarArg, CLI.Ins, CLI.DL, CLI.DAG,
1251       InVals);
1252 }
1253
1254 SDValue
1255 AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
1256 {
1257   EVT OVT = Op.getValueType();
1258   SDValue DST;
1259   if (OVT.getScalarType() == MVT::i64) {
1260     DST = LowerSDIV64(Op, DAG);
1261   } else if (OVT.getScalarType() == MVT::i32) {
1262     DST = LowerSDIV32(Op, DAG);
1263   } else if (OVT.getScalarType() == MVT::i16
1264       || OVT.getScalarType() == MVT::i8) {
1265     DST = LowerSDIV24(Op, DAG);
1266   } else {
1267     DST = SDValue(Op.getNode(), 0);
1268   }
1269   return DST;
1270 }
1271
1272 SDValue
1273 AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
1274 {
1275   EVT OVT = Op.getValueType();
1276   SDValue DST;
1277   if (OVT.getScalarType() == MVT::i64) {
1278     DST = LowerSREM64(Op, DAG);
1279   } else if (OVT.getScalarType() == MVT::i32) {
1280     DST = LowerSREM32(Op, DAG);
1281   } else if (OVT.getScalarType() == MVT::i16) {
1282     DST = LowerSREM16(Op, DAG);
1283   } else if (OVT.getScalarType() == MVT::i8) {
1284     DST = LowerSREM8(Op, DAG);
1285   } else {
1286     DST = SDValue(Op.getNode(), 0);
1287   }
1288   return DST;
1289 }
1290
1291 SDValue
1292 AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
1293 {
1294   EVT VT = Op.getValueType();
1295   SDValue Nodes1;
1296   SDValue second;
1297   SDValue third;
1298   SDValue fourth;
1299   DebugLoc DL = Op.getDebugLoc();
1300   Nodes1 = DAG.getNode(AMDILISD::VBUILD,
1301       DL,
1302       VT, Op.getOperand(0));
1303 #if 0
1304   bool allEqual = true;
1305   for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
1306     if (Op.getOperand(0) != Op.getOperand(x)) {
1307       allEqual = false;
1308       break;
1309     }
1310   }
1311   if (allEqual) {
1312     return Nodes1;
1313   }
1314 #endif
1315   switch(Op.getNumOperands()) {
1316     default:
1317     case 1:
1318       break;
1319     case 4:
1320       fourth = Op.getOperand(3);
1321       if (fourth.getOpcode() != ISD::UNDEF) {
1322         Nodes1 = DAG.getNode(
1323             ISD::INSERT_VECTOR_ELT,
1324             DL,
1325             Op.getValueType(),
1326             Nodes1,
1327             fourth,
1328             DAG.getConstant(7, MVT::i32));
1329       }
1330     case 3:
1331       third = Op.getOperand(2);
1332       if (third.getOpcode() != ISD::UNDEF) {
1333         Nodes1 = DAG.getNode(
1334             ISD::INSERT_VECTOR_ELT,
1335             DL,
1336             Op.getValueType(),
1337             Nodes1,
1338             third,
1339             DAG.getConstant(6, MVT::i32));
1340       }
1341     case 2:
1342       second = Op.getOperand(1);
1343       if (second.getOpcode() != ISD::UNDEF) {
1344         Nodes1 = DAG.getNode(
1345             ISD::INSERT_VECTOR_ELT,
1346             DL,
1347             Op.getValueType(),
1348             Nodes1,
1349             second,
1350             DAG.getConstant(5, MVT::i32));
1351       }
1352       break;
1353   };
1354   return Nodes1;
1355 }
1356
1357 SDValue
1358 AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const
1359 {
1360   SDValue Cond = Op.getOperand(0);
1361   SDValue LHS = Op.getOperand(1);
1362   SDValue RHS = Op.getOperand(2);
1363   DebugLoc DL = Op.getDebugLoc();
1364   Cond = getConversionNode(DAG, Cond, Op, true);
1365   Cond = DAG.getNode(AMDILISD::CMOVLOG,
1366       DL,
1367       Op.getValueType(), Cond, LHS, RHS);
1368   return Cond;
1369 }
1370 SDValue
1371 AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
1372 {
1373   SDValue Cond;
1374   SDValue LHS = Op.getOperand(0);
1375   SDValue RHS = Op.getOperand(1);
1376   SDValue CC  = Op.getOperand(2);
1377   DebugLoc DL = Op.getDebugLoc();
1378   ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
1379   unsigned int AMDILCC = CondCCodeToCC(
1380       SetCCOpcode,
1381       LHS.getValueType().getSimpleVT().SimpleTy);
1382   assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
1383   Cond = DAG.getNode(
1384       ISD::SELECT_CC,
1385       Op.getDebugLoc(),
1386       LHS.getValueType(),
1387       LHS, RHS,
1388       DAG.getConstant(-1, MVT::i32),
1389       DAG.getConstant(0, MVT::i32),
1390       CC);
1391   Cond = getConversionNode(DAG, Cond, Op, true);
1392   Cond = DAG.getNode(
1393       ISD::AND,
1394       DL,
1395       Cond.getValueType(),
1396       DAG.getConstant(1, Cond.getValueType()),
1397       Cond);
1398   return Cond;
1399 }
1400
1401 SDValue
1402 AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
1403 {
1404   SDValue Data = Op.getOperand(0);
1405   VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
1406   DebugLoc DL = Op.getDebugLoc();
1407   EVT DVT = Data.getValueType();
1408   EVT BVT = BaseType->getVT();
1409   unsigned baseBits = BVT.getScalarType().getSizeInBits();
1410   unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
1411   unsigned shiftBits = srcBits - baseBits;
1412   if (srcBits < 32) {
1413     // If the op is less than 32 bits, then it needs to extend to 32bits
1414     // so it can properly keep the upper bits valid.
1415     EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
1416     Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
1417     shiftBits = 32 - baseBits;
1418     DVT = IVT;
1419   }
1420   SDValue Shift = DAG.getConstant(shiftBits, DVT);
1421   // Shift left by 'Shift' bits.
1422   Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
1423   // Signed shift Right by 'Shift' bits.
1424   Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
1425   if (srcBits < 32) {
1426     // Once the sign extension is done, the op needs to be converted to
1427     // its original type.
1428     Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
1429   }
1430   return Data;
1431 }
1432 EVT
1433 AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
1434 {
1435   int iSize = (size * numEle);
1436   int vEle = (iSize >> ((size == 64) ? 6 : 5));
1437   if (!vEle) {
1438     vEle = 1;
1439   }
1440   if (size == 64) {
1441     if (vEle == 1) {
1442       return EVT(MVT::i64);
1443     } else {
1444       return EVT(MVT::getVectorVT(MVT::i64, vEle));
1445     }
1446   } else {
1447     if (vEle == 1) {
1448       return EVT(MVT::i32);
1449     } else {
1450       return EVT(MVT::getVectorVT(MVT::i32, vEle));
1451     }
1452   }
1453 }
1454
1455 SDValue
1456 AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
1457     SelectionDAG &DAG) const
1458 {
1459   SDValue Chain = Op.getOperand(0);
1460   SDValue Size = Op.getOperand(1);
1461   unsigned int SPReg = AMDGPU::SP;
1462   DebugLoc DL = Op.getDebugLoc();
1463   SDValue SP = DAG.getCopyFromReg(Chain,
1464       DL,
1465       SPReg, MVT::i32);
1466   SDValue NewSP = DAG.getNode(ISD::ADD,
1467       DL,
1468       MVT::i32, SP, Size);
1469   Chain = DAG.getCopyToReg(SP.getValue(1),
1470       DL,
1471       SPReg, NewSP);
1472   SDValue Ops[2] = {NewSP, Chain};
1473   Chain = DAG.getMergeValues(Ops, 2 ,DL);
1474   return Chain;
1475 }
1476 SDValue
1477 AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
1478 {
1479   SDValue Chain = Op.getOperand(0);
1480   SDValue Cond  = Op.getOperand(1);
1481   SDValue Jump  = Op.getOperand(2);
1482   SDValue Result;
1483   Result = DAG.getNode(
1484       AMDILISD::BRANCH_COND,
1485       Op.getDebugLoc(),
1486       Op.getValueType(),
1487       Chain, Jump, Cond);
1488   return Result;
1489 }
1490
1491 SDValue
1492 AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
1493 {
1494   SDValue Chain = Op.getOperand(0);
1495   SDValue CC = Op.getOperand(1);
1496   SDValue LHS   = Op.getOperand(2);
1497   SDValue RHS   = Op.getOperand(3);
1498   SDValue JumpT  = Op.getOperand(4);
1499   SDValue CmpValue;
1500   SDValue Result;
1501   CmpValue = DAG.getNode(
1502       ISD::SELECT_CC,
1503       Op.getDebugLoc(),
1504       LHS.getValueType(),
1505       LHS, RHS,
1506       DAG.getConstant(-1, MVT::i32),
1507       DAG.getConstant(0, MVT::i32),
1508       CC);
1509   Result = DAG.getNode(
1510       AMDILISD::BRANCH_COND,
1511       CmpValue.getDebugLoc(),
1512       MVT::Other, Chain,
1513       JumpT, CmpValue);
1514   return Result;
1515 }
1516
1517 // LowerRET - Lower an ISD::RET node.
1518 SDValue
1519 AMDILTargetLowering::LowerReturn(SDValue Chain,
1520     CallingConv::ID CallConv, bool isVarArg,
1521     const SmallVectorImpl<ISD::OutputArg> &Outs,
1522     const SmallVectorImpl<SDValue> &OutVals,
1523     DebugLoc dl, SelectionDAG &DAG)
1524 const
1525 {
1526   //MachineFunction& MF = DAG.getMachineFunction();
1527   // CCValAssign - represent the assignment of the return value
1528   // to a location
1529   SmallVector<CCValAssign, 16> RVLocs;
1530
1531   // CCState - Info about the registers and stack slot
1532   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1533                  getTargetMachine(), RVLocs, *DAG.getContext());
1534
1535   // Analyze return values of ISD::RET
1536   CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32);
1537   // If this is the first return lowered for this function, add
1538   // the regs to the liveout set for the function
1539   MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
1540   for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
1541     if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) {
1542       MRI.addLiveOut(RVLocs[i].getLocReg());
1543     }
1544   }
1545   // FIXME: implement this when tail call is implemented
1546   // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL);
1547   // both x86 and ppc implement this in ISelLowering
1548
1549   // Regular return here
1550   SDValue Flag;
1551   SmallVector<SDValue, 6> RetOps;
1552   RetOps.push_back(Chain);
1553   RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32));
1554   for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
1555     CCValAssign &VA = RVLocs[i];
1556     SDValue ValToCopy = OutVals[i];
1557     assert(VA.isRegLoc() && "Can only return in registers!");
1558     // ISD::Ret => ret chain, (regnum1, val1), ...
1559     // So i * 2 + 1 index only the regnums
1560     Chain = DAG.getCopyToReg(Chain,
1561         dl,
1562         VA.getLocReg(),
1563         ValToCopy,
1564         Flag);
1565     // guarantee that all emitted copies are stuck together
1566     // avoiding something bad
1567     Flag = Chain.getValue(1);
1568   }
1569   /*if (MF.getFunction()->hasStructRetAttr()) {
1570     assert(0 && "Struct returns are not yet implemented!");
1571   // Both MIPS and X86 have this
1572   }*/
1573   RetOps[0] = Chain;
1574   if (Flag.getNode())
1575     RetOps.push_back(Flag);
1576
1577   Flag = DAG.getNode(AMDILISD::RET_FLAG,
1578       dl,
1579       MVT::Other, &RetOps[0], RetOps.size());
1580   return Flag;
1581 }
1582
1583 unsigned int
1584 AMDILTargetLowering::getFunctionAlignment(const Function *) const
1585 {
1586   return 0;
1587 }
1588
1589 SDValue
1590 AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
1591 {
1592   DebugLoc DL = Op.getDebugLoc();
1593   EVT OVT = Op.getValueType();
1594   SDValue LHS = Op.getOperand(0);
1595   SDValue RHS = Op.getOperand(1);
1596   MVT INTTY;
1597   MVT FLTTY;
1598   if (!OVT.isVector()) {
1599     INTTY = MVT::i32;
1600     FLTTY = MVT::f32;
1601   } else if (OVT.getVectorNumElements() == 2) {
1602     INTTY = MVT::v2i32;
1603     FLTTY = MVT::v2f32;
1604   } else if (OVT.getVectorNumElements() == 4) {
1605     INTTY = MVT::v4i32;
1606     FLTTY = MVT::v4f32;
1607   }
1608   unsigned bitsize = OVT.getScalarType().getSizeInBits();
1609   // char|short jq = ia ^ ib;
1610   SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
1611
1612   // jq = jq >> (bitsize - 2)
1613   jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
1614
1615   // jq = jq | 0x1
1616   jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
1617
1618   // jq = (int)jq
1619   jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
1620
1621   // int ia = (int)LHS;
1622   SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
1623
1624   // int ib, (int)RHS;
1625   SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
1626
1627   // float fa = (float)ia;
1628   SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
1629
1630   // float fb = (float)ib;
1631   SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
1632
1633   // float fq = native_divide(fa, fb);
1634   SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
1635
1636   // fq = trunc(fq);
1637   fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
1638
1639   // float fqneg = -fq;
1640   SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
1641
1642   // float fr = mad(fqneg, fb, fa);
1643   SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa);
1644
1645   // int iq = (int)fq;
1646   SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
1647
1648   // fr = fabs(fr);
1649   fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
1650
1651   // fb = fabs(fb);
1652   fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
1653
1654   // int cv = fr >= fb;
1655   SDValue cv;
1656   if (INTTY == MVT::i32) {
1657     cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
1658   } else {
1659     cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
1660   }
1661   // jq = (cv ? jq : 0);
1662   jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq,
1663       DAG.getConstant(0, OVT));
1664   // dst = iq + jq;
1665   iq = DAG.getSExtOrTrunc(iq, DL, OVT);
1666   iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
1667   return iq;
1668 }
1669
1670 SDValue
1671 AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
1672 {
1673   DebugLoc DL = Op.getDebugLoc();
1674   EVT OVT = Op.getValueType();
1675   SDValue LHS = Op.getOperand(0);
1676   SDValue RHS = Op.getOperand(1);
1677   // The LowerSDIV32 function generates equivalent to the following IL.
1678   // mov r0, LHS
1679   // mov r1, RHS
1680   // ilt r10, r0, 0
1681   // ilt r11, r1, 0
1682   // iadd r0, r0, r10
1683   // iadd r1, r1, r11
1684   // ixor r0, r0, r10
1685   // ixor r1, r1, r11
1686   // udiv r0, r0, r1
1687   // ixor r10, r10, r11
1688   // iadd r0, r0, r10
1689   // ixor DST, r0, r10
1690
1691   // mov r0, LHS
1692   SDValue r0 = LHS;
1693
1694   // mov r1, RHS
1695   SDValue r1 = RHS;
1696
1697   // ilt r10, r0, 0
1698   SDValue r10 = DAG.getSelectCC(DL,
1699       r0, DAG.getConstant(0, OVT),
1700       DAG.getConstant(-1, MVT::i32),
1701       DAG.getConstant(0, MVT::i32),
1702       ISD::SETLT);
1703
1704   // ilt r11, r1, 0
1705   SDValue r11 = DAG.getSelectCC(DL,
1706       r1, DAG.getConstant(0, OVT),
1707       DAG.getConstant(-1, MVT::i32),
1708       DAG.getConstant(0, MVT::i32),
1709       ISD::SETLT);
1710
1711   // iadd r0, r0, r10
1712   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
1713
1714   // iadd r1, r1, r11
1715   r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
1716
1717   // ixor r0, r0, r10
1718   r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
1719
1720   // ixor r1, r1, r11
1721   r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
1722
1723   // udiv r0, r0, r1
1724   r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
1725
1726   // ixor r10, r10, r11
1727   r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
1728
1729   // iadd r0, r0, r10
1730   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
1731
1732   // ixor DST, r0, r10
1733   SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
1734   return DST;
1735 }
1736
1737 SDValue
1738 AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
1739 {
1740   return SDValue(Op.getNode(), 0);
1741 }
1742
1743 SDValue
1744 AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
1745 {
1746   DebugLoc DL = Op.getDebugLoc();
1747   EVT OVT = Op.getValueType();
1748   MVT INTTY = MVT::i32;
1749   if (OVT == MVT::v2i8) {
1750     INTTY = MVT::v2i32;
1751   } else if (OVT == MVT::v4i8) {
1752     INTTY = MVT::v4i32;
1753   }
1754   SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
1755   SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
1756   LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
1757   LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
1758   return LHS;
1759 }
1760
1761 SDValue
1762 AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
1763 {
1764   DebugLoc DL = Op.getDebugLoc();
1765   EVT OVT = Op.getValueType();
1766   MVT INTTY = MVT::i32;
1767   if (OVT == MVT::v2i16) {
1768     INTTY = MVT::v2i32;
1769   } else if (OVT == MVT::v4i16) {
1770     INTTY = MVT::v4i32;
1771   }
1772   SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
1773   SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
1774   LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
1775   LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
1776   return LHS;
1777 }
1778
1779 SDValue
1780 AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
1781 {
1782   DebugLoc DL = Op.getDebugLoc();
1783   EVT OVT = Op.getValueType();
1784   SDValue LHS = Op.getOperand(0);
1785   SDValue RHS = Op.getOperand(1);
1786   // The LowerSREM32 function generates equivalent to the following IL.
1787   // mov r0, LHS
1788   // mov r1, RHS
1789   // ilt r10, r0, 0
1790   // ilt r11, r1, 0
1791   // iadd r0, r0, r10
1792   // iadd r1, r1, r11
1793   // ixor r0, r0, r10
1794   // ixor r1, r1, r11
1795   // udiv r20, r0, r1
1796   // umul r20, r20, r1
1797   // sub r0, r0, r20
1798   // iadd r0, r0, r10
1799   // ixor DST, r0, r10
1800
1801   // mov r0, LHS
1802   SDValue r0 = LHS;
1803
1804   // mov r1, RHS
1805   SDValue r1 = RHS;
1806
1807   // ilt r10, r0, 0
1808   SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
1809       DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
1810       r0, DAG.getConstant(0, OVT));
1811
1812   // ilt r11, r1, 0
1813   SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
1814       DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
1815       r1, DAG.getConstant(0, OVT));
1816
1817   // iadd r0, r0, r10
1818   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
1819
1820   // iadd r1, r1, r11
1821   r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
1822
1823   // ixor r0, r0, r10
1824   r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
1825
1826   // ixor r1, r1, r11
1827   r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
1828
1829   // udiv r20, r0, r1
1830   SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
1831
1832   // umul r20, r20, r1
1833   r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1);
1834
1835   // sub r0, r0, r20
1836   r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
1837
1838   // iadd r0, r0, r10
1839   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
1840
1841   // ixor DST, r0, r10
1842   SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
1843   return DST;
1844 }
1845
1846 SDValue
1847 AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
1848 {
1849   return SDValue(Op.getNode(), 0);
1850 }