lib/Target/CellSPU/SPUISelLowering.cpp

   1 //
   2 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the SPUTargetLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "SPURegisterNames.h"
  15 #include "SPUISelLowering.h"
  16 #include "SPUTargetMachine.h"
  17 #include "SPUFrameInfo.h"
  18 #include "llvm/Constants.h"
  19 #include "llvm/Function.h"
  20 #include "llvm/Intrinsics.h"
  21 #include "llvm/CallingConv.h"
  22 #include "llvm/CodeGen/CallingConvLower.h"
  23 #include "llvm/CodeGen/MachineFrameInfo.h"
  24 #include "llvm/CodeGen/MachineFunction.h"
  25 #include "llvm/CodeGen/MachineInstrBuilder.h"
  26 #include "llvm/CodeGen/MachineRegisterInfo.h"
  27 #include "llvm/CodeGen/SelectionDAG.h"
  28 #include "llvm/Target/TargetLoweringObjectFile.h"
  29 #include "llvm/Target/TargetOptions.h"
  30 #include "llvm/ADT/VectorExtras.h"
  31 #include "llvm/Support/Debug.h"
  32 #include "llvm/Support/ErrorHandling.h"
  33 #include "llvm/Support/MathExtras.h"
  34 #include "llvm/Support/raw_ostream.h"
  35 #include <map>
  36
  37 using namespace llvm;
  38
  39 // Used in getTargetNodeName() below
  40 namespace {
  41   std::map<unsigned, const char *> node_names;
  42
  43   //! EVT mapping to useful data for Cell SPU
  44   struct valtype_map_s {
  45     EVT   valtype;
  46     int   prefslot_byte;
  47   };
  48
  49   const valtype_map_s valtype_map[] = {
  50     { MVT::i1,   3 },
  51     { MVT::i8,   3 },
  52     { MVT::i16,  2 },
  53     { MVT::i32,  0 },
  54     { MVT::f32,  0 },
  55     { MVT::i64,  0 },
  56     { MVT::f64,  0 },
  57     { MVT::i128, 0 }
  58   };
  59
  60   const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
  61
  62   const valtype_map_s *getValueTypeMapEntry(EVT VT) {
  63     const valtype_map_s *retval = 0;
  64
  65     for (size_t i = 0; i < n_valtype_map; ++i) {
  66       if (valtype_map[i].valtype == VT) {
  67         retval = valtype_map + i;
  68         break;
  69       }
  70     }
  71
  72 #ifndef NDEBUG
  73     if (retval == 0) {
  74       std::string msg;
  75       raw_string_ostream Msg(msg);
  76       Msg << "getValueTypeMapEntry returns NULL for "
  77            << VT.getEVTString();
  78       llvm_report_error(Msg.str());
  79     }
  80 #endif
  81
  82     return retval;
  83   }
  84
  85   //! Expand a library call into an actual call DAG node
  86   /*!
  87    \note
  88    This code is taken from SelectionDAGLegalize, since it is not exposed as
  89    part of the LLVM SelectionDAG API.
  90    */
  91
  92   SDValue
  93   ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
  94                 bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
  95     // The input chain to this libcall is the entry node of the function.
  96     // Legalizing the call will automatically add the previous call to the
  97     // dependence.
  98     SDValue InChain = DAG.getEntryNode();
  99
 100     TargetLowering::ArgListTy Args;
 101     TargetLowering::ArgListEntry Entry;
 102     for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
 103       EVT ArgVT = Op.getOperand(i).getValueType();
 104       const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
 105       Entry.Node = Op.getOperand(i);
 106       Entry.Ty = ArgTy;
 107       Entry.isSExt = isSigned;
 108       Entry.isZExt = !isSigned;
 109       Args.push_back(Entry);
 110     }
 111     SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
 112                                            TLI.getPointerTy());
 113
 114     // Splice the libcall in wherever FindInputOutputChains tells us to.
 115     const Type *RetTy =
 116                 Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
 117     std::pair<SDValue, SDValue> CallInfo =
 118             TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
 119                             0, TLI.getLibcallCallingConv(LC), false,
 120                             /*isReturnValueUsed=*/true,
 121                             Callee, Args, DAG,
 122                             Op.getDebugLoc());
 123
 124     return CallInfo.first;
 125   }
 126 }
 127
 128 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
 129   : TargetLowering(TM, new TargetLoweringObjectFileELF()),
 130     SPUTM(TM) {
 131   // Fold away setcc operations if possible.
 132   setPow2DivIsCheap();
 133
 134   // Use _setjmp/_longjmp instead of setjmp/longjmp.
 135   setUseUnderscoreSetJmp(true);
 136   setUseUnderscoreLongJmp(true);
 137
 138   // Set RTLIB libcall names as used by SPU:
 139   setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
 140
 141   // Set up the SPU's register classes:
 142   addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
 143   addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
 144   addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
 145   addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
 146   addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
 147   addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
 148   addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
 149
 150   // SPU has no sign or zero extended loads for i1, i8, i16:
 151   setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
 152   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
 153   setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
 154
 155   setLoadExtAction(ISD::EXTLOAD,  MVT::f32, Expand);
 156   setLoadExtAction(ISD::EXTLOAD,  MVT::f64, Expand);
 157
 158   setTruncStoreAction(MVT::i128, MVT::i64, Expand);
 159   setTruncStoreAction(MVT::i128, MVT::i32, Expand);
 160   setTruncStoreAction(MVT::i128, MVT::i16, Expand);
 161   setTruncStoreAction(MVT::i128, MVT::i8, Expand);
 162
 163   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
 164
 165   // SPU constant load actions are custom lowered:
 166   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
 167   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
 168
 169   // SPU's loads and stores have to be custom lowered:
 170   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
 171        ++sctype) {
 172     MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
 173
 174     setOperationAction(ISD::LOAD,   VT, Custom);
 175     setOperationAction(ISD::STORE,  VT, Custom);
 176     setLoadExtAction(ISD::EXTLOAD,  VT, Custom);
 177     setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
 178     setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
 179
 180     for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
 181       MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
 182       setTruncStoreAction(VT, StoreVT, Expand);
 183     }
 184   }
 185
 186   for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
 187        ++sctype) {
 188     MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
 189
 190     setOperationAction(ISD::LOAD,   VT, Custom);
 191     setOperationAction(ISD::STORE,  VT, Custom);
 192
 193     for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
 194       MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
 195       setTruncStoreAction(VT, StoreVT, Expand);
 196     }
 197   }
 198
 199   // Expand the jumptable branches
 200   setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
 201   setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
 202
 203   // Custom lower SELECT_CC for most cases, but expand by default
 204   setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
 205   setOperationAction(ISD::SELECT_CC,    MVT::i8,    Custom);
 206   setOperationAction(ISD::SELECT_CC,    MVT::i16,   Custom);
 207   setOperationAction(ISD::SELECT_CC,    MVT::i32,   Custom);
 208   setOperationAction(ISD::SELECT_CC,    MVT::i64,   Custom);
 209
 210   // SPU has no intrinsics for these particular operations:
 211   setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
 212
 213   // SPU has no division/remainder instructions
 214   setOperationAction(ISD::SREM,    MVT::i8,   Expand);
 215   setOperationAction(ISD::UREM,    MVT::i8,   Expand);
 216   setOperationAction(ISD::SDIV,    MVT::i8,   Expand);
 217   setOperationAction(ISD::UDIV,    MVT::i8,   Expand);
 218   setOperationAction(ISD::SDIVREM, MVT::i8,   Expand);
 219   setOperationAction(ISD::UDIVREM, MVT::i8,   Expand);
 220   setOperationAction(ISD::SREM,    MVT::i16,  Expand);
 221   setOperationAction(ISD::UREM,    MVT::i16,  Expand);
 222   setOperationAction(ISD::SDIV,    MVT::i16,  Expand);
 223   setOperationAction(ISD::UDIV,    MVT::i16,  Expand);
 224   setOperationAction(ISD::SDIVREM, MVT::i16,  Expand);
 225   setOperationAction(ISD::UDIVREM, MVT::i16,  Expand);
 226   setOperationAction(ISD::SREM,    MVT::i32,  Expand);
 227   setOperationAction(ISD::UREM,    MVT::i32,  Expand);
 228   setOperationAction(ISD::SDIV,    MVT::i32,  Expand);
 229   setOperationAction(ISD::UDIV,    MVT::i32,  Expand);
 230   setOperationAction(ISD::SDIVREM, MVT::i32,  Expand);
 231   setOperationAction(ISD::UDIVREM, MVT::i32,  Expand);
 232   setOperationAction(ISD::SREM,    MVT::i64,  Expand);
 233   setOperationAction(ISD::UREM,    MVT::i64,  Expand);
 234   setOperationAction(ISD::SDIV,    MVT::i64,  Expand);
 235   setOperationAction(ISD::UDIV,    MVT::i64,  Expand);
 236   setOperationAction(ISD::SDIVREM, MVT::i64,  Expand);
 237   setOperationAction(ISD::UDIVREM, MVT::i64,  Expand);
 238   setOperationAction(ISD::SREM,    MVT::i128, Expand);
 239   setOperationAction(ISD::UREM,    MVT::i128, Expand);
 240   setOperationAction(ISD::SDIV,    MVT::i128, Expand);
 241   setOperationAction(ISD::UDIV,    MVT::i128, Expand);
 242   setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
 243   setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
 244
 245   // We don't support sin/cos/sqrt/fmod
 246   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 247   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 248   setOperationAction(ISD::FREM , MVT::f64, Expand);
 249   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 250   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 251   setOperationAction(ISD::FREM , MVT::f32, Expand);
 252
 253   // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
 254   // for f32!)
 255   setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 256   setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 257
 258   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 259   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 260
 261   // SPU can do rotate right and left, so legalize it... but customize for i8
 262   // because instructions don't exist.
 263
 264   // FIXME: Change from "expand" to appropriate type once ROTR is supported in
 265   //        .td files.
 266   setOperationAction(ISD::ROTR, MVT::i32,    Expand /*Legal*/);
 267   setOperationAction(ISD::ROTR, MVT::i16,    Expand /*Legal*/);
 268   setOperationAction(ISD::ROTR, MVT::i8,     Expand /*Custom*/);
 269
 270   setOperationAction(ISD::ROTL, MVT::i32,    Legal);
 271   setOperationAction(ISD::ROTL, MVT::i16,    Legal);
 272   setOperationAction(ISD::ROTL, MVT::i8,     Custom);
 273
 274   // SPU has no native version of shift left/right for i8
 275   setOperationAction(ISD::SHL,  MVT::i8,     Custom);
 276   setOperationAction(ISD::SRL,  MVT::i8,     Custom);
 277   setOperationAction(ISD::SRA,  MVT::i8,     Custom);
 278
 279   // Make these operations legal and handle them during instruction selection:
 280   setOperationAction(ISD::SHL,  MVT::i64,    Legal);
 281   setOperationAction(ISD::SRL,  MVT::i64,    Legal);
 282   setOperationAction(ISD::SRA,  MVT::i64,    Legal);
 283
 284   // Custom lower i8, i32 and i64 multiplications
 285   setOperationAction(ISD::MUL,  MVT::i8,     Custom);
 286   setOperationAction(ISD::MUL,  MVT::i32,    Legal);
 287   setOperationAction(ISD::MUL,  MVT::i64,    Legal);
 288
 289   // Expand double-width multiplication
 290   // FIXME: It would probably be reasonable to support some of these operations
 291   setOperationAction(ISD::UMUL_LOHI, MVT::i8,  Expand);
 292   setOperationAction(ISD::SMUL_LOHI, MVT::i8,  Expand);
 293   setOperationAction(ISD::MULHU,     MVT::i8,  Expand);
 294   setOperationAction(ISD::MULHS,     MVT::i8,  Expand);
 295   setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
 296   setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
 297   setOperationAction(ISD::MULHU,     MVT::i16, Expand);
 298   setOperationAction(ISD::MULHS,     MVT::i16, Expand);
 299   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
 300   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
 301   setOperationAction(ISD::MULHU,     MVT::i32, Expand);
 302   setOperationAction(ISD::MULHS,     MVT::i32, Expand);
 303   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
 304   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
 305   setOperationAction(ISD::MULHU,     MVT::i64, Expand);
 306   setOperationAction(ISD::MULHS,     MVT::i64, Expand);
 307
 308   // Need to custom handle (some) common i8, i64 math ops
 309   setOperationAction(ISD::ADD,  MVT::i8,     Custom);
 310   setOperationAction(ISD::ADD,  MVT::i64,    Legal);
 311   setOperationAction(ISD::SUB,  MVT::i8,     Custom);
 312   setOperationAction(ISD::SUB,  MVT::i64,    Legal);
 313
 314   // SPU does not have BSWAP. It does have i32 support CTLZ.
 315   // CTPOP has to be custom lowered.
 316   setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
 317   setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
 318
 319   setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
 320   setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
 321   setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
 322   setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
 323   setOperationAction(ISD::CTPOP, MVT::i128,  Expand);
 324
 325   setOperationAction(ISD::CTTZ , MVT::i8,    Expand);
 326   setOperationAction(ISD::CTTZ , MVT::i16,   Expand);
 327   setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
 328   setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
 329   setOperationAction(ISD::CTTZ , MVT::i128,  Expand);
 330
 331   setOperationAction(ISD::CTLZ , MVT::i8,    Promote);
 332   setOperationAction(ISD::CTLZ , MVT::i16,   Promote);
 333   setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
 334   setOperationAction(ISD::CTLZ , MVT::i64,   Expand);
 335   setOperationAction(ISD::CTLZ , MVT::i128,  Expand);
 336
 337   // SPU has a version of select that implements (a&~c)|(b&c), just like
 338   // select ought to work:
 339   setOperationAction(ISD::SELECT, MVT::i8,   Legal);
 340   setOperationAction(ISD::SELECT, MVT::i16,  Legal);
 341   setOperationAction(ISD::SELECT, MVT::i32,  Legal);
 342   setOperationAction(ISD::SELECT, MVT::i64,  Legal);
 343
 344   setOperationAction(ISD::SETCC, MVT::i8,    Legal);
 345   setOperationAction(ISD::SETCC, MVT::i16,   Legal);
 346   setOperationAction(ISD::SETCC, MVT::i32,   Legal);
 347   setOperationAction(ISD::SETCC, MVT::i64,   Legal);
 348   setOperationAction(ISD::SETCC, MVT::f64,   Custom);
 349
 350   // Custom lower i128 -> i64 truncates
 351   setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
 352
 353   // Custom lower i32/i64 -> i128 sign extend
 354   setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
 355
 356   setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
 357   setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
 358   setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
 359   setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
 360   // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
 361   // to expand to a libcall, hence the custom lowering:
 362   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
 363   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 364   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
 365   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
 366   setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
 367   setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
 368
 369   // FDIV on SPU requires custom lowering
 370   setOperationAction(ISD::FDIV, MVT::f64, Expand);      // to libcall
 371
 372   // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
 373   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
 374   setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
 375   setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
 376   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
 377   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
 378   setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
 379   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 380   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 381
 382   setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
 383   setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
 384   setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
 385   setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
 386
 387   // We cannot sextinreg(i1).  Expand to shifts.
 388   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 389
 390   // Support label based line numbers.
 391   setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
 392
 393   // We want to legalize GlobalAddress and ConstantPool nodes into the
 394   // appropriate instructions to materialize the address.
 395   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
 396        ++sctype) {
 397     MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
 398
 399     setOperationAction(ISD::GlobalAddress,  VT, Custom);
 400     setOperationAction(ISD::ConstantPool,   VT, Custom);
 401     setOperationAction(ISD::JumpTable,      VT, Custom);
 402   }
 403
 404   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 405   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 406
 407   // Use the default implementation.
 408   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
 409   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 410   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 411   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 412   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
 413   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
 414   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
 415
 416   // Cell SPU has instructions for converting between i64 and fp.
 417   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 418   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 419
 420   // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
 421   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
 422
 423   // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 424   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 425
 426   // First set operation action for all vector types to expand. Then we
 427   // will selectively turn on ones that can be effectively codegen'd.
 428   addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
 429   addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
 430   addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
 431   addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
 432   addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
 433   addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
 434
 435   // "Odd size" vector classes that we're willing to support:
 436   addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
 437
 438   for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
 439        i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
 440     MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
 441
 442     // add/sub are legal for all supported vector VT's.
 443     setOperationAction(ISD::ADD,     VT, Legal);
 444     setOperationAction(ISD::SUB,     VT, Legal);
 445     // mul has to be custom lowered.
 446     setOperationAction(ISD::MUL,     VT, Legal);
 447
 448     setOperationAction(ISD::AND,     VT, Legal);
 449     setOperationAction(ISD::OR,      VT, Legal);
 450     setOperationAction(ISD::XOR,     VT, Legal);
 451     setOperationAction(ISD::LOAD,    VT, Legal);
 452     setOperationAction(ISD::SELECT,  VT, Legal);
 453     setOperationAction(ISD::STORE,   VT, Legal);
 454
 455     // These operations need to be expanded:
 456     setOperationAction(ISD::SDIV,    VT, Expand);
 457     setOperationAction(ISD::SREM,    VT, Expand);
 458     setOperationAction(ISD::UDIV,    VT, Expand);
 459     setOperationAction(ISD::UREM,    VT, Expand);
 460
 461     // Custom lower build_vector, constant pool spills, insert and
 462     // extract vector elements:
 463     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
 464     setOperationAction(ISD::ConstantPool, VT, Custom);
 465     setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
 466     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
 467     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
 468     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
 469   }
 470
 471   setOperationAction(ISD::AND, MVT::v16i8, Custom);
 472   setOperationAction(ISD::OR,  MVT::v16i8, Custom);
 473   setOperationAction(ISD::XOR, MVT::v16i8, Custom);
 474   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 475
 476   setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
 477
 478   setShiftAmountType(MVT::i32);
 479   setBooleanContents(ZeroOrNegativeOneBooleanContent);
 480
 481   setStackPointerRegisterToSaveRestore(SPU::R1);
 482
 483   // We have target-specific dag combine patterns for the following nodes:
 484   setTargetDAGCombine(ISD::ADD);
 485   setTargetDAGCombine(ISD::ZERO_EXTEND);
 486   setTargetDAGCombine(ISD::SIGN_EXTEND);
 487   setTargetDAGCombine(ISD::ANY_EXTEND);
 488
 489   computeRegisterProperties();
 490
 491   // Set pre-RA register scheduler default to BURR, which produces slightly
 492   // better code than the default (could also be TDRR, but TargetLowering.h
 493   // needs a mod to support that model):
 494   setSchedulingPreference(SchedulingForRegPressure);
 495 }
 496
 497 const char *
 498 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
 499 {
 500   if (node_names.empty()) {
 501     node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
 502     node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
 503     node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
 504     node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
 505     node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
 506     node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
 507     node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
 508     node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
 509     node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
 510     node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
 511     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
 512     node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
 513     node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
 514     node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
 515     node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
 516     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
 517     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
 518     node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
 519     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
 520             "SPUISD::ROTBYTES_LEFT_BITS";
 521     node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
 522     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
 523     node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
 524     node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
 525     node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
 526   }
 527
 528   std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
 529
 530   return ((i != node_names.end()) ? i->second : 0);
 531 }
 532
 533 /// getFunctionAlignment - Return the Log2 alignment of this function.
 534 unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
 535   return 3;
 536 }
 537
 538 //===----------------------------------------------------------------------===//
 539 // Return the Cell SPU's SETCC result type
 540 //===----------------------------------------------------------------------===//
 541
 542 MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const {
 543   // i16 and i32 are valid SETCC result types
 544   return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ?
 545     VT.getSimpleVT().SimpleTy :
 546     MVT::i32);
 547 }
 548
 549 //===----------------------------------------------------------------------===//
 550 // Calling convention code:
 551 //===----------------------------------------------------------------------===//
 552
 553 #include "SPUGenCallingConv.inc"
 554
 555 //===----------------------------------------------------------------------===//
 556 //  LowerOperation implementation
 557 //===----------------------------------------------------------------------===//
 558
 559 /// Custom lower loads for CellSPU
 560 /*!
 561  All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
 562  within a 16-byte block, we have to rotate to extract the requested element.
 563
 564  For extending loads, we also want to ensure that the following sequence is
 565  emitted, e.g. for MVT::f32 extending load to MVT::f64:
 566
 567 \verbatim
 568 %1  v16i8,ch = load
 569 %2  v16i8,ch = rotate %1
 570 %3  v4f8, ch = bitconvert %2
 571 %4  f32      = vec2perfslot %3
 572 %5  f64      = fp_extend %4
 573 \endverbatim
 574 */
 575 static SDValue
 576 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 577   LoadSDNode *LN = cast<LoadSDNode>(Op);
 578   SDValue the_chain = LN->getChain();
 579   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 580   EVT InVT = LN->getMemoryVT();
 581   EVT OutVT = Op.getValueType();
 582   ISD::LoadExtType ExtType = LN->getExtensionType();
 583   unsigned alignment = LN->getAlignment();
 584   const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
 585   DebugLoc dl = Op.getDebugLoc();
 586
 587   switch (LN->getAddressingMode()) {
 588   case ISD::UNINDEXED: {
 589     SDValue result;
 590     SDValue basePtr = LN->getBasePtr();
 591     SDValue rotate;
 592
 593     if (alignment == 16) {
 594       ConstantSDNode *CN;
 595
 596       // Special cases for a known aligned load to simplify the base pointer
 597       // and the rotation amount:
 598       if (basePtr.getOpcode() == ISD::ADD
 599           && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
 600         // Known offset into basePtr
 601         int64_t offset = CN->getSExtValue();
 602         int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
 603
 604         if (rotamt < 0)
 605           rotamt += 16;
 606
 607         rotate = DAG.getConstant(rotamt, MVT::i16);
 608
 609         // Simplify the base pointer for this case:
 610         basePtr = basePtr.getOperand(0);
 611         if ((offset & ~0xf) > 0) {
 612           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 613                                 basePtr,
 614                                 DAG.getConstant((offset & ~0xf), PtrVT));
 615         }
 616       } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
 617                  || (basePtr.getOpcode() == SPUISD::IndirectAddr
 618                      && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
 619                      && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
 620         // Plain aligned a-form address: rotate into preferred slot
 621         // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
 622         int64_t rotamt = -vtm->prefslot_byte;
 623         if (rotamt < 0)
 624           rotamt += 16;
 625         rotate = DAG.getConstant(rotamt, MVT::i16);
 626       } else {
 627         // Offset the rotate amount by the basePtr and the preferred slot
 628         // byte offset
 629         int64_t rotamt = -vtm->prefslot_byte;
 630         if (rotamt < 0)
 631           rotamt += 16;
 632         rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
 633                              basePtr,
 634                              DAG.getConstant(rotamt, PtrVT));
 635       }
 636     } else {
 637       // Unaligned load: must be more pessimistic about addressing modes:
 638       if (basePtr.getOpcode() == ISD::ADD) {
 639         MachineFunction &MF = DAG.getMachineFunction();
 640         MachineRegisterInfo &RegInfo = MF.getRegInfo();
 641         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 642         SDValue Flag;
 643
 644         SDValue Op0 = basePtr.getOperand(0);
 645         SDValue Op1 = basePtr.getOperand(1);
 646
 647         if (isa<ConstantSDNode>(Op1)) {
 648           // Convert the (add <ptr>, <const>) to an indirect address contained
 649           // in a register. Note that this is done because we need to avoid
 650           // creating a 0(reg) d-form address due to the SPU's block loads.
 651           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 652           the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
 653           basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
 654         } else {
 655           // Convert the (add <arg1>, <arg2>) to an indirect address, which
 656           // will likely be lowered as a reg(reg) x-form address.
 657           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 658         }
 659       } else {
 660         basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 661                               basePtr,
 662                               DAG.getConstant(0, PtrVT));
 663       }
 664
 665       // Offset the rotate amount by the basePtr and the preferred slot
 666       // byte offset
 667       rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
 668                            basePtr,
 669                            DAG.getConstant(-vtm->prefslot_byte, PtrVT));
 670     }
 671
 672     // Re-emit as a v16i8 vector load
 673     result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
 674                          LN->getSrcValue(), LN->getSrcValueOffset(),
 675                          LN->isVolatile(), 16);
 676
 677     // Update the chain
 678     the_chain = result.getValue(1);
 679
 680     // Rotate into the preferred slot:
 681     result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8,
 682                          result.getValue(0), rotate);
 683
 684     // Convert the loaded v16i8 vector to the appropriate vector type
 685     // specified by the operand:
 686     EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
 687                                  InVT, (128 / InVT.getSizeInBits()));
 688     result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
 689                          DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
 690
 691     // Handle extending loads by extending the scalar result:
 692     if (ExtType == ISD::SEXTLOAD) {
 693       result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
 694     } else if (ExtType == ISD::ZEXTLOAD) {
 695       result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
 696     } else if (ExtType == ISD::EXTLOAD) {
 697       unsigned NewOpc = ISD::ANY_EXTEND;
 698
 699       if (OutVT.isFloatingPoint())
 700         NewOpc = ISD::FP_EXTEND;
 701
 702       result = DAG.getNode(NewOpc, dl, OutVT, result);
 703     }
 704
 705     SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
 706     SDValue retops[2] = {
 707       result,
 708       the_chain
 709     };
 710
 711     result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
 712                          retops, sizeof(retops) / sizeof(retops[0]));
 713     return result;
 714   }
 715   case ISD::PRE_INC:
 716   case ISD::PRE_DEC:
 717   case ISD::POST_INC:
 718   case ISD::POST_DEC:
 719   case ISD::LAST_INDEXED_MODE:
 720     {
 721       std::string msg;
 722       raw_string_ostream Msg(msg);
 723       Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 724             "UNINDEXED\n";
 725       Msg << (unsigned) LN->getAddressingMode();
 726       llvm_report_error(Msg.str());
 727       /*NOTREACHED*/
 728     }
 729   }
 730
 731   return SDValue();
 732 }
 733
 734 /// Custom lower stores for CellSPU
 735 /*!
 736  All CellSPU stores are aligned to 16-byte boundaries, so for elements
 737  within a 16-byte block, we have to generate a shuffle to insert the
 738  requested element into its place, then store the resulting block.
 739  */
 740 static SDValue
 741 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 742   StoreSDNode *SN = cast<StoreSDNode>(Op);
 743   SDValue Value = SN->getValue();
 744   EVT VT = Value.getValueType();
 745   EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
 746   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 747   DebugLoc dl = Op.getDebugLoc();
 748   unsigned alignment = SN->getAlignment();
 749
 750   switch (SN->getAddressingMode()) {
 751   case ISD::UNINDEXED: {
 752     // The vector type we really want to load from the 16-byte chunk.
 753     EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
 754                                  VT, (128 / VT.getSizeInBits())),
 755         stVecVT = EVT::getVectorVT(*DAG.getContext(),
 756                                    StVT, (128 / StVT.getSizeInBits()));
 757
 758     SDValue alignLoadVec;
 759     SDValue basePtr = SN->getBasePtr();
 760     SDValue the_chain = SN->getChain();
 761     SDValue insertEltOffs;
 762
 763     if (alignment == 16) {
 764       ConstantSDNode *CN;
 765
 766       // Special cases for a known aligned load to simplify the base pointer
 767       // and insertion byte:
 768       if (basePtr.getOpcode() == ISD::ADD
 769           && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
 770         // Known offset into basePtr
 771         int64_t offset = CN->getSExtValue();
 772
 773         // Simplify the base pointer for this case:
 774         basePtr = basePtr.getOperand(0);
 775         insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 776                                     basePtr,
 777                                     DAG.getConstant((offset & 0xf), PtrVT));
 778
 779         if ((offset & ~0xf) > 0) {
 780           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 781                                 basePtr,
 782                                 DAG.getConstant((offset & ~0xf), PtrVT));
 783         }
 784       } else {
 785         // Otherwise, assume it's at byte 0 of basePtr
 786         insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 787                                     basePtr,
 788                                     DAG.getConstant(0, PtrVT));
 789       }
 790     } else {
 791       // Unaligned load: must be more pessimistic about addressing modes:
 792       if (basePtr.getOpcode() == ISD::ADD) {
 793         MachineFunction &MF = DAG.getMachineFunction();
 794         MachineRegisterInfo &RegInfo = MF.getRegInfo();
 795         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 796         SDValue Flag;
 797
 798         SDValue Op0 = basePtr.getOperand(0);
 799         SDValue Op1 = basePtr.getOperand(1);
 800
 801         if (isa<ConstantSDNode>(Op1)) {
 802           // Convert the (add <ptr>, <const>) to an indirect address contained
 803           // in a register. Note that this is done because we need to avoid
 804           // creating a 0(reg) d-form address due to the SPU's block loads.
 805           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 806           the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
 807           basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
 808         } else {
 809           // Convert the (add <arg1>, <arg2>) to an indirect address, which
 810           // will likely be lowered as a reg(reg) x-form address.
 811           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 812         }
 813       } else {
 814         basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 815                               basePtr,
 816                               DAG.getConstant(0, PtrVT));
 817       }
 818
 819       // Insertion point is solely determined by basePtr's contents
 820       insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
 821                                   basePtr,
 822                                   DAG.getConstant(0, PtrVT));
 823     }
 824
 825     // Re-emit as a v16i8 vector load
 826     alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
 827                                SN->getSrcValue(), SN->getSrcValueOffset(),
 828                                SN->isVolatile(), 16);
 829
 830     // Update the chain
 831     the_chain = alignLoadVec.getValue(1);
 832
 833     LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
 834     SDValue theValue = SN->getValue();
 835     SDValue result;
 836
 837     if (StVT != VT
 838         && (theValue.getOpcode() == ISD::AssertZext
 839             || theValue.getOpcode() == ISD::AssertSext)) {
 840       // Drill down and get the value for zero- and sign-extended
 841       // quantities
 842       theValue = theValue.getOperand(0);
 843     }
 844
 845     // If the base pointer is already a D-form address, then just create
 846     // a new D-form address with a slot offset and the orignal base pointer.
 847     // Otherwise generate a D-form address with the slot offset relative
 848     // to the stack pointer, which is always aligned.
 849 #if !defined(NDEBUG)
 850       if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
 851         errs() << "CellSPU LowerSTORE: basePtr = ";
 852         basePtr.getNode()->dump(&DAG);
 853         errs() << "\n";
 854       }
 855 #endif
 856
 857     SDValue insertEltOp =
 858             DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
 859     SDValue vectorizeOp =
 860             DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
 861
 862     result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
 863                          vectorizeOp, alignLoadVec,
 864                          DAG.getNode(ISD::BIT_CONVERT, dl,
 865                                      MVT::v4i32, insertEltOp));
 866
 867     result = DAG.getStore(the_chain, dl, result, basePtr,
 868                           LN->getSrcValue(), LN->getSrcValueOffset(),
 869                           LN->isVolatile(), LN->getAlignment());
 870
 871 #if 0 && !defined(NDEBUG)
 872     if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
 873       const SDValue &currentRoot = DAG.getRoot();
 874
 875       DAG.setRoot(result);
 876       errs() << "------- CellSPU:LowerStore result:\n";
 877       DAG.dump();
 878       errs() << "-------\n";
 879       DAG.setRoot(currentRoot);
 880     }
 881 #endif
 882
 883     return result;
 884     /*UNREACHED*/
 885   }
 886   case ISD::PRE_INC:
 887   case ISD::PRE_DEC:
 888   case ISD::POST_INC:
 889   case ISD::POST_DEC:
 890   case ISD::LAST_INDEXED_MODE:
 891     {
 892       std::string msg;
 893       raw_string_ostream Msg(msg);
 894       Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 895             "UNINDEXED\n";
 896       Msg << (unsigned) SN->getAddressingMode();
 897       llvm_report_error(Msg.str());
 898       /*NOTREACHED*/
 899     }
 900   }
 901
 902   return SDValue();
 903 }
 904
 905 //! Generate the address of a constant pool entry.
 906 static SDValue
 907 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 908   EVT PtrVT = Op.getValueType();
 909   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 910   Constant *C = CP->getConstVal();
 911   SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
 912   SDValue Zero = DAG.getConstant(0, PtrVT);
 913   const TargetMachine &TM = DAG.getTarget();
 914   // FIXME there is no actual debug info here
 915   DebugLoc dl = Op.getDebugLoc();
 916
 917   if (TM.getRelocationModel() == Reloc::Static) {
 918     if (!ST->usingLargeMem()) {
 919       // Just return the SDValue with the constant pool address in it.
 920       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
 921     } else {
 922       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
 923       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
 924       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 925     }
 926   }
 927
 928   llvm_unreachable("LowerConstantPool: Relocation model other than static"
 929                    " not supported.");
 930   return SDValue();
 931 }
 932
 933 //! Alternate entry point for generating the address of a constant pool entry
 934 SDValue
 935 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
 936   return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
 937 }
 938
 939 static SDValue
 940 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 941   EVT PtrVT = Op.getValueType();
 942   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 943   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
 944   SDValue Zero = DAG.getConstant(0, PtrVT);
 945   const TargetMachine &TM = DAG.getTarget();
 946   // FIXME there is no actual debug info here
 947   DebugLoc dl = Op.getDebugLoc();
 948
 949   if (TM.getRelocationModel() == Reloc::Static) {
 950     if (!ST->usingLargeMem()) {
 951       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
 952     } else {
 953       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
 954       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
 955       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 956     }
 957   }
 958
 959   llvm_unreachable("LowerJumpTable: Relocation model other than static"
 960                    " not supported.");
 961   return SDValue();
 962 }
 963
 964 static SDValue
 965 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 966   EVT PtrVT = Op.getValueType();
 967   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
 968   GlobalValue *GV = GSDN->getGlobal();
 969   SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
 970   const TargetMachine &TM = DAG.getTarget();
 971   SDValue Zero = DAG.getConstant(0, PtrVT);
 972   // FIXME there is no actual debug info here
 973   DebugLoc dl = Op.getDebugLoc();
 974
 975   if (TM.getRelocationModel() == Reloc::Static) {
 976     if (!ST->usingLargeMem()) {
 977       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
 978     } else {
 979       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
 980       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
 981       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 982     }
 983   } else {
 984     llvm_report_error("LowerGlobalAddress: Relocation model other than static"
 985                       "not supported.");
 986     /*NOTREACHED*/
 987   }
 988
 989   return SDValue();
 990 }
 991
 992 //! Custom lower double precision floating point constants
 993 static SDValue
 994 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
 995   EVT VT = Op.getValueType();
 996   // FIXME there is no actual debug info here
 997   DebugLoc dl = Op.getDebugLoc();
 998
 999   if (VT == MVT::f64) {
1000     ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
1001
1002     assert((FP != 0) &&
1003            "LowerConstantFP: Node is not ConstantFPSDNode");
1004
1005     uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
1006     SDValue T = DAG.getConstant(dbits, MVT::i64);
1007     SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
1008     return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1009                        DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec));
1010   }
1011
1012   return SDValue();
1013 }
1014
1015 SDValue
1016 SPUTargetLowering::LowerFormalArguments(SDValue Chain,
1017                                         CallingConv::ID CallConv, bool isVarArg,
1018                                         const SmallVectorImpl<ISD::InputArg>
1019                                           &Ins,
1020                                         DebugLoc dl, SelectionDAG &DAG,
1021                                         SmallVectorImpl<SDValue> &InVals) {
1022
1023   MachineFunction &MF = DAG.getMachineFunction();
1024   MachineFrameInfo *MFI = MF.getFrameInfo();
1025   MachineRegisterInfo &RegInfo = MF.getRegInfo();
1026
1027   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1028   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1029
1030   unsigned ArgOffset = SPUFrameInfo::minStackSize();
1031   unsigned ArgRegIdx = 0;
1032   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1033
1034   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1035
1036   // Add DAG nodes to load the arguments or copy them out of registers.
1037   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
1038     EVT ObjectVT = Ins[ArgNo].VT;
1039     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
1040     SDValue ArgVal;
1041
1042     if (ArgRegIdx < NumArgRegs) {
1043       const TargetRegisterClass *ArgRegClass;
1044
1045       switch (ObjectVT.getSimpleVT().SimpleTy) {
1046       default: {
1047         std::string msg;
1048         raw_string_ostream Msg(msg);
1049         Msg << "LowerFormalArguments Unhandled argument type: "
1050              << ObjectVT.getEVTString();
1051         llvm_report_error(Msg.str());
1052       }
1053       case MVT::i8:
1054         ArgRegClass = &SPU::R8CRegClass;
1055         break;
1056       case MVT::i16:
1057         ArgRegClass = &SPU::R16CRegClass;
1058         break;
1059       case MVT::i32:
1060         ArgRegClass = &SPU::R32CRegClass;
1061         break;
1062       case MVT::i64:
1063         ArgRegClass = &SPU::R64CRegClass;
1064         break;
1065       case MVT::i128:
1066         ArgRegClass = &SPU::GPRCRegClass;
1067         break;
1068       case MVT::f32:
1069         ArgRegClass = &SPU::R32FPRegClass;
1070         break;
1071       case MVT::f64:
1072         ArgRegClass = &SPU::R64FPRegClass;
1073         break;
1074       case MVT::v2f64:
1075       case MVT::v4f32:
1076       case MVT::v2i64:
1077       case MVT::v4i32:
1078       case MVT::v8i16:
1079       case MVT::v16i8:
1080         ArgRegClass = &SPU::VECREGRegClass;
1081         break;
1082       }
1083
1084       unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1085       RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1086       ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
1087       ++ArgRegIdx;
1088     } else {
1089       // We need to load the argument to a virtual register if we determined
1090       // above that we ran out of physical registers of the appropriate type
1091       // or we're forced to do vararg
1092       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true, false);
1093       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1094       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0);
1095       ArgOffset += StackSlotSize;
1096     }
1097
1098     InVals.push_back(ArgVal);
1099     // Update the chain
1100     Chain = ArgVal.getOperand(0);
1101   }
1102
1103   // vararg handling:
1104   if (isVarArg) {
1105     // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1106     // We will spill (79-3)+1 registers to the stack
1107     SmallVector<SDValue, 79-3+1> MemOps;
1108
1109     // Create the frame slot
1110
1111     for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1112       VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset,
1113                                                  true, false);
1114       SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1115       SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1116       SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0);
1117       Chain = Store.getOperand(0);
1118       MemOps.push_back(Store);
1119
1120       // Increment address by stack slot size for the next stored argument
1121       ArgOffset += StackSlotSize;
1122     }
1123     if (!MemOps.empty())
1124       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1125                           &MemOps[0], MemOps.size());
1126   }
1127
1128   return Chain;
1129 }
1130
1131 /// isLSAAddress - Return the immediate to use if the specified
1132 /// value is representable as a LSA address.
1133 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1134   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1135   if (!C) return 0;
1136
1137   int Addr = C->getZExtValue();
1138   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1139       (Addr << 14 >> 14) != Addr)
1140     return 0;  // Top 14 bits have to be sext of immediate.
1141
1142   return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1143 }
1144
1145 SDValue
1146 SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1147                              CallingConv::ID CallConv, bool isVarArg,
1148                              bool isTailCall,
1149                              const SmallVectorImpl<ISD::OutputArg> &Outs,
1150                              const SmallVectorImpl<ISD::InputArg> &Ins,
1151                              DebugLoc dl, SelectionDAG &DAG,
1152                              SmallVectorImpl<SDValue> &InVals) {
1153
1154   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
1155   unsigned NumOps     = Outs.size();
1156   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1157   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1158   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1159
1160   // Handy pointer type
1161   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1162
1163   // Accumulate how many bytes are to be pushed on the stack, including the
1164   // linkage area, and parameter passing area.  According to the SPU ABI,
1165   // we minimally need space for [LR] and [SP]
1166   unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1167
1168   // Set up a copy of the stack pointer for use loading and storing any
1169   // arguments that may not fit in the registers available for argument
1170   // passing.
1171   SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1172
1173   // Figure out which arguments are going to go in registers, and which in
1174   // memory.
1175   unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1176   unsigned ArgRegIdx = 0;
1177
1178   // Keep track of registers passing arguments
1179   std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1180   // And the arguments passed on the stack
1181   SmallVector<SDValue, 8> MemOpChains;
1182
1183   for (unsigned i = 0; i != NumOps; ++i) {
1184     SDValue Arg = Outs[i].Val;
1185
1186     // PtrOff will be used to store the current argument to the stack if a
1187     // register cannot be found for it.
1188     SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1189     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1190
1191     switch (Arg.getValueType().getSimpleVT().SimpleTy) {
1192     default: llvm_unreachable("Unexpected ValueType for argument!");
1193     case MVT::i8:
1194     case MVT::i16:
1195     case MVT::i32:
1196     case MVT::i64:
1197     case MVT::i128:
1198       if (ArgRegIdx != NumArgRegs) {
1199         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1200       } else {
1201         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1202         ArgOffset += StackSlotSize;
1203       }
1204       break;
1205     case MVT::f32:
1206     case MVT::f64:
1207       if (ArgRegIdx != NumArgRegs) {
1208         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1209       } else {
1210         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1211         ArgOffset += StackSlotSize;
1212       }
1213       break;
1214     case MVT::v2i64:
1215     case MVT::v2f64:
1216     case MVT::v4f32:
1217     case MVT::v4i32:
1218     case MVT::v8i16:
1219     case MVT::v16i8:
1220       if (ArgRegIdx != NumArgRegs) {
1221         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1222       } else {
1223         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1224         ArgOffset += StackSlotSize;
1225       }
1226       break;
1227     }
1228   }
1229
1230   // Update number of stack bytes actually used, insert a call sequence start
1231   NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1232   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1233                                                             true));
1234
1235   if (!MemOpChains.empty()) {
1236     // Adjust the stack pointer for the stack arguments.
1237     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1238                         &MemOpChains[0], MemOpChains.size());
1239   }
1240
1241   // Build a sequence of copy-to-reg nodes chained together with token chain
1242   // and flag operands which copy the outgoing args into the appropriate regs.
1243   SDValue InFlag;
1244   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1245     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1246                              RegsToPass[i].second, InFlag);
1247     InFlag = Chain.getValue(1);
1248   }
1249
1250   SmallVector<SDValue, 8> Ops;
1251   unsigned CallOpc = SPUISD::CALL;
1252
1253   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1254   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1255   // node so that legalize doesn't hack it.
1256   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1257     GlobalValue *GV = G->getGlobal();
1258     EVT CalleeVT = Callee.getValueType();
1259     SDValue Zero = DAG.getConstant(0, PtrVT);
1260     SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1261
1262     if (!ST->usingLargeMem()) {
1263       // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1264       // style calls, otherwise, external symbols are BRASL calls. This assumes
1265       // that declared/defined symbols are in the same compilation unit and can
1266       // be reached through PC-relative jumps.
1267       //
1268       // NOTE:
1269       // This may be an unsafe assumption for JIT and really large compilation
1270       // units.
1271       if (GV->isDeclaration()) {
1272         Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
1273       } else {
1274         Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
1275       }
1276     } else {
1277       // "Large memory" mode: Turn all calls into indirect calls with a X-form
1278       // address pairs:
1279       Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
1280     }
1281   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1282     EVT CalleeVT = Callee.getValueType();
1283     SDValue Zero = DAG.getConstant(0, PtrVT);
1284     SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1285         Callee.getValueType());
1286
1287     if (!ST->usingLargeMem()) {
1288       Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
1289     } else {
1290       Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
1291     }
1292   } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1293     // If this is an absolute destination address that appears to be a legal
1294     // local store address, use the munged value.
1295     Callee = SDValue(Dest, 0);
1296   }
1297
1298   Ops.push_back(Chain);
1299   Ops.push_back(Callee);
1300
1301   // Add argument registers to the end of the list so that they are known live
1302   // into the call.
1303   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1304     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1305                                   RegsToPass[i].second.getValueType()));
1306
1307   if (InFlag.getNode())
1308     Ops.push_back(InFlag);
1309   // Returns a chain and a flag for retval copy to use.
1310   Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
1311                       &Ops[0], Ops.size());
1312   InFlag = Chain.getValue(1);
1313
1314   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1315                              DAG.getIntPtrConstant(0, true), InFlag);
1316   if (!Ins.empty())
1317     InFlag = Chain.getValue(1);
1318
1319   // If the function returns void, just return the chain.
1320   if (Ins.empty())
1321     return Chain;
1322
1323   // If the call has results, copy the values out of the ret val registers.
1324   switch (Ins[0].VT.getSimpleVT().SimpleTy) {
1325   default: llvm_unreachable("Unexpected ret value!");
1326   case MVT::Other: break;
1327   case MVT::i32:
1328     if (Ins.size() > 1 && Ins[1].VT == MVT::i32) {
1329       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
1330                                  MVT::i32, InFlag).getValue(1);
1331       InVals.push_back(Chain.getValue(0));
1332       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1333                                  Chain.getValue(2)).getValue(1);
1334       InVals.push_back(Chain.getValue(0));
1335     } else {
1336       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1337                                  InFlag).getValue(1);
1338       InVals.push_back(Chain.getValue(0));
1339     }
1340     break;
1341   case MVT::i64:
1342     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64,
1343                                InFlag).getValue(1);
1344     InVals.push_back(Chain.getValue(0));
1345     break;
1346   case MVT::i128:
1347     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128,
1348                                InFlag).getValue(1);
1349     InVals.push_back(Chain.getValue(0));
1350     break;
1351   case MVT::f32:
1352   case MVT::f64:
1353     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
1354                                InFlag).getValue(1);
1355     InVals.push_back(Chain.getValue(0));
1356     break;
1357   case MVT::v2f64:
1358   case MVT::v2i64:
1359   case MVT::v4f32:
1360   case MVT::v4i32:
1361   case MVT::v8i16:
1362   case MVT::v16i8:
1363     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
1364                                    InFlag).getValue(1);
1365     InVals.push_back(Chain.getValue(0));
1366     break;
1367   }
1368
1369   return Chain;
1370 }
1371
1372 SDValue
1373 SPUTargetLowering::LowerReturn(SDValue Chain,
1374                                CallingConv::ID CallConv, bool isVarArg,
1375                                const SmallVectorImpl<ISD::OutputArg> &Outs,
1376                                DebugLoc dl, SelectionDAG &DAG) {
1377
1378   SmallVector<CCValAssign, 16> RVLocs;
1379   CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
1380                  RVLocs, *DAG.getContext());
1381   CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
1382
1383   // If this is the first return lowered for this function, add the regs to the
1384   // liveout set for the function.
1385   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1386     for (unsigned i = 0; i != RVLocs.size(); ++i)
1387       DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1388   }
1389
1390   SDValue Flag;
1391
1392   // Copy the result values into the output registers.
1393   for (unsigned i = 0; i != RVLocs.size(); ++i) {
1394     CCValAssign &VA = RVLocs[i];
1395     assert(VA.isRegLoc() && "Can only return in registers!");
1396     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1397                              Outs[i].Val, Flag);
1398     Flag = Chain.getValue(1);
1399   }
1400
1401   if (Flag.getNode())
1402     return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1403   else
1404     return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
1405 }
1406
1407
1408 //===----------------------------------------------------------------------===//
1409 // Vector related lowering:
1410 //===----------------------------------------------------------------------===//
1411
1412 static ConstantSDNode *
1413 getVecImm(SDNode *N) {
1414   SDValue OpVal(0, 0);
1415
1416   // Check to see if this buildvec has a single non-undef value in its elements.
1417   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1418     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1419     if (OpVal.getNode() == 0)
1420       OpVal = N->getOperand(i);
1421     else if (OpVal != N->getOperand(i))
1422       return 0;
1423   }
1424
1425   if (OpVal.getNode() != 0) {
1426     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1427       return CN;
1428     }
1429   }
1430
1431   return 0;
1432 }
1433
1434 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1435 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1436 /// constant
1437 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1438                               EVT ValueType) {
1439   if (ConstantSDNode *CN = getVecImm(N)) {
1440     uint64_t Value = CN->getZExtValue();
1441     if (ValueType == MVT::i64) {
1442       uint64_t UValue = CN->getZExtValue();
1443       uint32_t upper = uint32_t(UValue >> 32);
1444       uint32_t lower = uint32_t(UValue);
1445       if (upper != lower)
1446         return SDValue();
1447       Value = Value >> 32;
1448     }
1449     if (Value <= 0x3ffff)
1450       return DAG.getTargetConstant(Value, ValueType);
1451   }
1452
1453   return SDValue();
1454 }
1455
1456 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1457 /// and the value fits into a signed 16-bit constant, and if so, return the
1458 /// constant
1459 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1460                               EVT ValueType) {
1461   if (ConstantSDNode *CN = getVecImm(N)) {
1462     int64_t Value = CN->getSExtValue();
1463     if (ValueType == MVT::i64) {
1464       uint64_t UValue = CN->getZExtValue();
1465       uint32_t upper = uint32_t(UValue >> 32);
1466       uint32_t lower = uint32_t(UValue);
1467       if (upper != lower)
1468         return SDValue();
1469       Value = Value >> 32;
1470     }
1471     if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1472       return DAG.getTargetConstant(Value, ValueType);
1473     }
1474   }
1475
1476   return SDValue();
1477 }
1478
1479 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1480 /// and the value fits into a signed 10-bit constant, and if so, return the
1481 /// constant
1482 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1483                               EVT ValueType) {
1484   if (ConstantSDNode *CN = getVecImm(N)) {
1485     int64_t Value = CN->getSExtValue();
1486     if (ValueType == MVT::i64) {
1487       uint64_t UValue = CN->getZExtValue();
1488       uint32_t upper = uint32_t(UValue >> 32);
1489       uint32_t lower = uint32_t(UValue);
1490       if (upper != lower)
1491         return SDValue();
1492       Value = Value >> 32;
1493     }
1494     if (isS10Constant(Value))
1495       return DAG.getTargetConstant(Value, ValueType);
1496   }
1497
1498   return SDValue();
1499 }
1500
1501 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1502 /// and the value fits into a signed 8-bit constant, and if so, return the
1503 /// constant.
1504 ///
1505 /// @note: The incoming vector is v16i8 because that's the only way we can load
1506 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1507 /// same value.
1508 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1509                              EVT ValueType) {
1510   if (ConstantSDNode *CN = getVecImm(N)) {
1511     int Value = (int) CN->getZExtValue();
1512     if (ValueType == MVT::i16
1513         && Value <= 0xffff                 /* truncated from uint64_t */
1514         && ((short) Value >> 8) == ((short) Value & 0xff))
1515       return DAG.getTargetConstant(Value & 0xff, ValueType);
1516     else if (ValueType == MVT::i8
1517              && (Value & 0xff) == Value)
1518       return DAG.getTargetConstant(Value, ValueType);
1519   }
1520
1521   return SDValue();
1522 }
1523
1524 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1525 /// and the value fits into a signed 16-bit constant, and if so, return the
1526 /// constant
1527 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1528                                EVT ValueType) {
1529   if (ConstantSDNode *CN = getVecImm(N)) {
1530     uint64_t Value = CN->getZExtValue();
1531     if ((ValueType == MVT::i32
1532           && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1533         || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1534       return DAG.getTargetConstant(Value >> 16, ValueType);
1535   }
1536
1537   return SDValue();
1538 }
1539
1540 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1541 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1542   if (ConstantSDNode *CN = getVecImm(N)) {
1543     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1544   }
1545
1546   return SDValue();
1547 }
1548
1549 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1550 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1551   if (ConstantSDNode *CN = getVecImm(N)) {
1552     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1553   }
1554
1555   return SDValue();
1556 }
1557
1558 //! Lower a BUILD_VECTOR instruction creatively:
1559 static SDValue
1560 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1561   EVT VT = Op.getValueType();
1562   EVT EltVT = VT.getVectorElementType();
1563   DebugLoc dl = Op.getDebugLoc();
1564   BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
1565   assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
1566   unsigned minSplatBits = EltVT.getSizeInBits();
1567
1568   if (minSplatBits < 16)
1569     minSplatBits = 16;
1570
1571   APInt APSplatBits, APSplatUndef;
1572   unsigned SplatBitSize;
1573   bool HasAnyUndefs;
1574
1575   if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
1576                             HasAnyUndefs, minSplatBits)
1577       || minSplatBits < SplatBitSize)
1578     return SDValue();   // Wasn't a constant vector or splat exceeded min
1579
1580   uint64_t SplatBits = APSplatBits.getZExtValue();
1581
1582   switch (VT.getSimpleVT().SimpleTy) {
1583   default: {
1584     std::string msg;
1585     raw_string_ostream Msg(msg);
1586     Msg << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
1587          << VT.getEVTString();
1588     llvm_report_error(Msg.str());
1589     /*NOTREACHED*/
1590   }
1591   case MVT::v4f32: {
1592     uint32_t Value32 = uint32_t(SplatBits);
1593     assert(SplatBitSize == 32
1594            && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1595     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1596     SDValue T = DAG.getConstant(Value32, MVT::i32);
1597     return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
1598                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
1599     break;
1600   }
1601   case MVT::v2f64: {
1602     uint64_t f64val = uint64_t(SplatBits);
1603     assert(SplatBitSize == 64
1604            && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1605     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1606     SDValue T = DAG.getConstant(f64val, MVT::i64);
1607     return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64,
1608                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
1609     break;
1610   }
1611   case MVT::v16i8: {
1612    // 8-bit constants have to be expanded to 16-bits
1613    unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
1614    SmallVector<SDValue, 8> Ops;
1615
1616    Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
1617    return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
1618                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
1619   }
1620   case MVT::v8i16: {
1621     unsigned short Value16 = SplatBits;
1622     SDValue T = DAG.getConstant(Value16, EltVT);
1623     SmallVector<SDValue, 8> Ops;
1624
1625     Ops.assign(8, T);
1626     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
1627   }
1628   case MVT::v4i32: {
1629     SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1630     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
1631   }
1632   case MVT::v2i32: {
1633     SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1634     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
1635   }
1636   case MVT::v2i64: {
1637     return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
1638   }
1639   }
1640
1641   return SDValue();
1642 }
1643
1644 /*!
1645  */
1646 SDValue
1647 SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
1648                      DebugLoc dl) {
1649   uint32_t upper = uint32_t(SplatVal >> 32);
1650   uint32_t lower = uint32_t(SplatVal);
1651
1652   if (upper == lower) {
1653     // Magic constant that can be matched by IL, ILA, et. al.
1654     SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1655     return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1656                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1657                                    Val, Val, Val, Val));
1658   } else {
1659     bool upper_special, lower_special;
1660
1661     // NOTE: This code creates common-case shuffle masks that can be easily
1662     // detected as common expressions. It is not attempting to create highly
1663     // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1664
1665     // Detect if the upper or lower half is a special shuffle mask pattern:
1666     upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1667     lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1668
1669     // Both upper and lower are special, lower to a constant pool load:
1670     if (lower_special && upper_special) {
1671       SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
1672       return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
1673                          SplatValCN, SplatValCN);
1674     }
1675
1676     SDValue LO32;
1677     SDValue HI32;
1678     SmallVector<SDValue, 16> ShufBytes;
1679     SDValue Result;
1680
1681     // Create lower vector if not a special pattern
1682     if (!lower_special) {
1683       SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1684       LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1685                          DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1686                                      LO32C, LO32C, LO32C, LO32C));
1687     }
1688
1689     // Create upper vector if not a special pattern
1690     if (!upper_special) {
1691       SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1692       HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1693                          DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1694                                      HI32C, HI32C, HI32C, HI32C));
1695     }
1696
1697     // If either upper or lower are special, then the two input operands are
1698     // the same (basically, one of them is a "don't care")
1699     if (lower_special)
1700       LO32 = HI32;
1701     if (upper_special)
1702       HI32 = LO32;
1703
1704     for (int i = 0; i < 4; ++i) {
1705       uint64_t val = 0;
1706       for (int j = 0; j < 4; ++j) {
1707         SDValue V;
1708         bool process_upper, process_lower;
1709         val <<= 8;
1710         process_upper = (upper_special && (i & 1) == 0);
1711         process_lower = (lower_special && (i & 1) == 1);
1712
1713         if (process_upper || process_lower) {
1714           if ((process_upper && upper == 0)
1715                   || (process_lower && lower == 0))
1716             val |= 0x80;
1717           else if ((process_upper && upper == 0xffffffff)
1718                   || (process_lower && lower == 0xffffffff))
1719             val |= 0xc0;
1720           else if ((process_upper && upper == 0x80000000)
1721                   || (process_lower && lower == 0x80000000))
1722             val |= (j == 0 ? 0xe0 : 0x80);
1723         } else
1724           val |= i * 4 + j + ((i & 1) * 16);
1725       }
1726
1727       ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1728     }
1729
1730     return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
1731                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1732                                    &ShufBytes[0], ShufBytes.size()));
1733   }
1734 }
1735
1736 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1737 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1738 /// permutation vector, V3, is monotonically increasing with one "exception"
1739 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1740 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1741 /// In either case, the net result is going to eventually invoke SHUFB to
1742 /// permute/shuffle the bytes from V1 and V2.
1743 /// \note
1744 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1745 /// control word for byte/halfword/word insertion. This takes care of a single
1746 /// element move from V2 into V1.
1747 /// \note
1748 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1749 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1750   const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
1751   SDValue V1 = Op.getOperand(0);
1752   SDValue V2 = Op.getOperand(1);
1753   DebugLoc dl = Op.getDebugLoc();
1754
1755   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1756
1757   // If we have a single element being moved from V1 to V2, this can be handled
1758   // using the C*[DX] compute mask instructions, but the vector elements have
1759   // to be monotonically increasing with one exception element.
1760   EVT VecVT = V1.getValueType();
1761   EVT EltVT = VecVT.getVectorElementType();
1762   unsigned EltsFromV2 = 0;
1763   unsigned V2Elt = 0;
1764   unsigned V2EltIdx0 = 0;
1765   unsigned CurrElt = 0;
1766   unsigned MaxElts = VecVT.getVectorNumElements();
1767   unsigned PrevElt = 0;
1768   unsigned V0Elt = 0;
1769   bool monotonic = true;
1770   bool rotate = true;
1771
1772   if (EltVT == MVT::i8) {
1773     V2EltIdx0 = 16;
1774   } else if (EltVT == MVT::i16) {
1775     V2EltIdx0 = 8;
1776   } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1777     V2EltIdx0 = 4;
1778   } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1779     V2EltIdx0 = 2;
1780   } else
1781     llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
1782
1783   for (unsigned i = 0; i != MaxElts; ++i) {
1784     if (SVN->getMaskElt(i) < 0)
1785       continue;
1786
1787     unsigned SrcElt = SVN->getMaskElt(i);
1788
1789     if (monotonic) {
1790       if (SrcElt >= V2EltIdx0) {
1791         if (1 >= (++EltsFromV2)) {
1792           V2Elt = (V2EltIdx0 - SrcElt) << 2;
1793         }
1794       } else if (CurrElt != SrcElt) {
1795         monotonic = false;
1796       }
1797
1798       ++CurrElt;
1799     }
1800
1801     if (rotate) {
1802       if (PrevElt > 0 && SrcElt < MaxElts) {
1803         if ((PrevElt == SrcElt - 1)
1804             || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1805           PrevElt = SrcElt;
1806           if (SrcElt == 0)
1807             V0Elt = i;
1808         } else {
1809           rotate = false;
1810         }
1811       } else if (PrevElt == 0) {
1812         // First time through, need to keep track of previous element
1813         PrevElt = SrcElt;
1814       } else {
1815         // This isn't a rotation, takes elements from vector 2
1816         rotate = false;
1817       }
1818     }
1819   }
1820
1821   if (EltsFromV2 == 1 && monotonic) {
1822     // Compute mask and shuffle
1823     MachineFunction &MF = DAG.getMachineFunction();
1824     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1825     unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1826     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1827     // Initialize temporary register to 0
1828     SDValue InitTempReg =
1829       DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT));
1830     // Copy register's contents as index in SHUFFLE_MASK:
1831     SDValue ShufMaskOp =
1832       DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32,
1833                   DAG.getTargetConstant(V2Elt, MVT::i32),
1834                   DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT));
1835     // Use shuffle mask in SHUFB synthetic instruction:
1836     return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
1837                        ShufMaskOp);
1838   } else if (rotate) {
1839     int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1840
1841     return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
1842                        V1, DAG.getConstant(rotamt, MVT::i16));
1843   } else {
1844    // Convert the SHUFFLE_VECTOR mask's input element units to the
1845    // actual bytes.
1846     unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1847
1848     SmallVector<SDValue, 16> ResultMask;
1849     for (unsigned i = 0, e = MaxElts; i != e; ++i) {
1850       unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
1851
1852       for (unsigned j = 0; j < BytesPerElement; ++j)
1853         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
1854     }
1855
1856     SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
1857                                     &ResultMask[0], ResultMask.size());
1858     return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
1859   }
1860 }
1861
1862 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1863   SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar
1864   DebugLoc dl = Op.getDebugLoc();
1865
1866   if (Op0.getNode()->getOpcode() == ISD::Constant) {
1867     // For a constant, build the appropriate constant vector, which will
1868     // eventually simplify to a vector register load.
1869
1870     ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1871     SmallVector<SDValue, 16> ConstVecValues;
1872     EVT VT;
1873     size_t n_copies;
1874
1875     // Create a constant vector:
1876     switch (Op.getValueType().getSimpleVT().SimpleTy) {
1877     default: llvm_unreachable("Unexpected constant value type in "
1878                               "LowerSCALAR_TO_VECTOR");
1879     case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1880     case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1881     case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1882     case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1883     case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1884     case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1885     }
1886
1887     SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1888     for (size_t j = 0; j < n_copies; ++j)
1889       ConstVecValues.push_back(CValue);
1890
1891     return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
1892                        &ConstVecValues[0], ConstVecValues.size());
1893   } else {
1894     // Otherwise, copy the value from one register to another:
1895     switch (Op0.getValueType().getSimpleVT().SimpleTy) {
1896     default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
1897     case MVT::i8:
1898     case MVT::i16:
1899     case MVT::i32:
1900     case MVT::i64:
1901     case MVT::f32:
1902     case MVT::f64:
1903       return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
1904     }
1905   }
1906
1907   return SDValue();
1908 }
1909
1910 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1911   EVT VT = Op.getValueType();
1912   SDValue N = Op.getOperand(0);
1913   SDValue Elt = Op.getOperand(1);
1914   DebugLoc dl = Op.getDebugLoc();
1915   SDValue retval;
1916
1917   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1918     // Constant argument:
1919     int EltNo = (int) C->getZExtValue();
1920
1921     // sanity checks:
1922     if (VT == MVT::i8 && EltNo >= 16)
1923       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1924     else if (VT == MVT::i16 && EltNo >= 8)
1925       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1926     else if (VT == MVT::i32 && EltNo >= 4)
1927       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1928     else if (VT == MVT::i64 && EltNo >= 2)
1929       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1930
1931     if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1932       // i32 and i64: Element 0 is the preferred slot
1933       return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
1934     }
1935
1936     // Need to generate shuffle mask and extract:
1937     int prefslot_begin = -1, prefslot_end = -1;
1938     int elt_byte = EltNo * VT.getSizeInBits() / 8;
1939
1940     switch (VT.getSimpleVT().SimpleTy) {
1941     default:
1942       assert(false && "Invalid value type!");
1943     case MVT::i8: {
1944       prefslot_begin = prefslot_end = 3;
1945       break;
1946     }
1947     case MVT::i16: {
1948       prefslot_begin = 2; prefslot_end = 3;
1949       break;
1950     }
1951     case MVT::i32:
1952     case MVT::f32: {
1953       prefslot_begin = 0; prefslot_end = 3;
1954       break;
1955     }
1956     case MVT::i64:
1957     case MVT::f64: {
1958       prefslot_begin = 0; prefslot_end = 7;
1959       break;
1960     }
1961     }
1962
1963     assert(prefslot_begin != -1 && prefslot_end != -1 &&
1964            "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1965
1966     unsigned int ShufBytes[16] = {
1967       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1968     };
1969     for (int i = 0; i < 16; ++i) {
1970       // zero fill uppper part of preferred slot, don't care about the
1971       // other slots:
1972       unsigned int mask_val;
1973       if (i <= prefslot_end) {
1974         mask_val =
1975           ((i < prefslot_begin)
1976            ? 0x80
1977            : elt_byte + (i - prefslot_begin));
1978
1979         ShufBytes[i] = mask_val;
1980       } else
1981         ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1982     }
1983
1984     SDValue ShufMask[4];
1985     for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1986       unsigned bidx = i * 4;
1987       unsigned int bits = ((ShufBytes[bidx] << 24) |
1988                            (ShufBytes[bidx+1] << 16) |
1989                            (ShufBytes[bidx+2] << 8) |
1990                            ShufBytes[bidx+3]);
1991       ShufMask[i] = DAG.getConstant(bits, MVT::i32);
1992     }
1993
1994     SDValue ShufMaskVec =
1995       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1996                   &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
1997
1998     retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1999                          DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
2000                                      N, N, ShufMaskVec));
2001   } else {
2002     // Variable index: Rotate the requested element into slot 0, then replicate
2003     // slot 0 across the vector
2004     EVT VecVT = N.getValueType();
2005     if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
2006       llvm_report_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
2007                         "vector type!");
2008     }
2009
2010     // Make life easier by making sure the index is zero-extended to i32
2011     if (Elt.getValueType() != MVT::i32)
2012       Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
2013
2014     // Scale the index to a bit/byte shift quantity
2015     APInt scaleFactor =
2016             APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2017     unsigned scaleShift = scaleFactor.logBase2();
2018     SDValue vecShift;
2019
2020     if (scaleShift > 0) {
2021       // Scale the shift factor:
2022       Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
2023                         DAG.getConstant(scaleShift, MVT::i32));
2024     }
2025
2026     vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
2027
2028     // Replicate the bytes starting at byte 0 across the entire vector (for
2029     // consistency with the notion of a unified register set)
2030     SDValue replicate;
2031
2032     switch (VT.getSimpleVT().SimpleTy) {
2033     default:
2034       llvm_report_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
2035                         "type");
2036       /*NOTREACHED*/
2037     case MVT::i8: {
2038       SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2039       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2040                               factor, factor, factor, factor);
2041       break;
2042     }
2043     case MVT::i16: {
2044       SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2045       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2046                               factor, factor, factor, factor);
2047       break;
2048     }
2049     case MVT::i32:
2050     case MVT::f32: {
2051       SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2052       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2053                               factor, factor, factor, factor);
2054       break;
2055     }
2056     case MVT::i64:
2057     case MVT::f64: {
2058       SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2059       SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2060       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2061                               loFactor, hiFactor, loFactor, hiFactor);
2062       break;
2063     }
2064     }
2065
2066     retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2067                          DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2068                                      vecShift, vecShift, replicate));
2069   }
2070
2071   return retval;
2072 }
2073
2074 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2075   SDValue VecOp = Op.getOperand(0);
2076   SDValue ValOp = Op.getOperand(1);
2077   SDValue IdxOp = Op.getOperand(2);
2078   DebugLoc dl = Op.getDebugLoc();
2079   EVT VT = Op.getValueType();
2080
2081   ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2082   assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2083
2084   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2085   // Use $sp ($1) because it's always 16-byte aligned and it's available:
2086   SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
2087                                 DAG.getRegister(SPU::R1, PtrVT),
2088                                 DAG.getConstant(CN->getSExtValue(), PtrVT));
2089   SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
2090
2091   SDValue result =
2092     DAG.getNode(SPUISD::SHUFB, dl, VT,
2093                 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
2094                 VecOp,
2095                 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask));
2096
2097   return result;
2098 }
2099
2100 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2101                            const TargetLowering &TLI)
2102 {
2103   SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
2104   DebugLoc dl = Op.getDebugLoc();
2105   EVT ShiftVT = TLI.getShiftAmountTy();
2106
2107   assert(Op.getValueType() == MVT::i8);
2108   switch (Opc) {
2109   default:
2110     llvm_unreachable("Unhandled i8 math operator");
2111     /*NOTREACHED*/
2112     break;
2113   case ISD::ADD: {
2114     // 8-bit addition: Promote the arguments up to 16-bits and truncate
2115     // the result:
2116     SDValue N1 = Op.getOperand(1);
2117     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2118     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2119     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2120                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2121
2122   }
2123
2124   case ISD::SUB: {
2125     // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2126     // the result:
2127     SDValue N1 = Op.getOperand(1);
2128     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2129     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2130     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2131                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2132   }
2133   case ISD::ROTR:
2134   case ISD::ROTL: {
2135     SDValue N1 = Op.getOperand(1);
2136     EVT N1VT = N1.getValueType();
2137
2138     N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2139     if (!N1VT.bitsEq(ShiftVT)) {
2140       unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
2141                        ? ISD::ZERO_EXTEND
2142                        : ISD::TRUNCATE;
2143       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2144     }
2145
2146     // Replicate lower 8-bits into upper 8:
2147     SDValue ExpandArg =
2148       DAG.getNode(ISD::OR, dl, MVT::i16, N0,
2149                   DAG.getNode(ISD::SHL, dl, MVT::i16,
2150                               N0, DAG.getConstant(8, MVT::i32)));
2151
2152     // Truncate back down to i8
2153     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2154                        DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
2155   }
2156   case ISD::SRL:
2157   case ISD::SHL: {
2158     SDValue N1 = Op.getOperand(1);
2159     EVT N1VT = N1.getValueType();
2160
2161     N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2162     if (!N1VT.bitsEq(ShiftVT)) {
2163       unsigned N1Opc = ISD::ZERO_EXTEND;
2164
2165       if (N1.getValueType().bitsGT(ShiftVT))
2166         N1Opc = ISD::TRUNCATE;
2167
2168       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2169     }
2170
2171     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2172                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2173   }
2174   case ISD::SRA: {
2175     SDValue N1 = Op.getOperand(1);
2176     EVT N1VT = N1.getValueType();
2177
2178     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2179     if (!N1VT.bitsEq(ShiftVT)) {
2180       unsigned N1Opc = ISD::SIGN_EXTEND;
2181
2182       if (N1VT.bitsGT(ShiftVT))
2183         N1Opc = ISD::TRUNCATE;
2184       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2185     }
2186
2187     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2188                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2189   }
2190   case ISD::MUL: {
2191     SDValue N1 = Op.getOperand(1);
2192
2193     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2194     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2195     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2196                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2197     break;
2198   }
2199   }
2200
2201   return SDValue();
2202 }
2203
2204 //! Lower byte immediate operations for v16i8 vectors:
2205 static SDValue
2206 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2207   SDValue ConstVec;
2208   SDValue Arg;
2209   EVT VT = Op.getValueType();
2210   DebugLoc dl = Op.getDebugLoc();
2211
2212   ConstVec = Op.getOperand(0);
2213   Arg = Op.getOperand(1);
2214   if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2215     if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2216       ConstVec = ConstVec.getOperand(0);
2217     } else {
2218       ConstVec = Op.getOperand(1);
2219       Arg = Op.getOperand(0);
2220       if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2221         ConstVec = ConstVec.getOperand(0);
2222       }
2223     }
2224   }
2225
2226   if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2227     BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
2228     assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
2229
2230     APInt APSplatBits, APSplatUndef;
2231     unsigned SplatBitSize;
2232     bool HasAnyUndefs;
2233     unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
2234
2235     if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
2236                               HasAnyUndefs, minSplatBits)
2237         && minSplatBits <= SplatBitSize) {
2238       uint64_t SplatBits = APSplatBits.getZExtValue();
2239       SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2240
2241       SmallVector<SDValue, 16> tcVec;
2242       tcVec.assign(16, tc);
2243       return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
2244                          DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
2245     }
2246   }
2247
2248   // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2249   // lowered.  Return the operation, rather than a null SDValue.
2250   return Op;
2251 }
2252
2253 //! Custom lowering for CTPOP (count population)
2254 /*!
2255   Custom lowering code that counts the number ones in the input
2256   operand. SPU has such an instruction, but it counts the number of
2257   ones per byte, which then have to be accumulated.
2258 */
2259 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2260   EVT VT = Op.getValueType();
2261   EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
2262                                VT, (128 / VT.getSizeInBits()));
2263   DebugLoc dl = Op.getDebugLoc();
2264
2265   switch (VT.getSimpleVT().SimpleTy) {
2266   default:
2267     assert(false && "Invalid value type!");
2268   case MVT::i8: {
2269     SDValue N = Op.getOperand(0);
2270     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2271
2272     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2273     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2274
2275     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
2276   }
2277
2278   case MVT::i16: {
2279     MachineFunction &MF = DAG.getMachineFunction();
2280     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2281
2282     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2283
2284     SDValue N = Op.getOperand(0);
2285     SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2286     SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2287     SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2288
2289     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2290     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2291
2292     // CNTB_result becomes the chain to which all of the virtual registers
2293     // CNTB_reg, SUM1_reg become associated:
2294     SDValue CNTB_result =
2295       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
2296
2297     SDValue CNTB_rescopy =
2298       DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2299
2300     SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
2301
2302     return DAG.getNode(ISD::AND, dl, MVT::i16,
2303                        DAG.getNode(ISD::ADD, dl, MVT::i16,
2304                                    DAG.getNode(ISD::SRL, dl, MVT::i16,
2305                                                Tmp1, Shift1),
2306                                    Tmp1),
2307                        Mask0);
2308   }
2309
2310   case MVT::i32: {
2311     MachineFunction &MF = DAG.getMachineFunction();
2312     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2313
2314     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2315     unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2316
2317     SDValue N = Op.getOperand(0);
2318     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2319     SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2320     SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2321     SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2322
2323     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2324     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2325
2326     // CNTB_result becomes the chain to which all of the virtual registers
2327     // CNTB_reg, SUM1_reg become associated:
2328     SDValue CNTB_result =
2329       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
2330
2331     SDValue CNTB_rescopy =
2332       DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2333
2334     SDValue Comp1 =
2335       DAG.getNode(ISD::SRL, dl, MVT::i32,
2336                   DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
2337                   Shift1);
2338
2339     SDValue Sum1 =
2340       DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
2341                   DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
2342
2343     SDValue Sum1_rescopy =
2344       DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
2345
2346     SDValue Comp2 =
2347       DAG.getNode(ISD::SRL, dl, MVT::i32,
2348                   DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
2349                   Shift2);
2350     SDValue Sum2 =
2351       DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
2352                   DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
2353
2354     return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
2355   }
2356
2357   case MVT::i64:
2358     break;
2359   }
2360
2361   return SDValue();
2362 }
2363
2364 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2365 /*!
2366  f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2367  All conversions to i64 are expanded to a libcall.
2368  */
2369 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2370                               SPUTargetLowering &TLI) {
2371   EVT OpVT = Op.getValueType();
2372   SDValue Op0 = Op.getOperand(0);
2373   EVT Op0VT = Op0.getValueType();
2374
2375   if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2376       || OpVT == MVT::i64) {
2377     // Convert f32 / f64 to i32 / i64 via libcall.
2378     RTLIB::Libcall LC =
2379             (Op.getOpcode() == ISD::FP_TO_SINT)
2380              ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2381              : RTLIB::getFPTOUINT(Op0VT, OpVT);
2382     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2383     SDValue Dummy;
2384     return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2385   }
2386
2387   return Op;
2388 }
2389
2390 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2391 /*!
2392  i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2393  All conversions from i64 are expanded to a libcall.
2394  */
2395 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2396                               SPUTargetLowering &TLI) {
2397   EVT OpVT = Op.getValueType();
2398   SDValue Op0 = Op.getOperand(0);
2399   EVT Op0VT = Op0.getValueType();
2400
2401   if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2402       || Op0VT == MVT::i64) {
2403     // Convert i32, i64 to f64 via libcall:
2404     RTLIB::Libcall LC =
2405             (Op.getOpcode() == ISD::SINT_TO_FP)
2406              ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2407              : RTLIB::getUINTTOFP(Op0VT, OpVT);
2408     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2409     SDValue Dummy;
2410     return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2411   }
2412
2413   return Op;
2414 }
2415
2416 //! Lower ISD::SETCC
2417 /*!
2418  This handles MVT::f64 (double floating point) condition lowering
2419  */
2420 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2421                           const TargetLowering &TLI) {
2422   CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2423   DebugLoc dl = Op.getDebugLoc();
2424   assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2425
2426   SDValue lhs = Op.getOperand(0);
2427   SDValue rhs = Op.getOperand(1);
2428   EVT lhsVT = lhs.getValueType();
2429   assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2430
2431   EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2432   APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2433   EVT IntVT(MVT::i64);
2434
2435   // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2436   // selected to a NOP:
2437   SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
2438   SDValue lhsHi32 =
2439           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2440                       DAG.getNode(ISD::SRL, dl, IntVT,
2441                                   i64lhs, DAG.getConstant(32, MVT::i32)));
2442   SDValue lhsHi32abs =
2443           DAG.getNode(ISD::AND, dl, MVT::i32,
2444                       lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2445   SDValue lhsLo32 =
2446           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2447
2448   // SETO and SETUO only use the lhs operand:
2449   if (CC->get() == ISD::SETO) {
2450     // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2451     // SETUO
2452     APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2453     return DAG.getNode(ISD::XOR, dl, ccResultVT,
2454                        DAG.getSetCC(dl, ccResultVT,
2455                                     lhs, DAG.getConstantFP(0.0, lhsVT),
2456                                     ISD::SETUO),
2457                        DAG.getConstant(ccResultAllOnes, ccResultVT));
2458   } else if (CC->get() == ISD::SETUO) {
2459     // Evaluates to true if Op0 is [SQ]NaN
2460     return DAG.getNode(ISD::AND, dl, ccResultVT,
2461                        DAG.getSetCC(dl, ccResultVT,
2462                                     lhsHi32abs,
2463                                     DAG.getConstant(0x7ff00000, MVT::i32),
2464                                     ISD::SETGE),
2465                        DAG.getSetCC(dl, ccResultVT,
2466                                     lhsLo32,
2467                                     DAG.getConstant(0, MVT::i32),
2468                                     ISD::SETGT));
2469   }
2470
2471   SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
2472   SDValue rhsHi32 =
2473           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2474                       DAG.getNode(ISD::SRL, dl, IntVT,
2475                                   i64rhs, DAG.getConstant(32, MVT::i32)));
2476
2477   // If a value is negative, subtract from the sign magnitude constant:
2478   SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2479
2480   // Convert the sign-magnitude representation into 2's complement:
2481   SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2482                                       lhsHi32, DAG.getConstant(31, MVT::i32));
2483   SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2484   SDValue lhsSelect =
2485           DAG.getNode(ISD::SELECT, dl, IntVT,
2486                       lhsSelectMask, lhsSignMag2TC, i64lhs);
2487
2488   SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2489                                       rhsHi32, DAG.getConstant(31, MVT::i32));
2490   SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2491   SDValue rhsSelect =
2492           DAG.getNode(ISD::SELECT, dl, IntVT,
2493                       rhsSelectMask, rhsSignMag2TC, i64rhs);
2494
2495   unsigned compareOp;
2496
2497   switch (CC->get()) {
2498   case ISD::SETOEQ:
2499   case ISD::SETUEQ:
2500     compareOp = ISD::SETEQ; break;
2501   case ISD::SETOGT:
2502   case ISD::SETUGT:
2503     compareOp = ISD::SETGT; break;
2504   case ISD::SETOGE:
2505   case ISD::SETUGE:
2506     compareOp = ISD::SETGE; break;
2507   case ISD::SETOLT:
2508   case ISD::SETULT:
2509     compareOp = ISD::SETLT; break;
2510   case ISD::SETOLE:
2511   case ISD::SETULE:
2512     compareOp = ISD::SETLE; break;
2513   case ISD::SETUNE:
2514   case ISD::SETONE:
2515     compareOp = ISD::SETNE; break;
2516   default:
2517     llvm_report_error("CellSPU ISel Select: unimplemented f64 condition");
2518   }
2519
2520   SDValue result =
2521           DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2522                        (ISD::CondCode) compareOp);
2523
2524   if ((CC->get() & 0x8) == 0) {
2525     // Ordered comparison:
2526     SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2527                                   lhs, DAG.getConstantFP(0.0, MVT::f64),
2528                                   ISD::SETO);
2529     SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2530                                   rhs, DAG.getConstantFP(0.0, MVT::f64),
2531                                   ISD::SETO);
2532     SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2533
2534     result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2535   }
2536
2537   return result;
2538 }
2539
2540 //! Lower ISD::SELECT_CC
2541 /*!
2542   ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2543   SELB instruction.
2544
2545   \note Need to revisit this in the future: if the code path through the true
2546   and false value computations is longer than the latency of a branch (6
2547   cycles), then it would be more advantageous to branch and insert a new basic
2548   block and branch on the condition. However, this code does not make that
2549   assumption, given the simplisitc uses so far.
2550  */
2551
2552 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2553                               const TargetLowering &TLI) {
2554   EVT VT = Op.getValueType();
2555   SDValue lhs = Op.getOperand(0);
2556   SDValue rhs = Op.getOperand(1);
2557   SDValue trueval = Op.getOperand(2);
2558   SDValue falseval = Op.getOperand(3);
2559   SDValue condition = Op.getOperand(4);
2560   DebugLoc dl = Op.getDebugLoc();
2561
2562   // NOTE: SELB's arguments: $rA, $rB, $mask
2563   //
2564   // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2565   // where bits in $mask are 1. CCond will be inverted, having 1s where the
2566   // condition was true and 0s where the condition was false. Hence, the
2567   // arguments to SELB get reversed.
2568
2569   // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2570   // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2571   // with another "cannot select select_cc" assert:
2572
2573   SDValue compare = DAG.getNode(ISD::SETCC, dl,
2574                                 TLI.getSetCCResultType(Op.getValueType()),
2575                                 lhs, rhs, condition);
2576   return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
2577 }
2578
2579 //! Custom lower ISD::TRUNCATE
2580 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2581 {
2582   // Type to truncate to
2583   EVT VT = Op.getValueType();
2584   MVT simpleVT = VT.getSimpleVT();
2585   EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
2586                                VT, (128 / VT.getSizeInBits()));
2587   DebugLoc dl = Op.getDebugLoc();
2588
2589   // Type to truncate from
2590   SDValue Op0 = Op.getOperand(0);
2591   EVT Op0VT = Op0.getValueType();
2592
2593   if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2594     // Create shuffle mask, least significant doubleword of quadword
2595     unsigned maskHigh = 0x08090a0b;
2596     unsigned maskLow = 0x0c0d0e0f;
2597     // Use a shuffle to perform the truncation
2598     SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2599                                    DAG.getConstant(maskHigh, MVT::i32),
2600                                    DAG.getConstant(maskLow, MVT::i32),
2601                                    DAG.getConstant(maskHigh, MVT::i32),
2602                                    DAG.getConstant(maskLow, MVT::i32));
2603
2604     SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2605                                        Op0, Op0, shufMask);
2606
2607     return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
2608   }
2609
2610   return SDValue();             // Leave the truncate unmolested
2611 }
2612
2613 /*!
2614  * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
2615  * algorithm is to duplicate the sign bit using rotmai to generate at
2616  * least one byte full of sign bits. Then propagate the "sign-byte" into
2617  * the leftmost words and the i64/i32 into the rightmost words using shufb.
2618  *
2619  * @param Op The sext operand
2620  * @param DAG The current DAG
2621  * @return The SDValue with the entire instruction sequence
2622  */
2623 static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
2624 {
2625   DebugLoc dl = Op.getDebugLoc();
2626
2627   // Type to extend to
2628   MVT OpVT = Op.getValueType().getSimpleVT();
2629   EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
2630                                OpVT, (128 / OpVT.getSizeInBits()));
2631
2632   // Type to extend from
2633   SDValue Op0 = Op.getOperand(0);
2634   MVT Op0VT = Op0.getValueType().getSimpleVT();
2635
2636   // The type to extend to needs to be a i128 and
2637   // the type to extend from needs to be i64 or i32.
2638   assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
2639           "LowerSIGN_EXTEND: input and/or output operand have wrong size");
2640
2641   // Create shuffle mask
2642   unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
2643   unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte  8 - 11
2644   unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
2645   SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2646                                  DAG.getConstant(mask1, MVT::i32),
2647                                  DAG.getConstant(mask1, MVT::i32),
2648                                  DAG.getConstant(mask2, MVT::i32),
2649                                  DAG.getConstant(mask3, MVT::i32));
2650
2651   // Word wise arithmetic right shift to generate at least one byte
2652   // that contains sign bits.
2653   MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
2654   SDValue sraVal = DAG.getNode(ISD::SRA,
2655                  dl,
2656                  mvt,
2657                  DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
2658                  DAG.getConstant(31, MVT::i32));
2659
2660   // Shuffle bytes - Copy the sign bits into the upper 64 bits
2661   // and the input value into the lower 64 bits.
2662   SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
2663       DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i128, Op0), sraVal, shufMask);
2664
2665   return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, extShuffle);
2666 }
2667
2668 //! Custom (target-specific) lowering entry point
2669 /*!
2670   This is where LLVM's DAG selection process calls to do target-specific
2671   lowering of nodes.
2672  */
2673 SDValue
2674 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2675 {
2676   unsigned Opc = (unsigned) Op.getOpcode();
2677   EVT VT = Op.getValueType();
2678
2679   switch (Opc) {
2680   default: {
2681 #ifndef NDEBUG
2682     errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2683     errs() << "Op.getOpcode() = " << Opc << "\n";
2684     errs() << "*Op.getNode():\n";
2685     Op.getNode()->dump();
2686 #endif
2687     llvm_unreachable(0);
2688   }
2689   case ISD::LOAD:
2690   case ISD::EXTLOAD:
2691   case ISD::SEXTLOAD:
2692   case ISD::ZEXTLOAD:
2693     return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2694   case ISD::STORE:
2695     return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2696   case ISD::ConstantPool:
2697     return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2698   case ISD::GlobalAddress:
2699     return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2700   case ISD::JumpTable:
2701     return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2702   case ISD::ConstantFP:
2703     return LowerConstantFP(Op, DAG);
2704
2705   // i8, i64 math ops:
2706   case ISD::ADD:
2707   case ISD::SUB:
2708   case ISD::ROTR:
2709   case ISD::ROTL:
2710   case ISD::SRL:
2711   case ISD::SHL:
2712   case ISD::SRA: {
2713     if (VT == MVT::i8)
2714       return LowerI8Math(Op, DAG, Opc, *this);
2715     break;
2716   }
2717
2718   case ISD::FP_TO_SINT:
2719   case ISD::FP_TO_UINT:
2720     return LowerFP_TO_INT(Op, DAG, *this);
2721
2722   case ISD::SINT_TO_FP:
2723   case ISD::UINT_TO_FP:
2724     return LowerINT_TO_FP(Op, DAG, *this);
2725
2726   // Vector-related lowering.
2727   case ISD::BUILD_VECTOR:
2728     return LowerBUILD_VECTOR(Op, DAG);
2729   case ISD::SCALAR_TO_VECTOR:
2730     return LowerSCALAR_TO_VECTOR(Op, DAG);
2731   case ISD::VECTOR_SHUFFLE:
2732     return LowerVECTOR_SHUFFLE(Op, DAG);
2733   case ISD::EXTRACT_VECTOR_ELT:
2734     return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2735   case ISD::INSERT_VECTOR_ELT:
2736     return LowerINSERT_VECTOR_ELT(Op, DAG);
2737
2738   // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2739   case ISD::AND:
2740   case ISD::OR:
2741   case ISD::XOR:
2742     return LowerByteImmed(Op, DAG);
2743
2744   // Vector and i8 multiply:
2745   case ISD::MUL:
2746     if (VT == MVT::i8)
2747       return LowerI8Math(Op, DAG, Opc, *this);
2748
2749   case ISD::CTPOP:
2750     return LowerCTPOP(Op, DAG);
2751
2752   case ISD::SELECT_CC:
2753     return LowerSELECT_CC(Op, DAG, *this);
2754
2755   case ISD::SETCC:
2756     return LowerSETCC(Op, DAG, *this);
2757
2758   case ISD::TRUNCATE:
2759     return LowerTRUNCATE(Op, DAG);
2760
2761   case ISD::SIGN_EXTEND:
2762     return LowerSIGN_EXTEND(Op, DAG);
2763   }
2764
2765   return SDValue();
2766 }
2767
2768 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2769                                            SmallVectorImpl<SDValue>&Results,
2770                                            SelectionDAG &DAG)
2771 {
2772 #if 0
2773   unsigned Opc = (unsigned) N->getOpcode();
2774   EVT OpVT = N->getValueType(0);
2775
2776   switch (Opc) {
2777   default: {
2778     errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2779     errs() << "Op.getOpcode() = " << Opc << "\n";
2780     errs() << "*Op.getNode():\n";
2781     N->dump();
2782     abort();
2783     /*NOTREACHED*/
2784   }
2785   }
2786 #endif
2787
2788   /* Otherwise, return unchanged */
2789 }
2790
2791 //===----------------------------------------------------------------------===//
2792 // Target Optimization Hooks
2793 //===----------------------------------------------------------------------===//
2794
2795 SDValue
2796 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2797 {
2798 #if 0
2799   TargetMachine &TM = getTargetMachine();
2800 #endif
2801   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2802   SelectionDAG &DAG = DCI.DAG;
2803   SDValue Op0 = N->getOperand(0);       // everything has at least one operand
2804   EVT NodeVT = N->getValueType(0);      // The node's value type
2805   EVT Op0VT = Op0.getValueType();       // The first operand's result
2806   SDValue Result;                       // Initially, empty result
2807   DebugLoc dl = N->getDebugLoc();
2808
2809   switch (N->getOpcode()) {
2810   default: break;
2811   case ISD::ADD: {
2812     SDValue Op1 = N->getOperand(1);
2813
2814     if (Op0.getOpcode() == SPUISD::IndirectAddr
2815         || Op1.getOpcode() == SPUISD::IndirectAddr) {
2816       // Normalize the operands to reduce repeated code
2817       SDValue IndirectArg = Op0, AddArg = Op1;
2818
2819       if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2820         IndirectArg = Op1;
2821         AddArg = Op0;
2822       }
2823
2824       if (isa<ConstantSDNode>(AddArg)) {
2825         ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2826         SDValue IndOp1 = IndirectArg.getOperand(1);
2827
2828         if (CN0->isNullValue()) {
2829           // (add (SPUindirect <arg>, <arg>), 0) ->
2830           // (SPUindirect <arg>, <arg>)
2831
2832 #if !defined(NDEBUG)
2833           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2834             errs() << "\n"
2835                  << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2836                  << "With:    (SPUindirect <arg>, <arg>)\n";
2837           }
2838 #endif
2839
2840           return IndirectArg;
2841         } else if (isa<ConstantSDNode>(IndOp1)) {
2842           // (add (SPUindirect <arg>, <const>), <const>) ->
2843           // (SPUindirect <arg>, <const + const>)
2844           ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2845           int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2846           SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2847
2848 #if !defined(NDEBUG)
2849           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2850             errs() << "\n"
2851                  << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2852                  << "), " << CN0->getSExtValue() << ")\n"
2853                  << "With:    (SPUindirect <arg>, "
2854                  << combinedConst << ")\n";
2855           }
2856 #endif
2857
2858           return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2859                              IndirectArg, combinedValue);
2860         }
2861       }
2862     }
2863     break;
2864   }
2865   case ISD::SIGN_EXTEND:
2866   case ISD::ZERO_EXTEND:
2867   case ISD::ANY_EXTEND: {
2868     if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2869       // (any_extend (SPUextract_elt0 <arg>)) ->
2870       // (SPUextract_elt0 <arg>)
2871       // Types must match, however...
2872 #if !defined(NDEBUG)
2873       if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2874         errs() << "\nReplace: ";
2875         N->dump(&DAG);
2876         errs() << "\nWith:    ";
2877         Op0.getNode()->dump(&DAG);
2878         errs() << "\n";
2879       }
2880 #endif
2881
2882       return Op0;
2883     }
2884     break;
2885   }
2886   case SPUISD::IndirectAddr: {
2887     if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2888       ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2889       if (CN != 0 && CN->getZExtValue() == 0) {
2890         // (SPUindirect (SPUaform <addr>, 0), 0) ->
2891         // (SPUaform <addr>, 0)
2892
2893         DEBUG(errs() << "Replace: ");
2894         DEBUG(N->dump(&DAG));
2895         DEBUG(errs() << "\nWith:    ");
2896         DEBUG(Op0.getNode()->dump(&DAG));
2897         DEBUG(errs() << "\n");
2898
2899         return Op0;
2900       }
2901     } else if (Op0.getOpcode() == ISD::ADD) {
2902       SDValue Op1 = N->getOperand(1);
2903       if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2904         // (SPUindirect (add <arg>, <arg>), 0) ->
2905         // (SPUindirect <arg>, <arg>)
2906         if (CN1->isNullValue()) {
2907
2908 #if !defined(NDEBUG)
2909           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2910             errs() << "\n"
2911                  << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2912                  << "With:    (SPUindirect <arg>, <arg>)\n";
2913           }
2914 #endif
2915
2916           return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2917                              Op0.getOperand(0), Op0.getOperand(1));
2918         }
2919       }
2920     }
2921     break;
2922   }
2923   case SPUISD::SHLQUAD_L_BITS:
2924   case SPUISD::SHLQUAD_L_BYTES:
2925   case SPUISD::ROTBYTES_LEFT: {
2926     SDValue Op1 = N->getOperand(1);
2927
2928     // Kill degenerate vector shifts:
2929     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2930       if (CN->isNullValue()) {
2931         Result = Op0;
2932       }
2933     }
2934     break;
2935   }
2936   case SPUISD::PREFSLOT2VEC: {
2937     switch (Op0.getOpcode()) {
2938     default:
2939       break;
2940     case ISD::ANY_EXTEND:
2941     case ISD::ZERO_EXTEND:
2942     case ISD::SIGN_EXTEND: {
2943       // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2944       // <arg>
2945       // but only if the SPUprefslot2vec and <arg> types match.
2946       SDValue Op00 = Op0.getOperand(0);
2947       if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2948         SDValue Op000 = Op00.getOperand(0);
2949         if (Op000.getValueType() == NodeVT) {
2950           Result = Op000;
2951         }
2952       }
2953       break;
2954     }
2955     case SPUISD::VEC2PREFSLOT: {
2956       // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2957       // <arg>
2958       Result = Op0.getOperand(0);
2959       break;
2960     }
2961     }
2962     break;
2963   }
2964   }
2965
2966   // Otherwise, return unchanged.
2967 #ifndef NDEBUG
2968   if (Result.getNode()) {
2969     DEBUG(errs() << "\nReplace.SPU: ");
2970     DEBUG(N->dump(&DAG));
2971     DEBUG(errs() << "\nWith:        ");
2972     DEBUG(Result.getNode()->dump(&DAG));
2973     DEBUG(errs() << "\n");
2974   }
2975 #endif
2976
2977   return Result;
2978 }
2979
2980 //===----------------------------------------------------------------------===//
2981 // Inline Assembly Support
2982 //===----------------------------------------------------------------------===//
2983
2984 /// getConstraintType - Given a constraint letter, return the type of
2985 /// constraint it is for this target.
2986 SPUTargetLowering::ConstraintType
2987 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2988   if (ConstraintLetter.size() == 1) {
2989     switch (ConstraintLetter[0]) {
2990     default: break;
2991     case 'b':
2992     case 'r':
2993     case 'f':
2994     case 'v':
2995     case 'y':
2996       return C_RegisterClass;
2997     }
2998   }
2999   return TargetLowering::getConstraintType(ConstraintLetter);
3000 }
3001
3002 std::pair<unsigned, const TargetRegisterClass*>
3003 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3004                                                 EVT VT) const
3005 {
3006   if (Constraint.size() == 1) {
3007     // GCC RS6000 Constraint Letters
3008     switch (Constraint[0]) {
3009     case 'b':   // R1-R31
3010     case 'r':   // R0-R31
3011       if (VT == MVT::i64)
3012         return std::make_pair(0U, SPU::R64CRegisterClass);
3013       return std::make_pair(0U, SPU::R32CRegisterClass);
3014     case 'f':
3015       if (VT == MVT::f32)
3016         return std::make_pair(0U, SPU::R32FPRegisterClass);
3017       else if (VT == MVT::f64)
3018         return std::make_pair(0U, SPU::R64FPRegisterClass);
3019       break;
3020     case 'v':
3021       return std::make_pair(0U, SPU::GPRCRegisterClass);
3022     }
3023   }
3024
3025   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3026 }
3027
3028 //! Compute used/known bits for a SPU operand
3029 void
3030 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3031                                                   const APInt &Mask,
3032                                                   APInt &KnownZero,
3033                                                   APInt &KnownOne,
3034                                                   const SelectionDAG &DAG,
3035                                                   unsigned Depth ) const {
3036 #if 0
3037   const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
3038
3039   switch (Op.getOpcode()) {
3040   default:
3041     // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3042     break;
3043   case CALL:
3044   case SHUFB:
3045   case SHUFFLE_MASK:
3046   case CNTB:
3047   case SPUISD::PREFSLOT2VEC:
3048   case SPUISD::LDRESULT:
3049   case SPUISD::VEC2PREFSLOT:
3050   case SPUISD::SHLQUAD_L_BITS:
3051   case SPUISD::SHLQUAD_L_BYTES:
3052   case SPUISD::VEC_ROTL:
3053   case SPUISD::VEC_ROTR:
3054   case SPUISD::ROTBYTES_LEFT:
3055   case SPUISD::SELECT_MASK:
3056   case SPUISD::SELB:
3057   }
3058 #endif
3059 }
3060
3061 unsigned
3062 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3063                                                    unsigned Depth) const {
3064   switch (Op.getOpcode()) {
3065   default:
3066     return 1;
3067
3068   case ISD::SETCC: {
3069     EVT VT = Op.getValueType();
3070
3071     if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
3072       VT = MVT::i32;
3073     }
3074     return VT.getSizeInBits();
3075   }
3076   }
3077 }
3078
3079 // LowerAsmOperandForConstraint
3080 void
3081 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3082                                                 char ConstraintLetter,
3083                                                 bool hasMemory,
3084                                                 std::vector<SDValue> &Ops,
3085                                                 SelectionDAG &DAG) const {
3086   // Default, for the time being, to the base class handler
3087   TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3088                                                Ops, DAG);
3089 }
3090
3091 /// isLegalAddressImmediate - Return true if the integer value can be used
3092 /// as the offset of the target addressing mode.
3093 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3094                                                 const Type *Ty) const {
3095   // SPU's addresses are 256K:
3096   return (V > -(1 << 18) && V < (1 << 18) - 1);
3097 }
3098
3099 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3100   return false;
3101 }
3102
3103 bool
3104 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3105   // The SPU target isn't yet aware of offsets.
3106   return false;
3107 }