lib/Target/CellSPU/SPUISelLowering.cpp

   1 //
   2 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the SPUTargetLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "SPURegisterNames.h"
  15 #include "SPUISelLowering.h"
  16 #include "SPUTargetMachine.h"
  17 #include "SPUFrameInfo.h"
  18 #include "llvm/Constants.h"
  19 #include "llvm/Function.h"
  20 #include "llvm/Intrinsics.h"
  21 #include "llvm/CallingConv.h"
  22 #include "llvm/CodeGen/CallingConvLower.h"
  23 #include "llvm/CodeGen/MachineFrameInfo.h"
  24 #include "llvm/CodeGen/MachineFunction.h"
  25 #include "llvm/CodeGen/MachineInstrBuilder.h"
  26 #include "llvm/CodeGen/MachineRegisterInfo.h"
  27 #include "llvm/CodeGen/SelectionDAG.h"
  28 #include "llvm/Target/TargetLoweringObjectFile.h"
  29 #include "llvm/Target/TargetOptions.h"
  30 #include "llvm/ADT/VectorExtras.h"
  31 #include "llvm/Support/Debug.h"
  32 #include "llvm/Support/ErrorHandling.h"
  33 #include "llvm/Support/MathExtras.h"
  34 #include "llvm/Support/raw_ostream.h"
  35 #include <map>
  36
  37 using namespace llvm;
  38
  39 // Used in getTargetNodeName() below
  40 namespace {
  41   std::map<unsigned, const char *> node_names;
  42
  43   //! EVT mapping to useful data for Cell SPU
  44   struct valtype_map_s {
  45     EVT   valtype;
  46     int   prefslot_byte;
  47   };
  48
  49   const valtype_map_s valtype_map[] = {
  50     { MVT::i1,   3 },
  51     { MVT::i8,   3 },
  52     { MVT::i16,  2 },
  53     { MVT::i32,  0 },
  54     { MVT::f32,  0 },
  55     { MVT::i64,  0 },
  56     { MVT::f64,  0 },
  57     { MVT::i128, 0 }
  58   };
  59
  60   const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
  61
  62   const valtype_map_s *getValueTypeMapEntry(EVT VT) {
  63     const valtype_map_s *retval = 0;
  64
  65     for (size_t i = 0; i < n_valtype_map; ++i) {
  66       if (valtype_map[i].valtype == VT) {
  67         retval = valtype_map + i;
  68         break;
  69       }
  70     }
  71
  72 #ifndef NDEBUG
  73     if (retval == 0) {
  74       std::string msg;
  75       raw_string_ostream Msg(msg);
  76       Msg << "getValueTypeMapEntry returns NULL for "
  77            << VT.getEVTString();
  78       llvm_report_error(Msg.str());
  79     }
  80 #endif
  81
  82     return retval;
  83   }
  84
  85   //! Expand a library call into an actual call DAG node
  86   /*!
  87    \note
  88    This code is taken from SelectionDAGLegalize, since it is not exposed as
  89    part of the LLVM SelectionDAG API.
  90    */
  91
  92   SDValue
  93   ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
  94                 bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
  95     // The input chain to this libcall is the entry node of the function.
  96     // Legalizing the call will automatically add the previous call to the
  97     // dependence.
  98     SDValue InChain = DAG.getEntryNode();
  99
 100     TargetLowering::ArgListTy Args;
 101     TargetLowering::ArgListEntry Entry;
 102     for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
 103       EVT ArgVT = Op.getOperand(i).getValueType();
 104       const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
 105       Entry.Node = Op.getOperand(i);
 106       Entry.Ty = ArgTy;
 107       Entry.isSExt = isSigned;
 108       Entry.isZExt = !isSigned;
 109       Args.push_back(Entry);
 110     }
 111     SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
 112                                            TLI.getPointerTy());
 113
 114     // Splice the libcall in wherever FindInputOutputChains tells us to.
 115     const Type *RetTy =
 116                 Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
 117     std::pair<SDValue, SDValue> CallInfo =
 118             TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
 119                             0, TLI.getLibcallCallingConv(LC), false,
 120                             /*isReturnValueUsed=*/true,
 121                             Callee, Args, DAG,
 122                             Op.getDebugLoc());
 123
 124     return CallInfo.first;
 125   }
 126 }
 127
 128 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
 129   : TargetLowering(TM, new TargetLoweringObjectFileELF()),
 130     SPUTM(TM) {
 131   // Fold away setcc operations if possible.
 132   setPow2DivIsCheap();
 133
 134   // Use _setjmp/_longjmp instead of setjmp/longjmp.
 135   setUseUnderscoreSetJmp(true);
 136   setUseUnderscoreLongJmp(true);
 137
 138   // Set RTLIB libcall names as used by SPU:
 139   setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
 140
 141   // Set up the SPU's register classes:
 142   addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
 143   addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
 144   addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
 145   addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
 146   addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
 147   addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
 148   addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
 149
 150   // SPU has no sign or zero extended loads for i1, i8, i16:
 151   setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
 152   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
 153   setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
 154
 155   setLoadExtAction(ISD::EXTLOAD,  MVT::f32, Expand);
 156   setLoadExtAction(ISD::EXTLOAD,  MVT::f64, Expand);
 157
 158   setTruncStoreAction(MVT::i128, MVT::i64, Expand);
 159   setTruncStoreAction(MVT::i128, MVT::i32, Expand);
 160   setTruncStoreAction(MVT::i128, MVT::i16, Expand);
 161   setTruncStoreAction(MVT::i128, MVT::i8, Expand);
 162
 163   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
 164
 165   // SPU constant load actions are custom lowered:
 166   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
 167   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
 168
 169   // SPU's loads and stores have to be custom lowered:
 170   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
 171        ++sctype) {
 172     MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
 173
 174     setOperationAction(ISD::LOAD,   VT, Custom);
 175     setOperationAction(ISD::STORE,  VT, Custom);
 176     setLoadExtAction(ISD::EXTLOAD,  VT, Custom);
 177     setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
 178     setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
 179
 180     for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
 181       MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
 182       setTruncStoreAction(VT, StoreVT, Expand);
 183     }
 184   }
 185
 186   for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
 187        ++sctype) {
 188     MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
 189
 190     setOperationAction(ISD::LOAD,   VT, Custom);
 191     setOperationAction(ISD::STORE,  VT, Custom);
 192
 193     for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
 194       MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
 195       setTruncStoreAction(VT, StoreVT, Expand);
 196     }
 197   }
 198
 199   // Expand the jumptable branches
 200   setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
 201   setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
 202
 203   // Custom lower SELECT_CC for most cases, but expand by default
 204   setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
 205   setOperationAction(ISD::SELECT_CC,    MVT::i8,    Custom);
 206   setOperationAction(ISD::SELECT_CC,    MVT::i16,   Custom);
 207   setOperationAction(ISD::SELECT_CC,    MVT::i32,   Custom);
 208   setOperationAction(ISD::SELECT_CC,    MVT::i64,   Custom);
 209
 210   // SPU has no intrinsics for these particular operations:
 211   setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
 212
 213   // SPU has no division/remainder instructions
 214   setOperationAction(ISD::SREM,    MVT::i8,   Expand);
 215   setOperationAction(ISD::UREM,    MVT::i8,   Expand);
 216   setOperationAction(ISD::SDIV,    MVT::i8,   Expand);
 217   setOperationAction(ISD::UDIV,    MVT::i8,   Expand);
 218   setOperationAction(ISD::SDIVREM, MVT::i8,   Expand);
 219   setOperationAction(ISD::UDIVREM, MVT::i8,   Expand);
 220   setOperationAction(ISD::SREM,    MVT::i16,  Expand);
 221   setOperationAction(ISD::UREM,    MVT::i16,  Expand);
 222   setOperationAction(ISD::SDIV,    MVT::i16,  Expand);
 223   setOperationAction(ISD::UDIV,    MVT::i16,  Expand);
 224   setOperationAction(ISD::SDIVREM, MVT::i16,  Expand);
 225   setOperationAction(ISD::UDIVREM, MVT::i16,  Expand);
 226   setOperationAction(ISD::SREM,    MVT::i32,  Expand);
 227   setOperationAction(ISD::UREM,    MVT::i32,  Expand);
 228   setOperationAction(ISD::SDIV,    MVT::i32,  Expand);
 229   setOperationAction(ISD::UDIV,    MVT::i32,  Expand);
 230   setOperationAction(ISD::SDIVREM, MVT::i32,  Expand);
 231   setOperationAction(ISD::UDIVREM, MVT::i32,  Expand);
 232   setOperationAction(ISD::SREM,    MVT::i64,  Expand);
 233   setOperationAction(ISD::UREM,    MVT::i64,  Expand);
 234   setOperationAction(ISD::SDIV,    MVT::i64,  Expand);
 235   setOperationAction(ISD::UDIV,    MVT::i64,  Expand);
 236   setOperationAction(ISD::SDIVREM, MVT::i64,  Expand);
 237   setOperationAction(ISD::UDIVREM, MVT::i64,  Expand);
 238   setOperationAction(ISD::SREM,    MVT::i128, Expand);
 239   setOperationAction(ISD::UREM,    MVT::i128, Expand);
 240   setOperationAction(ISD::SDIV,    MVT::i128, Expand);
 241   setOperationAction(ISD::UDIV,    MVT::i128, Expand);
 242   setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
 243   setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
 244
 245   // We don't support sin/cos/sqrt/fmod
 246   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 247   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 248   setOperationAction(ISD::FREM , MVT::f64, Expand);
 249   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 250   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 251   setOperationAction(ISD::FREM , MVT::f32, Expand);
 252
 253   // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
 254   // for f32!)
 255   setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 256   setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 257
 258   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 259   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 260
 261   // SPU can do rotate right and left, so legalize it... but customize for i8
 262   // because instructions don't exist.
 263
 264   // FIXME: Change from "expand" to appropriate type once ROTR is supported in
 265   //        .td files.
 266   setOperationAction(ISD::ROTR, MVT::i32,    Expand /*Legal*/);
 267   setOperationAction(ISD::ROTR, MVT::i16,    Expand /*Legal*/);
 268   setOperationAction(ISD::ROTR, MVT::i8,     Expand /*Custom*/);
 269
 270   setOperationAction(ISD::ROTL, MVT::i32,    Legal);
 271   setOperationAction(ISD::ROTL, MVT::i16,    Legal);
 272   setOperationAction(ISD::ROTL, MVT::i8,     Custom);
 273
 274   // SPU has no native version of shift left/right for i8
 275   setOperationAction(ISD::SHL,  MVT::i8,     Custom);
 276   setOperationAction(ISD::SRL,  MVT::i8,     Custom);
 277   setOperationAction(ISD::SRA,  MVT::i8,     Custom);
 278
 279   // Make these operations legal and handle them during instruction selection:
 280   setOperationAction(ISD::SHL,  MVT::i64,    Legal);
 281   setOperationAction(ISD::SRL,  MVT::i64,    Legal);
 282   setOperationAction(ISD::SRA,  MVT::i64,    Legal);
 283
 284   // Custom lower i8, i32 and i64 multiplications
 285   setOperationAction(ISD::MUL,  MVT::i8,     Custom);
 286   setOperationAction(ISD::MUL,  MVT::i32,    Legal);
 287   setOperationAction(ISD::MUL,  MVT::i64,    Legal);
 288
 289   // Expand double-width multiplication
 290   // FIXME: It would probably be reasonable to support some of these operations
 291   setOperationAction(ISD::UMUL_LOHI, MVT::i8,  Expand);
 292   setOperationAction(ISD::SMUL_LOHI, MVT::i8,  Expand);
 293   setOperationAction(ISD::MULHU,     MVT::i8,  Expand);
 294   setOperationAction(ISD::MULHS,     MVT::i8,  Expand);
 295   setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
 296   setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
 297   setOperationAction(ISD::MULHU,     MVT::i16, Expand);
 298   setOperationAction(ISD::MULHS,     MVT::i16, Expand);
 299   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
 300   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
 301   setOperationAction(ISD::MULHU,     MVT::i32, Expand);
 302   setOperationAction(ISD::MULHS,     MVT::i32, Expand);
 303   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
 304   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
 305   setOperationAction(ISD::MULHU,     MVT::i64, Expand);
 306   setOperationAction(ISD::MULHS,     MVT::i64, Expand);
 307
 308   // Need to custom handle (some) common i8, i64 math ops
 309   setOperationAction(ISD::ADD,  MVT::i8,     Custom);
 310   setOperationAction(ISD::ADD,  MVT::i64,    Legal);
 311   setOperationAction(ISD::SUB,  MVT::i8,     Custom);
 312   setOperationAction(ISD::SUB,  MVT::i64,    Legal);
 313
 314   // SPU does not have BSWAP. It does have i32 support CTLZ.
 315   // CTPOP has to be custom lowered.
 316   setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
 317   setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
 318
 319   setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
 320   setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
 321   setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
 322   setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
 323   setOperationAction(ISD::CTPOP, MVT::i128,  Expand);
 324
 325   setOperationAction(ISD::CTTZ , MVT::i8,    Expand);
 326   setOperationAction(ISD::CTTZ , MVT::i16,   Expand);
 327   setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
 328   setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
 329   setOperationAction(ISD::CTTZ , MVT::i128,  Expand);
 330
 331   setOperationAction(ISD::CTLZ , MVT::i8,    Promote);
 332   setOperationAction(ISD::CTLZ , MVT::i16,   Promote);
 333   setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
 334   setOperationAction(ISD::CTLZ , MVT::i64,   Expand);
 335   setOperationAction(ISD::CTLZ , MVT::i128,  Expand);
 336
 337   // SPU has a version of select that implements (a&~c)|(b&c), just like
 338   // select ought to work:
 339   setOperationAction(ISD::SELECT, MVT::i8,   Legal);
 340   setOperationAction(ISD::SELECT, MVT::i16,  Legal);
 341   setOperationAction(ISD::SELECT, MVT::i32,  Legal);
 342   setOperationAction(ISD::SELECT, MVT::i64,  Legal);
 343
 344   setOperationAction(ISD::SETCC, MVT::i8,    Legal);
 345   setOperationAction(ISD::SETCC, MVT::i16,   Legal);
 346   setOperationAction(ISD::SETCC, MVT::i32,   Legal);
 347   setOperationAction(ISD::SETCC, MVT::i64,   Legal);
 348   setOperationAction(ISD::SETCC, MVT::f64,   Custom);
 349
 350   // Custom lower i128 -> i64 truncates
 351   setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
 352
 353   // Custom lower i32/i64 -> i128 sign extend
 354   setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
 355
 356   setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
 357   setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
 358   setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
 359   setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
 360   // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
 361   // to expand to a libcall, hence the custom lowering:
 362   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
 363   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 364   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
 365   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
 366   setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
 367   setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
 368
 369   // FDIV on SPU requires custom lowering
 370   setOperationAction(ISD::FDIV, MVT::f64, Expand);      // to libcall
 371
 372   // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
 373   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
 374   setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
 375   setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
 376   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
 377   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
 378   setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
 379   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 380   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 381
 382   setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
 383   setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
 384   setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
 385   setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
 386
 387   // We cannot sextinreg(i1).  Expand to shifts.
 388   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 389
 390   // Support label based line numbers.
 391   setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
 392   setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
 393
 394   // We want to legalize GlobalAddress and ConstantPool nodes into the
 395   // appropriate instructions to materialize the address.
 396   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
 397        ++sctype) {
 398     MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
 399
 400     setOperationAction(ISD::GlobalAddress,  VT, Custom);
 401     setOperationAction(ISD::ConstantPool,   VT, Custom);
 402     setOperationAction(ISD::JumpTable,      VT, Custom);
 403   }
 404
 405   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 406   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 407
 408   // Use the default implementation.
 409   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
 410   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 411   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 412   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 413   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
 414   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
 415   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
 416
 417   // Cell SPU has instructions for converting between i64 and fp.
 418   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 419   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 420
 421   // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
 422   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
 423
 424   // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 425   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 426
 427   // First set operation action for all vector types to expand. Then we
 428   // will selectively turn on ones that can be effectively codegen'd.
 429   addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
 430   addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
 431   addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
 432   addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
 433   addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
 434   addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
 435
 436   // "Odd size" vector classes that we're willing to support:
 437   addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
 438
 439   for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
 440        i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
 441     MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
 442
 443     // add/sub are legal for all supported vector VT's.
 444     setOperationAction(ISD::ADD,     VT, Legal);
 445     setOperationAction(ISD::SUB,     VT, Legal);
 446     // mul has to be custom lowered.
 447     setOperationAction(ISD::MUL,     VT, Legal);
 448
 449     setOperationAction(ISD::AND,     VT, Legal);
 450     setOperationAction(ISD::OR,      VT, Legal);
 451     setOperationAction(ISD::XOR,     VT, Legal);
 452     setOperationAction(ISD::LOAD,    VT, Legal);
 453     setOperationAction(ISD::SELECT,  VT, Legal);
 454     setOperationAction(ISD::STORE,   VT, Legal);
 455
 456     // These operations need to be expanded:
 457     setOperationAction(ISD::SDIV,    VT, Expand);
 458     setOperationAction(ISD::SREM,    VT, Expand);
 459     setOperationAction(ISD::UDIV,    VT, Expand);
 460     setOperationAction(ISD::UREM,    VT, Expand);
 461
 462     // Custom lower build_vector, constant pool spills, insert and
 463     // extract vector elements:
 464     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
 465     setOperationAction(ISD::ConstantPool, VT, Custom);
 466     setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
 467     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
 468     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
 469     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
 470   }
 471
 472   setOperationAction(ISD::AND, MVT::v16i8, Custom);
 473   setOperationAction(ISD::OR,  MVT::v16i8, Custom);
 474   setOperationAction(ISD::XOR, MVT::v16i8, Custom);
 475   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 476
 477   setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
 478
 479   setShiftAmountType(MVT::i32);
 480   setBooleanContents(ZeroOrNegativeOneBooleanContent);
 481
 482   setStackPointerRegisterToSaveRestore(SPU::R1);
 483
 484   // We have target-specific dag combine patterns for the following nodes:
 485   setTargetDAGCombine(ISD::ADD);
 486   setTargetDAGCombine(ISD::ZERO_EXTEND);
 487   setTargetDAGCombine(ISD::SIGN_EXTEND);
 488   setTargetDAGCombine(ISD::ANY_EXTEND);
 489
 490   computeRegisterProperties();
 491
 492   // Set pre-RA register scheduler default to BURR, which produces slightly
 493   // better code than the default (could also be TDRR, but TargetLowering.h
 494   // needs a mod to support that model):
 495   setSchedulingPreference(SchedulingForRegPressure);
 496 }
 497
 498 const char *
 499 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
 500 {
 501   if (node_names.empty()) {
 502     node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
 503     node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
 504     node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
 505     node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
 506     node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
 507     node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
 508     node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
 509     node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
 510     node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
 511     node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
 512     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
 513     node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
 514     node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
 515     node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
 516     node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
 517     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
 518     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
 519     node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
 520     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
 521             "SPUISD::ROTBYTES_LEFT_BITS";
 522     node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
 523     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
 524     node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
 525     node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
 526     node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
 527   }
 528
 529   std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
 530
 531   return ((i != node_names.end()) ? i->second : 0);
 532 }
 533
 534 /// getFunctionAlignment - Return the Log2 alignment of this function.
 535 unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
 536   return 3;
 537 }
 538
 539 //===----------------------------------------------------------------------===//
 540 // Return the Cell SPU's SETCC result type
 541 //===----------------------------------------------------------------------===//
 542
 543 MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const {
 544   // i16 and i32 are valid SETCC result types
 545   return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ?
 546     VT.getSimpleVT().SimpleTy :
 547     MVT::i32);
 548 }
 549
 550 //===----------------------------------------------------------------------===//
 551 // Calling convention code:
 552 //===----------------------------------------------------------------------===//
 553
 554 #include "SPUGenCallingConv.inc"
 555
 556 //===----------------------------------------------------------------------===//
 557 //  LowerOperation implementation
 558 //===----------------------------------------------------------------------===//
 559
 560 /// Custom lower loads for CellSPU
 561 /*!
 562  All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
 563  within a 16-byte block, we have to rotate to extract the requested element.
 564
 565  For extending loads, we also want to ensure that the following sequence is
 566  emitted, e.g. for MVT::f32 extending load to MVT::f64:
 567
 568 \verbatim
 569 %1  v16i8,ch = load
 570 %2  v16i8,ch = rotate %1
 571 %3  v4f8, ch = bitconvert %2
 572 %4  f32      = vec2perfslot %3
 573 %5  f64      = fp_extend %4
 574 \endverbatim
 575 */
 576 static SDValue
 577 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 578   LoadSDNode *LN = cast<LoadSDNode>(Op);
 579   SDValue the_chain = LN->getChain();
 580   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 581   EVT InVT = LN->getMemoryVT();
 582   EVT OutVT = Op.getValueType();
 583   ISD::LoadExtType ExtType = LN->getExtensionType();
 584   unsigned alignment = LN->getAlignment();
 585   const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
 586   DebugLoc dl = Op.getDebugLoc();
 587
 588   switch (LN->getAddressingMode()) {
 589   case ISD::UNINDEXED: {
 590     SDValue result;
 591     SDValue basePtr = LN->getBasePtr();
 592     SDValue rotate;
 593
 594     if (alignment == 16) {
 595       ConstantSDNode *CN;
 596
 597       // Special cases for a known aligned load to simplify the base pointer
 598       // and the rotation amount:
 599       if (basePtr.getOpcode() == ISD::ADD
 600           && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
 601         // Known offset into basePtr
 602         int64_t offset = CN->getSExtValue();
 603         int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
 604
 605         if (rotamt < 0)
 606           rotamt += 16;
 607
 608         rotate = DAG.getConstant(rotamt, MVT::i16);
 609
 610         // Simplify the base pointer for this case:
 611         basePtr = basePtr.getOperand(0);
 612         if ((offset & ~0xf) > 0) {
 613           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 614                                 basePtr,
 615                                 DAG.getConstant((offset & ~0xf), PtrVT));
 616         }
 617       } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
 618                  || (basePtr.getOpcode() == SPUISD::IndirectAddr
 619                      && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
 620                      && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
 621         // Plain aligned a-form address: rotate into preferred slot
 622         // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
 623         int64_t rotamt = -vtm->prefslot_byte;
 624         if (rotamt < 0)
 625           rotamt += 16;
 626         rotate = DAG.getConstant(rotamt, MVT::i16);
 627       } else {
 628         // Offset the rotate amount by the basePtr and the preferred slot
 629         // byte offset
 630         int64_t rotamt = -vtm->prefslot_byte;
 631         if (rotamt < 0)
 632           rotamt += 16;
 633         rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
 634                              basePtr,
 635                              DAG.getConstant(rotamt, PtrVT));
 636       }
 637     } else {
 638       // Unaligned load: must be more pessimistic about addressing modes:
 639       if (basePtr.getOpcode() == ISD::ADD) {
 640         MachineFunction &MF = DAG.getMachineFunction();
 641         MachineRegisterInfo &RegInfo = MF.getRegInfo();
 642         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 643         SDValue Flag;
 644
 645         SDValue Op0 = basePtr.getOperand(0);
 646         SDValue Op1 = basePtr.getOperand(1);
 647
 648         if (isa<ConstantSDNode>(Op1)) {
 649           // Convert the (add <ptr>, <const>) to an indirect address contained
 650           // in a register. Note that this is done because we need to avoid
 651           // creating a 0(reg) d-form address due to the SPU's block loads.
 652           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 653           the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
 654           basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
 655         } else {
 656           // Convert the (add <arg1>, <arg2>) to an indirect address, which
 657           // will likely be lowered as a reg(reg) x-form address.
 658           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 659         }
 660       } else {
 661         basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 662                               basePtr,
 663                               DAG.getConstant(0, PtrVT));
 664       }
 665
 666       // Offset the rotate amount by the basePtr and the preferred slot
 667       // byte offset
 668       rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
 669                            basePtr,
 670                            DAG.getConstant(-vtm->prefslot_byte, PtrVT));
 671     }
 672
 673     // Re-emit as a v16i8 vector load
 674     result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
 675                          LN->getSrcValue(), LN->getSrcValueOffset(),
 676                          LN->isVolatile(), 16);
 677
 678     // Update the chain
 679     the_chain = result.getValue(1);
 680
 681     // Rotate into the preferred slot:
 682     result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8,
 683                          result.getValue(0), rotate);
 684
 685     // Convert the loaded v16i8 vector to the appropriate vector type
 686     // specified by the operand:
 687     EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
 688                                  InVT, (128 / InVT.getSizeInBits()));
 689     result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
 690                          DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
 691
 692     // Handle extending loads by extending the scalar result:
 693     if (ExtType == ISD::SEXTLOAD) {
 694       result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
 695     } else if (ExtType == ISD::ZEXTLOAD) {
 696       result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
 697     } else if (ExtType == ISD::EXTLOAD) {
 698       unsigned NewOpc = ISD::ANY_EXTEND;
 699
 700       if (OutVT.isFloatingPoint())
 701         NewOpc = ISD::FP_EXTEND;
 702
 703       result = DAG.getNode(NewOpc, dl, OutVT, result);
 704     }
 705
 706     SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
 707     SDValue retops[2] = {
 708       result,
 709       the_chain
 710     };
 711
 712     result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
 713                          retops, sizeof(retops) / sizeof(retops[0]));
 714     return result;
 715   }
 716   case ISD::PRE_INC:
 717   case ISD::PRE_DEC:
 718   case ISD::POST_INC:
 719   case ISD::POST_DEC:
 720   case ISD::LAST_INDEXED_MODE:
 721     {
 722       std::string msg;
 723       raw_string_ostream Msg(msg);
 724       Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 725             "UNINDEXED\n";
 726       Msg << (unsigned) LN->getAddressingMode();
 727       llvm_report_error(Msg.str());
 728       /*NOTREACHED*/
 729     }
 730   }
 731
 732   return SDValue();
 733 }
 734
 735 /// Custom lower stores for CellSPU
 736 /*!
 737  All CellSPU stores are aligned to 16-byte boundaries, so for elements
 738  within a 16-byte block, we have to generate a shuffle to insert the
 739  requested element into its place, then store the resulting block.
 740  */
 741 static SDValue
 742 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 743   StoreSDNode *SN = cast<StoreSDNode>(Op);
 744   SDValue Value = SN->getValue();
 745   EVT VT = Value.getValueType();
 746   EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
 747   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 748   DebugLoc dl = Op.getDebugLoc();
 749   unsigned alignment = SN->getAlignment();
 750
 751   switch (SN->getAddressingMode()) {
 752   case ISD::UNINDEXED: {
 753     // The vector type we really want to load from the 16-byte chunk.
 754     EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
 755                                  VT, (128 / VT.getSizeInBits())),
 756         stVecVT = EVT::getVectorVT(*DAG.getContext(),
 757                                    StVT, (128 / StVT.getSizeInBits()));
 758
 759     SDValue alignLoadVec;
 760     SDValue basePtr = SN->getBasePtr();
 761     SDValue the_chain = SN->getChain();
 762     SDValue insertEltOffs;
 763
 764     if (alignment == 16) {
 765       ConstantSDNode *CN;
 766
 767       // Special cases for a known aligned load to simplify the base pointer
 768       // and insertion byte:
 769       if (basePtr.getOpcode() == ISD::ADD
 770           && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
 771         // Known offset into basePtr
 772         int64_t offset = CN->getSExtValue();
 773
 774         // Simplify the base pointer for this case:
 775         basePtr = basePtr.getOperand(0);
 776         insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 777                                     basePtr,
 778                                     DAG.getConstant((offset & 0xf), PtrVT));
 779
 780         if ((offset & ~0xf) > 0) {
 781           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 782                                 basePtr,
 783                                 DAG.getConstant((offset & ~0xf), PtrVT));
 784         }
 785       } else {
 786         // Otherwise, assume it's at byte 0 of basePtr
 787         insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 788                                     basePtr,
 789                                     DAG.getConstant(0, PtrVT));
 790       }
 791     } else {
 792       // Unaligned load: must be more pessimistic about addressing modes:
 793       if (basePtr.getOpcode() == ISD::ADD) {
 794         MachineFunction &MF = DAG.getMachineFunction();
 795         MachineRegisterInfo &RegInfo = MF.getRegInfo();
 796         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 797         SDValue Flag;
 798
 799         SDValue Op0 = basePtr.getOperand(0);
 800         SDValue Op1 = basePtr.getOperand(1);
 801
 802         if (isa<ConstantSDNode>(Op1)) {
 803           // Convert the (add <ptr>, <const>) to an indirect address contained
 804           // in a register. Note that this is done because we need to avoid
 805           // creating a 0(reg) d-form address due to the SPU's block loads.
 806           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 807           the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
 808           basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
 809         } else {
 810           // Convert the (add <arg1>, <arg2>) to an indirect address, which
 811           // will likely be lowered as a reg(reg) x-form address.
 812           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 813         }
 814       } else {
 815         basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 816                               basePtr,
 817                               DAG.getConstant(0, PtrVT));
 818       }
 819
 820       // Insertion point is solely determined by basePtr's contents
 821       insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
 822                                   basePtr,
 823                                   DAG.getConstant(0, PtrVT));
 824     }
 825
 826     // Re-emit as a v16i8 vector load
 827     alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
 828                                SN->getSrcValue(), SN->getSrcValueOffset(),
 829                                SN->isVolatile(), 16);
 830
 831     // Update the chain
 832     the_chain = alignLoadVec.getValue(1);
 833
 834     LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
 835     SDValue theValue = SN->getValue();
 836     SDValue result;
 837
 838     if (StVT != VT
 839         && (theValue.getOpcode() == ISD::AssertZext
 840             || theValue.getOpcode() == ISD::AssertSext)) {
 841       // Drill down and get the value for zero- and sign-extended
 842       // quantities
 843       theValue = theValue.getOperand(0);
 844     }
 845
 846     // If the base pointer is already a D-form address, then just create
 847     // a new D-form address with a slot offset and the orignal base pointer.
 848     // Otherwise generate a D-form address with the slot offset relative
 849     // to the stack pointer, which is always aligned.
 850 #if !defined(NDEBUG)
 851       if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
 852         errs() << "CellSPU LowerSTORE: basePtr = ";
 853         basePtr.getNode()->dump(&DAG);
 854         errs() << "\n";
 855       }
 856 #endif
 857
 858     SDValue insertEltOp =
 859             DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
 860     SDValue vectorizeOp =
 861             DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
 862
 863     result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
 864                          vectorizeOp, alignLoadVec,
 865                          DAG.getNode(ISD::BIT_CONVERT, dl,
 866                                      MVT::v4i32, insertEltOp));
 867
 868     result = DAG.getStore(the_chain, dl, result, basePtr,
 869                           LN->getSrcValue(), LN->getSrcValueOffset(),
 870                           LN->isVolatile(), LN->getAlignment());
 871
 872 #if 0 && !defined(NDEBUG)
 873     if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
 874       const SDValue &currentRoot = DAG.getRoot();
 875
 876       DAG.setRoot(result);
 877       errs() << "------- CellSPU:LowerStore result:\n";
 878       DAG.dump();
 879       errs() << "-------\n";
 880       DAG.setRoot(currentRoot);
 881     }
 882 #endif
 883
 884     return result;
 885     /*UNREACHED*/
 886   }
 887   case ISD::PRE_INC:
 888   case ISD::PRE_DEC:
 889   case ISD::POST_INC:
 890   case ISD::POST_DEC:
 891   case ISD::LAST_INDEXED_MODE:
 892     {
 893       std::string msg;
 894       raw_string_ostream Msg(msg);
 895       Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 896             "UNINDEXED\n";
 897       Msg << (unsigned) SN->getAddressingMode();
 898       llvm_report_error(Msg.str());
 899       /*NOTREACHED*/
 900     }
 901   }
 902
 903   return SDValue();
 904 }
 905
 906 //! Generate the address of a constant pool entry.
 907 static SDValue
 908 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 909   EVT PtrVT = Op.getValueType();
 910   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 911   Constant *C = CP->getConstVal();
 912   SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
 913   SDValue Zero = DAG.getConstant(0, PtrVT);
 914   const TargetMachine &TM = DAG.getTarget();
 915   // FIXME there is no actual debug info here
 916   DebugLoc dl = Op.getDebugLoc();
 917
 918   if (TM.getRelocationModel() == Reloc::Static) {
 919     if (!ST->usingLargeMem()) {
 920       // Just return the SDValue with the constant pool address in it.
 921       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
 922     } else {
 923       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
 924       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
 925       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 926     }
 927   }
 928
 929   llvm_unreachable("LowerConstantPool: Relocation model other than static"
 930                    " not supported.");
 931   return SDValue();
 932 }
 933
 934 //! Alternate entry point for generating the address of a constant pool entry
 935 SDValue
 936 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
 937   return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
 938 }
 939
 940 static SDValue
 941 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 942   EVT PtrVT = Op.getValueType();
 943   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 944   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
 945   SDValue Zero = DAG.getConstant(0, PtrVT);
 946   const TargetMachine &TM = DAG.getTarget();
 947   // FIXME there is no actual debug info here
 948   DebugLoc dl = Op.getDebugLoc();
 949
 950   if (TM.getRelocationModel() == Reloc::Static) {
 951     if (!ST->usingLargeMem()) {
 952       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
 953     } else {
 954       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
 955       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
 956       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 957     }
 958   }
 959
 960   llvm_unreachable("LowerJumpTable: Relocation model other than static"
 961                    " not supported.");
 962   return SDValue();
 963 }
 964
 965 static SDValue
 966 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 967   EVT PtrVT = Op.getValueType();
 968   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
 969   GlobalValue *GV = GSDN->getGlobal();
 970   SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
 971   const TargetMachine &TM = DAG.getTarget();
 972   SDValue Zero = DAG.getConstant(0, PtrVT);
 973   // FIXME there is no actual debug info here
 974   DebugLoc dl = Op.getDebugLoc();
 975
 976   if (TM.getRelocationModel() == Reloc::Static) {
 977     if (!ST->usingLargeMem()) {
 978       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
 979     } else {
 980       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
 981       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
 982       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 983     }
 984   } else {
 985     llvm_report_error("LowerGlobalAddress: Relocation model other than static"
 986                       "not supported.");
 987     /*NOTREACHED*/
 988   }
 989
 990   return SDValue();
 991 }
 992
 993 //! Custom lower double precision floating point constants
 994 static SDValue
 995 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
 996   EVT VT = Op.getValueType();
 997   // FIXME there is no actual debug info here
 998   DebugLoc dl = Op.getDebugLoc();
 999
1000   if (VT == MVT::f64) {
1001     ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
1002
1003     assert((FP != 0) &&
1004            "LowerConstantFP: Node is not ConstantFPSDNode");
1005
1006     uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
1007     SDValue T = DAG.getConstant(dbits, MVT::i64);
1008     SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
1009     return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1010                        DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec));
1011   }
1012
1013   return SDValue();
1014 }
1015
1016 SDValue
1017 SPUTargetLowering::LowerFormalArguments(SDValue Chain,
1018                                         CallingConv::ID CallConv, bool isVarArg,
1019                                         const SmallVectorImpl<ISD::InputArg>
1020                                           &Ins,
1021                                         DebugLoc dl, SelectionDAG &DAG,
1022                                         SmallVectorImpl<SDValue> &InVals) {
1023
1024   MachineFunction &MF = DAG.getMachineFunction();
1025   MachineFrameInfo *MFI = MF.getFrameInfo();
1026   MachineRegisterInfo &RegInfo = MF.getRegInfo();
1027
1028   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1029   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1030
1031   unsigned ArgOffset = SPUFrameInfo::minStackSize();
1032   unsigned ArgRegIdx = 0;
1033   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1034
1035   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1036
1037   // Add DAG nodes to load the arguments or copy them out of registers.
1038   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
1039     EVT ObjectVT = Ins[ArgNo].VT;
1040     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
1041     SDValue ArgVal;
1042
1043     if (ArgRegIdx < NumArgRegs) {
1044       const TargetRegisterClass *ArgRegClass;
1045
1046       switch (ObjectVT.getSimpleVT().SimpleTy) {
1047       default: {
1048         std::string msg;
1049         raw_string_ostream Msg(msg);
1050         Msg << "LowerFormalArguments Unhandled argument type: "
1051              << ObjectVT.getEVTString();
1052         llvm_report_error(Msg.str());
1053       }
1054       case MVT::i8:
1055         ArgRegClass = &SPU::R8CRegClass;
1056         break;
1057       case MVT::i16:
1058         ArgRegClass = &SPU::R16CRegClass;
1059         break;
1060       case MVT::i32:
1061         ArgRegClass = &SPU::R32CRegClass;
1062         break;
1063       case MVT::i64:
1064         ArgRegClass = &SPU::R64CRegClass;
1065         break;
1066       case MVT::i128:
1067         ArgRegClass = &SPU::GPRCRegClass;
1068         break;
1069       case MVT::f32:
1070         ArgRegClass = &SPU::R32FPRegClass;
1071         break;
1072       case MVT::f64:
1073         ArgRegClass = &SPU::R64FPRegClass;
1074         break;
1075       case MVT::v2f64:
1076       case MVT::v4f32:
1077       case MVT::v2i64:
1078       case MVT::v4i32:
1079       case MVT::v8i16:
1080       case MVT::v16i8:
1081         ArgRegClass = &SPU::VECREGRegClass;
1082         break;
1083       }
1084
1085       unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1086       RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1087       ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
1088       ++ArgRegIdx;
1089     } else {
1090       // We need to load the argument to a virtual register if we determined
1091       // above that we ran out of physical registers of the appropriate type
1092       // or we're forced to do vararg
1093       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true, false);
1094       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1095       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0);
1096       ArgOffset += StackSlotSize;
1097     }
1098
1099     InVals.push_back(ArgVal);
1100     // Update the chain
1101     Chain = ArgVal.getOperand(0);
1102   }
1103
1104   // vararg handling:
1105   if (isVarArg) {
1106     // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1107     // We will spill (79-3)+1 registers to the stack
1108     SmallVector<SDValue, 79-3+1> MemOps;
1109
1110     // Create the frame slot
1111
1112     for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1113       VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset,
1114                                                  true, false);
1115       SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1116       SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1117       SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0);
1118       Chain = Store.getOperand(0);
1119       MemOps.push_back(Store);
1120
1121       // Increment address by stack slot size for the next stored argument
1122       ArgOffset += StackSlotSize;
1123     }
1124     if (!MemOps.empty())
1125       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1126                           &MemOps[0], MemOps.size());
1127   }
1128
1129   return Chain;
1130 }
1131
1132 /// isLSAAddress - Return the immediate to use if the specified
1133 /// value is representable as a LSA address.
1134 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1135   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1136   if (!C) return 0;
1137
1138   int Addr = C->getZExtValue();
1139   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1140       (Addr << 14 >> 14) != Addr)
1141     return 0;  // Top 14 bits have to be sext of immediate.
1142
1143   return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1144 }
1145
1146 SDValue
1147 SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1148                              CallingConv::ID CallConv, bool isVarArg,
1149                              bool isTailCall,
1150                              const SmallVectorImpl<ISD::OutputArg> &Outs,
1151                              const SmallVectorImpl<ISD::InputArg> &Ins,
1152                              DebugLoc dl, SelectionDAG &DAG,
1153                              SmallVectorImpl<SDValue> &InVals) {
1154
1155   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
1156   unsigned NumOps     = Outs.size();
1157   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1158   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1159   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1160
1161   // Handy pointer type
1162   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1163
1164   // Accumulate how many bytes are to be pushed on the stack, including the
1165   // linkage area, and parameter passing area.  According to the SPU ABI,
1166   // we minimally need space for [LR] and [SP]
1167   unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1168
1169   // Set up a copy of the stack pointer for use loading and storing any
1170   // arguments that may not fit in the registers available for argument
1171   // passing.
1172   SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1173
1174   // Figure out which arguments are going to go in registers, and which in
1175   // memory.
1176   unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1177   unsigned ArgRegIdx = 0;
1178
1179   // Keep track of registers passing arguments
1180   std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1181   // And the arguments passed on the stack
1182   SmallVector<SDValue, 8> MemOpChains;
1183
1184   for (unsigned i = 0; i != NumOps; ++i) {
1185     SDValue Arg = Outs[i].Val;
1186
1187     // PtrOff will be used to store the current argument to the stack if a
1188     // register cannot be found for it.
1189     SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1190     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1191
1192     switch (Arg.getValueType().getSimpleVT().SimpleTy) {
1193     default: llvm_unreachable("Unexpected ValueType for argument!");
1194     case MVT::i8:
1195     case MVT::i16:
1196     case MVT::i32:
1197     case MVT::i64:
1198     case MVT::i128:
1199       if (ArgRegIdx != NumArgRegs) {
1200         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1201       } else {
1202         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1203         ArgOffset += StackSlotSize;
1204       }
1205       break;
1206     case MVT::f32:
1207     case MVT::f64:
1208       if (ArgRegIdx != NumArgRegs) {
1209         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1210       } else {
1211         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1212         ArgOffset += StackSlotSize;
1213       }
1214       break;
1215     case MVT::v2i64:
1216     case MVT::v2f64:
1217     case MVT::v4f32:
1218     case MVT::v4i32:
1219     case MVT::v8i16:
1220     case MVT::v16i8:
1221       if (ArgRegIdx != NumArgRegs) {
1222         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1223       } else {
1224         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1225         ArgOffset += StackSlotSize;
1226       }
1227       break;
1228     }
1229   }
1230
1231   // Update number of stack bytes actually used, insert a call sequence start
1232   NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1233   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1234                                                             true));
1235
1236   if (!MemOpChains.empty()) {
1237     // Adjust the stack pointer for the stack arguments.
1238     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1239                         &MemOpChains[0], MemOpChains.size());
1240   }
1241
1242   // Build a sequence of copy-to-reg nodes chained together with token chain
1243   // and flag operands which copy the outgoing args into the appropriate regs.
1244   SDValue InFlag;
1245   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1246     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1247                              RegsToPass[i].second, InFlag);
1248     InFlag = Chain.getValue(1);
1249   }
1250
1251   SmallVector<SDValue, 8> Ops;
1252   unsigned CallOpc = SPUISD::CALL;
1253
1254   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1255   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1256   // node so that legalize doesn't hack it.
1257   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1258     GlobalValue *GV = G->getGlobal();
1259     EVT CalleeVT = Callee.getValueType();
1260     SDValue Zero = DAG.getConstant(0, PtrVT);
1261     SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1262
1263     if (!ST->usingLargeMem()) {
1264       // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1265       // style calls, otherwise, external symbols are BRASL calls. This assumes
1266       // that declared/defined symbols are in the same compilation unit and can
1267       // be reached through PC-relative jumps.
1268       //
1269       // NOTE:
1270       // This may be an unsafe assumption for JIT and really large compilation
1271       // units.
1272       if (GV->isDeclaration()) {
1273         Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
1274       } else {
1275         Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
1276       }
1277     } else {
1278       // "Large memory" mode: Turn all calls into indirect calls with a X-form
1279       // address pairs:
1280       Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
1281     }
1282   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1283     EVT CalleeVT = Callee.getValueType();
1284     SDValue Zero = DAG.getConstant(0, PtrVT);
1285     SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1286         Callee.getValueType());
1287
1288     if (!ST->usingLargeMem()) {
1289       Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
1290     } else {
1291       Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
1292     }
1293   } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1294     // If this is an absolute destination address that appears to be a legal
1295     // local store address, use the munged value.
1296     Callee = SDValue(Dest, 0);
1297   }
1298
1299   Ops.push_back(Chain);
1300   Ops.push_back(Callee);
1301
1302   // Add argument registers to the end of the list so that they are known live
1303   // into the call.
1304   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1305     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1306                                   RegsToPass[i].second.getValueType()));
1307
1308   if (InFlag.getNode())
1309     Ops.push_back(InFlag);
1310   // Returns a chain and a flag for retval copy to use.
1311   Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
1312                       &Ops[0], Ops.size());
1313   InFlag = Chain.getValue(1);
1314
1315   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1316                              DAG.getIntPtrConstant(0, true), InFlag);
1317   if (!Ins.empty())
1318     InFlag = Chain.getValue(1);
1319
1320   // If the function returns void, just return the chain.
1321   if (Ins.empty())
1322     return Chain;
1323
1324   // If the call has results, copy the values out of the ret val registers.
1325   switch (Ins[0].VT.getSimpleVT().SimpleTy) {
1326   default: llvm_unreachable("Unexpected ret value!");
1327   case MVT::Other: break;
1328   case MVT::i32:
1329     if (Ins.size() > 1 && Ins[1].VT == MVT::i32) {
1330       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
1331                                  MVT::i32, InFlag).getValue(1);
1332       InVals.push_back(Chain.getValue(0));
1333       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1334                                  Chain.getValue(2)).getValue(1);
1335       InVals.push_back(Chain.getValue(0));
1336     } else {
1337       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1338                                  InFlag).getValue(1);
1339       InVals.push_back(Chain.getValue(0));
1340     }
1341     break;
1342   case MVT::i64:
1343     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64,
1344                                InFlag).getValue(1);
1345     InVals.push_back(Chain.getValue(0));
1346     break;
1347   case MVT::i128:
1348     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128,
1349                                InFlag).getValue(1);
1350     InVals.push_back(Chain.getValue(0));
1351     break;
1352   case MVT::f32:
1353   case MVT::f64:
1354     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
1355                                InFlag).getValue(1);
1356     InVals.push_back(Chain.getValue(0));
1357     break;
1358   case MVT::v2f64:
1359   case MVT::v2i64:
1360   case MVT::v4f32:
1361   case MVT::v4i32:
1362   case MVT::v8i16:
1363   case MVT::v16i8:
1364     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
1365                                    InFlag).getValue(1);
1366     InVals.push_back(Chain.getValue(0));
1367     break;
1368   }
1369
1370   return Chain;
1371 }
1372
1373 SDValue
1374 SPUTargetLowering::LowerReturn(SDValue Chain,
1375                                CallingConv::ID CallConv, bool isVarArg,
1376                                const SmallVectorImpl<ISD::OutputArg> &Outs,
1377                                DebugLoc dl, SelectionDAG &DAG) {
1378
1379   SmallVector<CCValAssign, 16> RVLocs;
1380   CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
1381                  RVLocs, *DAG.getContext());
1382   CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
1383
1384   // If this is the first return lowered for this function, add the regs to the
1385   // liveout set for the function.
1386   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1387     for (unsigned i = 0; i != RVLocs.size(); ++i)
1388       DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1389   }
1390
1391   SDValue Flag;
1392
1393   // Copy the result values into the output registers.
1394   for (unsigned i = 0; i != RVLocs.size(); ++i) {
1395     CCValAssign &VA = RVLocs[i];
1396     assert(VA.isRegLoc() && "Can only return in registers!");
1397     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1398                              Outs[i].Val, Flag);
1399     Flag = Chain.getValue(1);
1400   }
1401
1402   if (Flag.getNode())
1403     return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1404   else
1405     return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
1406 }
1407
1408
1409 //===----------------------------------------------------------------------===//
1410 // Vector related lowering:
1411 //===----------------------------------------------------------------------===//
1412
1413 static ConstantSDNode *
1414 getVecImm(SDNode *N) {
1415   SDValue OpVal(0, 0);
1416
1417   // Check to see if this buildvec has a single non-undef value in its elements.
1418   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1419     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1420     if (OpVal.getNode() == 0)
1421       OpVal = N->getOperand(i);
1422     else if (OpVal != N->getOperand(i))
1423       return 0;
1424   }
1425
1426   if (OpVal.getNode() != 0) {
1427     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1428       return CN;
1429     }
1430   }
1431
1432   return 0;
1433 }
1434
1435 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1436 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1437 /// constant
1438 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1439                               EVT ValueType) {
1440   if (ConstantSDNode *CN = getVecImm(N)) {
1441     uint64_t Value = CN->getZExtValue();
1442     if (ValueType == MVT::i64) {
1443       uint64_t UValue = CN->getZExtValue();
1444       uint32_t upper = uint32_t(UValue >> 32);
1445       uint32_t lower = uint32_t(UValue);
1446       if (upper != lower)
1447         return SDValue();
1448       Value = Value >> 32;
1449     }
1450     if (Value <= 0x3ffff)
1451       return DAG.getTargetConstant(Value, ValueType);
1452   }
1453
1454   return SDValue();
1455 }
1456
1457 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1458 /// and the value fits into a signed 16-bit constant, and if so, return the
1459 /// constant
1460 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1461                               EVT ValueType) {
1462   if (ConstantSDNode *CN = getVecImm(N)) {
1463     int64_t Value = CN->getSExtValue();
1464     if (ValueType == MVT::i64) {
1465       uint64_t UValue = CN->getZExtValue();
1466       uint32_t upper = uint32_t(UValue >> 32);
1467       uint32_t lower = uint32_t(UValue);
1468       if (upper != lower)
1469         return SDValue();
1470       Value = Value >> 32;
1471     }
1472     if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1473       return DAG.getTargetConstant(Value, ValueType);
1474     }
1475   }
1476
1477   return SDValue();
1478 }
1479
1480 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1481 /// and the value fits into a signed 10-bit constant, and if so, return the
1482 /// constant
1483 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1484                               EVT ValueType) {
1485   if (ConstantSDNode *CN = getVecImm(N)) {
1486     int64_t Value = CN->getSExtValue();
1487     if (ValueType == MVT::i64) {
1488       uint64_t UValue = CN->getZExtValue();
1489       uint32_t upper = uint32_t(UValue >> 32);
1490       uint32_t lower = uint32_t(UValue);
1491       if (upper != lower)
1492         return SDValue();
1493       Value = Value >> 32;
1494     }
1495     if (isS10Constant(Value))
1496       return DAG.getTargetConstant(Value, ValueType);
1497   }
1498
1499   return SDValue();
1500 }
1501
1502 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1503 /// and the value fits into a signed 8-bit constant, and if so, return the
1504 /// constant.
1505 ///
1506 /// @note: The incoming vector is v16i8 because that's the only way we can load
1507 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1508 /// same value.
1509 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1510                              EVT ValueType) {
1511   if (ConstantSDNode *CN = getVecImm(N)) {
1512     int Value = (int) CN->getZExtValue();
1513     if (ValueType == MVT::i16
1514         && Value <= 0xffff                 /* truncated from uint64_t */
1515         && ((short) Value >> 8) == ((short) Value & 0xff))
1516       return DAG.getTargetConstant(Value & 0xff, ValueType);
1517     else if (ValueType == MVT::i8
1518              && (Value & 0xff) == Value)
1519       return DAG.getTargetConstant(Value, ValueType);
1520   }
1521
1522   return SDValue();
1523 }
1524
1525 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1526 /// and the value fits into a signed 16-bit constant, and if so, return the
1527 /// constant
1528 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1529                                EVT ValueType) {
1530   if (ConstantSDNode *CN = getVecImm(N)) {
1531     uint64_t Value = CN->getZExtValue();
1532     if ((ValueType == MVT::i32
1533           && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1534         || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1535       return DAG.getTargetConstant(Value >> 16, ValueType);
1536   }
1537
1538   return SDValue();
1539 }
1540
1541 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1542 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1543   if (ConstantSDNode *CN = getVecImm(N)) {
1544     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1545   }
1546
1547   return SDValue();
1548 }
1549
1550 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1551 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1552   if (ConstantSDNode *CN = getVecImm(N)) {
1553     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1554   }
1555
1556   return SDValue();
1557 }
1558
1559 //! Lower a BUILD_VECTOR instruction creatively:
1560 static SDValue
1561 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1562   EVT VT = Op.getValueType();
1563   EVT EltVT = VT.getVectorElementType();
1564   DebugLoc dl = Op.getDebugLoc();
1565   BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
1566   assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
1567   unsigned minSplatBits = EltVT.getSizeInBits();
1568
1569   if (minSplatBits < 16)
1570     minSplatBits = 16;
1571
1572   APInt APSplatBits, APSplatUndef;
1573   unsigned SplatBitSize;
1574   bool HasAnyUndefs;
1575
1576   if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
1577                             HasAnyUndefs, minSplatBits)
1578       || minSplatBits < SplatBitSize)
1579     return SDValue();   // Wasn't a constant vector or splat exceeded min
1580
1581   uint64_t SplatBits = APSplatBits.getZExtValue();
1582
1583   switch (VT.getSimpleVT().SimpleTy) {
1584   default: {
1585     std::string msg;
1586     raw_string_ostream Msg(msg);
1587     Msg << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
1588          << VT.getEVTString();
1589     llvm_report_error(Msg.str());
1590     /*NOTREACHED*/
1591   }
1592   case MVT::v4f32: {
1593     uint32_t Value32 = uint32_t(SplatBits);
1594     assert(SplatBitSize == 32
1595            && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1596     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1597     SDValue T = DAG.getConstant(Value32, MVT::i32);
1598     return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
1599                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
1600     break;
1601   }
1602   case MVT::v2f64: {
1603     uint64_t f64val = uint64_t(SplatBits);
1604     assert(SplatBitSize == 64
1605            && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1606     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1607     SDValue T = DAG.getConstant(f64val, MVT::i64);
1608     return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64,
1609                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
1610     break;
1611   }
1612   case MVT::v16i8: {
1613    // 8-bit constants have to be expanded to 16-bits
1614    unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
1615    SmallVector<SDValue, 8> Ops;
1616
1617    Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
1618    return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
1619                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
1620   }
1621   case MVT::v8i16: {
1622     unsigned short Value16 = SplatBits;
1623     SDValue T = DAG.getConstant(Value16, EltVT);
1624     SmallVector<SDValue, 8> Ops;
1625
1626     Ops.assign(8, T);
1627     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
1628   }
1629   case MVT::v4i32: {
1630     SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1631     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
1632   }
1633   case MVT::v2i32: {
1634     SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1635     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
1636   }
1637   case MVT::v2i64: {
1638     return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
1639   }
1640   }
1641
1642   return SDValue();
1643 }
1644
1645 /*!
1646  */
1647 SDValue
1648 SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
1649                      DebugLoc dl) {
1650   uint32_t upper = uint32_t(SplatVal >> 32);
1651   uint32_t lower = uint32_t(SplatVal);
1652
1653   if (upper == lower) {
1654     // Magic constant that can be matched by IL, ILA, et. al.
1655     SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1656     return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1657                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1658                                    Val, Val, Val, Val));
1659   } else {
1660     bool upper_special, lower_special;
1661
1662     // NOTE: This code creates common-case shuffle masks that can be easily
1663     // detected as common expressions. It is not attempting to create highly
1664     // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1665
1666     // Detect if the upper or lower half is a special shuffle mask pattern:
1667     upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1668     lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1669
1670     // Both upper and lower are special, lower to a constant pool load:
1671     if (lower_special && upper_special) {
1672       SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
1673       return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
1674                          SplatValCN, SplatValCN);
1675     }
1676
1677     SDValue LO32;
1678     SDValue HI32;
1679     SmallVector<SDValue, 16> ShufBytes;
1680     SDValue Result;
1681
1682     // Create lower vector if not a special pattern
1683     if (!lower_special) {
1684       SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1685       LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1686                          DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1687                                      LO32C, LO32C, LO32C, LO32C));
1688     }
1689
1690     // Create upper vector if not a special pattern
1691     if (!upper_special) {
1692       SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1693       HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1694                          DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1695                                      HI32C, HI32C, HI32C, HI32C));
1696     }
1697
1698     // If either upper or lower are special, then the two input operands are
1699     // the same (basically, one of them is a "don't care")
1700     if (lower_special)
1701       LO32 = HI32;
1702     if (upper_special)
1703       HI32 = LO32;
1704
1705     for (int i = 0; i < 4; ++i) {
1706       uint64_t val = 0;
1707       for (int j = 0; j < 4; ++j) {
1708         SDValue V;
1709         bool process_upper, process_lower;
1710         val <<= 8;
1711         process_upper = (upper_special && (i & 1) == 0);
1712         process_lower = (lower_special && (i & 1) == 1);
1713
1714         if (process_upper || process_lower) {
1715           if ((process_upper && upper == 0)
1716                   || (process_lower && lower == 0))
1717             val |= 0x80;
1718           else if ((process_upper && upper == 0xffffffff)
1719                   || (process_lower && lower == 0xffffffff))
1720             val |= 0xc0;
1721           else if ((process_upper && upper == 0x80000000)
1722                   || (process_lower && lower == 0x80000000))
1723             val |= (j == 0 ? 0xe0 : 0x80);
1724         } else
1725           val |= i * 4 + j + ((i & 1) * 16);
1726       }
1727
1728       ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1729     }
1730
1731     return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
1732                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1733                                    &ShufBytes[0], ShufBytes.size()));
1734   }
1735 }
1736
1737 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1738 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1739 /// permutation vector, V3, is monotonically increasing with one "exception"
1740 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1741 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1742 /// In either case, the net result is going to eventually invoke SHUFB to
1743 /// permute/shuffle the bytes from V1 and V2.
1744 /// \note
1745 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1746 /// control word for byte/halfword/word insertion. This takes care of a single
1747 /// element move from V2 into V1.
1748 /// \note
1749 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1750 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1751   const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
1752   SDValue V1 = Op.getOperand(0);
1753   SDValue V2 = Op.getOperand(1);
1754   DebugLoc dl = Op.getDebugLoc();
1755
1756   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1757
1758   // If we have a single element being moved from V1 to V2, this can be handled
1759   // using the C*[DX] compute mask instructions, but the vector elements have
1760   // to be monotonically increasing with one exception element.
1761   EVT VecVT = V1.getValueType();
1762   EVT EltVT = VecVT.getVectorElementType();
1763   unsigned EltsFromV2 = 0;
1764   unsigned V2Elt = 0;
1765   unsigned V2EltIdx0 = 0;
1766   unsigned CurrElt = 0;
1767   unsigned MaxElts = VecVT.getVectorNumElements();
1768   unsigned PrevElt = 0;
1769   unsigned V0Elt = 0;
1770   bool monotonic = true;
1771   bool rotate = true;
1772
1773   if (EltVT == MVT::i8) {
1774     V2EltIdx0 = 16;
1775   } else if (EltVT == MVT::i16) {
1776     V2EltIdx0 = 8;
1777   } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1778     V2EltIdx0 = 4;
1779   } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1780     V2EltIdx0 = 2;
1781   } else
1782     llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
1783
1784   for (unsigned i = 0; i != MaxElts; ++i) {
1785     if (SVN->getMaskElt(i) < 0)
1786       continue;
1787
1788     unsigned SrcElt = SVN->getMaskElt(i);
1789
1790     if (monotonic) {
1791       if (SrcElt >= V2EltIdx0) {
1792         if (1 >= (++EltsFromV2)) {
1793           V2Elt = (V2EltIdx0 - SrcElt) << 2;
1794         }
1795       } else if (CurrElt != SrcElt) {
1796         monotonic = false;
1797       }
1798
1799       ++CurrElt;
1800     }
1801
1802     if (rotate) {
1803       if (PrevElt > 0 && SrcElt < MaxElts) {
1804         if ((PrevElt == SrcElt - 1)
1805             || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1806           PrevElt = SrcElt;
1807           if (SrcElt == 0)
1808             V0Elt = i;
1809         } else {
1810           rotate = false;
1811         }
1812       } else if (PrevElt == 0) {
1813         // First time through, need to keep track of previous element
1814         PrevElt = SrcElt;
1815       } else {
1816         // This isn't a rotation, takes elements from vector 2
1817         rotate = false;
1818       }
1819     }
1820   }
1821
1822   if (EltsFromV2 == 1 && monotonic) {
1823     // Compute mask and shuffle
1824     MachineFunction &MF = DAG.getMachineFunction();
1825     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1826     unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1827     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1828     // Initialize temporary register to 0
1829     SDValue InitTempReg =
1830       DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT));
1831     // Copy register's contents as index in SHUFFLE_MASK:
1832     SDValue ShufMaskOp =
1833       DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32,
1834                   DAG.getTargetConstant(V2Elt, MVT::i32),
1835                   DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT));
1836     // Use shuffle mask in SHUFB synthetic instruction:
1837     return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
1838                        ShufMaskOp);
1839   } else if (rotate) {
1840     int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1841
1842     return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
1843                        V1, DAG.getConstant(rotamt, MVT::i16));
1844   } else {
1845    // Convert the SHUFFLE_VECTOR mask's input element units to the
1846    // actual bytes.
1847     unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1848
1849     SmallVector<SDValue, 16> ResultMask;
1850     for (unsigned i = 0, e = MaxElts; i != e; ++i) {
1851       unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
1852
1853       for (unsigned j = 0; j < BytesPerElement; ++j)
1854         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
1855     }
1856
1857     SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
1858                                     &ResultMask[0], ResultMask.size());
1859     return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
1860   }
1861 }
1862
1863 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1864   SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar
1865   DebugLoc dl = Op.getDebugLoc();
1866
1867   if (Op0.getNode()->getOpcode() == ISD::Constant) {
1868     // For a constant, build the appropriate constant vector, which will
1869     // eventually simplify to a vector register load.
1870
1871     ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1872     SmallVector<SDValue, 16> ConstVecValues;
1873     EVT VT;
1874     size_t n_copies;
1875
1876     // Create a constant vector:
1877     switch (Op.getValueType().getSimpleVT().SimpleTy) {
1878     default: llvm_unreachable("Unexpected constant value type in "
1879                               "LowerSCALAR_TO_VECTOR");
1880     case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1881     case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1882     case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1883     case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1884     case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1885     case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1886     }
1887
1888     SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1889     for (size_t j = 0; j < n_copies; ++j)
1890       ConstVecValues.push_back(CValue);
1891
1892     return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
1893                        &ConstVecValues[0], ConstVecValues.size());
1894   } else {
1895     // Otherwise, copy the value from one register to another:
1896     switch (Op0.getValueType().getSimpleVT().SimpleTy) {
1897     default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
1898     case MVT::i8:
1899     case MVT::i16:
1900     case MVT::i32:
1901     case MVT::i64:
1902     case MVT::f32:
1903     case MVT::f64:
1904       return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
1905     }
1906   }
1907
1908   return SDValue();
1909 }
1910
1911 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1912   EVT VT = Op.getValueType();
1913   SDValue N = Op.getOperand(0);
1914   SDValue Elt = Op.getOperand(1);
1915   DebugLoc dl = Op.getDebugLoc();
1916   SDValue retval;
1917
1918   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1919     // Constant argument:
1920     int EltNo = (int) C->getZExtValue();
1921
1922     // sanity checks:
1923     if (VT == MVT::i8 && EltNo >= 16)
1924       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1925     else if (VT == MVT::i16 && EltNo >= 8)
1926       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1927     else if (VT == MVT::i32 && EltNo >= 4)
1928       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1929     else if (VT == MVT::i64 && EltNo >= 2)
1930       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1931
1932     if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1933       // i32 and i64: Element 0 is the preferred slot
1934       return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
1935     }
1936
1937     // Need to generate shuffle mask and extract:
1938     int prefslot_begin = -1, prefslot_end = -1;
1939     int elt_byte = EltNo * VT.getSizeInBits() / 8;
1940
1941     switch (VT.getSimpleVT().SimpleTy) {
1942     default:
1943       assert(false && "Invalid value type!");
1944     case MVT::i8: {
1945       prefslot_begin = prefslot_end = 3;
1946       break;
1947     }
1948     case MVT::i16: {
1949       prefslot_begin = 2; prefslot_end = 3;
1950       break;
1951     }
1952     case MVT::i32:
1953     case MVT::f32: {
1954       prefslot_begin = 0; prefslot_end = 3;
1955       break;
1956     }
1957     case MVT::i64:
1958     case MVT::f64: {
1959       prefslot_begin = 0; prefslot_end = 7;
1960       break;
1961     }
1962     }
1963
1964     assert(prefslot_begin != -1 && prefslot_end != -1 &&
1965            "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1966
1967     unsigned int ShufBytes[16] = {
1968       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1969     };
1970     for (int i = 0; i < 16; ++i) {
1971       // zero fill uppper part of preferred slot, don't care about the
1972       // other slots:
1973       unsigned int mask_val;
1974       if (i <= prefslot_end) {
1975         mask_val =
1976           ((i < prefslot_begin)
1977            ? 0x80
1978            : elt_byte + (i - prefslot_begin));
1979
1980         ShufBytes[i] = mask_val;
1981       } else
1982         ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1983     }
1984
1985     SDValue ShufMask[4];
1986     for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1987       unsigned bidx = i * 4;
1988       unsigned int bits = ((ShufBytes[bidx] << 24) |
1989                            (ShufBytes[bidx+1] << 16) |
1990                            (ShufBytes[bidx+2] << 8) |
1991                            ShufBytes[bidx+3]);
1992       ShufMask[i] = DAG.getConstant(bits, MVT::i32);
1993     }
1994
1995     SDValue ShufMaskVec =
1996       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1997                   &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
1998
1999     retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2000                          DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
2001                                      N, N, ShufMaskVec));
2002   } else {
2003     // Variable index: Rotate the requested element into slot 0, then replicate
2004     // slot 0 across the vector
2005     EVT VecVT = N.getValueType();
2006     if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
2007       llvm_report_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
2008                         "vector type!");
2009     }
2010
2011     // Make life easier by making sure the index is zero-extended to i32
2012     if (Elt.getValueType() != MVT::i32)
2013       Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
2014
2015     // Scale the index to a bit/byte shift quantity
2016     APInt scaleFactor =
2017             APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2018     unsigned scaleShift = scaleFactor.logBase2();
2019     SDValue vecShift;
2020
2021     if (scaleShift > 0) {
2022       // Scale the shift factor:
2023       Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
2024                         DAG.getConstant(scaleShift, MVT::i32));
2025     }
2026
2027     vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
2028
2029     // Replicate the bytes starting at byte 0 across the entire vector (for
2030     // consistency with the notion of a unified register set)
2031     SDValue replicate;
2032
2033     switch (VT.getSimpleVT().SimpleTy) {
2034     default:
2035       llvm_report_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
2036                         "type");
2037       /*NOTREACHED*/
2038     case MVT::i8: {
2039       SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2040       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2041                               factor, factor, factor, factor);
2042       break;
2043     }
2044     case MVT::i16: {
2045       SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2046       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2047                               factor, factor, factor, factor);
2048       break;
2049     }
2050     case MVT::i32:
2051     case MVT::f32: {
2052       SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2053       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2054                               factor, factor, factor, factor);
2055       break;
2056     }
2057     case MVT::i64:
2058     case MVT::f64: {
2059       SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2060       SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2061       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2062                               loFactor, hiFactor, loFactor, hiFactor);
2063       break;
2064     }
2065     }
2066
2067     retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2068                          DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2069                                      vecShift, vecShift, replicate));
2070   }
2071
2072   return retval;
2073 }
2074
2075 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2076   SDValue VecOp = Op.getOperand(0);
2077   SDValue ValOp = Op.getOperand(1);
2078   SDValue IdxOp = Op.getOperand(2);
2079   DebugLoc dl = Op.getDebugLoc();
2080   EVT VT = Op.getValueType();
2081
2082   ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2083   assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2084
2085   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2086   // Use $sp ($1) because it's always 16-byte aligned and it's available:
2087   SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
2088                                 DAG.getRegister(SPU::R1, PtrVT),
2089                                 DAG.getConstant(CN->getSExtValue(), PtrVT));
2090   SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
2091
2092   SDValue result =
2093     DAG.getNode(SPUISD::SHUFB, dl, VT,
2094                 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
2095                 VecOp,
2096                 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask));
2097
2098   return result;
2099 }
2100
2101 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2102                            const TargetLowering &TLI)
2103 {
2104   SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
2105   DebugLoc dl = Op.getDebugLoc();
2106   EVT ShiftVT = TLI.getShiftAmountTy();
2107
2108   assert(Op.getValueType() == MVT::i8);
2109   switch (Opc) {
2110   default:
2111     llvm_unreachable("Unhandled i8 math operator");
2112     /*NOTREACHED*/
2113     break;
2114   case ISD::ADD: {
2115     // 8-bit addition: Promote the arguments up to 16-bits and truncate
2116     // the result:
2117     SDValue N1 = Op.getOperand(1);
2118     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2119     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2120     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2121                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2122
2123   }
2124
2125   case ISD::SUB: {
2126     // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2127     // the result:
2128     SDValue N1 = Op.getOperand(1);
2129     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2130     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2131     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2132                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2133   }
2134   case ISD::ROTR:
2135   case ISD::ROTL: {
2136     SDValue N1 = Op.getOperand(1);
2137     EVT N1VT = N1.getValueType();
2138
2139     N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2140     if (!N1VT.bitsEq(ShiftVT)) {
2141       unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
2142                        ? ISD::ZERO_EXTEND
2143                        : ISD::TRUNCATE;
2144       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2145     }
2146
2147     // Replicate lower 8-bits into upper 8:
2148     SDValue ExpandArg =
2149       DAG.getNode(ISD::OR, dl, MVT::i16, N0,
2150                   DAG.getNode(ISD::SHL, dl, MVT::i16,
2151                               N0, DAG.getConstant(8, MVT::i32)));
2152
2153     // Truncate back down to i8
2154     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2155                        DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
2156   }
2157   case ISD::SRL:
2158   case ISD::SHL: {
2159     SDValue N1 = Op.getOperand(1);
2160     EVT N1VT = N1.getValueType();
2161
2162     N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2163     if (!N1VT.bitsEq(ShiftVT)) {
2164       unsigned N1Opc = ISD::ZERO_EXTEND;
2165
2166       if (N1.getValueType().bitsGT(ShiftVT))
2167         N1Opc = ISD::TRUNCATE;
2168
2169       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2170     }
2171
2172     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2173                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2174   }
2175   case ISD::SRA: {
2176     SDValue N1 = Op.getOperand(1);
2177     EVT N1VT = N1.getValueType();
2178
2179     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2180     if (!N1VT.bitsEq(ShiftVT)) {
2181       unsigned N1Opc = ISD::SIGN_EXTEND;
2182
2183       if (N1VT.bitsGT(ShiftVT))
2184         N1Opc = ISD::TRUNCATE;
2185       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2186     }
2187
2188     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2189                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2190   }
2191   case ISD::MUL: {
2192     SDValue N1 = Op.getOperand(1);
2193
2194     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2195     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2196     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2197                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2198     break;
2199   }
2200   }
2201
2202   return SDValue();
2203 }
2204
2205 //! Lower byte immediate operations for v16i8 vectors:
2206 static SDValue
2207 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2208   SDValue ConstVec;
2209   SDValue Arg;
2210   EVT VT = Op.getValueType();
2211   DebugLoc dl = Op.getDebugLoc();
2212
2213   ConstVec = Op.getOperand(0);
2214   Arg = Op.getOperand(1);
2215   if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2216     if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2217       ConstVec = ConstVec.getOperand(0);
2218     } else {
2219       ConstVec = Op.getOperand(1);
2220       Arg = Op.getOperand(0);
2221       if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2222         ConstVec = ConstVec.getOperand(0);
2223       }
2224     }
2225   }
2226
2227   if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2228     BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
2229     assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
2230
2231     APInt APSplatBits, APSplatUndef;
2232     unsigned SplatBitSize;
2233     bool HasAnyUndefs;
2234     unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
2235
2236     if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
2237                               HasAnyUndefs, minSplatBits)
2238         && minSplatBits <= SplatBitSize) {
2239       uint64_t SplatBits = APSplatBits.getZExtValue();
2240       SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2241
2242       SmallVector<SDValue, 16> tcVec;
2243       tcVec.assign(16, tc);
2244       return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
2245                          DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
2246     }
2247   }
2248
2249   // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2250   // lowered.  Return the operation, rather than a null SDValue.
2251   return Op;
2252 }
2253
2254 //! Custom lowering for CTPOP (count population)
2255 /*!
2256   Custom lowering code that counts the number ones in the input
2257   operand. SPU has such an instruction, but it counts the number of
2258   ones per byte, which then have to be accumulated.
2259 */
2260 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2261   EVT VT = Op.getValueType();
2262   EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
2263                                VT, (128 / VT.getSizeInBits()));
2264   DebugLoc dl = Op.getDebugLoc();
2265
2266   switch (VT.getSimpleVT().SimpleTy) {
2267   default:
2268     assert(false && "Invalid value type!");
2269   case MVT::i8: {
2270     SDValue N = Op.getOperand(0);
2271     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2272
2273     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2274     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2275
2276     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
2277   }
2278
2279   case MVT::i16: {
2280     MachineFunction &MF = DAG.getMachineFunction();
2281     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2282
2283     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2284
2285     SDValue N = Op.getOperand(0);
2286     SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2287     SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2288     SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2289
2290     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2291     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2292
2293     // CNTB_result becomes the chain to which all of the virtual registers
2294     // CNTB_reg, SUM1_reg become associated:
2295     SDValue CNTB_result =
2296       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
2297
2298     SDValue CNTB_rescopy =
2299       DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2300
2301     SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
2302
2303     return DAG.getNode(ISD::AND, dl, MVT::i16,
2304                        DAG.getNode(ISD::ADD, dl, MVT::i16,
2305                                    DAG.getNode(ISD::SRL, dl, MVT::i16,
2306                                                Tmp1, Shift1),
2307                                    Tmp1),
2308                        Mask0);
2309   }
2310
2311   case MVT::i32: {
2312     MachineFunction &MF = DAG.getMachineFunction();
2313     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2314
2315     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2316     unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2317
2318     SDValue N = Op.getOperand(0);
2319     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2320     SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2321     SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2322     SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2323
2324     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2325     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2326
2327     // CNTB_result becomes the chain to which all of the virtual registers
2328     // CNTB_reg, SUM1_reg become associated:
2329     SDValue CNTB_result =
2330       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
2331
2332     SDValue CNTB_rescopy =
2333       DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2334
2335     SDValue Comp1 =
2336       DAG.getNode(ISD::SRL, dl, MVT::i32,
2337                   DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
2338                   Shift1);
2339
2340     SDValue Sum1 =
2341       DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
2342                   DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
2343
2344     SDValue Sum1_rescopy =
2345       DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
2346
2347     SDValue Comp2 =
2348       DAG.getNode(ISD::SRL, dl, MVT::i32,
2349                   DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
2350                   Shift2);
2351     SDValue Sum2 =
2352       DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
2353                   DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
2354
2355     return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
2356   }
2357
2358   case MVT::i64:
2359     break;
2360   }
2361
2362   return SDValue();
2363 }
2364
2365 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2366 /*!
2367  f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2368  All conversions to i64 are expanded to a libcall.
2369  */
2370 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2371                               SPUTargetLowering &TLI) {
2372   EVT OpVT = Op.getValueType();
2373   SDValue Op0 = Op.getOperand(0);
2374   EVT Op0VT = Op0.getValueType();
2375
2376   if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2377       || OpVT == MVT::i64) {
2378     // Convert f32 / f64 to i32 / i64 via libcall.
2379     RTLIB::Libcall LC =
2380             (Op.getOpcode() == ISD::FP_TO_SINT)
2381              ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2382              : RTLIB::getFPTOUINT(Op0VT, OpVT);
2383     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2384     SDValue Dummy;
2385     return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2386   }
2387
2388   return Op;
2389 }
2390
2391 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2392 /*!
2393  i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2394  All conversions from i64 are expanded to a libcall.
2395  */
2396 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2397                               SPUTargetLowering &TLI) {
2398   EVT OpVT = Op.getValueType();
2399   SDValue Op0 = Op.getOperand(0);
2400   EVT Op0VT = Op0.getValueType();
2401
2402   if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2403       || Op0VT == MVT::i64) {
2404     // Convert i32, i64 to f64 via libcall:
2405     RTLIB::Libcall LC =
2406             (Op.getOpcode() == ISD::SINT_TO_FP)
2407              ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2408              : RTLIB::getUINTTOFP(Op0VT, OpVT);
2409     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2410     SDValue Dummy;
2411     return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2412   }
2413
2414   return Op;
2415 }
2416
2417 //! Lower ISD::SETCC
2418 /*!
2419  This handles MVT::f64 (double floating point) condition lowering
2420  */
2421 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2422                           const TargetLowering &TLI) {
2423   CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2424   DebugLoc dl = Op.getDebugLoc();
2425   assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2426
2427   SDValue lhs = Op.getOperand(0);
2428   SDValue rhs = Op.getOperand(1);
2429   EVT lhsVT = lhs.getValueType();
2430   assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2431
2432   EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2433   APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2434   EVT IntVT(MVT::i64);
2435
2436   // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2437   // selected to a NOP:
2438   SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
2439   SDValue lhsHi32 =
2440           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2441                       DAG.getNode(ISD::SRL, dl, IntVT,
2442                                   i64lhs, DAG.getConstant(32, MVT::i32)));
2443   SDValue lhsHi32abs =
2444           DAG.getNode(ISD::AND, dl, MVT::i32,
2445                       lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2446   SDValue lhsLo32 =
2447           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2448
2449   // SETO and SETUO only use the lhs operand:
2450   if (CC->get() == ISD::SETO) {
2451     // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2452     // SETUO
2453     APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2454     return DAG.getNode(ISD::XOR, dl, ccResultVT,
2455                        DAG.getSetCC(dl, ccResultVT,
2456                                     lhs, DAG.getConstantFP(0.0, lhsVT),
2457                                     ISD::SETUO),
2458                        DAG.getConstant(ccResultAllOnes, ccResultVT));
2459   } else if (CC->get() == ISD::SETUO) {
2460     // Evaluates to true if Op0 is [SQ]NaN
2461     return DAG.getNode(ISD::AND, dl, ccResultVT,
2462                        DAG.getSetCC(dl, ccResultVT,
2463                                     lhsHi32abs,
2464                                     DAG.getConstant(0x7ff00000, MVT::i32),
2465                                     ISD::SETGE),
2466                        DAG.getSetCC(dl, ccResultVT,
2467                                     lhsLo32,
2468                                     DAG.getConstant(0, MVT::i32),
2469                                     ISD::SETGT));
2470   }
2471
2472   SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
2473   SDValue rhsHi32 =
2474           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2475                       DAG.getNode(ISD::SRL, dl, IntVT,
2476                                   i64rhs, DAG.getConstant(32, MVT::i32)));
2477
2478   // If a value is negative, subtract from the sign magnitude constant:
2479   SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2480
2481   // Convert the sign-magnitude representation into 2's complement:
2482   SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2483                                       lhsHi32, DAG.getConstant(31, MVT::i32));
2484   SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2485   SDValue lhsSelect =
2486           DAG.getNode(ISD::SELECT, dl, IntVT,
2487                       lhsSelectMask, lhsSignMag2TC, i64lhs);
2488
2489   SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2490                                       rhsHi32, DAG.getConstant(31, MVT::i32));
2491   SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2492   SDValue rhsSelect =
2493           DAG.getNode(ISD::SELECT, dl, IntVT,
2494                       rhsSelectMask, rhsSignMag2TC, i64rhs);
2495
2496   unsigned compareOp;
2497
2498   switch (CC->get()) {
2499   case ISD::SETOEQ:
2500   case ISD::SETUEQ:
2501     compareOp = ISD::SETEQ; break;
2502   case ISD::SETOGT:
2503   case ISD::SETUGT:
2504     compareOp = ISD::SETGT; break;
2505   case ISD::SETOGE:
2506   case ISD::SETUGE:
2507     compareOp = ISD::SETGE; break;
2508   case ISD::SETOLT:
2509   case ISD::SETULT:
2510     compareOp = ISD::SETLT; break;
2511   case ISD::SETOLE:
2512   case ISD::SETULE:
2513     compareOp = ISD::SETLE; break;
2514   case ISD::SETUNE:
2515   case ISD::SETONE:
2516     compareOp = ISD::SETNE; break;
2517   default:
2518     llvm_report_error("CellSPU ISel Select: unimplemented f64 condition");
2519   }
2520
2521   SDValue result =
2522           DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2523                        (ISD::CondCode) compareOp);
2524
2525   if ((CC->get() & 0x8) == 0) {
2526     // Ordered comparison:
2527     SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2528                                   lhs, DAG.getConstantFP(0.0, MVT::f64),
2529                                   ISD::SETO);
2530     SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2531                                   rhs, DAG.getConstantFP(0.0, MVT::f64),
2532                                   ISD::SETO);
2533     SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2534
2535     result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2536   }
2537
2538   return result;
2539 }
2540
2541 //! Lower ISD::SELECT_CC
2542 /*!
2543   ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2544   SELB instruction.
2545
2546   \note Need to revisit this in the future: if the code path through the true
2547   and false value computations is longer than the latency of a branch (6
2548   cycles), then it would be more advantageous to branch and insert a new basic
2549   block and branch on the condition. However, this code does not make that
2550   assumption, given the simplisitc uses so far.
2551  */
2552
2553 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2554                               const TargetLowering &TLI) {
2555   EVT VT = Op.getValueType();
2556   SDValue lhs = Op.getOperand(0);
2557   SDValue rhs = Op.getOperand(1);
2558   SDValue trueval = Op.getOperand(2);
2559   SDValue falseval = Op.getOperand(3);
2560   SDValue condition = Op.getOperand(4);
2561   DebugLoc dl = Op.getDebugLoc();
2562
2563   // NOTE: SELB's arguments: $rA, $rB, $mask
2564   //
2565   // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2566   // where bits in $mask are 1. CCond will be inverted, having 1s where the
2567   // condition was true and 0s where the condition was false. Hence, the
2568   // arguments to SELB get reversed.
2569
2570   // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2571   // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2572   // with another "cannot select select_cc" assert:
2573
2574   SDValue compare = DAG.getNode(ISD::SETCC, dl,
2575                                 TLI.getSetCCResultType(Op.getValueType()),
2576                                 lhs, rhs, condition);
2577   return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
2578 }
2579
2580 //! Custom lower ISD::TRUNCATE
2581 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2582 {
2583   // Type to truncate to
2584   EVT VT = Op.getValueType();
2585   MVT simpleVT = VT.getSimpleVT();
2586   EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
2587                                VT, (128 / VT.getSizeInBits()));
2588   DebugLoc dl = Op.getDebugLoc();
2589
2590   // Type to truncate from
2591   SDValue Op0 = Op.getOperand(0);
2592   EVT Op0VT = Op0.getValueType();
2593
2594   if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2595     // Create shuffle mask, least significant doubleword of quadword
2596     unsigned maskHigh = 0x08090a0b;
2597     unsigned maskLow = 0x0c0d0e0f;
2598     // Use a shuffle to perform the truncation
2599     SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2600                                    DAG.getConstant(maskHigh, MVT::i32),
2601                                    DAG.getConstant(maskLow, MVT::i32),
2602                                    DAG.getConstant(maskHigh, MVT::i32),
2603                                    DAG.getConstant(maskLow, MVT::i32));
2604
2605     SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2606                                        Op0, Op0, shufMask);
2607
2608     return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
2609   }
2610
2611   return SDValue();             // Leave the truncate unmolested
2612 }
2613
2614 /*!
2615  * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
2616  * algorithm is to duplicate the sign bit using rotmai to generate at
2617  * least one byte full of sign bits. Then propagate the "sign-byte" into
2618  * the leftmost words and the i64/i32 into the rightmost words using shufb.
2619  *
2620  * @param Op The sext operand
2621  * @param DAG The current DAG
2622  * @return The SDValue with the entire instruction sequence
2623  */
2624 static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
2625 {
2626   DebugLoc dl = Op.getDebugLoc();
2627
2628   // Type to extend to
2629   MVT OpVT = Op.getValueType().getSimpleVT();
2630   EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
2631                                OpVT, (128 / OpVT.getSizeInBits()));
2632
2633   // Type to extend from
2634   SDValue Op0 = Op.getOperand(0);
2635   MVT Op0VT = Op0.getValueType().getSimpleVT();
2636
2637   // The type to extend to needs to be a i128 and
2638   // the type to extend from needs to be i64 or i32.
2639   assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
2640           "LowerSIGN_EXTEND: input and/or output operand have wrong size");
2641
2642   // Create shuffle mask
2643   unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
2644   unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte  8 - 11
2645   unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
2646   SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2647                                  DAG.getConstant(mask1, MVT::i32),
2648                                  DAG.getConstant(mask1, MVT::i32),
2649                                  DAG.getConstant(mask2, MVT::i32),
2650                                  DAG.getConstant(mask3, MVT::i32));
2651
2652   // Word wise arithmetic right shift to generate at least one byte
2653   // that contains sign bits.
2654   MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
2655   SDValue sraVal = DAG.getNode(ISD::SRA,
2656                  dl,
2657                  mvt,
2658                  DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
2659                  DAG.getConstant(31, MVT::i32));
2660
2661   // Shuffle bytes - Copy the sign bits into the upper 64 bits
2662   // and the input value into the lower 64 bits.
2663   SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
2664       DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i128, Op0), sraVal, shufMask);
2665
2666   return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, extShuffle);
2667 }
2668
2669 //! Custom (target-specific) lowering entry point
2670 /*!
2671   This is where LLVM's DAG selection process calls to do target-specific
2672   lowering of nodes.
2673  */
2674 SDValue
2675 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2676 {
2677   unsigned Opc = (unsigned) Op.getOpcode();
2678   EVT VT = Op.getValueType();
2679
2680   switch (Opc) {
2681   default: {
2682 #ifndef NDEBUG
2683     errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2684     errs() << "Op.getOpcode() = " << Opc << "\n";
2685     errs() << "*Op.getNode():\n";
2686     Op.getNode()->dump();
2687 #endif
2688     llvm_unreachable(0);
2689   }
2690   case ISD::LOAD:
2691   case ISD::EXTLOAD:
2692   case ISD::SEXTLOAD:
2693   case ISD::ZEXTLOAD:
2694     return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2695   case ISD::STORE:
2696     return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2697   case ISD::ConstantPool:
2698     return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2699   case ISD::GlobalAddress:
2700     return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2701   case ISD::JumpTable:
2702     return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2703   case ISD::ConstantFP:
2704     return LowerConstantFP(Op, DAG);
2705
2706   // i8, i64 math ops:
2707   case ISD::ADD:
2708   case ISD::SUB:
2709   case ISD::ROTR:
2710   case ISD::ROTL:
2711   case ISD::SRL:
2712   case ISD::SHL:
2713   case ISD::SRA: {
2714     if (VT == MVT::i8)
2715       return LowerI8Math(Op, DAG, Opc, *this);
2716     break;
2717   }
2718
2719   case ISD::FP_TO_SINT:
2720   case ISD::FP_TO_UINT:
2721     return LowerFP_TO_INT(Op, DAG, *this);
2722
2723   case ISD::SINT_TO_FP:
2724   case ISD::UINT_TO_FP:
2725     return LowerINT_TO_FP(Op, DAG, *this);
2726
2727   // Vector-related lowering.
2728   case ISD::BUILD_VECTOR:
2729     return LowerBUILD_VECTOR(Op, DAG);
2730   case ISD::SCALAR_TO_VECTOR:
2731     return LowerSCALAR_TO_VECTOR(Op, DAG);
2732   case ISD::VECTOR_SHUFFLE:
2733     return LowerVECTOR_SHUFFLE(Op, DAG);
2734   case ISD::EXTRACT_VECTOR_ELT:
2735     return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2736   case ISD::INSERT_VECTOR_ELT:
2737     return LowerINSERT_VECTOR_ELT(Op, DAG);
2738
2739   // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2740   case ISD::AND:
2741   case ISD::OR:
2742   case ISD::XOR:
2743     return LowerByteImmed(Op, DAG);
2744
2745   // Vector and i8 multiply:
2746   case ISD::MUL:
2747     if (VT == MVT::i8)
2748       return LowerI8Math(Op, DAG, Opc, *this);
2749
2750   case ISD::CTPOP:
2751     return LowerCTPOP(Op, DAG);
2752
2753   case ISD::SELECT_CC:
2754     return LowerSELECT_CC(Op, DAG, *this);
2755
2756   case ISD::SETCC:
2757     return LowerSETCC(Op, DAG, *this);
2758
2759   case ISD::TRUNCATE:
2760     return LowerTRUNCATE(Op, DAG);
2761
2762   case ISD::SIGN_EXTEND:
2763     return LowerSIGN_EXTEND(Op, DAG);
2764   }
2765
2766   return SDValue();
2767 }
2768
2769 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2770                                            SmallVectorImpl<SDValue>&Results,
2771                                            SelectionDAG &DAG)
2772 {
2773 #if 0
2774   unsigned Opc = (unsigned) N->getOpcode();
2775   EVT OpVT = N->getValueType(0);
2776
2777   switch (Opc) {
2778   default: {
2779     errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2780     errs() << "Op.getOpcode() = " << Opc << "\n";
2781     errs() << "*Op.getNode():\n";
2782     N->dump();
2783     abort();
2784     /*NOTREACHED*/
2785   }
2786   }
2787 #endif
2788
2789   /* Otherwise, return unchanged */
2790 }
2791
2792 //===----------------------------------------------------------------------===//
2793 // Target Optimization Hooks
2794 //===----------------------------------------------------------------------===//
2795
2796 SDValue
2797 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2798 {
2799 #if 0
2800   TargetMachine &TM = getTargetMachine();
2801 #endif
2802   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2803   SelectionDAG &DAG = DCI.DAG;
2804   SDValue Op0 = N->getOperand(0);       // everything has at least one operand
2805   EVT NodeVT = N->getValueType(0);      // The node's value type
2806   EVT Op0VT = Op0.getValueType();       // The first operand's result
2807   SDValue Result;                       // Initially, empty result
2808   DebugLoc dl = N->getDebugLoc();
2809
2810   switch (N->getOpcode()) {
2811   default: break;
2812   case ISD::ADD: {
2813     SDValue Op1 = N->getOperand(1);
2814
2815     if (Op0.getOpcode() == SPUISD::IndirectAddr
2816         || Op1.getOpcode() == SPUISD::IndirectAddr) {
2817       // Normalize the operands to reduce repeated code
2818       SDValue IndirectArg = Op0, AddArg = Op1;
2819
2820       if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2821         IndirectArg = Op1;
2822         AddArg = Op0;
2823       }
2824
2825       if (isa<ConstantSDNode>(AddArg)) {
2826         ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2827         SDValue IndOp1 = IndirectArg.getOperand(1);
2828
2829         if (CN0->isNullValue()) {
2830           // (add (SPUindirect <arg>, <arg>), 0) ->
2831           // (SPUindirect <arg>, <arg>)
2832
2833 #if !defined(NDEBUG)
2834           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2835             errs() << "\n"
2836                  << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2837                  << "With:    (SPUindirect <arg>, <arg>)\n";
2838           }
2839 #endif
2840
2841           return IndirectArg;
2842         } else if (isa<ConstantSDNode>(IndOp1)) {
2843           // (add (SPUindirect <arg>, <const>), <const>) ->
2844           // (SPUindirect <arg>, <const + const>)
2845           ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2846           int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2847           SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2848
2849 #if !defined(NDEBUG)
2850           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2851             errs() << "\n"
2852                  << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2853                  << "), " << CN0->getSExtValue() << ")\n"
2854                  << "With:    (SPUindirect <arg>, "
2855                  << combinedConst << ")\n";
2856           }
2857 #endif
2858
2859           return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2860                              IndirectArg, combinedValue);
2861         }
2862       }
2863     }
2864     break;
2865   }
2866   case ISD::SIGN_EXTEND:
2867   case ISD::ZERO_EXTEND:
2868   case ISD::ANY_EXTEND: {
2869     if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2870       // (any_extend (SPUextract_elt0 <arg>)) ->
2871       // (SPUextract_elt0 <arg>)
2872       // Types must match, however...
2873 #if !defined(NDEBUG)
2874       if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2875         errs() << "\nReplace: ";
2876         N->dump(&DAG);
2877         errs() << "\nWith:    ";
2878         Op0.getNode()->dump(&DAG);
2879         errs() << "\n";
2880       }
2881 #endif
2882
2883       return Op0;
2884     }
2885     break;
2886   }
2887   case SPUISD::IndirectAddr: {
2888     if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2889       ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2890       if (CN != 0 && CN->getZExtValue() == 0) {
2891         // (SPUindirect (SPUaform <addr>, 0), 0) ->
2892         // (SPUaform <addr>, 0)
2893
2894         DEBUG(errs() << "Replace: ");
2895         DEBUG(N->dump(&DAG));
2896         DEBUG(errs() << "\nWith:    ");
2897         DEBUG(Op0.getNode()->dump(&DAG));
2898         DEBUG(errs() << "\n");
2899
2900         return Op0;
2901       }
2902     } else if (Op0.getOpcode() == ISD::ADD) {
2903       SDValue Op1 = N->getOperand(1);
2904       if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2905         // (SPUindirect (add <arg>, <arg>), 0) ->
2906         // (SPUindirect <arg>, <arg>)
2907         if (CN1->isNullValue()) {
2908
2909 #if !defined(NDEBUG)
2910           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2911             errs() << "\n"
2912                  << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2913                  << "With:    (SPUindirect <arg>, <arg>)\n";
2914           }
2915 #endif
2916
2917           return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2918                              Op0.getOperand(0), Op0.getOperand(1));
2919         }
2920       }
2921     }
2922     break;
2923   }
2924   case SPUISD::SHLQUAD_L_BITS:
2925   case SPUISD::SHLQUAD_L_BYTES:
2926   case SPUISD::ROTBYTES_LEFT: {
2927     SDValue Op1 = N->getOperand(1);
2928
2929     // Kill degenerate vector shifts:
2930     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2931       if (CN->isNullValue()) {
2932         Result = Op0;
2933       }
2934     }
2935     break;
2936   }
2937   case SPUISD::PREFSLOT2VEC: {
2938     switch (Op0.getOpcode()) {
2939     default:
2940       break;
2941     case ISD::ANY_EXTEND:
2942     case ISD::ZERO_EXTEND:
2943     case ISD::SIGN_EXTEND: {
2944       // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2945       // <arg>
2946       // but only if the SPUprefslot2vec and <arg> types match.
2947       SDValue Op00 = Op0.getOperand(0);
2948       if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2949         SDValue Op000 = Op00.getOperand(0);
2950         if (Op000.getValueType() == NodeVT) {
2951           Result = Op000;
2952         }
2953       }
2954       break;
2955     }
2956     case SPUISD::VEC2PREFSLOT: {
2957       // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2958       // <arg>
2959       Result = Op0.getOperand(0);
2960       break;
2961     }
2962     }
2963     break;
2964   }
2965   }
2966
2967   // Otherwise, return unchanged.
2968 #ifndef NDEBUG
2969   if (Result.getNode()) {
2970     DEBUG(errs() << "\nReplace.SPU: ");
2971     DEBUG(N->dump(&DAG));
2972     DEBUG(errs() << "\nWith:        ");
2973     DEBUG(Result.getNode()->dump(&DAG));
2974     DEBUG(errs() << "\n");
2975   }
2976 #endif
2977
2978   return Result;
2979 }
2980
2981 //===----------------------------------------------------------------------===//
2982 // Inline Assembly Support
2983 //===----------------------------------------------------------------------===//
2984
2985 /// getConstraintType - Given a constraint letter, return the type of
2986 /// constraint it is for this target.
2987 SPUTargetLowering::ConstraintType
2988 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2989   if (ConstraintLetter.size() == 1) {
2990     switch (ConstraintLetter[0]) {
2991     default: break;
2992     case 'b':
2993     case 'r':
2994     case 'f':
2995     case 'v':
2996     case 'y':
2997       return C_RegisterClass;
2998     }
2999   }
3000   return TargetLowering::getConstraintType(ConstraintLetter);
3001 }
3002
3003 std::pair<unsigned, const TargetRegisterClass*>
3004 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3005                                                 EVT VT) const
3006 {
3007   if (Constraint.size() == 1) {
3008     // GCC RS6000 Constraint Letters
3009     switch (Constraint[0]) {
3010     case 'b':   // R1-R31
3011     case 'r':   // R0-R31
3012       if (VT == MVT::i64)
3013         return std::make_pair(0U, SPU::R64CRegisterClass);
3014       return std::make_pair(0U, SPU::R32CRegisterClass);
3015     case 'f':
3016       if (VT == MVT::f32)
3017         return std::make_pair(0U, SPU::R32FPRegisterClass);
3018       else if (VT == MVT::f64)
3019         return std::make_pair(0U, SPU::R64FPRegisterClass);
3020       break;
3021     case 'v':
3022       return std::make_pair(0U, SPU::GPRCRegisterClass);
3023     }
3024   }
3025
3026   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3027 }
3028
3029 //! Compute used/known bits for a SPU operand
3030 void
3031 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3032                                                   const APInt &Mask,
3033                                                   APInt &KnownZero,
3034                                                   APInt &KnownOne,
3035                                                   const SelectionDAG &DAG,
3036                                                   unsigned Depth ) const {
3037 #if 0
3038   const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
3039
3040   switch (Op.getOpcode()) {
3041   default:
3042     // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3043     break;
3044   case CALL:
3045   case SHUFB:
3046   case SHUFFLE_MASK:
3047   case CNTB:
3048   case SPUISD::PREFSLOT2VEC:
3049   case SPUISD::LDRESULT:
3050   case SPUISD::VEC2PREFSLOT:
3051   case SPUISD::SHLQUAD_L_BITS:
3052   case SPUISD::SHLQUAD_L_BYTES:
3053   case SPUISD::VEC_ROTL:
3054   case SPUISD::VEC_ROTR:
3055   case SPUISD::ROTBYTES_LEFT:
3056   case SPUISD::SELECT_MASK:
3057   case SPUISD::SELB:
3058   }
3059 #endif
3060 }
3061
3062 unsigned
3063 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3064                                                    unsigned Depth) const {
3065   switch (Op.getOpcode()) {
3066   default:
3067     return 1;
3068
3069   case ISD::SETCC: {
3070     EVT VT = Op.getValueType();
3071
3072     if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
3073       VT = MVT::i32;
3074     }
3075     return VT.getSizeInBits();
3076   }
3077   }
3078 }
3079
3080 // LowerAsmOperandForConstraint
3081 void
3082 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3083                                                 char ConstraintLetter,
3084                                                 bool hasMemory,
3085                                                 std::vector<SDValue> &Ops,
3086                                                 SelectionDAG &DAG) const {
3087   // Default, for the time being, to the base class handler
3088   TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3089                                                Ops, DAG);
3090 }
3091
3092 /// isLegalAddressImmediate - Return true if the integer value can be used
3093 /// as the offset of the target addressing mode.
3094 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3095                                                 const Type *Ty) const {
3096   // SPU's addresses are 256K:
3097   return (V > -(1 << 18) && V < (1 << 18) - 1);
3098 }
3099
3100 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3101   return false;
3102 }
3103
3104 bool
3105 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3106   // The SPU target isn't yet aware of offsets.
3107   return false;
3108 }