lib/Target/CellSPU/SPUISelLowering.cpp

   1 //
   2 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the SPUTargetLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "SPURegisterNames.h"
  15 #include "SPUISelLowering.h"
  16 #include "SPUTargetMachine.h"
  17 #include "SPUFrameInfo.h"
  18 #include "SPUMachineFunction.h"
  19 #include "llvm/Constants.h"
  20 #include "llvm/Function.h"
  21 #include "llvm/Intrinsics.h"
  22 #include "llvm/CallingConv.h"
  23 #include "llvm/CodeGen/CallingConvLower.h"
  24 #include "llvm/CodeGen/MachineFrameInfo.h"
  25 #include "llvm/CodeGen/MachineFunction.h"
  26 #include "llvm/CodeGen/MachineInstrBuilder.h"
  27 #include "llvm/CodeGen/MachineRegisterInfo.h"
  28 #include "llvm/CodeGen/SelectionDAG.h"
  29 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
  30 #include "llvm/Target/TargetOptions.h"
  31 #include "llvm/ADT/VectorExtras.h"
  32 #include "llvm/Support/Debug.h"
  33 #include "llvm/Support/ErrorHandling.h"
  34 #include "llvm/Support/MathExtras.h"
  35 #include "llvm/Support/raw_ostream.h"
  36 #include <map>
  37
  38 using namespace llvm;
  39
  40 // Used in getTargetNodeName() below
  41 namespace {
  42   std::map<unsigned, const char *> node_names;
  43
  44   //! EVT mapping to useful data for Cell SPU
  45   struct valtype_map_s {
  46     EVT   valtype;
  47     int   prefslot_byte;
  48   };
  49
  50   const valtype_map_s valtype_map[] = {
  51     { MVT::i1,   3 },
  52     { MVT::i8,   3 },
  53     { MVT::i16,  2 },
  54     { MVT::i32,  0 },
  55     { MVT::f32,  0 },
  56     { MVT::i64,  0 },
  57     { MVT::f64,  0 },
  58     { MVT::i128, 0 }
  59   };
  60
  61   const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
  62
  63   const valtype_map_s *getValueTypeMapEntry(EVT VT) {
  64     const valtype_map_s *retval = 0;
  65
  66     for (size_t i = 0; i < n_valtype_map; ++i) {
  67       if (valtype_map[i].valtype == VT) {
  68         retval = valtype_map + i;
  69         break;
  70       }
  71     }
  72
  73 #ifndef NDEBUG
  74     if (retval == 0) {
  75       report_fatal_error("getValueTypeMapEntry returns NULL for " +
  76                          Twine(VT.getEVTString()));
  77     }
  78 #endif
  79
  80     return retval;
  81   }
  82
  83   //! Expand a library call into an actual call DAG node
  84   /*!
  85    \note
  86    This code is taken from SelectionDAGLegalize, since it is not exposed as
  87    part of the LLVM SelectionDAG API.
  88    */
  89
  90   SDValue
  91   ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
  92                 bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) {
  93     // The input chain to this libcall is the entry node of the function.
  94     // Legalizing the call will automatically add the previous call to the
  95     // dependence.
  96     SDValue InChain = DAG.getEntryNode();
  97
  98     TargetLowering::ArgListTy Args;
  99     TargetLowering::ArgListEntry Entry;
 100     for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
 101       EVT ArgVT = Op.getOperand(i).getValueType();
 102       const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
 103       Entry.Node = Op.getOperand(i);
 104       Entry.Ty = ArgTy;
 105       Entry.isSExt = isSigned;
 106       Entry.isZExt = !isSigned;
 107       Args.push_back(Entry);
 108     }
 109     SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
 110                                            TLI.getPointerTy());
 111
 112     // Splice the libcall in wherever FindInputOutputChains tells us to.
 113     const Type *RetTy =
 114                 Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
 115     std::pair<SDValue, SDValue> CallInfo =
 116             TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
 117                             0, TLI.getLibcallCallingConv(LC), false,
 118                             /*isReturnValueUsed=*/true,
 119                             Callee, Args, DAG, Op.getDebugLoc());
 120
 121     return CallInfo.first;
 122   }
 123 }
 124
 125 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
 126   : TargetLowering(TM, new TargetLoweringObjectFileELF()),
 127     SPUTM(TM) {
 128   // Fold away setcc operations if possible.
 129   setPow2DivIsCheap();
 130
 131   // Use _setjmp/_longjmp instead of setjmp/longjmp.
 132   setUseUnderscoreSetJmp(true);
 133   setUseUnderscoreLongJmp(true);
 134
 135   // Set RTLIB libcall names as used by SPU:
 136   setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
 137
 138   // Set up the SPU's register classes:
 139   addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
 140   addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
 141   addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
 142   addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
 143   addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
 144   addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
 145   addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
 146
 147   // SPU has no sign or zero extended loads for i1, i8, i16:
 148   setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
 149   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
 150   setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
 151
 152   setLoadExtAction(ISD::EXTLOAD,  MVT::f32, Expand);
 153   setLoadExtAction(ISD::EXTLOAD,  MVT::f64, Expand);
 154
 155   setTruncStoreAction(MVT::i128, MVT::i64, Expand);
 156   setTruncStoreAction(MVT::i128, MVT::i32, Expand);
 157   setTruncStoreAction(MVT::i128, MVT::i16, Expand);
 158   setTruncStoreAction(MVT::i128, MVT::i8, Expand);
 159
 160   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
 161
 162   // SPU constant load actions are custom lowered:
 163   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
 164   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
 165
 166   // SPU's loads and stores have to be custom lowered:
 167   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
 168        ++sctype) {
 169     MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
 170
 171     setOperationAction(ISD::LOAD,   VT, Custom);
 172     setOperationAction(ISD::STORE,  VT, Custom);
 173     setLoadExtAction(ISD::EXTLOAD,  VT, Custom);
 174     setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
 175     setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
 176
 177     for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
 178       MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
 179       setTruncStoreAction(VT, StoreVT, Expand);
 180     }
 181   }
 182
 183   for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
 184        ++sctype) {
 185     MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
 186
 187     setOperationAction(ISD::LOAD,   VT, Custom);
 188     setOperationAction(ISD::STORE,  VT, Custom);
 189
 190     for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
 191       MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
 192       setTruncStoreAction(VT, StoreVT, Expand);
 193     }
 194   }
 195
 196   // Expand the jumptable branches
 197   setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
 198   setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
 199
 200   // Custom lower SELECT_CC for most cases, but expand by default
 201   setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
 202   setOperationAction(ISD::SELECT_CC,    MVT::i8,    Custom);
 203   setOperationAction(ISD::SELECT_CC,    MVT::i16,   Custom);
 204   setOperationAction(ISD::SELECT_CC,    MVT::i32,   Custom);
 205   setOperationAction(ISD::SELECT_CC,    MVT::i64,   Custom);
 206
 207   // SPU has no intrinsics for these particular operations:
 208   setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
 209
 210   // SPU has no division/remainder instructions
 211   setOperationAction(ISD::SREM,    MVT::i8,   Expand);
 212   setOperationAction(ISD::UREM,    MVT::i8,   Expand);
 213   setOperationAction(ISD::SDIV,    MVT::i8,   Expand);
 214   setOperationAction(ISD::UDIV,    MVT::i8,   Expand);
 215   setOperationAction(ISD::SDIVREM, MVT::i8,   Expand);
 216   setOperationAction(ISD::UDIVREM, MVT::i8,   Expand);
 217   setOperationAction(ISD::SREM,    MVT::i16,  Expand);
 218   setOperationAction(ISD::UREM,    MVT::i16,  Expand);
 219   setOperationAction(ISD::SDIV,    MVT::i16,  Expand);
 220   setOperationAction(ISD::UDIV,    MVT::i16,  Expand);
 221   setOperationAction(ISD::SDIVREM, MVT::i16,  Expand);
 222   setOperationAction(ISD::UDIVREM, MVT::i16,  Expand);
 223   setOperationAction(ISD::SREM,    MVT::i32,  Expand);
 224   setOperationAction(ISD::UREM,    MVT::i32,  Expand);
 225   setOperationAction(ISD::SDIV,    MVT::i32,  Expand);
 226   setOperationAction(ISD::UDIV,    MVT::i32,  Expand);
 227   setOperationAction(ISD::SDIVREM, MVT::i32,  Expand);
 228   setOperationAction(ISD::UDIVREM, MVT::i32,  Expand);
 229   setOperationAction(ISD::SREM,    MVT::i64,  Expand);
 230   setOperationAction(ISD::UREM,    MVT::i64,  Expand);
 231   setOperationAction(ISD::SDIV,    MVT::i64,  Expand);
 232   setOperationAction(ISD::UDIV,    MVT::i64,  Expand);
 233   setOperationAction(ISD::SDIVREM, MVT::i64,  Expand);
 234   setOperationAction(ISD::UDIVREM, MVT::i64,  Expand);
 235   setOperationAction(ISD::SREM,    MVT::i128, Expand);
 236   setOperationAction(ISD::UREM,    MVT::i128, Expand);
 237   setOperationAction(ISD::SDIV,    MVT::i128, Expand);
 238   setOperationAction(ISD::UDIV,    MVT::i128, Expand);
 239   setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
 240   setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
 241
 242   // We don't support sin/cos/sqrt/fmod
 243   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 244   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 245   setOperationAction(ISD::FREM , MVT::f64, Expand);
 246   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 247   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 248   setOperationAction(ISD::FREM , MVT::f32, Expand);
 249
 250   // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
 251   // for f32!)
 252   setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 253   setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 254
 255   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 256   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 257
 258   // SPU can do rotate right and left, so legalize it... but customize for i8
 259   // because instructions don't exist.
 260
 261   // FIXME: Change from "expand" to appropriate type once ROTR is supported in
 262   //        .td files.
 263   setOperationAction(ISD::ROTR, MVT::i32,    Expand /*Legal*/);
 264   setOperationAction(ISD::ROTR, MVT::i16,    Expand /*Legal*/);
 265   setOperationAction(ISD::ROTR, MVT::i8,     Expand /*Custom*/);
 266
 267   setOperationAction(ISD::ROTL, MVT::i32,    Legal);
 268   setOperationAction(ISD::ROTL, MVT::i16,    Legal);
 269   setOperationAction(ISD::ROTL, MVT::i8,     Custom);
 270
 271   // SPU has no native version of shift left/right for i8
 272   setOperationAction(ISD::SHL,  MVT::i8,     Custom);
 273   setOperationAction(ISD::SRL,  MVT::i8,     Custom);
 274   setOperationAction(ISD::SRA,  MVT::i8,     Custom);
 275
 276   // Make these operations legal and handle them during instruction selection:
 277   setOperationAction(ISD::SHL,  MVT::i64,    Legal);
 278   setOperationAction(ISD::SRL,  MVT::i64,    Legal);
 279   setOperationAction(ISD::SRA,  MVT::i64,    Legal);
 280
 281   // Custom lower i8, i32 and i64 multiplications
 282   setOperationAction(ISD::MUL,  MVT::i8,     Custom);
 283   setOperationAction(ISD::MUL,  MVT::i32,    Legal);
 284   setOperationAction(ISD::MUL,  MVT::i64,    Legal);
 285
 286   // Expand double-width multiplication
 287   // FIXME: It would probably be reasonable to support some of these operations
 288   setOperationAction(ISD::UMUL_LOHI, MVT::i8,  Expand);
 289   setOperationAction(ISD::SMUL_LOHI, MVT::i8,  Expand);
 290   setOperationAction(ISD::MULHU,     MVT::i8,  Expand);
 291   setOperationAction(ISD::MULHS,     MVT::i8,  Expand);
 292   setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
 293   setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
 294   setOperationAction(ISD::MULHU,     MVT::i16, Expand);
 295   setOperationAction(ISD::MULHS,     MVT::i16, Expand);
 296   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
 297   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
 298   setOperationAction(ISD::MULHU,     MVT::i32, Expand);
 299   setOperationAction(ISD::MULHS,     MVT::i32, Expand);
 300   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
 301   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
 302   setOperationAction(ISD::MULHU,     MVT::i64, Expand);
 303   setOperationAction(ISD::MULHS,     MVT::i64, Expand);
 304
 305   // Need to custom handle (some) common i8, i64 math ops
 306   setOperationAction(ISD::ADD,  MVT::i8,     Custom);
 307   setOperationAction(ISD::ADD,  MVT::i64,    Legal);
 308   setOperationAction(ISD::SUB,  MVT::i8,     Custom);
 309   setOperationAction(ISD::SUB,  MVT::i64,    Legal);
 310
 311   // SPU does not have BSWAP. It does have i32 support CTLZ.
 312   // CTPOP has to be custom lowered.
 313   setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
 314   setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
 315
 316   setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
 317   setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
 318   setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
 319   setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
 320   setOperationAction(ISD::CTPOP, MVT::i128,  Expand);
 321
 322   setOperationAction(ISD::CTTZ , MVT::i8,    Expand);
 323   setOperationAction(ISD::CTTZ , MVT::i16,   Expand);
 324   setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
 325   setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
 326   setOperationAction(ISD::CTTZ , MVT::i128,  Expand);
 327
 328   setOperationAction(ISD::CTLZ , MVT::i8,    Promote);
 329   setOperationAction(ISD::CTLZ , MVT::i16,   Promote);
 330   setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
 331   setOperationAction(ISD::CTLZ , MVT::i64,   Expand);
 332   setOperationAction(ISD::CTLZ , MVT::i128,  Expand);
 333
 334   // SPU has a version of select that implements (a&~c)|(b&c), just like
 335   // select ought to work:
 336   setOperationAction(ISD::SELECT, MVT::i8,   Legal);
 337   setOperationAction(ISD::SELECT, MVT::i16,  Legal);
 338   setOperationAction(ISD::SELECT, MVT::i32,  Legal);
 339   setOperationAction(ISD::SELECT, MVT::i64,  Legal);
 340
 341   setOperationAction(ISD::SETCC, MVT::i8,    Legal);
 342   setOperationAction(ISD::SETCC, MVT::i16,   Legal);
 343   setOperationAction(ISD::SETCC, MVT::i32,   Legal);
 344   setOperationAction(ISD::SETCC, MVT::i64,   Legal);
 345   setOperationAction(ISD::SETCC, MVT::f64,   Custom);
 346
 347   // Custom lower i128 -> i64 truncates
 348   setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
 349
 350   // Custom lower i32/i64 -> i128 sign extend
 351   setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
 352
 353   setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
 354   setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
 355   setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
 356   setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
 357   // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
 358   // to expand to a libcall, hence the custom lowering:
 359   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
 360   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 361   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
 362   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
 363   setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
 364   setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
 365
 366   // FDIV on SPU requires custom lowering
 367   setOperationAction(ISD::FDIV, MVT::f64, Expand);      // to libcall
 368
 369   // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
 370   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
 371   setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
 372   setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
 373   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
 374   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
 375   setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
 376   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 377   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 378
 379   setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
 380   setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
 381   setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
 382   setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
 383
 384   // We cannot sextinreg(i1).  Expand to shifts.
 385   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 386
 387   // We want to legalize GlobalAddress and ConstantPool nodes into the
 388   // appropriate instructions to materialize the address.
 389   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
 390        ++sctype) {
 391     MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
 392
 393     setOperationAction(ISD::GlobalAddress,  VT, Custom);
 394     setOperationAction(ISD::ConstantPool,   VT, Custom);
 395     setOperationAction(ISD::JumpTable,      VT, Custom);
 396   }
 397
 398   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 399   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 400
 401   // Use the default implementation.
 402   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
 403   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 404   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 405   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 406   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
 407   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
 408   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
 409
 410   // Cell SPU has instructions for converting between i64 and fp.
 411   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 412   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 413
 414   // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
 415   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
 416
 417   // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 418   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 419
 420   // First set operation action for all vector types to expand. Then we
 421   // will selectively turn on ones that can be effectively codegen'd.
 422   addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
 423   addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
 424   addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
 425   addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
 426   addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
 427   addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
 428
 429   for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
 430        i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
 431     MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
 432
 433     // add/sub are legal for all supported vector VT's.
 434     setOperationAction(ISD::ADD,     VT, Legal);
 435     setOperationAction(ISD::SUB,     VT, Legal);
 436     // mul has to be custom lowered.
 437     setOperationAction(ISD::MUL,     VT, Legal);
 438
 439     setOperationAction(ISD::AND,     VT, Legal);
 440     setOperationAction(ISD::OR,      VT, Legal);
 441     setOperationAction(ISD::XOR,     VT, Legal);
 442     setOperationAction(ISD::LOAD,    VT, Legal);
 443     setOperationAction(ISD::SELECT,  VT, Legal);
 444     setOperationAction(ISD::STORE,   VT, Legal);
 445
 446     // These operations need to be expanded:
 447     setOperationAction(ISD::SDIV,    VT, Expand);
 448     setOperationAction(ISD::SREM,    VT, Expand);
 449     setOperationAction(ISD::UDIV,    VT, Expand);
 450     setOperationAction(ISD::UREM,    VT, Expand);
 451
 452     // Custom lower build_vector, constant pool spills, insert and
 453     // extract vector elements:
 454     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
 455     setOperationAction(ISD::ConstantPool, VT, Custom);
 456     setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
 457     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
 458     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
 459     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
 460   }
 461
 462   setOperationAction(ISD::AND, MVT::v16i8, Custom);
 463   setOperationAction(ISD::OR,  MVT::v16i8, Custom);
 464   setOperationAction(ISD::XOR, MVT::v16i8, Custom);
 465   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 466
 467   setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
 468
 469   setShiftAmountType(MVT::i32);
 470   setBooleanContents(ZeroOrNegativeOneBooleanContent);
 471
 472   setStackPointerRegisterToSaveRestore(SPU::R1);
 473
 474   // We have target-specific dag combine patterns for the following nodes:
 475   setTargetDAGCombine(ISD::ADD);
 476   setTargetDAGCombine(ISD::ZERO_EXTEND);
 477   setTargetDAGCombine(ISD::SIGN_EXTEND);
 478   setTargetDAGCombine(ISD::ANY_EXTEND);
 479
 480   computeRegisterProperties();
 481
 482   // Set pre-RA register scheduler default to BURR, which produces slightly
 483   // better code than the default (could also be TDRR, but TargetLowering.h
 484   // needs a mod to support that model):
 485   setSchedulingPreference(Sched::RegPressure);
 486 }
 487
 488 const char *
 489 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
 490 {
 491   if (node_names.empty()) {
 492     node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
 493     node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
 494     node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
 495     node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
 496     node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
 497     node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
 498     node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
 499     node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
 500     node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
 501     node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
 502     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
 503     node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
 504     node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
 505     node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
 506     node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
 507     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
 508     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
 509     node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
 510     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
 511             "SPUISD::ROTBYTES_LEFT_BITS";
 512     node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
 513     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
 514     node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
 515     node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
 516     node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
 517   }
 518
 519   std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
 520
 521   return ((i != node_names.end()) ? i->second : 0);
 522 }
 523
 524 /// getFunctionAlignment - Return the Log2 alignment of this function.
 525 unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
 526   return 3;
 527 }
 528
 529 //===----------------------------------------------------------------------===//
 530 // Return the Cell SPU's SETCC result type
 531 //===----------------------------------------------------------------------===//
 532
 533 MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const {
 534   // i16 and i32 are valid SETCC result types
 535   return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ?
 536     VT.getSimpleVT().SimpleTy :
 537     MVT::i32);
 538 }
 539
 540 //===----------------------------------------------------------------------===//
 541 // Calling convention code:
 542 //===----------------------------------------------------------------------===//
 543
 544 #include "SPUGenCallingConv.inc"
 545
 546 //===----------------------------------------------------------------------===//
 547 //  LowerOperation implementation
 548 //===----------------------------------------------------------------------===//
 549
 550 /// Custom lower loads for CellSPU
 551 /*!
 552  All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
 553  within a 16-byte block, we have to rotate to extract the requested element.
 554
 555  For extending loads, we also want to ensure that the following sequence is
 556  emitted, e.g. for MVT::f32 extending load to MVT::f64:
 557
 558 \verbatim
 559 %1  v16i8,ch = load
 560 %2  v16i8,ch = rotate %1
 561 %3  v4f8, ch = bitconvert %2
 562 %4  f32      = vec2perfslot %3
 563 %5  f64      = fp_extend %4
 564 \endverbatim
 565 */
 566 static SDValue
 567 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 568   LoadSDNode *LN = cast<LoadSDNode>(Op);
 569   SDValue the_chain = LN->getChain();
 570   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 571   EVT InVT = LN->getMemoryVT();
 572   EVT OutVT = Op.getValueType();
 573   ISD::LoadExtType ExtType = LN->getExtensionType();
 574   unsigned alignment = LN->getAlignment();
 575   const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
 576   DebugLoc dl = Op.getDebugLoc();
 577
 578   switch (LN->getAddressingMode()) {
 579   case ISD::UNINDEXED: {
 580     SDValue result;
 581     SDValue basePtr = LN->getBasePtr();
 582     SDValue rotate;
 583
 584     if (alignment == 16) {
 585       ConstantSDNode *CN;
 586
 587       // Special cases for a known aligned load to simplify the base pointer
 588       // and the rotation amount:
 589       if (basePtr.getOpcode() == ISD::ADD
 590           && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
 591         // Known offset into basePtr
 592         int64_t offset = CN->getSExtValue();
 593         int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
 594
 595         if (rotamt < 0)
 596           rotamt += 16;
 597
 598         rotate = DAG.getConstant(rotamt, MVT::i16);
 599
 600         // Simplify the base pointer for this case:
 601         basePtr = basePtr.getOperand(0);
 602         if ((offset & ~0xf) > 0) {
 603           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 604                                 basePtr,
 605                                 DAG.getConstant((offset & ~0xf), PtrVT));
 606         }
 607       } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
 608                  || (basePtr.getOpcode() == SPUISD::IndirectAddr
 609                      && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
 610                      && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
 611         // Plain aligned a-form address: rotate into preferred slot
 612         // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
 613         int64_t rotamt = -vtm->prefslot_byte;
 614         if (rotamt < 0)
 615           rotamt += 16;
 616         rotate = DAG.getConstant(rotamt, MVT::i16);
 617       } else {
 618         // Offset the rotate amount by the basePtr and the preferred slot
 619         // byte offset
 620         int64_t rotamt = -vtm->prefslot_byte;
 621         if (rotamt < 0)
 622           rotamt += 16;
 623         rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
 624                              basePtr,
 625                              DAG.getConstant(rotamt, PtrVT));
 626       }
 627     } else {
 628       // Unaligned load: must be more pessimistic about addressing modes:
 629       if (basePtr.getOpcode() == ISD::ADD) {
 630         MachineFunction &MF = DAG.getMachineFunction();
 631         MachineRegisterInfo &RegInfo = MF.getRegInfo();
 632         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 633         SDValue Flag;
 634
 635         SDValue Op0 = basePtr.getOperand(0);
 636         SDValue Op1 = basePtr.getOperand(1);
 637
 638         if (isa<ConstantSDNode>(Op1)) {
 639           // Convert the (add <ptr>, <const>) to an indirect address contained
 640           // in a register. Note that this is done because we need to avoid
 641           // creating a 0(reg) d-form address due to the SPU's block loads.
 642           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 643           the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
 644           basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
 645         } else {
 646           // Convert the (add <arg1>, <arg2>) to an indirect address, which
 647           // will likely be lowered as a reg(reg) x-form address.
 648           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 649         }
 650       } else {
 651         basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 652                               basePtr,
 653                               DAG.getConstant(0, PtrVT));
 654       }
 655
 656       // Offset the rotate amount by the basePtr and the preferred slot
 657       // byte offset
 658       rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
 659                            basePtr,
 660                            DAG.getConstant(-vtm->prefslot_byte, PtrVT));
 661     }
 662
 663     // Re-emit as a v16i8 vector load
 664     result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
 665                          LN->getSrcValue(), LN->getSrcValueOffset(),
 666                          LN->isVolatile(), LN->isNonTemporal(), 16);
 667
 668     // Update the chain
 669     the_chain = result.getValue(1);
 670
 671     // Rotate into the preferred slot:
 672     result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8,
 673                          result.getValue(0), rotate);
 674
 675     // Convert the loaded v16i8 vector to the appropriate vector type
 676     // specified by the operand:
 677     EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
 678                                  InVT, (128 / InVT.getSizeInBits()));
 679     result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
 680                          DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
 681
 682     // Handle extending loads by extending the scalar result:
 683     if (ExtType == ISD::SEXTLOAD) {
 684       result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
 685     } else if (ExtType == ISD::ZEXTLOAD) {
 686       result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
 687     } else if (ExtType == ISD::EXTLOAD) {
 688       unsigned NewOpc = ISD::ANY_EXTEND;
 689
 690       if (OutVT.isFloatingPoint())
 691         NewOpc = ISD::FP_EXTEND;
 692
 693       result = DAG.getNode(NewOpc, dl, OutVT, result);
 694     }
 695
 696     SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
 697     SDValue retops[2] = {
 698       result,
 699       the_chain
 700     };
 701
 702     result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
 703                          retops, sizeof(retops) / sizeof(retops[0]));
 704     return result;
 705   }
 706   case ISD::PRE_INC:
 707   case ISD::PRE_DEC:
 708   case ISD::POST_INC:
 709   case ISD::POST_DEC:
 710   case ISD::LAST_INDEXED_MODE:
 711     {
 712       report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other "
 713                          "than UNINDEXED\n" +
 714                          Twine((unsigned)LN->getAddressingMode()));
 715       /*NOTREACHED*/
 716     }
 717   }
 718
 719   return SDValue();
 720 }
 721
 722 /// Custom lower stores for CellSPU
 723 /*!
 724  All CellSPU stores are aligned to 16-byte boundaries, so for elements
 725  within a 16-byte block, we have to generate a shuffle to insert the
 726  requested element into its place, then store the resulting block.
 727  */
 728 static SDValue
 729 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 730   StoreSDNode *SN = cast<StoreSDNode>(Op);
 731   SDValue Value = SN->getValue();
 732   EVT VT = Value.getValueType();
 733   EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
 734   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 735   DebugLoc dl = Op.getDebugLoc();
 736   unsigned alignment = SN->getAlignment();
 737
 738   switch (SN->getAddressingMode()) {
 739   case ISD::UNINDEXED: {
 740     // The vector type we really want to load from the 16-byte chunk.
 741     EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
 742                                  VT, (128 / VT.getSizeInBits()));
 743
 744     SDValue alignLoadVec;
 745     SDValue basePtr = SN->getBasePtr();
 746     SDValue the_chain = SN->getChain();
 747     SDValue insertEltOffs;
 748
 749     if (alignment == 16) {
 750       ConstantSDNode *CN;
 751       // Special cases for a known aligned load to simplify the base pointer
 752       // and insertion byte:
 753       if (basePtr.getOpcode() == ISD::ADD
 754           && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
 755         // Known offset into basePtr
 756         int64_t offset = CN->getSExtValue();
 757
 758         // Simplify the base pointer for this case:
 759         basePtr = basePtr.getOperand(0);
 760         insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 761                                     basePtr,
 762                                     DAG.getConstant((offset & 0xf), PtrVT));
 763
 764         if ((offset & ~0xf) > 0) {
 765           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 766                                 basePtr,
 767                                 DAG.getConstant((offset & ~0xf), PtrVT));
 768         }
 769       } else {
 770         // Otherwise, assume it's at byte 0 of basePtr
 771         insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 772                                     basePtr,
 773                                     DAG.getConstant(0, PtrVT));
 774         basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 775                                     basePtr,
 776                                     DAG.getConstant(0, PtrVT));
 777       }
 778     } else {
 779       // Unaligned load: must be more pessimistic about addressing modes:
 780       if (basePtr.getOpcode() == ISD::ADD) {
 781         MachineFunction &MF = DAG.getMachineFunction();
 782         MachineRegisterInfo &RegInfo = MF.getRegInfo();
 783         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 784         SDValue Flag;
 785
 786         SDValue Op0 = basePtr.getOperand(0);
 787         SDValue Op1 = basePtr.getOperand(1);
 788
 789         if (isa<ConstantSDNode>(Op1)) {
 790           // Convert the (add <ptr>, <const>) to an indirect address contained
 791           // in a register. Note that this is done because we need to avoid
 792           // creating a 0(reg) d-form address due to the SPU's block loads.
 793           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 794           the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
 795           basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
 796         } else {
 797           // Convert the (add <arg1>, <arg2>) to an indirect address, which
 798           // will likely be lowered as a reg(reg) x-form address.
 799           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 800         }
 801       } else {
 802         basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 803                               basePtr,
 804                               DAG.getConstant(0, PtrVT));
 805       }
 806
 807       // Insertion point is solely determined by basePtr's contents
 808       insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
 809                                   basePtr,
 810                                   DAG.getConstant(0, PtrVT));
 811     }
 812
 813     // Load the memory to which to store.
 814     alignLoadVec = DAG.getLoad(vecVT, dl, the_chain, basePtr,
 815                                SN->getSrcValue(), SN->getSrcValueOffset(),
 816                                SN->isVolatile(), SN->isNonTemporal(), 16);
 817
 818     // Update the chain
 819     the_chain = alignLoadVec.getValue(1);
 820
 821     LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
 822     SDValue theValue = SN->getValue();
 823     SDValue result;
 824
 825     if (StVT != VT
 826         && (theValue.getOpcode() == ISD::AssertZext
 827             || theValue.getOpcode() == ISD::AssertSext)) {
 828       // Drill down and get the value for zero- and sign-extended
 829       // quantities
 830       theValue = theValue.getOperand(0);
 831     }
 832
 833     // If the base pointer is already a D-form address, then just create
 834     // a new D-form address with a slot offset and the orignal base pointer.
 835     // Otherwise generate a D-form address with the slot offset relative
 836     // to the stack pointer, which is always aligned.
 837 #if !defined(NDEBUG)
 838       if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
 839         errs() << "CellSPU LowerSTORE: basePtr = ";
 840         basePtr.getNode()->dump(&DAG);
 841         errs() << "\n";
 842       }
 843 #endif
 844
 845     SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT,
 846                                       insertEltOffs);
 847     SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT,
 848                                       theValue);
 849
 850     result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
 851                          vectorizeOp, alignLoadVec,
 852                          DAG.getNode(ISD::BIT_CONVERT, dl,
 853                                      MVT::v4i32, insertEltOp));
 854
 855     result = DAG.getStore(the_chain, dl, result, basePtr,
 856                           LN->getSrcValue(), LN->getSrcValueOffset(),
 857                           LN->isVolatile(), LN->isNonTemporal(),
 858                           LN->getAlignment());
 859
 860 #if 0 && !defined(NDEBUG)
 861     if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
 862       const SDValue &currentRoot = DAG.getRoot();
 863
 864       DAG.setRoot(result);
 865       errs() << "------- CellSPU:LowerStore result:\n";
 866       DAG.dump();
 867       errs() << "-------\n";
 868       DAG.setRoot(currentRoot);
 869     }
 870 #endif
 871
 872     return result;
 873     /*UNREACHED*/
 874   }
 875   case ISD::PRE_INC:
 876   case ISD::PRE_DEC:
 877   case ISD::POST_INC:
 878   case ISD::POST_DEC:
 879   case ISD::LAST_INDEXED_MODE:
 880     {
 881       report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other "
 882                          "than UNINDEXED\n" +
 883                          Twine((unsigned)SN->getAddressingMode()));
 884       /*NOTREACHED*/
 885     }
 886   }
 887
 888   return SDValue();
 889 }
 890
 891 //! Generate the address of a constant pool entry.
 892 static SDValue
 893 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 894   EVT PtrVT = Op.getValueType();
 895   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 896   const Constant *C = CP->getConstVal();
 897   SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
 898   SDValue Zero = DAG.getConstant(0, PtrVT);
 899   const TargetMachine &TM = DAG.getTarget();
 900   // FIXME there is no actual debug info here
 901   DebugLoc dl = Op.getDebugLoc();
 902
 903   if (TM.getRelocationModel() == Reloc::Static) {
 904     if (!ST->usingLargeMem()) {
 905       // Just return the SDValue with the constant pool address in it.
 906       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
 907     } else {
 908       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
 909       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
 910       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 911     }
 912   }
 913
 914   llvm_unreachable("LowerConstantPool: Relocation model other than static"
 915                    " not supported.");
 916   return SDValue();
 917 }
 918
 919 //! Alternate entry point for generating the address of a constant pool entry
 920 SDValue
 921 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
 922   return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
 923 }
 924
 925 static SDValue
 926 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 927   EVT PtrVT = Op.getValueType();
 928   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 929   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
 930   SDValue Zero = DAG.getConstant(0, PtrVT);
 931   const TargetMachine &TM = DAG.getTarget();
 932   // FIXME there is no actual debug info here
 933   DebugLoc dl = Op.getDebugLoc();
 934
 935   if (TM.getRelocationModel() == Reloc::Static) {
 936     if (!ST->usingLargeMem()) {
 937       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
 938     } else {
 939       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
 940       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
 941       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 942     }
 943   }
 944
 945   llvm_unreachable("LowerJumpTable: Relocation model other than static"
 946                    " not supported.");
 947   return SDValue();
 948 }
 949
 950 static SDValue
 951 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 952   EVT PtrVT = Op.getValueType();
 953   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
 954   const GlobalValue *GV = GSDN->getGlobal();
 955   SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
 956                                           PtrVT, GSDN->getOffset());
 957   const TargetMachine &TM = DAG.getTarget();
 958   SDValue Zero = DAG.getConstant(0, PtrVT);
 959   // FIXME there is no actual debug info here
 960   DebugLoc dl = Op.getDebugLoc();
 961
 962   if (TM.getRelocationModel() == Reloc::Static) {
 963     if (!ST->usingLargeMem()) {
 964       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
 965     } else {
 966       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
 967       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
 968       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 969     }
 970   } else {
 971     report_fatal_error("LowerGlobalAddress: Relocation model other than static"
 972                       "not supported.");
 973     /*NOTREACHED*/
 974   }
 975
 976   return SDValue();
 977 }
 978
 979 //! Custom lower double precision floating point constants
 980 static SDValue
 981 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
 982   EVT VT = Op.getValueType();
 983   // FIXME there is no actual debug info here
 984   DebugLoc dl = Op.getDebugLoc();
 985
 986   if (VT == MVT::f64) {
 987     ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
 988
 989     assert((FP != 0) &&
 990            "LowerConstantFP: Node is not ConstantFPSDNode");
 991
 992     uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
 993     SDValue T = DAG.getConstant(dbits, MVT::i64);
 994     SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
 995     return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
 996                        DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec));
 997   }
 998
 999   return SDValue();
1000 }
1001
1002 SDValue
1003 SPUTargetLowering::LowerFormalArguments(SDValue Chain,
1004                                         CallingConv::ID CallConv, bool isVarArg,
1005                                         const SmallVectorImpl<ISD::InputArg>
1006                                           &Ins,
1007                                         DebugLoc dl, SelectionDAG &DAG,
1008                                         SmallVectorImpl<SDValue> &InVals)
1009                                           const {
1010
1011   MachineFunction &MF = DAG.getMachineFunction();
1012   MachineFrameInfo *MFI = MF.getFrameInfo();
1013   MachineRegisterInfo &RegInfo = MF.getRegInfo();
1014   SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>();
1015
1016   unsigned ArgOffset = SPUFrameInfo::minStackSize();
1017   unsigned ArgRegIdx = 0;
1018   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1019
1020   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1021
1022   SmallVector<CCValAssign, 16> ArgLocs;
1023   CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
1024                  *DAG.getContext());
1025   // FIXME: allow for other calling conventions
1026   CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU);
1027
1028   // Add DAG nodes to load the arguments or copy them out of registers.
1029   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
1030     EVT ObjectVT = Ins[ArgNo].VT;
1031     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
1032     SDValue ArgVal;
1033     CCValAssign &VA = ArgLocs[ArgNo];
1034
1035     if (VA.isRegLoc()) {
1036       const TargetRegisterClass *ArgRegClass;
1037
1038       switch (ObjectVT.getSimpleVT().SimpleTy) {
1039       default:
1040         report_fatal_error("LowerFormalArguments Unhandled argument type: " +
1041                            Twine(ObjectVT.getEVTString()));
1042       case MVT::i8:
1043         ArgRegClass = &SPU::R8CRegClass;
1044         break;
1045       case MVT::i16:
1046         ArgRegClass = &SPU::R16CRegClass;
1047         break;
1048       case MVT::i32:
1049         ArgRegClass = &SPU::R32CRegClass;
1050         break;
1051       case MVT::i64:
1052         ArgRegClass = &SPU::R64CRegClass;
1053         break;
1054       case MVT::i128:
1055         ArgRegClass = &SPU::GPRCRegClass;
1056         break;
1057       case MVT::f32:
1058         ArgRegClass = &SPU::R32FPRegClass;
1059         break;
1060       case MVT::f64:
1061         ArgRegClass = &SPU::R64FPRegClass;
1062         break;
1063       case MVT::v2f64:
1064       case MVT::v4f32:
1065       case MVT::v2i64:
1066       case MVT::v4i32:
1067       case MVT::v8i16:
1068       case MVT::v16i8:
1069         ArgRegClass = &SPU::VECREGRegClass;
1070         break;
1071       }
1072
1073       unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1074       RegInfo.addLiveIn(VA.getLocReg(), VReg);
1075       ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
1076       ++ArgRegIdx;
1077     } else {
1078       // We need to load the argument to a virtual register if we determined
1079       // above that we ran out of physical registers of the appropriate type
1080       // or we're forced to do vararg
1081       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true);
1082       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1083       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, false, false, 0);
1084       ArgOffset += StackSlotSize;
1085     }
1086
1087     InVals.push_back(ArgVal);
1088     // Update the chain
1089     Chain = ArgVal.getOperand(0);
1090   }
1091
1092   // vararg handling:
1093   if (isVarArg) {
1094     // FIXME: we should be able to query the argument registers from
1095     //        tablegen generated code.
1096     static const unsigned ArgRegs[] = {
1097       SPU::R3,  SPU::R4,  SPU::R5,  SPU::R6,  SPU::R7,  SPU::R8,  SPU::R9,
1098       SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
1099       SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23,
1100       SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30,
1101       SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37,
1102       SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44,
1103       SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51,
1104       SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58,
1105       SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65,
1106       SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72,
1107       SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79
1108     };
1109     // size of ArgRegs array
1110     unsigned NumArgRegs = 77;
1111
1112     // We will spill (79-3)+1 registers to the stack
1113     SmallVector<SDValue, 79-3+1> MemOps;
1114
1115     // Create the frame slot
1116     for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1117       FuncInfo->setVarArgsFrameIndex(
1118         MFI->CreateFixedObject(StackSlotSize, ArgOffset, true));
1119       SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
1120       unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass);
1121       SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
1122       SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0,
1123                                    false, false, 0);
1124       Chain = Store.getOperand(0);
1125       MemOps.push_back(Store);
1126
1127       // Increment address by stack slot size for the next stored argument
1128       ArgOffset += StackSlotSize;
1129     }
1130     if (!MemOps.empty())
1131       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1132                           &MemOps[0], MemOps.size());
1133   }
1134
1135   return Chain;
1136 }
1137
1138 /// isLSAAddress - Return the immediate to use if the specified
1139 /// value is representable as a LSA address.
1140 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1141   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1142   if (!C) return 0;
1143
1144   int Addr = C->getZExtValue();
1145   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1146       (Addr << 14 >> 14) != Addr)
1147     return 0;  // Top 14 bits have to be sext of immediate.
1148
1149   return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1150 }
1151
1152 SDValue
1153 SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1154                              CallingConv::ID CallConv, bool isVarArg,
1155                              bool &isTailCall,
1156                              const SmallVectorImpl<ISD::OutputArg> &Outs,
1157                              const SmallVectorImpl<SDValue> &OutVals,
1158                              const SmallVectorImpl<ISD::InputArg> &Ins,
1159                              DebugLoc dl, SelectionDAG &DAG,
1160                              SmallVectorImpl<SDValue> &InVals) const {
1161   // CellSPU target does not yet support tail call optimization.
1162   isTailCall = false;
1163
1164   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
1165   unsigned NumOps     = Outs.size();
1166   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1167
1168   SmallVector<CCValAssign, 16> ArgLocs;
1169   CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
1170                  *DAG.getContext());
1171   // FIXME: allow for other calling conventions
1172   CCInfo.AnalyzeCallOperands(Outs, CCC_SPU);
1173
1174   const unsigned NumArgRegs = ArgLocs.size();
1175
1176
1177   // Handy pointer type
1178   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1179
1180   // Set up a copy of the stack pointer for use loading and storing any
1181   // arguments that may not fit in the registers available for argument
1182   // passing.
1183   SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1184
1185   // Figure out which arguments are going to go in registers, and which in
1186   // memory.
1187   unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1188   unsigned ArgRegIdx = 0;
1189
1190   // Keep track of registers passing arguments
1191   std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1192   // And the arguments passed on the stack
1193   SmallVector<SDValue, 8> MemOpChains;
1194
1195   for (; ArgRegIdx != NumOps; ++ArgRegIdx) {
1196     SDValue Arg = OutVals[ArgRegIdx];
1197     CCValAssign &VA = ArgLocs[ArgRegIdx];
1198
1199     // PtrOff will be used to store the current argument to the stack if a
1200     // register cannot be found for it.
1201     SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1202     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1203
1204     switch (Arg.getValueType().getSimpleVT().SimpleTy) {
1205     default: llvm_unreachable("Unexpected ValueType for argument!");
1206     case MVT::i8:
1207     case MVT::i16:
1208     case MVT::i32:
1209     case MVT::i64:
1210     case MVT::i128:
1211     case MVT::f32:
1212     case MVT::f64:
1213     case MVT::v2i64:
1214     case MVT::v2f64:
1215     case MVT::v4f32:
1216     case MVT::v4i32:
1217     case MVT::v8i16:
1218     case MVT::v16i8:
1219       if (ArgRegIdx != NumArgRegs) {
1220         RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1221       } else {
1222         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
1223                                            false, false, 0));
1224         ArgOffset += StackSlotSize;
1225       }
1226       break;
1227     }
1228   }
1229
1230   // Accumulate how many bytes are to be pushed on the stack, including the
1231   // linkage area, and parameter passing area.  According to the SPU ABI,
1232   // we minimally need space for [LR] and [SP].
1233   unsigned NumStackBytes = ArgOffset - SPUFrameInfo::minStackSize();
1234
1235   // Insert a call sequence start
1236   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1237                                                             true));
1238
1239   if (!MemOpChains.empty()) {
1240     // Adjust the stack pointer for the stack arguments.
1241     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1242                         &MemOpChains[0], MemOpChains.size());
1243   }
1244
1245   // Build a sequence of copy-to-reg nodes chained together with token chain
1246   // and flag operands which copy the outgoing args into the appropriate regs.
1247   SDValue InFlag;
1248   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1249     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1250                              RegsToPass[i].second, InFlag);
1251     InFlag = Chain.getValue(1);
1252   }
1253
1254   SmallVector<SDValue, 8> Ops;
1255   unsigned CallOpc = SPUISD::CALL;
1256
1257   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1258   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1259   // node so that legalize doesn't hack it.
1260   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1261     const GlobalValue *GV = G->getGlobal();
1262     EVT CalleeVT = Callee.getValueType();
1263     SDValue Zero = DAG.getConstant(0, PtrVT);
1264     SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT);
1265
1266     if (!ST->usingLargeMem()) {
1267       // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1268       // style calls, otherwise, external symbols are BRASL calls. This assumes
1269       // that declared/defined symbols are in the same compilation unit and can
1270       // be reached through PC-relative jumps.
1271       //
1272       // NOTE:
1273       // This may be an unsafe assumption for JIT and really large compilation
1274       // units.
1275       if (GV->isDeclaration()) {
1276         Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
1277       } else {
1278         Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
1279       }
1280     } else {
1281       // "Large memory" mode: Turn all calls into indirect calls with a X-form
1282       // address pairs:
1283       Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
1284     }
1285   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1286     EVT CalleeVT = Callee.getValueType();
1287     SDValue Zero = DAG.getConstant(0, PtrVT);
1288     SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1289         Callee.getValueType());
1290
1291     if (!ST->usingLargeMem()) {
1292       Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
1293     } else {
1294       Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
1295     }
1296   } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1297     // If this is an absolute destination address that appears to be a legal
1298     // local store address, use the munged value.
1299     Callee = SDValue(Dest, 0);
1300   }
1301
1302   Ops.push_back(Chain);
1303   Ops.push_back(Callee);
1304
1305   // Add argument registers to the end of the list so that they are known live
1306   // into the call.
1307   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1308     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1309                                   RegsToPass[i].second.getValueType()));
1310
1311   if (InFlag.getNode())
1312     Ops.push_back(InFlag);
1313   // Returns a chain and a flag for retval copy to use.
1314   Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
1315                       &Ops[0], Ops.size());
1316   InFlag = Chain.getValue(1);
1317
1318   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1319                              DAG.getIntPtrConstant(0, true), InFlag);
1320   if (!Ins.empty())
1321     InFlag = Chain.getValue(1);
1322
1323   // If the function returns void, just return the chain.
1324   if (Ins.empty())
1325     return Chain;
1326
1327   // Now handle the return value(s)
1328   SmallVector<CCValAssign, 16> RVLocs;
1329   CCState CCRetInfo(CallConv, isVarArg, getTargetMachine(),
1330                     RVLocs, *DAG.getContext());
1331   CCRetInfo.AnalyzeCallResult(Ins, CCC_SPU);
1332
1333
1334   // If the call has results, copy the values out of the ret val registers.
1335   for (unsigned i = 0; i != RVLocs.size(); ++i) {
1336     CCValAssign VA = RVLocs[i];
1337
1338     SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1339                                      InFlag);
1340     Chain = Val.getValue(1);
1341     InFlag = Val.getValue(2);
1342     InVals.push_back(Val);
1343    }
1344
1345   return Chain;
1346 }
1347
1348 SDValue
1349 SPUTargetLowering::LowerReturn(SDValue Chain,
1350                                CallingConv::ID CallConv, bool isVarArg,
1351                                const SmallVectorImpl<ISD::OutputArg> &Outs,
1352                                const SmallVectorImpl<SDValue> &OutVals,
1353                                DebugLoc dl, SelectionDAG &DAG) const {
1354
1355   SmallVector<CCValAssign, 16> RVLocs;
1356   CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
1357                  RVLocs, *DAG.getContext());
1358   CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
1359
1360   // If this is the first return lowered for this function, add the regs to the
1361   // liveout set for the function.
1362   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1363     for (unsigned i = 0; i != RVLocs.size(); ++i)
1364       DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1365   }
1366
1367   SDValue Flag;
1368
1369   // Copy the result values into the output registers.
1370   for (unsigned i = 0; i != RVLocs.size(); ++i) {
1371     CCValAssign &VA = RVLocs[i];
1372     assert(VA.isRegLoc() && "Can only return in registers!");
1373     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1374                              OutVals[i], Flag);
1375     Flag = Chain.getValue(1);
1376   }
1377
1378   if (Flag.getNode())
1379     return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1380   else
1381     return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
1382 }
1383
1384
1385 //===----------------------------------------------------------------------===//
1386 // Vector related lowering:
1387 //===----------------------------------------------------------------------===//
1388
1389 static ConstantSDNode *
1390 getVecImm(SDNode *N) {
1391   SDValue OpVal(0, 0);
1392
1393   // Check to see if this buildvec has a single non-undef value in its elements.
1394   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1395     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1396     if (OpVal.getNode() == 0)
1397       OpVal = N->getOperand(i);
1398     else if (OpVal != N->getOperand(i))
1399       return 0;
1400   }
1401
1402   if (OpVal.getNode() != 0) {
1403     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1404       return CN;
1405     }
1406   }
1407
1408   return 0;
1409 }
1410
1411 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1412 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1413 /// constant
1414 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1415                               EVT ValueType) {
1416   if (ConstantSDNode *CN = getVecImm(N)) {
1417     uint64_t Value = CN->getZExtValue();
1418     if (ValueType == MVT::i64) {
1419       uint64_t UValue = CN->getZExtValue();
1420       uint32_t upper = uint32_t(UValue >> 32);
1421       uint32_t lower = uint32_t(UValue);
1422       if (upper != lower)
1423         return SDValue();
1424       Value = Value >> 32;
1425     }
1426     if (Value <= 0x3ffff)
1427       return DAG.getTargetConstant(Value, ValueType);
1428   }
1429
1430   return SDValue();
1431 }
1432
1433 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1434 /// and the value fits into a signed 16-bit constant, and if so, return the
1435 /// constant
1436 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1437                               EVT ValueType) {
1438   if (ConstantSDNode *CN = getVecImm(N)) {
1439     int64_t Value = CN->getSExtValue();
1440     if (ValueType == MVT::i64) {
1441       uint64_t UValue = CN->getZExtValue();
1442       uint32_t upper = uint32_t(UValue >> 32);
1443       uint32_t lower = uint32_t(UValue);
1444       if (upper != lower)
1445         return SDValue();
1446       Value = Value >> 32;
1447     }
1448     if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1449       return DAG.getTargetConstant(Value, ValueType);
1450     }
1451   }
1452
1453   return SDValue();
1454 }
1455
1456 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1457 /// and the value fits into a signed 10-bit constant, and if so, return the
1458 /// constant
1459 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1460                               EVT ValueType) {
1461   if (ConstantSDNode *CN = getVecImm(N)) {
1462     int64_t Value = CN->getSExtValue();
1463     if (ValueType == MVT::i64) {
1464       uint64_t UValue = CN->getZExtValue();
1465       uint32_t upper = uint32_t(UValue >> 32);
1466       uint32_t lower = uint32_t(UValue);
1467       if (upper != lower)
1468         return SDValue();
1469       Value = Value >> 32;
1470     }
1471     if (isInt<10>(Value))
1472       return DAG.getTargetConstant(Value, ValueType);
1473   }
1474
1475   return SDValue();
1476 }
1477
1478 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1479 /// and the value fits into a signed 8-bit constant, and if so, return the
1480 /// constant.
1481 ///
1482 /// @note: The incoming vector is v16i8 because that's the only way we can load
1483 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1484 /// same value.
1485 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1486                              EVT ValueType) {
1487   if (ConstantSDNode *CN = getVecImm(N)) {
1488     int Value = (int) CN->getZExtValue();
1489     if (ValueType == MVT::i16
1490         && Value <= 0xffff                 /* truncated from uint64_t */
1491         && ((short) Value >> 8) == ((short) Value & 0xff))
1492       return DAG.getTargetConstant(Value & 0xff, ValueType);
1493     else if (ValueType == MVT::i8
1494              && (Value & 0xff) == Value)
1495       return DAG.getTargetConstant(Value, ValueType);
1496   }
1497
1498   return SDValue();
1499 }
1500
1501 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1502 /// and the value fits into a signed 16-bit constant, and if so, return the
1503 /// constant
1504 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1505                                EVT ValueType) {
1506   if (ConstantSDNode *CN = getVecImm(N)) {
1507     uint64_t Value = CN->getZExtValue();
1508     if ((ValueType == MVT::i32
1509           && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1510         || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1511       return DAG.getTargetConstant(Value >> 16, ValueType);
1512   }
1513
1514   return SDValue();
1515 }
1516
1517 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1518 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1519   if (ConstantSDNode *CN = getVecImm(N)) {
1520     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1521   }
1522
1523   return SDValue();
1524 }
1525
1526 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1527 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1528   if (ConstantSDNode *CN = getVecImm(N)) {
1529     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1530   }
1531
1532   return SDValue();
1533 }
1534
1535 //! Lower a BUILD_VECTOR instruction creatively:
1536 static SDValue
1537 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1538   EVT VT = Op.getValueType();
1539   EVT EltVT = VT.getVectorElementType();
1540   DebugLoc dl = Op.getDebugLoc();
1541   BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
1542   assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
1543   unsigned minSplatBits = EltVT.getSizeInBits();
1544
1545   if (minSplatBits < 16)
1546     minSplatBits = 16;
1547
1548   APInt APSplatBits, APSplatUndef;
1549   unsigned SplatBitSize;
1550   bool HasAnyUndefs;
1551
1552   if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
1553                             HasAnyUndefs, minSplatBits)
1554       || minSplatBits < SplatBitSize)
1555     return SDValue();   // Wasn't a constant vector or splat exceeded min
1556
1557   uint64_t SplatBits = APSplatBits.getZExtValue();
1558
1559   switch (VT.getSimpleVT().SimpleTy) {
1560   default:
1561     report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " +
1562                        Twine(VT.getEVTString()));
1563     /*NOTREACHED*/
1564   case MVT::v4f32: {
1565     uint32_t Value32 = uint32_t(SplatBits);
1566     assert(SplatBitSize == 32
1567            && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1568     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1569     SDValue T = DAG.getConstant(Value32, MVT::i32);
1570     return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
1571                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
1572     break;
1573   }
1574   case MVT::v2f64: {
1575     uint64_t f64val = uint64_t(SplatBits);
1576     assert(SplatBitSize == 64
1577            && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1578     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1579     SDValue T = DAG.getConstant(f64val, MVT::i64);
1580     return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64,
1581                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
1582     break;
1583   }
1584   case MVT::v16i8: {
1585    // 8-bit constants have to be expanded to 16-bits
1586    unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
1587    SmallVector<SDValue, 8> Ops;
1588
1589    Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
1590    return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
1591                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
1592   }
1593   case MVT::v8i16: {
1594     unsigned short Value16 = SplatBits;
1595     SDValue T = DAG.getConstant(Value16, EltVT);
1596     SmallVector<SDValue, 8> Ops;
1597
1598     Ops.assign(8, T);
1599     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
1600   }
1601   case MVT::v4i32: {
1602     SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1603     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
1604   }
1605   case MVT::v2i64: {
1606     return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
1607   }
1608   }
1609
1610   return SDValue();
1611 }
1612
1613 /*!
1614  */
1615 SDValue
1616 SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
1617                      DebugLoc dl) {
1618   uint32_t upper = uint32_t(SplatVal >> 32);
1619   uint32_t lower = uint32_t(SplatVal);
1620
1621   if (upper == lower) {
1622     // Magic constant that can be matched by IL, ILA, et. al.
1623     SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1624     return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1625                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1626                                    Val, Val, Val, Val));
1627   } else {
1628     bool upper_special, lower_special;
1629
1630     // NOTE: This code creates common-case shuffle masks that can be easily
1631     // detected as common expressions. It is not attempting to create highly
1632     // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1633
1634     // Detect if the upper or lower half is a special shuffle mask pattern:
1635     upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1636     lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1637
1638     // Both upper and lower are special, lower to a constant pool load:
1639     if (lower_special && upper_special) {
1640       SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
1641       return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
1642                          SplatValCN, SplatValCN);
1643     }
1644
1645     SDValue LO32;
1646     SDValue HI32;
1647     SmallVector<SDValue, 16> ShufBytes;
1648     SDValue Result;
1649
1650     // Create lower vector if not a special pattern
1651     if (!lower_special) {
1652       SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1653       LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1654                          DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1655                                      LO32C, LO32C, LO32C, LO32C));
1656     }
1657
1658     // Create upper vector if not a special pattern
1659     if (!upper_special) {
1660       SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1661       HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1662                          DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1663                                      HI32C, HI32C, HI32C, HI32C));
1664     }
1665
1666     // If either upper or lower are special, then the two input operands are
1667     // the same (basically, one of them is a "don't care")
1668     if (lower_special)
1669       LO32 = HI32;
1670     if (upper_special)
1671       HI32 = LO32;
1672
1673     for (int i = 0; i < 4; ++i) {
1674       uint64_t val = 0;
1675       for (int j = 0; j < 4; ++j) {
1676         SDValue V;
1677         bool process_upper, process_lower;
1678         val <<= 8;
1679         process_upper = (upper_special && (i & 1) == 0);
1680         process_lower = (lower_special && (i & 1) == 1);
1681
1682         if (process_upper || process_lower) {
1683           if ((process_upper && upper == 0)
1684                   || (process_lower && lower == 0))
1685             val |= 0x80;
1686           else if ((process_upper && upper == 0xffffffff)
1687                   || (process_lower && lower == 0xffffffff))
1688             val |= 0xc0;
1689           else if ((process_upper && upper == 0x80000000)
1690                   || (process_lower && lower == 0x80000000))
1691             val |= (j == 0 ? 0xe0 : 0x80);
1692         } else
1693           val |= i * 4 + j + ((i & 1) * 16);
1694       }
1695
1696       ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1697     }
1698
1699     return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
1700                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1701                                    &ShufBytes[0], ShufBytes.size()));
1702   }
1703 }
1704
1705 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1706 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1707 /// permutation vector, V3, is monotonically increasing with one "exception"
1708 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1709 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1710 /// In either case, the net result is going to eventually invoke SHUFB to
1711 /// permute/shuffle the bytes from V1 and V2.
1712 /// \note
1713 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1714 /// control word for byte/halfword/word insertion. This takes care of a single
1715 /// element move from V2 into V1.
1716 /// \note
1717 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1718 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1719   const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
1720   SDValue V1 = Op.getOperand(0);
1721   SDValue V2 = Op.getOperand(1);
1722   DebugLoc dl = Op.getDebugLoc();
1723
1724   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1725
1726   // If we have a single element being moved from V1 to V2, this can be handled
1727   // using the C*[DX] compute mask instructions, but the vector elements have
1728   // to be monotonically increasing with one exception element, and the source
1729   // slot of the element to move must be the same as the destination.
1730   EVT VecVT = V1.getValueType();
1731   EVT EltVT = VecVT.getVectorElementType();
1732   unsigned EltsFromV2 = 0;
1733   unsigned V2EltOffset = 0;
1734   unsigned V2EltIdx0 = 0;
1735   unsigned CurrElt = 0;
1736   unsigned MaxElts = VecVT.getVectorNumElements();
1737   unsigned PrevElt = 0;
1738   unsigned V0Elt = 0;
1739   bool monotonic = true;
1740   bool rotate = true;
1741   EVT maskVT;             // which of the c?d instructions to use
1742
1743   if (EltVT == MVT::i8) {
1744     V2EltIdx0 = 16;
1745     maskVT = MVT::v16i8;
1746   } else if (EltVT == MVT::i16) {
1747     V2EltIdx0 = 8;
1748     maskVT = MVT::v8i16;
1749   } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1750     V2EltIdx0 = 4;
1751     maskVT = MVT::v4i32;
1752   } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1753     V2EltIdx0 = 2;
1754     maskVT = MVT::v2i64;
1755   } else
1756     llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
1757
1758   for (unsigned i = 0; i != MaxElts; ++i) {
1759     if (SVN->getMaskElt(i) < 0)
1760       continue;
1761
1762     unsigned SrcElt = SVN->getMaskElt(i);
1763
1764     if (monotonic) {
1765       if (SrcElt >= V2EltIdx0) {
1766         // TODO: optimize for the monotonic case when several consecutive
1767         // elements are taken form V2. Do we ever get such a case?
1768         if (EltsFromV2 == 0 && CurrElt == (SrcElt - V2EltIdx0))
1769           V2EltOffset = (SrcElt - V2EltIdx0) * (EltVT.getSizeInBits()/8);
1770         else
1771           monotonic = false;
1772         ++EltsFromV2;
1773       } else if (CurrElt != SrcElt) {
1774         monotonic = false;
1775       }
1776
1777       ++CurrElt;
1778     }
1779
1780     if (rotate) {
1781       if (PrevElt > 0 && SrcElt < MaxElts) {
1782         if ((PrevElt == SrcElt - 1)
1783             || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1784           PrevElt = SrcElt;
1785           if (SrcElt == 0)
1786             V0Elt = i;
1787         } else {
1788           rotate = false;
1789         }
1790       } else if (i == 0) {
1791         // First time through, need to keep track of previous element
1792         PrevElt = SrcElt;
1793       } else {
1794         // This isn't a rotation, takes elements from vector 2
1795         rotate = false;
1796       }
1797     }
1798   }
1799
1800   if (EltsFromV2 == 1 && monotonic) {
1801     // Compute mask and shuffle
1802     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1803
1804     // As SHUFFLE_MASK becomes a c?d instruction, feed it an address
1805     // R1 ($sp) is used here only as it is guaranteed to have last bits zero
1806     SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
1807                                 DAG.getRegister(SPU::R1, PtrVT),
1808                                 DAG.getConstant(V2EltOffset, MVT::i32));
1809     SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl,
1810                                      maskVT, Pointer);
1811
1812     // Use shuffle mask in SHUFB synthetic instruction:
1813     return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
1814                        ShufMaskOp);
1815   } else if (rotate) {
1816     int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1817
1818     return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
1819                        V1, DAG.getConstant(rotamt, MVT::i16));
1820   } else {
1821    // Convert the SHUFFLE_VECTOR mask's input element units to the
1822    // actual bytes.
1823     unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1824
1825     SmallVector<SDValue, 16> ResultMask;
1826     for (unsigned i = 0, e = MaxElts; i != e; ++i) {
1827       unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
1828
1829       for (unsigned j = 0; j < BytesPerElement; ++j)
1830         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
1831     }
1832     SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
1833                                     &ResultMask[0], ResultMask.size());
1834     return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
1835   }
1836 }
1837
1838 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1839   SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar
1840   DebugLoc dl = Op.getDebugLoc();
1841
1842   if (Op0.getNode()->getOpcode() == ISD::Constant) {
1843     // For a constant, build the appropriate constant vector, which will
1844     // eventually simplify to a vector register load.
1845
1846     ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1847     SmallVector<SDValue, 16> ConstVecValues;
1848     EVT VT;
1849     size_t n_copies;
1850
1851     // Create a constant vector:
1852     switch (Op.getValueType().getSimpleVT().SimpleTy) {
1853     default: llvm_unreachable("Unexpected constant value type in "
1854                               "LowerSCALAR_TO_VECTOR");
1855     case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1856     case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1857     case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1858     case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1859     case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1860     case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1861     }
1862
1863     SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1864     for (size_t j = 0; j < n_copies; ++j)
1865       ConstVecValues.push_back(CValue);
1866
1867     return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
1868                        &ConstVecValues[0], ConstVecValues.size());
1869   } else {
1870     // Otherwise, copy the value from one register to another:
1871     switch (Op0.getValueType().getSimpleVT().SimpleTy) {
1872     default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
1873     case MVT::i8:
1874     case MVT::i16:
1875     case MVT::i32:
1876     case MVT::i64:
1877     case MVT::f32:
1878     case MVT::f64:
1879       return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
1880     }
1881   }
1882
1883   return SDValue();
1884 }
1885
1886 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1887   EVT VT = Op.getValueType();
1888   SDValue N = Op.getOperand(0);
1889   SDValue Elt = Op.getOperand(1);
1890   DebugLoc dl = Op.getDebugLoc();
1891   SDValue retval;
1892
1893   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1894     // Constant argument:
1895     int EltNo = (int) C->getZExtValue();
1896
1897     // sanity checks:
1898     if (VT == MVT::i8 && EltNo >= 16)
1899       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1900     else if (VT == MVT::i16 && EltNo >= 8)
1901       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1902     else if (VT == MVT::i32 && EltNo >= 4)
1903       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1904     else if (VT == MVT::i64 && EltNo >= 2)
1905       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1906
1907     if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1908       // i32 and i64: Element 0 is the preferred slot
1909       return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
1910     }
1911
1912     // Need to generate shuffle mask and extract:
1913     int prefslot_begin = -1, prefslot_end = -1;
1914     int elt_byte = EltNo * VT.getSizeInBits() / 8;
1915
1916     switch (VT.getSimpleVT().SimpleTy) {
1917     default:
1918       assert(false && "Invalid value type!");
1919     case MVT::i8: {
1920       prefslot_begin = prefslot_end = 3;
1921       break;
1922     }
1923     case MVT::i16: {
1924       prefslot_begin = 2; prefslot_end = 3;
1925       break;
1926     }
1927     case MVT::i32:
1928     case MVT::f32: {
1929       prefslot_begin = 0; prefslot_end = 3;
1930       break;
1931     }
1932     case MVT::i64:
1933     case MVT::f64: {
1934       prefslot_begin = 0; prefslot_end = 7;
1935       break;
1936     }
1937     }
1938
1939     assert(prefslot_begin != -1 && prefslot_end != -1 &&
1940            "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1941
1942     unsigned int ShufBytes[16] = {
1943       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1944     };
1945     for (int i = 0; i < 16; ++i) {
1946       // zero fill uppper part of preferred slot, don't care about the
1947       // other slots:
1948       unsigned int mask_val;
1949       if (i <= prefslot_end) {
1950         mask_val =
1951           ((i < prefslot_begin)
1952            ? 0x80
1953            : elt_byte + (i - prefslot_begin));
1954
1955         ShufBytes[i] = mask_val;
1956       } else
1957         ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1958     }
1959
1960     SDValue ShufMask[4];
1961     for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1962       unsigned bidx = i * 4;
1963       unsigned int bits = ((ShufBytes[bidx] << 24) |
1964                            (ShufBytes[bidx+1] << 16) |
1965                            (ShufBytes[bidx+2] << 8) |
1966                            ShufBytes[bidx+3]);
1967       ShufMask[i] = DAG.getConstant(bits, MVT::i32);
1968     }
1969
1970     SDValue ShufMaskVec =
1971       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1972                   &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
1973
1974     retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1975                          DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
1976                                      N, N, ShufMaskVec));
1977   } else {
1978     // Variable index: Rotate the requested element into slot 0, then replicate
1979     // slot 0 across the vector
1980     EVT VecVT = N.getValueType();
1981     if (!VecVT.isSimple() || !VecVT.isVector()) {
1982       report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
1983                         "vector type!");
1984     }
1985
1986     // Make life easier by making sure the index is zero-extended to i32
1987     if (Elt.getValueType() != MVT::i32)
1988       Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
1989
1990     // Scale the index to a bit/byte shift quantity
1991     APInt scaleFactor =
1992             APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
1993     unsigned scaleShift = scaleFactor.logBase2();
1994     SDValue vecShift;
1995
1996     if (scaleShift > 0) {
1997       // Scale the shift factor:
1998       Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
1999                         DAG.getConstant(scaleShift, MVT::i32));
2000     }
2001
2002     vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
2003
2004     // Replicate the bytes starting at byte 0 across the entire vector (for
2005     // consistency with the notion of a unified register set)
2006     SDValue replicate;
2007
2008     switch (VT.getSimpleVT().SimpleTy) {
2009     default:
2010       report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
2011                         "type");
2012       /*NOTREACHED*/
2013     case MVT::i8: {
2014       SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2015       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2016                               factor, factor, factor, factor);
2017       break;
2018     }
2019     case MVT::i16: {
2020       SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2021       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2022                               factor, factor, factor, factor);
2023       break;
2024     }
2025     case MVT::i32:
2026     case MVT::f32: {
2027       SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2028       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2029                               factor, factor, factor, factor);
2030       break;
2031     }
2032     case MVT::i64:
2033     case MVT::f64: {
2034       SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2035       SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2036       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2037                               loFactor, hiFactor, loFactor, hiFactor);
2038       break;
2039     }
2040     }
2041
2042     retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2043                          DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2044                                      vecShift, vecShift, replicate));
2045   }
2046
2047   return retval;
2048 }
2049
2050 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2051   SDValue VecOp = Op.getOperand(0);
2052   SDValue ValOp = Op.getOperand(1);
2053   SDValue IdxOp = Op.getOperand(2);
2054   DebugLoc dl = Op.getDebugLoc();
2055   EVT VT = Op.getValueType();
2056
2057   // use 0 when the lane to insert to is 'undef'
2058   int64_t Idx=0;
2059   if (IdxOp.getOpcode() != ISD::UNDEF) {
2060     ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2061     assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2062     Idx = (CN->getSExtValue());
2063   }
2064
2065   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2066   // Use $sp ($1) because it's always 16-byte aligned and it's available:
2067   SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
2068                                 DAG.getRegister(SPU::R1, PtrVT),
2069                                 DAG.getConstant(Idx, PtrVT));
2070   // widen the mask when dealing with half vectors
2071   EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(),
2072                                 128/ VT.getVectorElementType().getSizeInBits());
2073   SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer);
2074
2075   SDValue result =
2076     DAG.getNode(SPUISD::SHUFB, dl, VT,
2077                 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
2078                 VecOp,
2079                 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask));
2080
2081   return result;
2082 }
2083
2084 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2085                            const TargetLowering &TLI)
2086 {
2087   SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
2088   DebugLoc dl = Op.getDebugLoc();
2089   EVT ShiftVT = TLI.getShiftAmountTy();
2090
2091   assert(Op.getValueType() == MVT::i8);
2092   switch (Opc) {
2093   default:
2094     llvm_unreachable("Unhandled i8 math operator");
2095     /*NOTREACHED*/
2096     break;
2097   case ISD::ADD: {
2098     // 8-bit addition: Promote the arguments up to 16-bits and truncate
2099     // the result:
2100     SDValue N1 = Op.getOperand(1);
2101     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2102     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2103     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2104                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2105
2106   }
2107
2108   case ISD::SUB: {
2109     // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2110     // the result:
2111     SDValue N1 = Op.getOperand(1);
2112     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2113     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2114     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2115                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2116   }
2117   case ISD::ROTR:
2118   case ISD::ROTL: {
2119     SDValue N1 = Op.getOperand(1);
2120     EVT N1VT = N1.getValueType();
2121
2122     N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2123     if (!N1VT.bitsEq(ShiftVT)) {
2124       unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
2125                        ? ISD::ZERO_EXTEND
2126                        : ISD::TRUNCATE;
2127       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2128     }
2129
2130     // Replicate lower 8-bits into upper 8:
2131     SDValue ExpandArg =
2132       DAG.getNode(ISD::OR, dl, MVT::i16, N0,
2133                   DAG.getNode(ISD::SHL, dl, MVT::i16,
2134                               N0, DAG.getConstant(8, MVT::i32)));
2135
2136     // Truncate back down to i8
2137     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2138                        DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
2139   }
2140   case ISD::SRL:
2141   case ISD::SHL: {
2142     SDValue N1 = Op.getOperand(1);
2143     EVT N1VT = N1.getValueType();
2144
2145     N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2146     if (!N1VT.bitsEq(ShiftVT)) {
2147       unsigned N1Opc = ISD::ZERO_EXTEND;
2148
2149       if (N1.getValueType().bitsGT(ShiftVT))
2150         N1Opc = ISD::TRUNCATE;
2151
2152       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2153     }
2154
2155     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2156                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2157   }
2158   case ISD::SRA: {
2159     SDValue N1 = Op.getOperand(1);
2160     EVT N1VT = N1.getValueType();
2161
2162     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2163     if (!N1VT.bitsEq(ShiftVT)) {
2164       unsigned N1Opc = ISD::SIGN_EXTEND;
2165
2166       if (N1VT.bitsGT(ShiftVT))
2167         N1Opc = ISD::TRUNCATE;
2168       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2169     }
2170
2171     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2172                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2173   }
2174   case ISD::MUL: {
2175     SDValue N1 = Op.getOperand(1);
2176
2177     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2178     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2179     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2180                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2181     break;
2182   }
2183   }
2184
2185   return SDValue();
2186 }
2187
2188 //! Lower byte immediate operations for v16i8 vectors:
2189 static SDValue
2190 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2191   SDValue ConstVec;
2192   SDValue Arg;
2193   EVT VT = Op.getValueType();
2194   DebugLoc dl = Op.getDebugLoc();
2195
2196   ConstVec = Op.getOperand(0);
2197   Arg = Op.getOperand(1);
2198   if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2199     if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2200       ConstVec = ConstVec.getOperand(0);
2201     } else {
2202       ConstVec = Op.getOperand(1);
2203       Arg = Op.getOperand(0);
2204       if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2205         ConstVec = ConstVec.getOperand(0);
2206       }
2207     }
2208   }
2209
2210   if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2211     BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
2212     assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
2213
2214     APInt APSplatBits, APSplatUndef;
2215     unsigned SplatBitSize;
2216     bool HasAnyUndefs;
2217     unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
2218
2219     if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
2220                               HasAnyUndefs, minSplatBits)
2221         && minSplatBits <= SplatBitSize) {
2222       uint64_t SplatBits = APSplatBits.getZExtValue();
2223       SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2224
2225       SmallVector<SDValue, 16> tcVec;
2226       tcVec.assign(16, tc);
2227       return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
2228                          DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
2229     }
2230   }
2231
2232   // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2233   // lowered.  Return the operation, rather than a null SDValue.
2234   return Op;
2235 }
2236
2237 //! Custom lowering for CTPOP (count population)
2238 /*!
2239   Custom lowering code that counts the number ones in the input
2240   operand. SPU has such an instruction, but it counts the number of
2241   ones per byte, which then have to be accumulated.
2242 */
2243 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2244   EVT VT = Op.getValueType();
2245   EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
2246                                VT, (128 / VT.getSizeInBits()));
2247   DebugLoc dl = Op.getDebugLoc();
2248
2249   switch (VT.getSimpleVT().SimpleTy) {
2250   default:
2251     assert(false && "Invalid value type!");
2252   case MVT::i8: {
2253     SDValue N = Op.getOperand(0);
2254     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2255
2256     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2257     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2258
2259     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
2260   }
2261
2262   case MVT::i16: {
2263     MachineFunction &MF = DAG.getMachineFunction();
2264     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2265
2266     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2267
2268     SDValue N = Op.getOperand(0);
2269     SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2270     SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2271     SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2272
2273     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2274     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2275
2276     // CNTB_result becomes the chain to which all of the virtual registers
2277     // CNTB_reg, SUM1_reg become associated:
2278     SDValue CNTB_result =
2279       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
2280
2281     SDValue CNTB_rescopy =
2282       DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2283
2284     SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
2285
2286     return DAG.getNode(ISD::AND, dl, MVT::i16,
2287                        DAG.getNode(ISD::ADD, dl, MVT::i16,
2288                                    DAG.getNode(ISD::SRL, dl, MVT::i16,
2289                                                Tmp1, Shift1),
2290                                    Tmp1),
2291                        Mask0);
2292   }
2293
2294   case MVT::i32: {
2295     MachineFunction &MF = DAG.getMachineFunction();
2296     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2297
2298     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2299     unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2300
2301     SDValue N = Op.getOperand(0);
2302     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2303     SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2304     SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2305     SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2306
2307     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2308     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2309
2310     // CNTB_result becomes the chain to which all of the virtual registers
2311     // CNTB_reg, SUM1_reg become associated:
2312     SDValue CNTB_result =
2313       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
2314
2315     SDValue CNTB_rescopy =
2316       DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2317
2318     SDValue Comp1 =
2319       DAG.getNode(ISD::SRL, dl, MVT::i32,
2320                   DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
2321                   Shift1);
2322
2323     SDValue Sum1 =
2324       DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
2325                   DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
2326
2327     SDValue Sum1_rescopy =
2328       DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
2329
2330     SDValue Comp2 =
2331       DAG.getNode(ISD::SRL, dl, MVT::i32,
2332                   DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
2333                   Shift2);
2334     SDValue Sum2 =
2335       DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
2336                   DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
2337
2338     return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
2339   }
2340
2341   case MVT::i64:
2342     break;
2343   }
2344
2345   return SDValue();
2346 }
2347
2348 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2349 /*!
2350  f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2351  All conversions to i64 are expanded to a libcall.
2352  */
2353 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2354                               const SPUTargetLowering &TLI) {
2355   EVT OpVT = Op.getValueType();
2356   SDValue Op0 = Op.getOperand(0);
2357   EVT Op0VT = Op0.getValueType();
2358
2359   if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2360       || OpVT == MVT::i64) {
2361     // Convert f32 / f64 to i32 / i64 via libcall.
2362     RTLIB::Libcall LC =
2363             (Op.getOpcode() == ISD::FP_TO_SINT)
2364              ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2365              : RTLIB::getFPTOUINT(Op0VT, OpVT);
2366     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2367     SDValue Dummy;
2368     return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2369   }
2370
2371   return Op;
2372 }
2373
2374 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2375 /*!
2376  i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2377  All conversions from i64 are expanded to a libcall.
2378  */
2379 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2380                               const SPUTargetLowering &TLI) {
2381   EVT OpVT = Op.getValueType();
2382   SDValue Op0 = Op.getOperand(0);
2383   EVT Op0VT = Op0.getValueType();
2384
2385   if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2386       || Op0VT == MVT::i64) {
2387     // Convert i32, i64 to f64 via libcall:
2388     RTLIB::Libcall LC =
2389             (Op.getOpcode() == ISD::SINT_TO_FP)
2390              ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2391              : RTLIB::getUINTTOFP(Op0VT, OpVT);
2392     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2393     SDValue Dummy;
2394     return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2395   }
2396
2397   return Op;
2398 }
2399
2400 //! Lower ISD::SETCC
2401 /*!
2402  This handles MVT::f64 (double floating point) condition lowering
2403  */
2404 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2405                           const TargetLowering &TLI) {
2406   CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2407   DebugLoc dl = Op.getDebugLoc();
2408   assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2409
2410   SDValue lhs = Op.getOperand(0);
2411   SDValue rhs = Op.getOperand(1);
2412   EVT lhsVT = lhs.getValueType();
2413   assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2414
2415   EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2416   APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2417   EVT IntVT(MVT::i64);
2418
2419   // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2420   // selected to a NOP:
2421   SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
2422   SDValue lhsHi32 =
2423           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2424                       DAG.getNode(ISD::SRL, dl, IntVT,
2425                                   i64lhs, DAG.getConstant(32, MVT::i32)));
2426   SDValue lhsHi32abs =
2427           DAG.getNode(ISD::AND, dl, MVT::i32,
2428                       lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2429   SDValue lhsLo32 =
2430           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2431
2432   // SETO and SETUO only use the lhs operand:
2433   if (CC->get() == ISD::SETO) {
2434     // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2435     // SETUO
2436     APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2437     return DAG.getNode(ISD::XOR, dl, ccResultVT,
2438                        DAG.getSetCC(dl, ccResultVT,
2439                                     lhs, DAG.getConstantFP(0.0, lhsVT),
2440                                     ISD::SETUO),
2441                        DAG.getConstant(ccResultAllOnes, ccResultVT));
2442   } else if (CC->get() == ISD::SETUO) {
2443     // Evaluates to true if Op0 is [SQ]NaN
2444     return DAG.getNode(ISD::AND, dl, ccResultVT,
2445                        DAG.getSetCC(dl, ccResultVT,
2446                                     lhsHi32abs,
2447                                     DAG.getConstant(0x7ff00000, MVT::i32),
2448                                     ISD::SETGE),
2449                        DAG.getSetCC(dl, ccResultVT,
2450                                     lhsLo32,
2451                                     DAG.getConstant(0, MVT::i32),
2452                                     ISD::SETGT));
2453   }
2454
2455   SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
2456   SDValue rhsHi32 =
2457           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2458                       DAG.getNode(ISD::SRL, dl, IntVT,
2459                                   i64rhs, DAG.getConstant(32, MVT::i32)));
2460
2461   // If a value is negative, subtract from the sign magnitude constant:
2462   SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2463
2464   // Convert the sign-magnitude representation into 2's complement:
2465   SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2466                                       lhsHi32, DAG.getConstant(31, MVT::i32));
2467   SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2468   SDValue lhsSelect =
2469           DAG.getNode(ISD::SELECT, dl, IntVT,
2470                       lhsSelectMask, lhsSignMag2TC, i64lhs);
2471
2472   SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2473                                       rhsHi32, DAG.getConstant(31, MVT::i32));
2474   SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2475   SDValue rhsSelect =
2476           DAG.getNode(ISD::SELECT, dl, IntVT,
2477                       rhsSelectMask, rhsSignMag2TC, i64rhs);
2478
2479   unsigned compareOp;
2480
2481   switch (CC->get()) {
2482   case ISD::SETOEQ:
2483   case ISD::SETUEQ:
2484     compareOp = ISD::SETEQ; break;
2485   case ISD::SETOGT:
2486   case ISD::SETUGT:
2487     compareOp = ISD::SETGT; break;
2488   case ISD::SETOGE:
2489   case ISD::SETUGE:
2490     compareOp = ISD::SETGE; break;
2491   case ISD::SETOLT:
2492   case ISD::SETULT:
2493     compareOp = ISD::SETLT; break;
2494   case ISD::SETOLE:
2495   case ISD::SETULE:
2496     compareOp = ISD::SETLE; break;
2497   case ISD::SETUNE:
2498   case ISD::SETONE:
2499     compareOp = ISD::SETNE; break;
2500   default:
2501     report_fatal_error("CellSPU ISel Select: unimplemented f64 condition");
2502   }
2503
2504   SDValue result =
2505           DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2506                        (ISD::CondCode) compareOp);
2507
2508   if ((CC->get() & 0x8) == 0) {
2509     // Ordered comparison:
2510     SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2511                                   lhs, DAG.getConstantFP(0.0, MVT::f64),
2512                                   ISD::SETO);
2513     SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2514                                   rhs, DAG.getConstantFP(0.0, MVT::f64),
2515                                   ISD::SETO);
2516     SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2517
2518     result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2519   }
2520
2521   return result;
2522 }
2523
2524 //! Lower ISD::SELECT_CC
2525 /*!
2526   ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2527   SELB instruction.
2528
2529   \note Need to revisit this in the future: if the code path through the true
2530   and false value computations is longer than the latency of a branch (6
2531   cycles), then it would be more advantageous to branch and insert a new basic
2532   block and branch on the condition. However, this code does not make that
2533   assumption, given the simplisitc uses so far.
2534  */
2535
2536 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2537                               const TargetLowering &TLI) {
2538   EVT VT = Op.getValueType();
2539   SDValue lhs = Op.getOperand(0);
2540   SDValue rhs = Op.getOperand(1);
2541   SDValue trueval = Op.getOperand(2);
2542   SDValue falseval = Op.getOperand(3);
2543   SDValue condition = Op.getOperand(4);
2544   DebugLoc dl = Op.getDebugLoc();
2545
2546   // NOTE: SELB's arguments: $rA, $rB, $mask
2547   //
2548   // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2549   // where bits in $mask are 1. CCond will be inverted, having 1s where the
2550   // condition was true and 0s where the condition was false. Hence, the
2551   // arguments to SELB get reversed.
2552
2553   // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2554   // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2555   // with another "cannot select select_cc" assert:
2556
2557   SDValue compare = DAG.getNode(ISD::SETCC, dl,
2558                                 TLI.getSetCCResultType(Op.getValueType()),
2559                                 lhs, rhs, condition);
2560   return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
2561 }
2562
2563 //! Custom lower ISD::TRUNCATE
2564 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2565 {
2566   // Type to truncate to
2567   EVT VT = Op.getValueType();
2568   MVT simpleVT = VT.getSimpleVT();
2569   EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
2570                                VT, (128 / VT.getSizeInBits()));
2571   DebugLoc dl = Op.getDebugLoc();
2572
2573   // Type to truncate from
2574   SDValue Op0 = Op.getOperand(0);
2575   EVT Op0VT = Op0.getValueType();
2576
2577   if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2578     // Create shuffle mask, least significant doubleword of quadword
2579     unsigned maskHigh = 0x08090a0b;
2580     unsigned maskLow = 0x0c0d0e0f;
2581     // Use a shuffle to perform the truncation
2582     SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2583                                    DAG.getConstant(maskHigh, MVT::i32),
2584                                    DAG.getConstant(maskLow, MVT::i32),
2585                                    DAG.getConstant(maskHigh, MVT::i32),
2586                                    DAG.getConstant(maskLow, MVT::i32));
2587
2588     SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2589                                        Op0, Op0, shufMask);
2590
2591     return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
2592   }
2593
2594   return SDValue();             // Leave the truncate unmolested
2595 }
2596
2597 /*!
2598  * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
2599  * algorithm is to duplicate the sign bit using rotmai to generate at
2600  * least one byte full of sign bits. Then propagate the "sign-byte" into
2601  * the leftmost words and the i64/i32 into the rightmost words using shufb.
2602  *
2603  * @param Op The sext operand
2604  * @param DAG The current DAG
2605  * @return The SDValue with the entire instruction sequence
2606  */
2607 static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
2608 {
2609   DebugLoc dl = Op.getDebugLoc();
2610
2611   // Type to extend to
2612   MVT OpVT = Op.getValueType().getSimpleVT();
2613
2614   // Type to extend from
2615   SDValue Op0 = Op.getOperand(0);
2616   MVT Op0VT = Op0.getValueType().getSimpleVT();
2617
2618   // The type to extend to needs to be a i128 and
2619   // the type to extend from needs to be i64 or i32.
2620   assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
2621           "LowerSIGN_EXTEND: input and/or output operand have wrong size");
2622
2623   // Create shuffle mask
2624   unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
2625   unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte  8 - 11
2626   unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
2627   SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2628                                  DAG.getConstant(mask1, MVT::i32),
2629                                  DAG.getConstant(mask1, MVT::i32),
2630                                  DAG.getConstant(mask2, MVT::i32),
2631                                  DAG.getConstant(mask3, MVT::i32));
2632
2633   // Word wise arithmetic right shift to generate at least one byte
2634   // that contains sign bits.
2635   MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
2636   SDValue sraVal = DAG.getNode(ISD::SRA,
2637                  dl,
2638                  mvt,
2639                  DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
2640                  DAG.getConstant(31, MVT::i32));
2641
2642   // Shuffle bytes - Copy the sign bits into the upper 64 bits
2643   // and the input value into the lower 64 bits.
2644   SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
2645       DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i128, Op0), sraVal, shufMask);
2646
2647   return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, extShuffle);
2648 }
2649
2650 //! Custom (target-specific) lowering entry point
2651 /*!
2652   This is where LLVM's DAG selection process calls to do target-specific
2653   lowering of nodes.
2654  */
2655 SDValue
2656 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
2657 {
2658   unsigned Opc = (unsigned) Op.getOpcode();
2659   EVT VT = Op.getValueType();
2660
2661   switch (Opc) {
2662   default: {
2663 #ifndef NDEBUG
2664     errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2665     errs() << "Op.getOpcode() = " << Opc << "\n";
2666     errs() << "*Op.getNode():\n";
2667     Op.getNode()->dump();
2668 #endif
2669     llvm_unreachable(0);
2670   }
2671   case ISD::LOAD:
2672   case ISD::EXTLOAD:
2673   case ISD::SEXTLOAD:
2674   case ISD::ZEXTLOAD:
2675     return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2676   case ISD::STORE:
2677     return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2678   case ISD::ConstantPool:
2679     return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2680   case ISD::GlobalAddress:
2681     return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2682   case ISD::JumpTable:
2683     return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2684   case ISD::ConstantFP:
2685     return LowerConstantFP(Op, DAG);
2686
2687   // i8, i64 math ops:
2688   case ISD::ADD:
2689   case ISD::SUB:
2690   case ISD::ROTR:
2691   case ISD::ROTL:
2692   case ISD::SRL:
2693   case ISD::SHL:
2694   case ISD::SRA: {
2695     if (VT == MVT::i8)
2696       return LowerI8Math(Op, DAG, Opc, *this);
2697     break;
2698   }
2699
2700   case ISD::FP_TO_SINT:
2701   case ISD::FP_TO_UINT:
2702     return LowerFP_TO_INT(Op, DAG, *this);
2703
2704   case ISD::SINT_TO_FP:
2705   case ISD::UINT_TO_FP:
2706     return LowerINT_TO_FP(Op, DAG, *this);
2707
2708   // Vector-related lowering.
2709   case ISD::BUILD_VECTOR:
2710     return LowerBUILD_VECTOR(Op, DAG);
2711   case ISD::SCALAR_TO_VECTOR:
2712     return LowerSCALAR_TO_VECTOR(Op, DAG);
2713   case ISD::VECTOR_SHUFFLE:
2714     return LowerVECTOR_SHUFFLE(Op, DAG);
2715   case ISD::EXTRACT_VECTOR_ELT:
2716     return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2717   case ISD::INSERT_VECTOR_ELT:
2718     return LowerINSERT_VECTOR_ELT(Op, DAG);
2719
2720   // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2721   case ISD::AND:
2722   case ISD::OR:
2723   case ISD::XOR:
2724     return LowerByteImmed(Op, DAG);
2725
2726   // Vector and i8 multiply:
2727   case ISD::MUL:
2728     if (VT == MVT::i8)
2729       return LowerI8Math(Op, DAG, Opc, *this);
2730
2731   case ISD::CTPOP:
2732     return LowerCTPOP(Op, DAG);
2733
2734   case ISD::SELECT_CC:
2735     return LowerSELECT_CC(Op, DAG, *this);
2736
2737   case ISD::SETCC:
2738     return LowerSETCC(Op, DAG, *this);
2739
2740   case ISD::TRUNCATE:
2741     return LowerTRUNCATE(Op, DAG);
2742
2743   case ISD::SIGN_EXTEND:
2744     return LowerSIGN_EXTEND(Op, DAG);
2745   }
2746
2747   return SDValue();
2748 }
2749
2750 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2751                                            SmallVectorImpl<SDValue>&Results,
2752                                            SelectionDAG &DAG) const
2753 {
2754 #if 0
2755   unsigned Opc = (unsigned) N->getOpcode();
2756   EVT OpVT = N->getValueType(0);
2757
2758   switch (Opc) {
2759   default: {
2760     errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2761     errs() << "Op.getOpcode() = " << Opc << "\n";
2762     errs() << "*Op.getNode():\n";
2763     N->dump();
2764     abort();
2765     /*NOTREACHED*/
2766   }
2767   }
2768 #endif
2769
2770   /* Otherwise, return unchanged */
2771 }
2772
2773 //===----------------------------------------------------------------------===//
2774 // Target Optimization Hooks
2775 //===----------------------------------------------------------------------===//
2776
2777 SDValue
2778 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2779 {
2780 #if 0
2781   TargetMachine &TM = getTargetMachine();
2782 #endif
2783   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2784   SelectionDAG &DAG = DCI.DAG;
2785   SDValue Op0 = N->getOperand(0);       // everything has at least one operand
2786   EVT NodeVT = N->getValueType(0);      // The node's value type
2787   EVT Op0VT = Op0.getValueType();       // The first operand's result
2788   SDValue Result;                       // Initially, empty result
2789   DebugLoc dl = N->getDebugLoc();
2790
2791   switch (N->getOpcode()) {
2792   default: break;
2793   case ISD::ADD: {
2794     SDValue Op1 = N->getOperand(1);
2795
2796     if (Op0.getOpcode() == SPUISD::IndirectAddr
2797         || Op1.getOpcode() == SPUISD::IndirectAddr) {
2798       // Normalize the operands to reduce repeated code
2799       SDValue IndirectArg = Op0, AddArg = Op1;
2800
2801       if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2802         IndirectArg = Op1;
2803         AddArg = Op0;
2804       }
2805
2806       if (isa<ConstantSDNode>(AddArg)) {
2807         ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2808         SDValue IndOp1 = IndirectArg.getOperand(1);
2809
2810         if (CN0->isNullValue()) {
2811           // (add (SPUindirect <arg>, <arg>), 0) ->
2812           // (SPUindirect <arg>, <arg>)
2813
2814 #if !defined(NDEBUG)
2815           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2816             errs() << "\n"
2817                  << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2818                  << "With:    (SPUindirect <arg>, <arg>)\n";
2819           }
2820 #endif
2821
2822           return IndirectArg;
2823         } else if (isa<ConstantSDNode>(IndOp1)) {
2824           // (add (SPUindirect <arg>, <const>), <const>) ->
2825           // (SPUindirect <arg>, <const + const>)
2826           ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2827           int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2828           SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2829
2830 #if !defined(NDEBUG)
2831           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2832             errs() << "\n"
2833                  << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2834                  << "), " << CN0->getSExtValue() << ")\n"
2835                  << "With:    (SPUindirect <arg>, "
2836                  << combinedConst << ")\n";
2837           }
2838 #endif
2839
2840           return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2841                              IndirectArg, combinedValue);
2842         }
2843       }
2844     }
2845     break;
2846   }
2847   case ISD::SIGN_EXTEND:
2848   case ISD::ZERO_EXTEND:
2849   case ISD::ANY_EXTEND: {
2850     if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2851       // (any_extend (SPUextract_elt0 <arg>)) ->
2852       // (SPUextract_elt0 <arg>)
2853       // Types must match, however...
2854 #if !defined(NDEBUG)
2855       if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2856         errs() << "\nReplace: ";
2857         N->dump(&DAG);
2858         errs() << "\nWith:    ";
2859         Op0.getNode()->dump(&DAG);
2860         errs() << "\n";
2861       }
2862 #endif
2863
2864       return Op0;
2865     }
2866     break;
2867   }
2868   case SPUISD::IndirectAddr: {
2869     if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2870       ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2871       if (CN != 0 && CN->isNullValue()) {
2872         // (SPUindirect (SPUaform <addr>, 0), 0) ->
2873         // (SPUaform <addr>, 0)
2874
2875         DEBUG(errs() << "Replace: ");
2876         DEBUG(N->dump(&DAG));
2877         DEBUG(errs() << "\nWith:    ");
2878         DEBUG(Op0.getNode()->dump(&DAG));
2879         DEBUG(errs() << "\n");
2880
2881         return Op0;
2882       }
2883     } else if (Op0.getOpcode() == ISD::ADD) {
2884       SDValue Op1 = N->getOperand(1);
2885       if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2886         // (SPUindirect (add <arg>, <arg>), 0) ->
2887         // (SPUindirect <arg>, <arg>)
2888         if (CN1->isNullValue()) {
2889
2890 #if !defined(NDEBUG)
2891           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2892             errs() << "\n"
2893                  << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2894                  << "With:    (SPUindirect <arg>, <arg>)\n";
2895           }
2896 #endif
2897
2898           return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2899                              Op0.getOperand(0), Op0.getOperand(1));
2900         }
2901       }
2902     }
2903     break;
2904   }
2905   case SPUISD::SHLQUAD_L_BITS:
2906   case SPUISD::SHLQUAD_L_BYTES:
2907   case SPUISD::ROTBYTES_LEFT: {
2908     SDValue Op1 = N->getOperand(1);
2909
2910     // Kill degenerate vector shifts:
2911     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2912       if (CN->isNullValue()) {
2913         Result = Op0;
2914       }
2915     }
2916     break;
2917   }
2918   case SPUISD::PREFSLOT2VEC: {
2919     switch (Op0.getOpcode()) {
2920     default:
2921       break;
2922     case ISD::ANY_EXTEND:
2923     case ISD::ZERO_EXTEND:
2924     case ISD::SIGN_EXTEND: {
2925       // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2926       // <arg>
2927       // but only if the SPUprefslot2vec and <arg> types match.
2928       SDValue Op00 = Op0.getOperand(0);
2929       if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2930         SDValue Op000 = Op00.getOperand(0);
2931         if (Op000.getValueType() == NodeVT) {
2932           Result = Op000;
2933         }
2934       }
2935       break;
2936     }
2937     case SPUISD::VEC2PREFSLOT: {
2938       // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2939       // <arg>
2940       Result = Op0.getOperand(0);
2941       break;
2942     }
2943     }
2944     break;
2945   }
2946   }
2947
2948   // Otherwise, return unchanged.
2949 #ifndef NDEBUG
2950   if (Result.getNode()) {
2951     DEBUG(errs() << "\nReplace.SPU: ");
2952     DEBUG(N->dump(&DAG));
2953     DEBUG(errs() << "\nWith:        ");
2954     DEBUG(Result.getNode()->dump(&DAG));
2955     DEBUG(errs() << "\n");
2956   }
2957 #endif
2958
2959   return Result;
2960 }
2961
2962 //===----------------------------------------------------------------------===//
2963 // Inline Assembly Support
2964 //===----------------------------------------------------------------------===//
2965
2966 /// getConstraintType - Given a constraint letter, return the type of
2967 /// constraint it is for this target.
2968 SPUTargetLowering::ConstraintType
2969 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2970   if (ConstraintLetter.size() == 1) {
2971     switch (ConstraintLetter[0]) {
2972     default: break;
2973     case 'b':
2974     case 'r':
2975     case 'f':
2976     case 'v':
2977     case 'y':
2978       return C_RegisterClass;
2979     }
2980   }
2981   return TargetLowering::getConstraintType(ConstraintLetter);
2982 }
2983
2984 std::pair<unsigned, const TargetRegisterClass*>
2985 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2986                                                 EVT VT) const
2987 {
2988   if (Constraint.size() == 1) {
2989     // GCC RS6000 Constraint Letters
2990     switch (Constraint[0]) {
2991     case 'b':   // R1-R31
2992     case 'r':   // R0-R31
2993       if (VT == MVT::i64)
2994         return std::make_pair(0U, SPU::R64CRegisterClass);
2995       return std::make_pair(0U, SPU::R32CRegisterClass);
2996     case 'f':
2997       if (VT == MVT::f32)
2998         return std::make_pair(0U, SPU::R32FPRegisterClass);
2999       else if (VT == MVT::f64)
3000         return std::make_pair(0U, SPU::R64FPRegisterClass);
3001       break;
3002     case 'v':
3003       return std::make_pair(0U, SPU::GPRCRegisterClass);
3004     }
3005   }
3006
3007   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3008 }
3009
3010 //! Compute used/known bits for a SPU operand
3011 void
3012 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3013                                                   const APInt &Mask,
3014                                                   APInt &KnownZero,
3015                                                   APInt &KnownOne,
3016                                                   const SelectionDAG &DAG,
3017                                                   unsigned Depth ) const {
3018 #if 0
3019   const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
3020
3021   switch (Op.getOpcode()) {
3022   default:
3023     // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3024     break;
3025   case CALL:
3026   case SHUFB:
3027   case SHUFFLE_MASK:
3028   case CNTB:
3029   case SPUISD::PREFSLOT2VEC:
3030   case SPUISD::LDRESULT:
3031   case SPUISD::VEC2PREFSLOT:
3032   case SPUISD::SHLQUAD_L_BITS:
3033   case SPUISD::SHLQUAD_L_BYTES:
3034   case SPUISD::VEC_ROTL:
3035   case SPUISD::VEC_ROTR:
3036   case SPUISD::ROTBYTES_LEFT:
3037   case SPUISD::SELECT_MASK:
3038   case SPUISD::SELB:
3039   }
3040 #endif
3041 }
3042
3043 unsigned
3044 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3045                                                    unsigned Depth) const {
3046   switch (Op.getOpcode()) {
3047   default:
3048     return 1;
3049
3050   case ISD::SETCC: {
3051     EVT VT = Op.getValueType();
3052
3053     if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
3054       VT = MVT::i32;
3055     }
3056     return VT.getSizeInBits();
3057   }
3058   }
3059 }
3060
3061 // LowerAsmOperandForConstraint
3062 void
3063 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3064                                                 char ConstraintLetter,
3065                                                 std::vector<SDValue> &Ops,
3066                                                 SelectionDAG &DAG) const {
3067   // Default, for the time being, to the base class handler
3068   TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
3069 }
3070
3071 /// isLegalAddressImmediate - Return true if the integer value can be used
3072 /// as the offset of the target addressing mode.
3073 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3074                                                 const Type *Ty) const {
3075   // SPU's addresses are 256K:
3076   return (V > -(1 << 18) && V < (1 << 18) - 1);
3077 }
3078
3079 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3080   return false;
3081 }
3082
3083 bool
3084 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3085   // The SPU target isn't yet aware of offsets.
3086   return false;
3087 }