lib/Target/CellSPU/SPUISelLowering.cpp

   1 //
   2 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the SPUTargetLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "SPURegisterNames.h"
  15 #include "SPUISelLowering.h"
  16 #include "SPUTargetMachine.h"
  17 #include "SPUFrameInfo.h"
  18 #include "llvm/ADT/APInt.h"
  19 #include "llvm/ADT/VectorExtras.h"
  20 #include "llvm/CallingConv.h"
  21 #include "llvm/CodeGen/CallingConvLower.h"
  22 #include "llvm/CodeGen/MachineFrameInfo.h"
  23 #include "llvm/CodeGen/MachineFunction.h"
  24 #include "llvm/CodeGen/MachineInstrBuilder.h"
  25 #include "llvm/CodeGen/MachineRegisterInfo.h"
  26 #include "llvm/CodeGen/SelectionDAG.h"
  27 #include "llvm/Constants.h"
  28 #include "llvm/Function.h"
  29 #include "llvm/Intrinsics.h"
  30 #include "llvm/Support/Debug.h"
  31 #include "llvm/Support/ErrorHandling.h"
  32 #include "llvm/Support/MathExtras.h"
  33 #include "llvm/Support/raw_ostream.h"
  34 #include "llvm/Target/TargetOptions.h"
  35
  36 #include <map>
  37
  38 using namespace llvm;
  39
  40 // Used in getTargetNodeName() below
  41 namespace {
  42   std::map<unsigned, const char *> node_names;
  43
  44   //! MVT mapping to useful data for Cell SPU
  45   struct valtype_map_s {
  46     const MVT   valtype;
  47     const int   prefslot_byte;
  48   };
  49
  50   const valtype_map_s valtype_map[] = {
  51     { MVT::i1,   3 },
  52     { MVT::i8,   3 },
  53     { MVT::i16,  2 },
  54     { MVT::i32,  0 },
  55     { MVT::f32,  0 },
  56     { MVT::i64,  0 },
  57     { MVT::f64,  0 },
  58     { MVT::i128, 0 }
  59   };
  60
  61   const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
  62
  63   const valtype_map_s *getValueTypeMapEntry(MVT VT) {
  64     const valtype_map_s *retval = 0;
  65
  66     for (size_t i = 0; i < n_valtype_map; ++i) {
  67       if (valtype_map[i].valtype == VT) {
  68         retval = valtype_map + i;
  69         break;
  70       }
  71     }
  72
  73 #ifndef NDEBUG
  74     if (retval == 0) {
  75       std::string msg;
  76       raw_string_ostream Msg(msg);
  77       Msg << "getValueTypeMapEntry returns NULL for "
  78            << VT.getMVTString();
  79       llvm_report_error(Msg.str());
  80     }
  81 #endif
  82
  83     return retval;
  84   }
  85
  86   //! Expand a library call into an actual call DAG node
  87   /*!
  88    \note
  89    This code is taken from SelectionDAGLegalize, since it is not exposed as
  90    part of the LLVM SelectionDAG API.
  91    */
  92
  93   SDValue
  94   ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
  95                 bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
  96     // The input chain to this libcall is the entry node of the function.
  97     // Legalizing the call will automatically add the previous call to the
  98     // dependence.
  99     SDValue InChain = DAG.getEntryNode();
 100
 101     TargetLowering::ArgListTy Args;
 102     TargetLowering::ArgListEntry Entry;
 103     for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
 104       MVT ArgVT = Op.getOperand(i).getValueType();
 105       const Type *ArgTy = ArgVT.getTypeForMVT(*DAG.getContext());
 106       Entry.Node = Op.getOperand(i);
 107       Entry.Ty = ArgTy;
 108       Entry.isSExt = isSigned;
 109       Entry.isZExt = !isSigned;
 110       Args.push_back(Entry);
 111     }
 112     SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
 113                                            TLI.getPointerTy());
 114
 115     // Splice the libcall in wherever FindInputOutputChains tells us to.
 116     const Type *RetTy =
 117                  Op.getNode()->getValueType(0).getTypeForMVT(*DAG.getContext());
 118     std::pair<SDValue, SDValue> CallInfo =
 119             TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
 120                             0, CallingConv::C, false, Callee, Args, DAG,
 121                             Op.getDebugLoc());
 122
 123     return CallInfo.first;
 124   }
 125 }
 126
 127 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
 128   : TargetLowering(TM),
 129     SPUTM(TM)
 130 {
 131   // Fold away setcc operations if possible.
 132   setPow2DivIsCheap();
 133
 134   // Use _setjmp/_longjmp instead of setjmp/longjmp.
 135   setUseUnderscoreSetJmp(true);
 136   setUseUnderscoreLongJmp(true);
 137
 138   // Set RTLIB libcall names as used by SPU:
 139   setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
 140
 141   // Set up the SPU's register classes:
 142   addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
 143   addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
 144   addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
 145   addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
 146   addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
 147   addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
 148   addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
 149
 150   // SPU has no sign or zero extended loads for i1, i8, i16:
 151   setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
 152   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
 153   setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
 154
 155   setLoadExtAction(ISD::EXTLOAD,  MVT::f32, Expand);
 156   setLoadExtAction(ISD::EXTLOAD,  MVT::f64, Expand);
 157
 158   // SPU constant load actions are custom lowered:
 159   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
 160   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
 161
 162   // SPU's loads and stores have to be custom lowered:
 163   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
 164        ++sctype) {
 165     MVT VT = (MVT::SimpleValueType)sctype;
 166
 167     setOperationAction(ISD::LOAD,   VT, Custom);
 168     setOperationAction(ISD::STORE,  VT, Custom);
 169     setLoadExtAction(ISD::EXTLOAD,  VT, Custom);
 170     setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
 171     setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
 172
 173     for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
 174       MVT StoreVT = (MVT::SimpleValueType) stype;
 175       setTruncStoreAction(VT, StoreVT, Expand);
 176     }
 177   }
 178
 179   for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
 180        ++sctype) {
 181     MVT VT = (MVT::SimpleValueType) sctype;
 182
 183     setOperationAction(ISD::LOAD,   VT, Custom);
 184     setOperationAction(ISD::STORE,  VT, Custom);
 185
 186     for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
 187       MVT StoreVT = (MVT::SimpleValueType) stype;
 188       setTruncStoreAction(VT, StoreVT, Expand);
 189     }
 190   }
 191
 192   // Expand the jumptable branches
 193   setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
 194   setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
 195
 196   // Custom lower SELECT_CC for most cases, but expand by default
 197   setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
 198   setOperationAction(ISD::SELECT_CC,    MVT::i8,    Custom);
 199   setOperationAction(ISD::SELECT_CC,    MVT::i16,   Custom);
 200   setOperationAction(ISD::SELECT_CC,    MVT::i32,   Custom);
 201   setOperationAction(ISD::SELECT_CC,    MVT::i64,   Custom);
 202
 203   // SPU has no intrinsics for these particular operations:
 204   setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
 205
 206   // SPU has no SREM/UREM instructions
 207   setOperationAction(ISD::SREM, MVT::i32, Expand);
 208   setOperationAction(ISD::UREM, MVT::i32, Expand);
 209   setOperationAction(ISD::SREM, MVT::i64, Expand);
 210   setOperationAction(ISD::UREM, MVT::i64, Expand);
 211
 212   // We don't support sin/cos/sqrt/fmod
 213   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 214   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 215   setOperationAction(ISD::FREM , MVT::f64, Expand);
 216   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 217   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 218   setOperationAction(ISD::FREM , MVT::f32, Expand);
 219
 220   // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
 221   // for f32!)
 222   setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 223   setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 224
 225   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 226   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 227
 228   // SPU can do rotate right and left, so legalize it... but customize for i8
 229   // because instructions don't exist.
 230
 231   // FIXME: Change from "expand" to appropriate type once ROTR is supported in
 232   //        .td files.
 233   setOperationAction(ISD::ROTR, MVT::i32,    Expand /*Legal*/);
 234   setOperationAction(ISD::ROTR, MVT::i16,    Expand /*Legal*/);
 235   setOperationAction(ISD::ROTR, MVT::i8,     Expand /*Custom*/);
 236
 237   setOperationAction(ISD::ROTL, MVT::i32,    Legal);
 238   setOperationAction(ISD::ROTL, MVT::i16,    Legal);
 239   setOperationAction(ISD::ROTL, MVT::i8,     Custom);
 240
 241   // SPU has no native version of shift left/right for i8
 242   setOperationAction(ISD::SHL,  MVT::i8,     Custom);
 243   setOperationAction(ISD::SRL,  MVT::i8,     Custom);
 244   setOperationAction(ISD::SRA,  MVT::i8,     Custom);
 245
 246   // Make these operations legal and handle them during instruction selection:
 247   setOperationAction(ISD::SHL,  MVT::i64,    Legal);
 248   setOperationAction(ISD::SRL,  MVT::i64,    Legal);
 249   setOperationAction(ISD::SRA,  MVT::i64,    Legal);
 250
 251   // Custom lower i8, i32 and i64 multiplications
 252   setOperationAction(ISD::MUL,  MVT::i8,     Custom);
 253   setOperationAction(ISD::MUL,  MVT::i32,    Legal);
 254   setOperationAction(ISD::MUL,  MVT::i64,    Legal);
 255
 256   // Expand double-width multiplication
 257   // FIXME: It would probably be reasonable to support some of these operations
 258   setOperationAction(ISD::UMUL_LOHI, MVT::i8,  Expand);
 259   setOperationAction(ISD::SMUL_LOHI, MVT::i8,  Expand);
 260   setOperationAction(ISD::MULHU,     MVT::i8,  Expand);
 261   setOperationAction(ISD::MULHS,     MVT::i8,  Expand);
 262   setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
 263   setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
 264   setOperationAction(ISD::MULHU,     MVT::i16, Expand);
 265   setOperationAction(ISD::MULHS,     MVT::i16, Expand);
 266   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
 267   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
 268   setOperationAction(ISD::MULHU,     MVT::i32, Expand);
 269   setOperationAction(ISD::MULHS,     MVT::i32, Expand);
 270   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
 271   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
 272   setOperationAction(ISD::MULHU,     MVT::i64, Expand);
 273   setOperationAction(ISD::MULHS,     MVT::i64, Expand);
 274
 275   // Need to custom handle (some) common i8, i64 math ops
 276   setOperationAction(ISD::ADD,  MVT::i8,     Custom);
 277   setOperationAction(ISD::ADD,  MVT::i64,    Legal);
 278   setOperationAction(ISD::SUB,  MVT::i8,     Custom);
 279   setOperationAction(ISD::SUB,  MVT::i64,    Legal);
 280
 281   // SPU does not have BSWAP. It does have i32 support CTLZ.
 282   // CTPOP has to be custom lowered.
 283   setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
 284   setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
 285
 286   setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
 287   setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
 288   setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
 289   setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
 290
 291   setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
 292   setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
 293
 294   setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
 295
 296   // SPU has a version of select that implements (a&~c)|(b&c), just like
 297   // select ought to work:
 298   setOperationAction(ISD::SELECT, MVT::i8,   Legal);
 299   setOperationAction(ISD::SELECT, MVT::i16,  Legal);
 300   setOperationAction(ISD::SELECT, MVT::i32,  Legal);
 301   setOperationAction(ISD::SELECT, MVT::i64,  Legal);
 302
 303   setOperationAction(ISD::SETCC, MVT::i8,    Legal);
 304   setOperationAction(ISD::SETCC, MVT::i16,   Legal);
 305   setOperationAction(ISD::SETCC, MVT::i32,   Legal);
 306   setOperationAction(ISD::SETCC, MVT::i64,   Legal);
 307   setOperationAction(ISD::SETCC, MVT::f64,   Custom);
 308
 309   // Custom lower i128 -> i64 truncates
 310   setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
 311
 312   // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
 313   // to expand to a libcall, hence the custom lowering:
 314   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
 315   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 316
 317   // FDIV on SPU requires custom lowering
 318   setOperationAction(ISD::FDIV, MVT::f64, Expand);      // to libcall
 319
 320   // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
 321   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
 322   setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
 323   setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
 324   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
 325   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
 326   setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
 327   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 328   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 329
 330   setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
 331   setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
 332   setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
 333   setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
 334
 335   // We cannot sextinreg(i1).  Expand to shifts.
 336   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 337
 338   // Support label based line numbers.
 339   setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
 340   setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
 341
 342   // We want to legalize GlobalAddress and ConstantPool nodes into the
 343   // appropriate instructions to materialize the address.
 344   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
 345        ++sctype) {
 346     MVT VT = (MVT::SimpleValueType)sctype;
 347
 348     setOperationAction(ISD::GlobalAddress,  VT, Custom);
 349     setOperationAction(ISD::ConstantPool,   VT, Custom);
 350     setOperationAction(ISD::JumpTable,      VT, Custom);
 351   }
 352
 353   // RET must be custom lowered, to meet ABI requirements
 354   setOperationAction(ISD::RET,           MVT::Other, Custom);
 355
 356   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 357   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 358
 359   // Use the default implementation.
 360   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
 361   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 362   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 363   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 364   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
 365   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
 366   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
 367
 368   // Cell SPU has instructions for converting between i64 and fp.
 369   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 370   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 371
 372   // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
 373   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
 374
 375   // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 376   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 377
 378   // First set operation action for all vector types to expand. Then we
 379   // will selectively turn on ones that can be effectively codegen'd.
 380   addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
 381   addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
 382   addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
 383   addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
 384   addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
 385   addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
 386
 387   // "Odd size" vector classes that we're willing to support:
 388   addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
 389
 390   for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
 391        i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
 392     MVT VT = (MVT::SimpleValueType)i;
 393
 394     // add/sub are legal for all supported vector VT's.
 395     setOperationAction(ISD::ADD,     VT, Legal);
 396     setOperationAction(ISD::SUB,     VT, Legal);
 397     // mul has to be custom lowered.
 398     setOperationAction(ISD::MUL,     VT, Legal);
 399
 400     setOperationAction(ISD::AND,     VT, Legal);
 401     setOperationAction(ISD::OR,      VT, Legal);
 402     setOperationAction(ISD::XOR,     VT, Legal);
 403     setOperationAction(ISD::LOAD,    VT, Legal);
 404     setOperationAction(ISD::SELECT,  VT, Legal);
 405     setOperationAction(ISD::STORE,   VT, Legal);
 406
 407     // These operations need to be expanded:
 408     setOperationAction(ISD::SDIV,    VT, Expand);
 409     setOperationAction(ISD::SREM,    VT, Expand);
 410     setOperationAction(ISD::UDIV,    VT, Expand);
 411     setOperationAction(ISD::UREM,    VT, Expand);
 412
 413     // Custom lower build_vector, constant pool spills, insert and
 414     // extract vector elements:
 415     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
 416     setOperationAction(ISD::ConstantPool, VT, Custom);
 417     setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
 418     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
 419     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
 420     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
 421   }
 422
 423   setOperationAction(ISD::AND, MVT::v16i8, Custom);
 424   setOperationAction(ISD::OR,  MVT::v16i8, Custom);
 425   setOperationAction(ISD::XOR, MVT::v16i8, Custom);
 426   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 427
 428   setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
 429
 430   setShiftAmountType(MVT::i32);
 431   setBooleanContents(ZeroOrNegativeOneBooleanContent);
 432
 433   setStackPointerRegisterToSaveRestore(SPU::R1);
 434
 435   // We have target-specific dag combine patterns for the following nodes:
 436   setTargetDAGCombine(ISD::ADD);
 437   setTargetDAGCombine(ISD::ZERO_EXTEND);
 438   setTargetDAGCombine(ISD::SIGN_EXTEND);
 439   setTargetDAGCombine(ISD::ANY_EXTEND);
 440
 441   computeRegisterProperties();
 442
 443   // Set pre-RA register scheduler default to BURR, which produces slightly
 444   // better code than the default (could also be TDRR, but TargetLowering.h
 445   // needs a mod to support that model):
 446   setSchedulingPreference(SchedulingForRegPressure);
 447 }
 448
 449 const char *
 450 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
 451 {
 452   if (node_names.empty()) {
 453     node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
 454     node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
 455     node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
 456     node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
 457     node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
 458     node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
 459     node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
 460     node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
 461     node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
 462     node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
 463     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
 464     node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
 465     node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
 466     node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
 467     node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
 468     node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
 469     node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
 470     node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
 471     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
 472     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
 473     node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
 474     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
 475             "SPUISD::ROTBYTES_LEFT_BITS";
 476     node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
 477     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
 478     node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
 479     node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
 480     node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
 481   }
 482
 483   std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
 484
 485   return ((i != node_names.end()) ? i->second : 0);
 486 }
 487
 488 /// getFunctionAlignment - Return the Log2 alignment of this function.
 489 unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
 490   return 3;
 491 }
 492
 493 //===----------------------------------------------------------------------===//
 494 // Return the Cell SPU's SETCC result type
 495 //===----------------------------------------------------------------------===//
 496
 497 MVT SPUTargetLowering::getSetCCResultType(MVT VT) const {
 498   // i16 and i32 are valid SETCC result types
 499   return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32);
 500 }
 501
 502 //===----------------------------------------------------------------------===//
 503 // Calling convention code:
 504 //===----------------------------------------------------------------------===//
 505
 506 #include "SPUGenCallingConv.inc"
 507
 508 //===----------------------------------------------------------------------===//
 509 //  LowerOperation implementation
 510 //===----------------------------------------------------------------------===//
 511
 512 /// Custom lower loads for CellSPU
 513 /*!
 514  All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
 515  within a 16-byte block, we have to rotate to extract the requested element.
 516
 517  For extending loads, we also want to ensure that the following sequence is
 518  emitted, e.g. for MVT::f32 extending load to MVT::f64:
 519
 520 \verbatim
 521 %1  v16i8,ch = load
 522 %2  v16i8,ch = rotate %1
 523 %3  v4f8, ch = bitconvert %2
 524 %4  f32      = vec2perfslot %3
 525 %5  f64      = fp_extend %4
 526 \endverbatim
 527 */
 528 static SDValue
 529 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 530   LoadSDNode *LN = cast<LoadSDNode>(Op);
 531   SDValue the_chain = LN->getChain();
 532   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 533   MVT InVT = LN->getMemoryVT();
 534   MVT OutVT = Op.getValueType();
 535   ISD::LoadExtType ExtType = LN->getExtensionType();
 536   unsigned alignment = LN->getAlignment();
 537   const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
 538   DebugLoc dl = Op.getDebugLoc();
 539
 540   switch (LN->getAddressingMode()) {
 541   case ISD::UNINDEXED: {
 542     SDValue result;
 543     SDValue basePtr = LN->getBasePtr();
 544     SDValue rotate;
 545
 546     if (alignment == 16) {
 547       ConstantSDNode *CN;
 548
 549       // Special cases for a known aligned load to simplify the base pointer
 550       // and the rotation amount:
 551       if (basePtr.getOpcode() == ISD::ADD
 552           && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
 553         // Known offset into basePtr
 554         int64_t offset = CN->getSExtValue();
 555         int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
 556
 557         if (rotamt < 0)
 558           rotamt += 16;
 559
 560         rotate = DAG.getConstant(rotamt, MVT::i16);
 561
 562         // Simplify the base pointer for this case:
 563         basePtr = basePtr.getOperand(0);
 564         if ((offset & ~0xf) > 0) {
 565           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 566                                 basePtr,
 567                                 DAG.getConstant((offset & ~0xf), PtrVT));
 568         }
 569       } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
 570                  || (basePtr.getOpcode() == SPUISD::IndirectAddr
 571                      && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
 572                      && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
 573         // Plain aligned a-form address: rotate into preferred slot
 574         // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
 575         int64_t rotamt = -vtm->prefslot_byte;
 576         if (rotamt < 0)
 577           rotamt += 16;
 578         rotate = DAG.getConstant(rotamt, MVT::i16);
 579       } else {
 580         // Offset the rotate amount by the basePtr and the preferred slot
 581         // byte offset
 582         int64_t rotamt = -vtm->prefslot_byte;
 583         if (rotamt < 0)
 584           rotamt += 16;
 585         rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
 586                              basePtr,
 587                              DAG.getConstant(rotamt, PtrVT));
 588       }
 589     } else {
 590       // Unaligned load: must be more pessimistic about addressing modes:
 591       if (basePtr.getOpcode() == ISD::ADD) {
 592         MachineFunction &MF = DAG.getMachineFunction();
 593         MachineRegisterInfo &RegInfo = MF.getRegInfo();
 594         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 595         SDValue Flag;
 596
 597         SDValue Op0 = basePtr.getOperand(0);
 598         SDValue Op1 = basePtr.getOperand(1);
 599
 600         if (isa<ConstantSDNode>(Op1)) {
 601           // Convert the (add <ptr>, <const>) to an indirect address contained
 602           // in a register. Note that this is done because we need to avoid
 603           // creating a 0(reg) d-form address due to the SPU's block loads.
 604           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 605           the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
 606           basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
 607         } else {
 608           // Convert the (add <arg1>, <arg2>) to an indirect address, which
 609           // will likely be lowered as a reg(reg) x-form address.
 610           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 611         }
 612       } else {
 613         basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 614                               basePtr,
 615                               DAG.getConstant(0, PtrVT));
 616       }
 617
 618       // Offset the rotate amount by the basePtr and the preferred slot
 619       // byte offset
 620       rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
 621                            basePtr,
 622                            DAG.getConstant(-vtm->prefslot_byte, PtrVT));
 623     }
 624
 625     // Re-emit as a v16i8 vector load
 626     result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
 627                          LN->getSrcValue(), LN->getSrcValueOffset(),
 628                          LN->isVolatile(), 16);
 629
 630     // Update the chain
 631     the_chain = result.getValue(1);
 632
 633     // Rotate into the preferred slot:
 634     result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8,
 635                          result.getValue(0), rotate);
 636
 637     // Convert the loaded v16i8 vector to the appropriate vector type
 638     // specified by the operand:
 639     MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
 640     result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
 641                          DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
 642
 643     // Handle extending loads by extending the scalar result:
 644     if (ExtType == ISD::SEXTLOAD) {
 645       result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
 646     } else if (ExtType == ISD::ZEXTLOAD) {
 647       result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
 648     } else if (ExtType == ISD::EXTLOAD) {
 649       unsigned NewOpc = ISD::ANY_EXTEND;
 650
 651       if (OutVT.isFloatingPoint())
 652         NewOpc = ISD::FP_EXTEND;
 653
 654       result = DAG.getNode(NewOpc, dl, OutVT, result);
 655     }
 656
 657     SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
 658     SDValue retops[2] = {
 659       result,
 660       the_chain
 661     };
 662
 663     result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
 664                          retops, sizeof(retops) / sizeof(retops[0]));
 665     return result;
 666   }
 667   case ISD::PRE_INC:
 668   case ISD::PRE_DEC:
 669   case ISD::POST_INC:
 670   case ISD::POST_DEC:
 671   case ISD::LAST_INDEXED_MODE:
 672     {
 673       std::string msg;
 674       raw_string_ostream Msg(msg);
 675       Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 676             "UNINDEXED\n";
 677       Msg << (unsigned) LN->getAddressingMode();
 678       llvm_report_error(Msg.str());
 679       /*NOTREACHED*/
 680     }
 681   }
 682
 683   return SDValue();
 684 }
 685
 686 /// Custom lower stores for CellSPU
 687 /*!
 688  All CellSPU stores are aligned to 16-byte boundaries, so for elements
 689  within a 16-byte block, we have to generate a shuffle to insert the
 690  requested element into its place, then store the resulting block.
 691  */
 692 static SDValue
 693 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 694   StoreSDNode *SN = cast<StoreSDNode>(Op);
 695   SDValue Value = SN->getValue();
 696   MVT VT = Value.getValueType();
 697   MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
 698   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 699   DebugLoc dl = Op.getDebugLoc();
 700   unsigned alignment = SN->getAlignment();
 701
 702   switch (SN->getAddressingMode()) {
 703   case ISD::UNINDEXED: {
 704     // The vector type we really want to load from the 16-byte chunk.
 705     MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
 706         stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
 707
 708     SDValue alignLoadVec;
 709     SDValue basePtr = SN->getBasePtr();
 710     SDValue the_chain = SN->getChain();
 711     SDValue insertEltOffs;
 712
 713     if (alignment == 16) {
 714       ConstantSDNode *CN;
 715
 716       // Special cases for a known aligned load to simplify the base pointer
 717       // and insertion byte:
 718       if (basePtr.getOpcode() == ISD::ADD
 719           && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
 720         // Known offset into basePtr
 721         int64_t offset = CN->getSExtValue();
 722
 723         // Simplify the base pointer for this case:
 724         basePtr = basePtr.getOperand(0);
 725         insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 726                                     basePtr,
 727                                     DAG.getConstant((offset & 0xf), PtrVT));
 728
 729         if ((offset & ~0xf) > 0) {
 730           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 731                                 basePtr,
 732                                 DAG.getConstant((offset & ~0xf), PtrVT));
 733         }
 734       } else {
 735         // Otherwise, assume it's at byte 0 of basePtr
 736         insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 737                                     basePtr,
 738                                     DAG.getConstant(0, PtrVT));
 739       }
 740     } else {
 741       // Unaligned load: must be more pessimistic about addressing modes:
 742       if (basePtr.getOpcode() == ISD::ADD) {
 743         MachineFunction &MF = DAG.getMachineFunction();
 744         MachineRegisterInfo &RegInfo = MF.getRegInfo();
 745         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 746         SDValue Flag;
 747
 748         SDValue Op0 = basePtr.getOperand(0);
 749         SDValue Op1 = basePtr.getOperand(1);
 750
 751         if (isa<ConstantSDNode>(Op1)) {
 752           // Convert the (add <ptr>, <const>) to an indirect address contained
 753           // in a register. Note that this is done because we need to avoid
 754           // creating a 0(reg) d-form address due to the SPU's block loads.
 755           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 756           the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
 757           basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
 758         } else {
 759           // Convert the (add <arg1>, <arg2>) to an indirect address, which
 760           // will likely be lowered as a reg(reg) x-form address.
 761           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 762         }
 763       } else {
 764         basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 765                               basePtr,
 766                               DAG.getConstant(0, PtrVT));
 767       }
 768
 769       // Insertion point is solely determined by basePtr's contents
 770       insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
 771                                   basePtr,
 772                                   DAG.getConstant(0, PtrVT));
 773     }
 774
 775     // Re-emit as a v16i8 vector load
 776     alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
 777                                SN->getSrcValue(), SN->getSrcValueOffset(),
 778                                SN->isVolatile(), 16);
 779
 780     // Update the chain
 781     the_chain = alignLoadVec.getValue(1);
 782
 783     LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
 784     SDValue theValue = SN->getValue();
 785     SDValue result;
 786
 787     if (StVT != VT
 788         && (theValue.getOpcode() == ISD::AssertZext
 789             || theValue.getOpcode() == ISD::AssertSext)) {
 790       // Drill down and get the value for zero- and sign-extended
 791       // quantities
 792       theValue = theValue.getOperand(0);
 793     }
 794
 795     // If the base pointer is already a D-form address, then just create
 796     // a new D-form address with a slot offset and the orignal base pointer.
 797     // Otherwise generate a D-form address with the slot offset relative
 798     // to the stack pointer, which is always aligned.
 799 #if !defined(NDEBUG)
 800       if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
 801         cerr << "CellSPU LowerSTORE: basePtr = ";
 802         basePtr.getNode()->dump(&DAG);
 803         cerr << "\n";
 804       }
 805 #endif
 806
 807     SDValue insertEltOp =
 808             DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
 809     SDValue vectorizeOp =
 810             DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
 811
 812     result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
 813                          vectorizeOp, alignLoadVec,
 814                          DAG.getNode(ISD::BIT_CONVERT, dl,
 815                                      MVT::v4i32, insertEltOp));
 816
 817     result = DAG.getStore(the_chain, dl, result, basePtr,
 818                           LN->getSrcValue(), LN->getSrcValueOffset(),
 819                           LN->isVolatile(), LN->getAlignment());
 820
 821 #if 0 && !defined(NDEBUG)
 822     if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
 823       const SDValue &currentRoot = DAG.getRoot();
 824
 825       DAG.setRoot(result);
 826       cerr << "------- CellSPU:LowerStore result:\n";
 827       DAG.dump();
 828       cerr << "-------\n";
 829       DAG.setRoot(currentRoot);
 830     }
 831 #endif
 832
 833     return result;
 834     /*UNREACHED*/
 835   }
 836   case ISD::PRE_INC:
 837   case ISD::PRE_DEC:
 838   case ISD::POST_INC:
 839   case ISD::POST_DEC:
 840   case ISD::LAST_INDEXED_MODE:
 841     {
 842       std::string msg;
 843       raw_string_ostream Msg(msg);
 844       Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 845             "UNINDEXED\n";
 846       Msg << (unsigned) SN->getAddressingMode();
 847       llvm_report_error(Msg.str());
 848       /*NOTREACHED*/
 849     }
 850   }
 851
 852   return SDValue();
 853 }
 854
 855 //! Generate the address of a constant pool entry.
 856 SDValue
 857 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 858   MVT PtrVT = Op.getValueType();
 859   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 860   Constant *C = CP->getConstVal();
 861   SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
 862   SDValue Zero = DAG.getConstant(0, PtrVT);
 863   const TargetMachine &TM = DAG.getTarget();
 864   // FIXME there is no actual debug info here
 865   DebugLoc dl = Op.getDebugLoc();
 866
 867   if (TM.getRelocationModel() == Reloc::Static) {
 868     if (!ST->usingLargeMem()) {
 869       // Just return the SDValue with the constant pool address in it.
 870       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
 871     } else {
 872       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
 873       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
 874       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 875     }
 876   }
 877
 878   assert(0 &&
 879          "LowerConstantPool: Relocation model other than static"
 880          " not supported.");
 881   return SDValue();
 882 }
 883
 884 //! Alternate entry point for generating the address of a constant pool entry
 885 SDValue
 886 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
 887   return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
 888 }
 889
 890 static SDValue
 891 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 892   MVT PtrVT = Op.getValueType();
 893   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 894   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
 895   SDValue Zero = DAG.getConstant(0, PtrVT);
 896   const TargetMachine &TM = DAG.getTarget();
 897   // FIXME there is no actual debug info here
 898   DebugLoc dl = Op.getDebugLoc();
 899
 900   if (TM.getRelocationModel() == Reloc::Static) {
 901     if (!ST->usingLargeMem()) {
 902       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
 903     } else {
 904       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
 905       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
 906       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 907     }
 908   }
 909
 910   assert(0 &&
 911          "LowerJumpTable: Relocation model other than static not supported.");
 912   return SDValue();
 913 }
 914
 915 static SDValue
 916 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 917   MVT PtrVT = Op.getValueType();
 918   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
 919   GlobalValue *GV = GSDN->getGlobal();
 920   SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
 921   const TargetMachine &TM = DAG.getTarget();
 922   SDValue Zero = DAG.getConstant(0, PtrVT);
 923   // FIXME there is no actual debug info here
 924   DebugLoc dl = Op.getDebugLoc();
 925
 926   if (TM.getRelocationModel() == Reloc::Static) {
 927     if (!ST->usingLargeMem()) {
 928       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
 929     } else {
 930       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
 931       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
 932       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 933     }
 934   } else {
 935     llvm_report_error("LowerGlobalAddress: Relocation model other than static"
 936                       "not supported.");
 937     /*NOTREACHED*/
 938   }
 939
 940   return SDValue();
 941 }
 942
 943 //! Custom lower double precision floating point constants
 944 static SDValue
 945 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
 946   MVT VT = Op.getValueType();
 947   // FIXME there is no actual debug info here
 948   DebugLoc dl = Op.getDebugLoc();
 949
 950   if (VT == MVT::f64) {
 951     ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
 952
 953     assert((FP != 0) &&
 954            "LowerConstantFP: Node is not ConstantFPSDNode");
 955
 956     uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
 957     SDValue T = DAG.getConstant(dbits, MVT::i64);
 958     SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
 959     return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
 960                        DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec));
 961   }
 962
 963   return SDValue();
 964 }
 965
 966 static SDValue
 967 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
 968 {
 969   MachineFunction &MF = DAG.getMachineFunction();
 970   MachineFrameInfo *MFI = MF.getFrameInfo();
 971   MachineRegisterInfo &RegInfo = MF.getRegInfo();
 972   SmallVector<SDValue, 48> ArgValues;
 973   SDValue Root = Op.getOperand(0);
 974   bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
 975   DebugLoc dl = Op.getDebugLoc();
 976
 977   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
 978   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
 979
 980   unsigned ArgOffset = SPUFrameInfo::minStackSize();
 981   unsigned ArgRegIdx = 0;
 982   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
 983
 984   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 985
 986   // Add DAG nodes to load the arguments or copy them out of registers.
 987   for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
 988        ArgNo != e; ++ArgNo) {
 989     MVT ObjectVT = Op.getValue(ArgNo).getValueType();
 990     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
 991     SDValue ArgVal;
 992
 993     if (ArgRegIdx < NumArgRegs) {
 994       const TargetRegisterClass *ArgRegClass;
 995
 996       switch (ObjectVT.getSimpleVT()) {
 997       default: {
 998         std::string msg;
 999         raw_string_ostream Msg(msg);
1000         Msg << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
1001              << ObjectVT.getMVTString();
1002         llvm_report_error(Msg.str());
1003       }
1004       case MVT::i8:
1005         ArgRegClass = &SPU::R8CRegClass;
1006         break;
1007       case MVT::i16:
1008         ArgRegClass = &SPU::R16CRegClass;
1009         break;
1010       case MVT::i32:
1011         ArgRegClass = &SPU::R32CRegClass;
1012         break;
1013       case MVT::i64:
1014         ArgRegClass = &SPU::R64CRegClass;
1015         break;
1016       case MVT::i128:
1017         ArgRegClass = &SPU::GPRCRegClass;
1018         break;
1019       case MVT::f32:
1020         ArgRegClass = &SPU::R32FPRegClass;
1021         break;
1022       case MVT::f64:
1023         ArgRegClass = &SPU::R64FPRegClass;
1024         break;
1025       case MVT::v2f64:
1026       case MVT::v4f32:
1027       case MVT::v2i64:
1028       case MVT::v4i32:
1029       case MVT::v8i16:
1030       case MVT::v16i8:
1031         ArgRegClass = &SPU::VECREGRegClass;
1032         break;
1033       }
1034
1035       unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1036       RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1037       ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT);
1038       ++ArgRegIdx;
1039     } else {
1040       // We need to load the argument to a virtual register if we determined
1041       // above that we ran out of physical registers of the appropriate type
1042       // or we're forced to do vararg
1043       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1044       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1045       ArgVal = DAG.getLoad(ObjectVT, dl, Root, FIN, NULL, 0);
1046       ArgOffset += StackSlotSize;
1047     }
1048
1049     ArgValues.push_back(ArgVal);
1050     // Update the chain
1051     Root = ArgVal.getOperand(0);
1052   }
1053
1054   // vararg handling:
1055   if (isVarArg) {
1056     // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1057     // We will spill (79-3)+1 registers to the stack
1058     SmallVector<SDValue, 79-3+1> MemOps;
1059
1060     // Create the frame slot
1061
1062     for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1063       VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1064       SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1065       SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1066       SDValue Store = DAG.getStore(Root, dl, ArgVal, FIN, NULL, 0);
1067       Root = Store.getOperand(0);
1068       MemOps.push_back(Store);
1069
1070       // Increment address by stack slot size for the next stored argument
1071       ArgOffset += StackSlotSize;
1072     }
1073     if (!MemOps.empty())
1074       Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1075                          &MemOps[0], MemOps.size());
1076   }
1077
1078   ArgValues.push_back(Root);
1079
1080   // Return the new list of results.
1081   return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
1082                      &ArgValues[0], ArgValues.size());
1083 }
1084
1085 /// isLSAAddress - Return the immediate to use if the specified
1086 /// value is representable as a LSA address.
1087 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1088   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1089   if (!C) return 0;
1090
1091   int Addr = C->getZExtValue();
1092   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1093       (Addr << 14 >> 14) != Addr)
1094     return 0;  // Top 14 bits have to be sext of immediate.
1095
1096   return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1097 }
1098
1099 static SDValue
1100 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1101   CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1102   SDValue Chain = TheCall->getChain();
1103   SDValue Callee    = TheCall->getCallee();
1104   unsigned NumOps     = TheCall->getNumArgs();
1105   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1106   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1107   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1108   DebugLoc dl = TheCall->getDebugLoc();
1109
1110   // Handy pointer type
1111   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1112
1113   // Accumulate how many bytes are to be pushed on the stack, including the
1114   // linkage area, and parameter passing area.  According to the SPU ABI,
1115   // we minimally need space for [LR] and [SP]
1116   unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1117
1118   // Set up a copy of the stack pointer for use loading and storing any
1119   // arguments that may not fit in the registers available for argument
1120   // passing.
1121   SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1122
1123   // Figure out which arguments are going to go in registers, and which in
1124   // memory.
1125   unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1126   unsigned ArgRegIdx = 0;
1127
1128   // Keep track of registers passing arguments
1129   std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1130   // And the arguments passed on the stack
1131   SmallVector<SDValue, 8> MemOpChains;
1132
1133   for (unsigned i = 0; i != NumOps; ++i) {
1134     SDValue Arg = TheCall->getArg(i);
1135
1136     // PtrOff will be used to store the current argument to the stack if a
1137     // register cannot be found for it.
1138     SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1139     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1140
1141     switch (Arg.getValueType().getSimpleVT()) {
1142     default: assert(0 && "Unexpected ValueType for argument!");
1143     case MVT::i8:
1144     case MVT::i16:
1145     case MVT::i32:
1146     case MVT::i64:
1147     case MVT::i128:
1148       if (ArgRegIdx != NumArgRegs) {
1149         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1150       } else {
1151         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1152         ArgOffset += StackSlotSize;
1153       }
1154       break;
1155     case MVT::f32:
1156     case MVT::f64:
1157       if (ArgRegIdx != NumArgRegs) {
1158         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1159       } else {
1160         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1161         ArgOffset += StackSlotSize;
1162       }
1163       break;
1164     case MVT::v2i64:
1165     case MVT::v2f64:
1166     case MVT::v4f32:
1167     case MVT::v4i32:
1168     case MVT::v8i16:
1169     case MVT::v16i8:
1170       if (ArgRegIdx != NumArgRegs) {
1171         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1172       } else {
1173         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1174         ArgOffset += StackSlotSize;
1175       }
1176       break;
1177     }
1178   }
1179
1180   // Update number of stack bytes actually used, insert a call sequence start
1181   NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1182   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1183                                                             true));
1184
1185   if (!MemOpChains.empty()) {
1186     // Adjust the stack pointer for the stack arguments.
1187     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1188                         &MemOpChains[0], MemOpChains.size());
1189   }
1190
1191   // Build a sequence of copy-to-reg nodes chained together with token chain
1192   // and flag operands which copy the outgoing args into the appropriate regs.
1193   SDValue InFlag;
1194   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1195     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1196                              RegsToPass[i].second, InFlag);
1197     InFlag = Chain.getValue(1);
1198   }
1199
1200   SmallVector<SDValue, 8> Ops;
1201   unsigned CallOpc = SPUISD::CALL;
1202
1203   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1204   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1205   // node so that legalize doesn't hack it.
1206   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1207     GlobalValue *GV = G->getGlobal();
1208     MVT CalleeVT = Callee.getValueType();
1209     SDValue Zero = DAG.getConstant(0, PtrVT);
1210     SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1211
1212     if (!ST->usingLargeMem()) {
1213       // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1214       // style calls, otherwise, external symbols are BRASL calls. This assumes
1215       // that declared/defined symbols are in the same compilation unit and can
1216       // be reached through PC-relative jumps.
1217       //
1218       // NOTE:
1219       // This may be an unsafe assumption for JIT and really large compilation
1220       // units.
1221       if (GV->isDeclaration()) {
1222         Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
1223       } else {
1224         Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
1225       }
1226     } else {
1227       // "Large memory" mode: Turn all calls into indirect calls with a X-form
1228       // address pairs:
1229       Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
1230     }
1231   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1232     MVT CalleeVT = Callee.getValueType();
1233     SDValue Zero = DAG.getConstant(0, PtrVT);
1234     SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1235         Callee.getValueType());
1236
1237     if (!ST->usingLargeMem()) {
1238       Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
1239     } else {
1240       Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
1241     }
1242   } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1243     // If this is an absolute destination address that appears to be a legal
1244     // local store address, use the munged value.
1245     Callee = SDValue(Dest, 0);
1246   }
1247
1248   Ops.push_back(Chain);
1249   Ops.push_back(Callee);
1250
1251   // Add argument registers to the end of the list so that they are known live
1252   // into the call.
1253   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1254     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1255                                   RegsToPass[i].second.getValueType()));
1256
1257   if (InFlag.getNode())
1258     Ops.push_back(InFlag);
1259   // Returns a chain and a flag for retval copy to use.
1260   Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
1261                       &Ops[0], Ops.size());
1262   InFlag = Chain.getValue(1);
1263
1264   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1265                              DAG.getIntPtrConstant(0, true), InFlag);
1266   if (TheCall->getValueType(0) != MVT::Other)
1267     InFlag = Chain.getValue(1);
1268
1269   SDValue ResultVals[3];
1270   unsigned NumResults = 0;
1271
1272   // If the call has results, copy the values out of the ret val registers.
1273   switch (TheCall->getValueType(0).getSimpleVT()) {
1274   default: assert(0 && "Unexpected ret value!");
1275   case MVT::Other: break;
1276   case MVT::i32:
1277     if (TheCall->getValueType(1) == MVT::i32) {
1278       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
1279                                  MVT::i32, InFlag).getValue(1);
1280       ResultVals[0] = Chain.getValue(0);
1281       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1282                                  Chain.getValue(2)).getValue(1);
1283       ResultVals[1] = Chain.getValue(0);
1284       NumResults = 2;
1285     } else {
1286       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1287                                  InFlag).getValue(1);
1288       ResultVals[0] = Chain.getValue(0);
1289       NumResults = 1;
1290     }
1291     break;
1292   case MVT::i64:
1293     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64,
1294                                InFlag).getValue(1);
1295     ResultVals[0] = Chain.getValue(0);
1296     NumResults = 1;
1297     break;
1298   case MVT::i128:
1299     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128,
1300                                InFlag).getValue(1);
1301     ResultVals[0] = Chain.getValue(0);
1302     NumResults = 1;
1303     break;
1304   case MVT::f32:
1305   case MVT::f64:
1306     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0),
1307                                InFlag).getValue(1);
1308     ResultVals[0] = Chain.getValue(0);
1309     NumResults = 1;
1310     break;
1311   case MVT::v2f64:
1312   case MVT::v2i64:
1313   case MVT::v4f32:
1314   case MVT::v4i32:
1315   case MVT::v8i16:
1316   case MVT::v16i8:
1317     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0),
1318                                    InFlag).getValue(1);
1319     ResultVals[0] = Chain.getValue(0);
1320     NumResults = 1;
1321     break;
1322   }
1323
1324   // If the function returns void, just return the chain.
1325   if (NumResults == 0)
1326     return Chain;
1327
1328   // Otherwise, merge everything together with a MERGE_VALUES node.
1329   ResultVals[NumResults++] = Chain;
1330   SDValue Res = DAG.getMergeValues(ResultVals, NumResults, dl);
1331   return Res.getValue(Op.getResNo());
1332 }
1333
1334 static SDValue
1335 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1336   SmallVector<CCValAssign, 16> RVLocs;
1337   unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1338   bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1339   DebugLoc dl = Op.getDebugLoc();
1340   CCState CCInfo(CC, isVarArg, TM, RVLocs, DAG.getContext());
1341   CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1342
1343   // If this is the first return lowered for this function, add the regs to the
1344   // liveout set for the function.
1345   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1346     for (unsigned i = 0; i != RVLocs.size(); ++i)
1347       DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1348   }
1349
1350   SDValue Chain = Op.getOperand(0);
1351   SDValue Flag;
1352
1353   // Copy the result values into the output registers.
1354   for (unsigned i = 0; i != RVLocs.size(); ++i) {
1355     CCValAssign &VA = RVLocs[i];
1356     assert(VA.isRegLoc() && "Can only return in registers!");
1357     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1358                              Op.getOperand(i*2+1), Flag);
1359     Flag = Chain.getValue(1);
1360   }
1361
1362   if (Flag.getNode())
1363     return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1364   else
1365     return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
1366 }
1367
1368
1369 //===----------------------------------------------------------------------===//
1370 // Vector related lowering:
1371 //===----------------------------------------------------------------------===//
1372
1373 static ConstantSDNode *
1374 getVecImm(SDNode *N) {
1375   SDValue OpVal(0, 0);
1376
1377   // Check to see if this buildvec has a single non-undef value in its elements.
1378   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1379     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1380     if (OpVal.getNode() == 0)
1381       OpVal = N->getOperand(i);
1382     else if (OpVal != N->getOperand(i))
1383       return 0;
1384   }
1385
1386   if (OpVal.getNode() != 0) {
1387     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1388       return CN;
1389     }
1390   }
1391
1392   return 0;
1393 }
1394
1395 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1396 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1397 /// constant
1398 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1399                               MVT ValueType) {
1400   if (ConstantSDNode *CN = getVecImm(N)) {
1401     uint64_t Value = CN->getZExtValue();
1402     if (ValueType == MVT::i64) {
1403       uint64_t UValue = CN->getZExtValue();
1404       uint32_t upper = uint32_t(UValue >> 32);
1405       uint32_t lower = uint32_t(UValue);
1406       if (upper != lower)
1407         return SDValue();
1408       Value = Value >> 32;
1409     }
1410     if (Value <= 0x3ffff)
1411       return DAG.getTargetConstant(Value, ValueType);
1412   }
1413
1414   return SDValue();
1415 }
1416
1417 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1418 /// and the value fits into a signed 16-bit constant, and if so, return the
1419 /// constant
1420 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1421                               MVT ValueType) {
1422   if (ConstantSDNode *CN = getVecImm(N)) {
1423     int64_t Value = CN->getSExtValue();
1424     if (ValueType == MVT::i64) {
1425       uint64_t UValue = CN->getZExtValue();
1426       uint32_t upper = uint32_t(UValue >> 32);
1427       uint32_t lower = uint32_t(UValue);
1428       if (upper != lower)
1429         return SDValue();
1430       Value = Value >> 32;
1431     }
1432     if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1433       return DAG.getTargetConstant(Value, ValueType);
1434     }
1435   }
1436
1437   return SDValue();
1438 }
1439
1440 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1441 /// and the value fits into a signed 10-bit constant, and if so, return the
1442 /// constant
1443 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1444                               MVT ValueType) {
1445   if (ConstantSDNode *CN = getVecImm(N)) {
1446     int64_t Value = CN->getSExtValue();
1447     if (ValueType == MVT::i64) {
1448       uint64_t UValue = CN->getZExtValue();
1449       uint32_t upper = uint32_t(UValue >> 32);
1450       uint32_t lower = uint32_t(UValue);
1451       if (upper != lower)
1452         return SDValue();
1453       Value = Value >> 32;
1454     }
1455     if (isS10Constant(Value))
1456       return DAG.getTargetConstant(Value, ValueType);
1457   }
1458
1459   return SDValue();
1460 }
1461
1462 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1463 /// and the value fits into a signed 8-bit constant, and if so, return the
1464 /// constant.
1465 ///
1466 /// @note: The incoming vector is v16i8 because that's the only way we can load
1467 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1468 /// same value.
1469 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1470                              MVT ValueType) {
1471   if (ConstantSDNode *CN = getVecImm(N)) {
1472     int Value = (int) CN->getZExtValue();
1473     if (ValueType == MVT::i16
1474         && Value <= 0xffff                 /* truncated from uint64_t */
1475         && ((short) Value >> 8) == ((short) Value & 0xff))
1476       return DAG.getTargetConstant(Value & 0xff, ValueType);
1477     else if (ValueType == MVT::i8
1478              && (Value & 0xff) == Value)
1479       return DAG.getTargetConstant(Value, ValueType);
1480   }
1481
1482   return SDValue();
1483 }
1484
1485 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1486 /// and the value fits into a signed 16-bit constant, and if so, return the
1487 /// constant
1488 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1489                                MVT ValueType) {
1490   if (ConstantSDNode *CN = getVecImm(N)) {
1491     uint64_t Value = CN->getZExtValue();
1492     if ((ValueType == MVT::i32
1493           && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1494         || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1495       return DAG.getTargetConstant(Value >> 16, ValueType);
1496   }
1497
1498   return SDValue();
1499 }
1500
1501 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1502 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1503   if (ConstantSDNode *CN = getVecImm(N)) {
1504     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1505   }
1506
1507   return SDValue();
1508 }
1509
1510 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1511 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1512   if (ConstantSDNode *CN = getVecImm(N)) {
1513     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1514   }
1515
1516   return SDValue();
1517 }
1518
1519 //! Lower a BUILD_VECTOR instruction creatively:
1520 SDValue
1521 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1522   MVT VT = Op.getValueType();
1523   MVT EltVT = VT.getVectorElementType();
1524   DebugLoc dl = Op.getDebugLoc();
1525   BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
1526   assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
1527   unsigned minSplatBits = EltVT.getSizeInBits();
1528
1529   if (minSplatBits < 16)
1530     minSplatBits = 16;
1531
1532   APInt APSplatBits, APSplatUndef;
1533   unsigned SplatBitSize;
1534   bool HasAnyUndefs;
1535
1536   if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
1537                             HasAnyUndefs, minSplatBits)
1538       || minSplatBits < SplatBitSize)
1539     return SDValue();   // Wasn't a constant vector or splat exceeded min
1540
1541   uint64_t SplatBits = APSplatBits.getZExtValue();
1542
1543   switch (VT.getSimpleVT()) {
1544   default: {
1545     std::string msg;
1546     raw_string_ostream Msg(msg);
1547     Msg << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
1548          << VT.getMVTString();
1549     llvm_report_error(Msg.str());
1550     /*NOTREACHED*/
1551   }
1552   case MVT::v4f32: {
1553     uint32_t Value32 = uint32_t(SplatBits);
1554     assert(SplatBitSize == 32
1555            && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1556     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1557     SDValue T = DAG.getConstant(Value32, MVT::i32);
1558     return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
1559                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
1560     break;
1561   }
1562   case MVT::v2f64: {
1563     uint64_t f64val = uint64_t(SplatBits);
1564     assert(SplatBitSize == 64
1565            && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1566     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1567     SDValue T = DAG.getConstant(f64val, MVT::i64);
1568     return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64,
1569                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
1570     break;
1571   }
1572   case MVT::v16i8: {
1573    // 8-bit constants have to be expanded to 16-bits
1574    unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
1575    SmallVector<SDValue, 8> Ops;
1576
1577    Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
1578    return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
1579                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
1580   }
1581   case MVT::v8i16: {
1582     unsigned short Value16 = SplatBits;
1583     SDValue T = DAG.getConstant(Value16, EltVT);
1584     SmallVector<SDValue, 8> Ops;
1585
1586     Ops.assign(8, T);
1587     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
1588   }
1589   case MVT::v4i32: {
1590     SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1591     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
1592   }
1593   case MVT::v2i32: {
1594     SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1595     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
1596   }
1597   case MVT::v2i64: {
1598     return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
1599   }
1600   }
1601
1602   return SDValue();
1603 }
1604
1605 /*!
1606  */
1607 SDValue
1608 SPU::LowerV2I64Splat(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
1609                      DebugLoc dl) {
1610   uint32_t upper = uint32_t(SplatVal >> 32);
1611   uint32_t lower = uint32_t(SplatVal);
1612
1613   if (upper == lower) {
1614     // Magic constant that can be matched by IL, ILA, et. al.
1615     SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1616     return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1617                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1618                                    Val, Val, Val, Val));
1619   } else {
1620     bool upper_special, lower_special;
1621
1622     // NOTE: This code creates common-case shuffle masks that can be easily
1623     // detected as common expressions. It is not attempting to create highly
1624     // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1625
1626     // Detect if the upper or lower half is a special shuffle mask pattern:
1627     upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1628     lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1629
1630     // Both upper and lower are special, lower to a constant pool load:
1631     if (lower_special && upper_special) {
1632       SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
1633       return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
1634                          SplatValCN, SplatValCN);
1635     }
1636
1637     SDValue LO32;
1638     SDValue HI32;
1639     SmallVector<SDValue, 16> ShufBytes;
1640     SDValue Result;
1641
1642     // Create lower vector if not a special pattern
1643     if (!lower_special) {
1644       SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1645       LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1646                          DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1647                                      LO32C, LO32C, LO32C, LO32C));
1648     }
1649
1650     // Create upper vector if not a special pattern
1651     if (!upper_special) {
1652       SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1653       HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1654                          DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1655                                      HI32C, HI32C, HI32C, HI32C));
1656     }
1657
1658     // If either upper or lower are special, then the two input operands are
1659     // the same (basically, one of them is a "don't care")
1660     if (lower_special)
1661       LO32 = HI32;
1662     if (upper_special)
1663       HI32 = LO32;
1664
1665     for (int i = 0; i < 4; ++i) {
1666       uint64_t val = 0;
1667       for (int j = 0; j < 4; ++j) {
1668         SDValue V;
1669         bool process_upper, process_lower;
1670         val <<= 8;
1671         process_upper = (upper_special && (i & 1) == 0);
1672         process_lower = (lower_special && (i & 1) == 1);
1673
1674         if (process_upper || process_lower) {
1675           if ((process_upper && upper == 0)
1676                   || (process_lower && lower == 0))
1677             val |= 0x80;
1678           else if ((process_upper && upper == 0xffffffff)
1679                   || (process_lower && lower == 0xffffffff))
1680             val |= 0xc0;
1681           else if ((process_upper && upper == 0x80000000)
1682                   || (process_lower && lower == 0x80000000))
1683             val |= (j == 0 ? 0xe0 : 0x80);
1684         } else
1685           val |= i * 4 + j + ((i & 1) * 16);
1686       }
1687
1688       ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1689     }
1690
1691     return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
1692                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1693                                    &ShufBytes[0], ShufBytes.size()));
1694   }
1695 }
1696
1697 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1698 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1699 /// permutation vector, V3, is monotonically increasing with one "exception"
1700 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1701 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1702 /// In either case, the net result is going to eventually invoke SHUFB to
1703 /// permute/shuffle the bytes from V1 and V2.
1704 /// \note
1705 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1706 /// control word for byte/halfword/word insertion. This takes care of a single
1707 /// element move from V2 into V1.
1708 /// \note
1709 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1710 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1711   const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
1712   SDValue V1 = Op.getOperand(0);
1713   SDValue V2 = Op.getOperand(1);
1714   DebugLoc dl = Op.getDebugLoc();
1715
1716   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1717
1718   // If we have a single element being moved from V1 to V2, this can be handled
1719   // using the C*[DX] compute mask instructions, but the vector elements have
1720   // to be monotonically increasing with one exception element.
1721   MVT VecVT = V1.getValueType();
1722   MVT EltVT = VecVT.getVectorElementType();
1723   unsigned EltsFromV2 = 0;
1724   unsigned V2Elt = 0;
1725   unsigned V2EltIdx0 = 0;
1726   unsigned CurrElt = 0;
1727   unsigned MaxElts = VecVT.getVectorNumElements();
1728   unsigned PrevElt = 0;
1729   unsigned V0Elt = 0;
1730   bool monotonic = true;
1731   bool rotate = true;
1732
1733   if (EltVT == MVT::i8) {
1734     V2EltIdx0 = 16;
1735   } else if (EltVT == MVT::i16) {
1736     V2EltIdx0 = 8;
1737   } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1738     V2EltIdx0 = 4;
1739   } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1740     V2EltIdx0 = 2;
1741   } else
1742     assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1743
1744   for (unsigned i = 0; i != MaxElts; ++i) {
1745     if (SVN->getMaskElt(i) < 0)
1746       continue;
1747
1748     unsigned SrcElt = SVN->getMaskElt(i);
1749
1750     if (monotonic) {
1751       if (SrcElt >= V2EltIdx0) {
1752         if (1 >= (++EltsFromV2)) {
1753           V2Elt = (V2EltIdx0 - SrcElt) << 2;
1754         }
1755       } else if (CurrElt != SrcElt) {
1756         monotonic = false;
1757       }
1758
1759       ++CurrElt;
1760     }
1761
1762     if (rotate) {
1763       if (PrevElt > 0 && SrcElt < MaxElts) {
1764         if ((PrevElt == SrcElt - 1)
1765             || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1766           PrevElt = SrcElt;
1767           if (SrcElt == 0)
1768             V0Elt = i;
1769         } else {
1770           rotate = false;
1771         }
1772       } else if (PrevElt == 0) {
1773         // First time through, need to keep track of previous element
1774         PrevElt = SrcElt;
1775       } else {
1776         // This isn't a rotation, takes elements from vector 2
1777         rotate = false;
1778       }
1779     }
1780   }
1781
1782   if (EltsFromV2 == 1 && monotonic) {
1783     // Compute mask and shuffle
1784     MachineFunction &MF = DAG.getMachineFunction();
1785     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1786     unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1787     MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1788     // Initialize temporary register to 0
1789     SDValue InitTempReg =
1790       DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT));
1791     // Copy register's contents as index in SHUFFLE_MASK:
1792     SDValue ShufMaskOp =
1793       DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32,
1794                   DAG.getTargetConstant(V2Elt, MVT::i32),
1795                   DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT));
1796     // Use shuffle mask in SHUFB synthetic instruction:
1797     return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
1798                        ShufMaskOp);
1799   } else if (rotate) {
1800     int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1801
1802     return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
1803                        V1, DAG.getConstant(rotamt, MVT::i16));
1804   } else {
1805    // Convert the SHUFFLE_VECTOR mask's input element units to the
1806    // actual bytes.
1807     unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1808
1809     SmallVector<SDValue, 16> ResultMask;
1810     for (unsigned i = 0, e = MaxElts; i != e; ++i) {
1811       unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
1812
1813       for (unsigned j = 0; j < BytesPerElement; ++j)
1814         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
1815     }
1816
1817     SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
1818                                     &ResultMask[0], ResultMask.size());
1819     return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
1820   }
1821 }
1822
1823 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1824   SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar
1825   DebugLoc dl = Op.getDebugLoc();
1826
1827   if (Op0.getNode()->getOpcode() == ISD::Constant) {
1828     // For a constant, build the appropriate constant vector, which will
1829     // eventually simplify to a vector register load.
1830
1831     ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1832     SmallVector<SDValue, 16> ConstVecValues;
1833     MVT VT;
1834     size_t n_copies;
1835
1836     // Create a constant vector:
1837     switch (Op.getValueType().getSimpleVT()) {
1838     default: assert(0 && "Unexpected constant value type in "
1839                          "LowerSCALAR_TO_VECTOR");
1840     case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1841     case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1842     case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1843     case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1844     case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1845     case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1846     }
1847
1848     SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1849     for (size_t j = 0; j < n_copies; ++j)
1850       ConstVecValues.push_back(CValue);
1851
1852     return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
1853                        &ConstVecValues[0], ConstVecValues.size());
1854   } else {
1855     // Otherwise, copy the value from one register to another:
1856     switch (Op0.getValueType().getSimpleVT()) {
1857     default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1858     case MVT::i8:
1859     case MVT::i16:
1860     case MVT::i32:
1861     case MVT::i64:
1862     case MVT::f32:
1863     case MVT::f64:
1864       return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
1865     }
1866   }
1867
1868   return SDValue();
1869 }
1870
1871 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1872   MVT VT = Op.getValueType();
1873   SDValue N = Op.getOperand(0);
1874   SDValue Elt = Op.getOperand(1);
1875   DebugLoc dl = Op.getDebugLoc();
1876   SDValue retval;
1877
1878   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1879     // Constant argument:
1880     int EltNo = (int) C->getZExtValue();
1881
1882     // sanity checks:
1883     if (VT == MVT::i8 && EltNo >= 16)
1884       assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1885     else if (VT == MVT::i16 && EltNo >= 8)
1886       assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1887     else if (VT == MVT::i32 && EltNo >= 4)
1888       assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1889     else if (VT == MVT::i64 && EltNo >= 2)
1890       assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1891
1892     if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1893       // i32 and i64: Element 0 is the preferred slot
1894       return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
1895     }
1896
1897     // Need to generate shuffle mask and extract:
1898     int prefslot_begin = -1, prefslot_end = -1;
1899     int elt_byte = EltNo * VT.getSizeInBits() / 8;
1900
1901     switch (VT.getSimpleVT()) {
1902     default:
1903       assert(false && "Invalid value type!");
1904     case MVT::i8: {
1905       prefslot_begin = prefslot_end = 3;
1906       break;
1907     }
1908     case MVT::i16: {
1909       prefslot_begin = 2; prefslot_end = 3;
1910       break;
1911     }
1912     case MVT::i32:
1913     case MVT::f32: {
1914       prefslot_begin = 0; prefslot_end = 3;
1915       break;
1916     }
1917     case MVT::i64:
1918     case MVT::f64: {
1919       prefslot_begin = 0; prefslot_end = 7;
1920       break;
1921     }
1922     }
1923
1924     assert(prefslot_begin != -1 && prefslot_end != -1 &&
1925            "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1926
1927     unsigned int ShufBytes[16];
1928     for (int i = 0; i < 16; ++i) {
1929       // zero fill uppper part of preferred slot, don't care about the
1930       // other slots:
1931       unsigned int mask_val;
1932       if (i <= prefslot_end) {
1933         mask_val =
1934           ((i < prefslot_begin)
1935            ? 0x80
1936            : elt_byte + (i - prefslot_begin));
1937
1938         ShufBytes[i] = mask_val;
1939       } else
1940         ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1941     }
1942
1943     SDValue ShufMask[4];
1944     for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1945       unsigned bidx = i * 4;
1946       unsigned int bits = ((ShufBytes[bidx] << 24) |
1947                            (ShufBytes[bidx+1] << 16) |
1948                            (ShufBytes[bidx+2] << 8) |
1949                            ShufBytes[bidx+3]);
1950       ShufMask[i] = DAG.getConstant(bits, MVT::i32);
1951     }
1952
1953     SDValue ShufMaskVec =
1954       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1955                   &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
1956
1957     retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1958                          DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
1959                                      N, N, ShufMaskVec));
1960   } else {
1961     // Variable index: Rotate the requested element into slot 0, then replicate
1962     // slot 0 across the vector
1963     MVT VecVT = N.getValueType();
1964     if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
1965       llvm_report_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
1966                         "vector type!");
1967     }
1968
1969     // Make life easier by making sure the index is zero-extended to i32
1970     if (Elt.getValueType() != MVT::i32)
1971       Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
1972
1973     // Scale the index to a bit/byte shift quantity
1974     APInt scaleFactor =
1975             APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
1976     unsigned scaleShift = scaleFactor.logBase2();
1977     SDValue vecShift;
1978
1979     if (scaleShift > 0) {
1980       // Scale the shift factor:
1981       Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
1982                         DAG.getConstant(scaleShift, MVT::i32));
1983     }
1984
1985     vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
1986
1987     // Replicate the bytes starting at byte 0 across the entire vector (for
1988     // consistency with the notion of a unified register set)
1989     SDValue replicate;
1990
1991     switch (VT.getSimpleVT()) {
1992     default:
1993       llvm_report_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
1994                         "type");
1995       /*NOTREACHED*/
1996     case MVT::i8: {
1997       SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
1998       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1999                               factor, factor, factor, factor);
2000       break;
2001     }
2002     case MVT::i16: {
2003       SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2004       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2005                               factor, factor, factor, factor);
2006       break;
2007     }
2008     case MVT::i32:
2009     case MVT::f32: {
2010       SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2011       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2012                               factor, factor, factor, factor);
2013       break;
2014     }
2015     case MVT::i64:
2016     case MVT::f64: {
2017       SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2018       SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2019       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2020                               loFactor, hiFactor, loFactor, hiFactor);
2021       break;
2022     }
2023     }
2024
2025     retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2026                          DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2027                                      vecShift, vecShift, replicate));
2028   }
2029
2030   return retval;
2031 }
2032
2033 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2034   SDValue VecOp = Op.getOperand(0);
2035   SDValue ValOp = Op.getOperand(1);
2036   SDValue IdxOp = Op.getOperand(2);
2037   DebugLoc dl = Op.getDebugLoc();
2038   MVT VT = Op.getValueType();
2039
2040   ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2041   assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2042
2043   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2044   // Use $sp ($1) because it's always 16-byte aligned and it's available:
2045   SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
2046                                 DAG.getRegister(SPU::R1, PtrVT),
2047                                 DAG.getConstant(CN->getSExtValue(), PtrVT));
2048   SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
2049
2050   SDValue result =
2051     DAG.getNode(SPUISD::SHUFB, dl, VT,
2052                 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
2053                 VecOp,
2054                 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask));
2055
2056   return result;
2057 }
2058
2059 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2060                            const TargetLowering &TLI)
2061 {
2062   SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
2063   DebugLoc dl = Op.getDebugLoc();
2064   MVT ShiftVT = TLI.getShiftAmountTy();
2065
2066   assert(Op.getValueType() == MVT::i8);
2067   switch (Opc) {
2068   default:
2069     assert(0 && "Unhandled i8 math operator");
2070     /*NOTREACHED*/
2071     break;
2072   case ISD::ADD: {
2073     // 8-bit addition: Promote the arguments up to 16-bits and truncate
2074     // the result:
2075     SDValue N1 = Op.getOperand(1);
2076     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2077     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2078     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2079                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2080
2081   }
2082
2083   case ISD::SUB: {
2084     // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2085     // the result:
2086     SDValue N1 = Op.getOperand(1);
2087     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2088     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2089     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2090                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2091   }
2092   case ISD::ROTR:
2093   case ISD::ROTL: {
2094     SDValue N1 = Op.getOperand(1);
2095     MVT N1VT = N1.getValueType();
2096
2097     N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2098     if (!N1VT.bitsEq(ShiftVT)) {
2099       unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
2100                        ? ISD::ZERO_EXTEND
2101                        : ISD::TRUNCATE;
2102       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2103     }
2104
2105     // Replicate lower 8-bits into upper 8:
2106     SDValue ExpandArg =
2107       DAG.getNode(ISD::OR, dl, MVT::i16, N0,
2108                   DAG.getNode(ISD::SHL, dl, MVT::i16,
2109                               N0, DAG.getConstant(8, MVT::i32)));
2110
2111     // Truncate back down to i8
2112     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2113                        DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
2114   }
2115   case ISD::SRL:
2116   case ISD::SHL: {
2117     SDValue N1 = Op.getOperand(1);
2118     MVT N1VT = N1.getValueType();
2119
2120     N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2121     if (!N1VT.bitsEq(ShiftVT)) {
2122       unsigned N1Opc = ISD::ZERO_EXTEND;
2123
2124       if (N1.getValueType().bitsGT(ShiftVT))
2125         N1Opc = ISD::TRUNCATE;
2126
2127       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2128     }
2129
2130     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2131                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2132   }
2133   case ISD::SRA: {
2134     SDValue N1 = Op.getOperand(1);
2135     MVT N1VT = N1.getValueType();
2136
2137     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2138     if (!N1VT.bitsEq(ShiftVT)) {
2139       unsigned N1Opc = ISD::SIGN_EXTEND;
2140
2141       if (N1VT.bitsGT(ShiftVT))
2142         N1Opc = ISD::TRUNCATE;
2143       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2144     }
2145
2146     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2147                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2148   }
2149   case ISD::MUL: {
2150     SDValue N1 = Op.getOperand(1);
2151
2152     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2153     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2154     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2155                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2156     break;
2157   }
2158   }
2159
2160   return SDValue();
2161 }
2162
2163 //! Lower byte immediate operations for v16i8 vectors:
2164 static SDValue
2165 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2166   SDValue ConstVec;
2167   SDValue Arg;
2168   MVT VT = Op.getValueType();
2169   DebugLoc dl = Op.getDebugLoc();
2170
2171   ConstVec = Op.getOperand(0);
2172   Arg = Op.getOperand(1);
2173   if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2174     if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2175       ConstVec = ConstVec.getOperand(0);
2176     } else {
2177       ConstVec = Op.getOperand(1);
2178       Arg = Op.getOperand(0);
2179       if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2180         ConstVec = ConstVec.getOperand(0);
2181       }
2182     }
2183   }
2184
2185   if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2186     BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
2187     assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
2188
2189     APInt APSplatBits, APSplatUndef;
2190     unsigned SplatBitSize;
2191     bool HasAnyUndefs;
2192     unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
2193
2194     if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
2195                               HasAnyUndefs, minSplatBits)
2196         && minSplatBits <= SplatBitSize) {
2197       uint64_t SplatBits = APSplatBits.getZExtValue();
2198       SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2199
2200       SmallVector<SDValue, 16> tcVec;
2201       tcVec.assign(16, tc);
2202       return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
2203                          DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
2204     }
2205   }
2206
2207   // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2208   // lowered.  Return the operation, rather than a null SDValue.
2209   return Op;
2210 }
2211
2212 //! Custom lowering for CTPOP (count population)
2213 /*!
2214   Custom lowering code that counts the number ones in the input
2215   operand. SPU has such an instruction, but it counts the number of
2216   ones per byte, which then have to be accumulated.
2217 */
2218 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2219   MVT VT = Op.getValueType();
2220   MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2221   DebugLoc dl = Op.getDebugLoc();
2222
2223   switch (VT.getSimpleVT()) {
2224   default:
2225     assert(false && "Invalid value type!");
2226   case MVT::i8: {
2227     SDValue N = Op.getOperand(0);
2228     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2229
2230     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2231     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2232
2233     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
2234   }
2235
2236   case MVT::i16: {
2237     MachineFunction &MF = DAG.getMachineFunction();
2238     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2239
2240     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2241
2242     SDValue N = Op.getOperand(0);
2243     SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2244     SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2245     SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2246
2247     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2248     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2249
2250     // CNTB_result becomes the chain to which all of the virtual registers
2251     // CNTB_reg, SUM1_reg become associated:
2252     SDValue CNTB_result =
2253       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
2254
2255     SDValue CNTB_rescopy =
2256       DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2257
2258     SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
2259
2260     return DAG.getNode(ISD::AND, dl, MVT::i16,
2261                        DAG.getNode(ISD::ADD, dl, MVT::i16,
2262                                    DAG.getNode(ISD::SRL, dl, MVT::i16,
2263                                                Tmp1, Shift1),
2264                                    Tmp1),
2265                        Mask0);
2266   }
2267
2268   case MVT::i32: {
2269     MachineFunction &MF = DAG.getMachineFunction();
2270     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2271
2272     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2273     unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2274
2275     SDValue N = Op.getOperand(0);
2276     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2277     SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2278     SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2279     SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2280
2281     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2282     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2283
2284     // CNTB_result becomes the chain to which all of the virtual registers
2285     // CNTB_reg, SUM1_reg become associated:
2286     SDValue CNTB_result =
2287       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
2288
2289     SDValue CNTB_rescopy =
2290       DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2291
2292     SDValue Comp1 =
2293       DAG.getNode(ISD::SRL, dl, MVT::i32,
2294                   DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
2295                   Shift1);
2296
2297     SDValue Sum1 =
2298       DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
2299                   DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
2300
2301     SDValue Sum1_rescopy =
2302       DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
2303
2304     SDValue Comp2 =
2305       DAG.getNode(ISD::SRL, dl, MVT::i32,
2306                   DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
2307                   Shift2);
2308     SDValue Sum2 =
2309       DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
2310                   DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
2311
2312     return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
2313   }
2314
2315   case MVT::i64:
2316     break;
2317   }
2318
2319   return SDValue();
2320 }
2321
2322 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2323 /*!
2324  f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2325  All conversions to i64 are expanded to a libcall.
2326  */
2327 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2328                               SPUTargetLowering &TLI) {
2329   MVT OpVT = Op.getValueType();
2330   SDValue Op0 = Op.getOperand(0);
2331   MVT Op0VT = Op0.getValueType();
2332
2333   if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2334       || OpVT == MVT::i64) {
2335     // Convert f32 / f64 to i32 / i64 via libcall.
2336     RTLIB::Libcall LC =
2337             (Op.getOpcode() == ISD::FP_TO_SINT)
2338              ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2339              : RTLIB::getFPTOUINT(Op0VT, OpVT);
2340     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2341     SDValue Dummy;
2342     return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2343   }
2344
2345   return Op;
2346 }
2347
2348 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2349 /*!
2350  i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2351  All conversions from i64 are expanded to a libcall.
2352  */
2353 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2354                               SPUTargetLowering &TLI) {
2355   MVT OpVT = Op.getValueType();
2356   SDValue Op0 = Op.getOperand(0);
2357   MVT Op0VT = Op0.getValueType();
2358
2359   if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2360       || Op0VT == MVT::i64) {
2361     // Convert i32, i64 to f64 via libcall:
2362     RTLIB::Libcall LC =
2363             (Op.getOpcode() == ISD::SINT_TO_FP)
2364              ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2365              : RTLIB::getUINTTOFP(Op0VT, OpVT);
2366     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2367     SDValue Dummy;
2368     return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2369   }
2370
2371   return Op;
2372 }
2373
2374 //! Lower ISD::SETCC
2375 /*!
2376  This handles MVT::f64 (double floating point) condition lowering
2377  */
2378 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2379                           const TargetLowering &TLI) {
2380   CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2381   DebugLoc dl = Op.getDebugLoc();
2382   assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2383
2384   SDValue lhs = Op.getOperand(0);
2385   SDValue rhs = Op.getOperand(1);
2386   MVT lhsVT = lhs.getValueType();
2387   assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2388
2389   MVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2390   APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2391   MVT IntVT(MVT::i64);
2392
2393   // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2394   // selected to a NOP:
2395   SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
2396   SDValue lhsHi32 =
2397           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2398                       DAG.getNode(ISD::SRL, dl, IntVT,
2399                                   i64lhs, DAG.getConstant(32, MVT::i32)));
2400   SDValue lhsHi32abs =
2401           DAG.getNode(ISD::AND, dl, MVT::i32,
2402                       lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2403   SDValue lhsLo32 =
2404           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2405
2406   // SETO and SETUO only use the lhs operand:
2407   if (CC->get() == ISD::SETO) {
2408     // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2409     // SETUO
2410     APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2411     return DAG.getNode(ISD::XOR, dl, ccResultVT,
2412                        DAG.getSetCC(dl, ccResultVT,
2413                                     lhs, DAG.getConstantFP(0.0, lhsVT),
2414                                     ISD::SETUO),
2415                        DAG.getConstant(ccResultAllOnes, ccResultVT));
2416   } else if (CC->get() == ISD::SETUO) {
2417     // Evaluates to true if Op0 is [SQ]NaN
2418     return DAG.getNode(ISD::AND, dl, ccResultVT,
2419                        DAG.getSetCC(dl, ccResultVT,
2420                                     lhsHi32abs,
2421                                     DAG.getConstant(0x7ff00000, MVT::i32),
2422                                     ISD::SETGE),
2423                        DAG.getSetCC(dl, ccResultVT,
2424                                     lhsLo32,
2425                                     DAG.getConstant(0, MVT::i32),
2426                                     ISD::SETGT));
2427   }
2428
2429   SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
2430   SDValue rhsHi32 =
2431           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2432                       DAG.getNode(ISD::SRL, dl, IntVT,
2433                                   i64rhs, DAG.getConstant(32, MVT::i32)));
2434
2435   // If a value is negative, subtract from the sign magnitude constant:
2436   SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2437
2438   // Convert the sign-magnitude representation into 2's complement:
2439   SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2440                                       lhsHi32, DAG.getConstant(31, MVT::i32));
2441   SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2442   SDValue lhsSelect =
2443           DAG.getNode(ISD::SELECT, dl, IntVT,
2444                       lhsSelectMask, lhsSignMag2TC, i64lhs);
2445
2446   SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2447                                       rhsHi32, DAG.getConstant(31, MVT::i32));
2448   SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2449   SDValue rhsSelect =
2450           DAG.getNode(ISD::SELECT, dl, IntVT,
2451                       rhsSelectMask, rhsSignMag2TC, i64rhs);
2452
2453   unsigned compareOp;
2454
2455   switch (CC->get()) {
2456   case ISD::SETOEQ:
2457   case ISD::SETUEQ:
2458     compareOp = ISD::SETEQ; break;
2459   case ISD::SETOGT:
2460   case ISD::SETUGT:
2461     compareOp = ISD::SETGT; break;
2462   case ISD::SETOGE:
2463   case ISD::SETUGE:
2464     compareOp = ISD::SETGE; break;
2465   case ISD::SETOLT:
2466   case ISD::SETULT:
2467     compareOp = ISD::SETLT; break;
2468   case ISD::SETOLE:
2469   case ISD::SETULE:
2470     compareOp = ISD::SETLE; break;
2471   case ISD::SETUNE:
2472   case ISD::SETONE:
2473     compareOp = ISD::SETNE; break;
2474   default:
2475     llvm_report_error("CellSPU ISel Select: unimplemented f64 condition");
2476   }
2477
2478   SDValue result =
2479           DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2480                        (ISD::CondCode) compareOp);
2481
2482   if ((CC->get() & 0x8) == 0) {
2483     // Ordered comparison:
2484     SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2485                                   lhs, DAG.getConstantFP(0.0, MVT::f64),
2486                                   ISD::SETO);
2487     SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2488                                   rhs, DAG.getConstantFP(0.0, MVT::f64),
2489                                   ISD::SETO);
2490     SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2491
2492     result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2493   }
2494
2495   return result;
2496 }
2497
2498 //! Lower ISD::SELECT_CC
2499 /*!
2500   ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2501   SELB instruction.
2502
2503   \note Need to revisit this in the future: if the code path through the true
2504   and false value computations is longer than the latency of a branch (6
2505   cycles), then it would be more advantageous to branch and insert a new basic
2506   block and branch on the condition. However, this code does not make that
2507   assumption, given the simplisitc uses so far.
2508  */
2509
2510 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2511                               const TargetLowering &TLI) {
2512   MVT VT = Op.getValueType();
2513   SDValue lhs = Op.getOperand(0);
2514   SDValue rhs = Op.getOperand(1);
2515   SDValue trueval = Op.getOperand(2);
2516   SDValue falseval = Op.getOperand(3);
2517   SDValue condition = Op.getOperand(4);
2518   DebugLoc dl = Op.getDebugLoc();
2519
2520   // NOTE: SELB's arguments: $rA, $rB, $mask
2521   //
2522   // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2523   // where bits in $mask are 1. CCond will be inverted, having 1s where the
2524   // condition was true and 0s where the condition was false. Hence, the
2525   // arguments to SELB get reversed.
2526
2527   // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2528   // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2529   // with another "cannot select select_cc" assert:
2530
2531   SDValue compare = DAG.getNode(ISD::SETCC, dl,
2532                                 TLI.getSetCCResultType(Op.getValueType()),
2533                                 lhs, rhs, condition);
2534   return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
2535 }
2536
2537 //! Custom lower ISD::TRUNCATE
2538 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2539 {
2540   // Type to truncate to
2541   MVT VT = Op.getValueType();
2542   MVT::SimpleValueType simpleVT = VT.getSimpleVT();
2543   MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2544   DebugLoc dl = Op.getDebugLoc();
2545
2546   // Type to truncate from
2547   SDValue Op0 = Op.getOperand(0);
2548   MVT Op0VT = Op0.getValueType();
2549
2550   if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2551     // Create shuffle mask, least significant doubleword of quadword
2552     unsigned maskHigh = 0x08090a0b;
2553     unsigned maskLow = 0x0c0d0e0f;
2554     // Use a shuffle to perform the truncation
2555     SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2556                                    DAG.getConstant(maskHigh, MVT::i32),
2557                                    DAG.getConstant(maskLow, MVT::i32),
2558                                    DAG.getConstant(maskHigh, MVT::i32),
2559                                    DAG.getConstant(maskLow, MVT::i32));
2560
2561     SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2562                                        Op0, Op0, shufMask);
2563
2564     return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
2565   }
2566
2567   return SDValue();             // Leave the truncate unmolested
2568 }
2569
2570 //! Custom (target-specific) lowering entry point
2571 /*!
2572   This is where LLVM's DAG selection process calls to do target-specific
2573   lowering of nodes.
2574  */
2575 SDValue
2576 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2577 {
2578   unsigned Opc = (unsigned) Op.getOpcode();
2579   MVT VT = Op.getValueType();
2580
2581   switch (Opc) {
2582   default: {
2583 #ifndef NDEBUG
2584     cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2585     cerr << "Op.getOpcode() = " << Opc << "\n";
2586     cerr << "*Op.getNode():\n";
2587     Op.getNode()->dump();
2588 #endif
2589     llvm_unreachable();
2590   }
2591   case ISD::LOAD:
2592   case ISD::EXTLOAD:
2593   case ISD::SEXTLOAD:
2594   case ISD::ZEXTLOAD:
2595     return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2596   case ISD::STORE:
2597     return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2598   case ISD::ConstantPool:
2599     return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2600   case ISD::GlobalAddress:
2601     return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2602   case ISD::JumpTable:
2603     return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2604   case ISD::ConstantFP:
2605     return LowerConstantFP(Op, DAG);
2606   case ISD::FORMAL_ARGUMENTS:
2607     return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2608   case ISD::CALL:
2609     return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2610   case ISD::RET:
2611     return LowerRET(Op, DAG, getTargetMachine());
2612
2613   // i8, i64 math ops:
2614   case ISD::ADD:
2615   case ISD::SUB:
2616   case ISD::ROTR:
2617   case ISD::ROTL:
2618   case ISD::SRL:
2619   case ISD::SHL:
2620   case ISD::SRA: {
2621     if (VT == MVT::i8)
2622       return LowerI8Math(Op, DAG, Opc, *this);
2623     break;
2624   }
2625
2626   case ISD::FP_TO_SINT:
2627   case ISD::FP_TO_UINT:
2628     return LowerFP_TO_INT(Op, DAG, *this);
2629
2630   case ISD::SINT_TO_FP:
2631   case ISD::UINT_TO_FP:
2632     return LowerINT_TO_FP(Op, DAG, *this);
2633
2634   // Vector-related lowering.
2635   case ISD::BUILD_VECTOR:
2636     return LowerBUILD_VECTOR(Op, DAG);
2637   case ISD::SCALAR_TO_VECTOR:
2638     return LowerSCALAR_TO_VECTOR(Op, DAG);
2639   case ISD::VECTOR_SHUFFLE:
2640     return LowerVECTOR_SHUFFLE(Op, DAG);
2641   case ISD::EXTRACT_VECTOR_ELT:
2642     return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2643   case ISD::INSERT_VECTOR_ELT:
2644     return LowerINSERT_VECTOR_ELT(Op, DAG);
2645
2646   // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2647   case ISD::AND:
2648   case ISD::OR:
2649   case ISD::XOR:
2650     return LowerByteImmed(Op, DAG);
2651
2652   // Vector and i8 multiply:
2653   case ISD::MUL:
2654     if (VT == MVT::i8)
2655       return LowerI8Math(Op, DAG, Opc, *this);
2656
2657   case ISD::CTPOP:
2658     return LowerCTPOP(Op, DAG);
2659
2660   case ISD::SELECT_CC:
2661     return LowerSELECT_CC(Op, DAG, *this);
2662
2663   case ISD::SETCC:
2664     return LowerSETCC(Op, DAG, *this);
2665
2666   case ISD::TRUNCATE:
2667     return LowerTRUNCATE(Op, DAG);
2668   }
2669
2670   return SDValue();
2671 }
2672
2673 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2674                                            SmallVectorImpl<SDValue>&Results,
2675                                            SelectionDAG &DAG)
2676 {
2677 #if 0
2678   unsigned Opc = (unsigned) N->getOpcode();
2679   MVT OpVT = N->getValueType(0);
2680
2681   switch (Opc) {
2682   default: {
2683     cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2684     cerr << "Op.getOpcode() = " << Opc << "\n";
2685     cerr << "*Op.getNode():\n";
2686     N->dump();
2687     abort();
2688     /*NOTREACHED*/
2689   }
2690   }
2691 #endif
2692
2693   /* Otherwise, return unchanged */
2694 }
2695
2696 //===----------------------------------------------------------------------===//
2697 // Target Optimization Hooks
2698 //===----------------------------------------------------------------------===//
2699
2700 SDValue
2701 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2702 {
2703 #if 0
2704   TargetMachine &TM = getTargetMachine();
2705 #endif
2706   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2707   SelectionDAG &DAG = DCI.DAG;
2708   SDValue Op0 = N->getOperand(0);       // everything has at least one operand
2709   MVT NodeVT = N->getValueType(0);      // The node's value type
2710   MVT Op0VT = Op0.getValueType();       // The first operand's result
2711   SDValue Result;                       // Initially, empty result
2712   DebugLoc dl = N->getDebugLoc();
2713
2714   switch (N->getOpcode()) {
2715   default: break;
2716   case ISD::ADD: {
2717     SDValue Op1 = N->getOperand(1);
2718
2719     if (Op0.getOpcode() == SPUISD::IndirectAddr
2720         || Op1.getOpcode() == SPUISD::IndirectAddr) {
2721       // Normalize the operands to reduce repeated code
2722       SDValue IndirectArg = Op0, AddArg = Op1;
2723
2724       if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2725         IndirectArg = Op1;
2726         AddArg = Op0;
2727       }
2728
2729       if (isa<ConstantSDNode>(AddArg)) {
2730         ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2731         SDValue IndOp1 = IndirectArg.getOperand(1);
2732
2733         if (CN0->isNullValue()) {
2734           // (add (SPUindirect <arg>, <arg>), 0) ->
2735           // (SPUindirect <arg>, <arg>)
2736
2737 #if !defined(NDEBUG)
2738           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2739             cerr << "\n"
2740                  << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2741                  << "With:    (SPUindirect <arg>, <arg>)\n";
2742           }
2743 #endif
2744
2745           return IndirectArg;
2746         } else if (isa<ConstantSDNode>(IndOp1)) {
2747           // (add (SPUindirect <arg>, <const>), <const>) ->
2748           // (SPUindirect <arg>, <const + const>)
2749           ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2750           int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2751           SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2752
2753 #if !defined(NDEBUG)
2754           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2755             cerr << "\n"
2756                  << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2757                  << "), " << CN0->getSExtValue() << ")\n"
2758                  << "With:    (SPUindirect <arg>, "
2759                  << combinedConst << ")\n";
2760           }
2761 #endif
2762
2763           return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2764                              IndirectArg, combinedValue);
2765         }
2766       }
2767     }
2768     break;
2769   }
2770   case ISD::SIGN_EXTEND:
2771   case ISD::ZERO_EXTEND:
2772   case ISD::ANY_EXTEND: {
2773     if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2774       // (any_extend (SPUextract_elt0 <arg>)) ->
2775       // (SPUextract_elt0 <arg>)
2776       // Types must match, however...
2777 #if !defined(NDEBUG)
2778       if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2779         cerr << "\nReplace: ";
2780         N->dump(&DAG);
2781         cerr << "\nWith:    ";
2782         Op0.getNode()->dump(&DAG);
2783         cerr << "\n";
2784       }
2785 #endif
2786
2787       return Op0;
2788     }
2789     break;
2790   }
2791   case SPUISD::IndirectAddr: {
2792     if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2793       ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2794       if (CN != 0 && CN->getZExtValue() == 0) {
2795         // (SPUindirect (SPUaform <addr>, 0), 0) ->
2796         // (SPUaform <addr>, 0)
2797
2798         DEBUG(cerr << "Replace: ");
2799         DEBUG(N->dump(&DAG));
2800         DEBUG(cerr << "\nWith:    ");
2801         DEBUG(Op0.getNode()->dump(&DAG));
2802         DEBUG(cerr << "\n");
2803
2804         return Op0;
2805       }
2806     } else if (Op0.getOpcode() == ISD::ADD) {
2807       SDValue Op1 = N->getOperand(1);
2808       if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2809         // (SPUindirect (add <arg>, <arg>), 0) ->
2810         // (SPUindirect <arg>, <arg>)
2811         if (CN1->isNullValue()) {
2812
2813 #if !defined(NDEBUG)
2814           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2815             cerr << "\n"
2816                  << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2817                  << "With:    (SPUindirect <arg>, <arg>)\n";
2818           }
2819 #endif
2820
2821           return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2822                              Op0.getOperand(0), Op0.getOperand(1));
2823         }
2824       }
2825     }
2826     break;
2827   }
2828   case SPUISD::SHLQUAD_L_BITS:
2829   case SPUISD::SHLQUAD_L_BYTES:
2830   case SPUISD::VEC_SHL:
2831   case SPUISD::VEC_SRL:
2832   case SPUISD::VEC_SRA:
2833   case SPUISD::ROTBYTES_LEFT: {
2834     SDValue Op1 = N->getOperand(1);
2835
2836     // Kill degenerate vector shifts:
2837     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2838       if (CN->isNullValue()) {
2839         Result = Op0;
2840       }
2841     }
2842     break;
2843   }
2844   case SPUISD::PREFSLOT2VEC: {
2845     switch (Op0.getOpcode()) {
2846     default:
2847       break;
2848     case ISD::ANY_EXTEND:
2849     case ISD::ZERO_EXTEND:
2850     case ISD::SIGN_EXTEND: {
2851       // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2852       // <arg>
2853       // but only if the SPUprefslot2vec and <arg> types match.
2854       SDValue Op00 = Op0.getOperand(0);
2855       if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2856         SDValue Op000 = Op00.getOperand(0);
2857         if (Op000.getValueType() == NodeVT) {
2858           Result = Op000;
2859         }
2860       }
2861       break;
2862     }
2863     case SPUISD::VEC2PREFSLOT: {
2864       // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2865       // <arg>
2866       Result = Op0.getOperand(0);
2867       break;
2868     }
2869     }
2870     break;
2871   }
2872   }
2873
2874   // Otherwise, return unchanged.
2875 #ifndef NDEBUG
2876   if (Result.getNode()) {
2877     DEBUG(cerr << "\nReplace.SPU: ");
2878     DEBUG(N->dump(&DAG));
2879     DEBUG(cerr << "\nWith:        ");
2880     DEBUG(Result.getNode()->dump(&DAG));
2881     DEBUG(cerr << "\n");
2882   }
2883 #endif
2884
2885   return Result;
2886 }
2887
2888 //===----------------------------------------------------------------------===//
2889 // Inline Assembly Support
2890 //===----------------------------------------------------------------------===//
2891
2892 /// getConstraintType - Given a constraint letter, return the type of
2893 /// constraint it is for this target.
2894 SPUTargetLowering::ConstraintType
2895 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2896   if (ConstraintLetter.size() == 1) {
2897     switch (ConstraintLetter[0]) {
2898     default: break;
2899     case 'b':
2900     case 'r':
2901     case 'f':
2902     case 'v':
2903     case 'y':
2904       return C_RegisterClass;
2905     }
2906   }
2907   return TargetLowering::getConstraintType(ConstraintLetter);
2908 }
2909
2910 std::pair<unsigned, const TargetRegisterClass*>
2911 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2912                                                 MVT VT) const
2913 {
2914   if (Constraint.size() == 1) {
2915     // GCC RS6000 Constraint Letters
2916     switch (Constraint[0]) {
2917     case 'b':   // R1-R31
2918     case 'r':   // R0-R31
2919       if (VT == MVT::i64)
2920         return std::make_pair(0U, SPU::R64CRegisterClass);
2921       return std::make_pair(0U, SPU::R32CRegisterClass);
2922     case 'f':
2923       if (VT == MVT::f32)
2924         return std::make_pair(0U, SPU::R32FPRegisterClass);
2925       else if (VT == MVT::f64)
2926         return std::make_pair(0U, SPU::R64FPRegisterClass);
2927       break;
2928     case 'v':
2929       return std::make_pair(0U, SPU::GPRCRegisterClass);
2930     }
2931   }
2932
2933   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2934 }
2935
2936 //! Compute used/known bits for a SPU operand
2937 void
2938 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2939                                                   const APInt &Mask,
2940                                                   APInt &KnownZero,
2941                                                   APInt &KnownOne,
2942                                                   const SelectionDAG &DAG,
2943                                                   unsigned Depth ) const {
2944 #if 0
2945   const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
2946
2947   switch (Op.getOpcode()) {
2948   default:
2949     // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2950     break;
2951   case CALL:
2952   case SHUFB:
2953   case SHUFFLE_MASK:
2954   case CNTB:
2955   case SPUISD::PREFSLOT2VEC:
2956   case SPUISD::LDRESULT:
2957   case SPUISD::VEC2PREFSLOT:
2958   case SPUISD::SHLQUAD_L_BITS:
2959   case SPUISD::SHLQUAD_L_BYTES:
2960   case SPUISD::VEC_SHL:
2961   case SPUISD::VEC_SRL:
2962   case SPUISD::VEC_SRA:
2963   case SPUISD::VEC_ROTL:
2964   case SPUISD::VEC_ROTR:
2965   case SPUISD::ROTBYTES_LEFT:
2966   case SPUISD::SELECT_MASK:
2967   case SPUISD::SELB:
2968   }
2969 #endif
2970 }
2971
2972 unsigned
2973 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
2974                                                    unsigned Depth) const {
2975   switch (Op.getOpcode()) {
2976   default:
2977     return 1;
2978
2979   case ISD::SETCC: {
2980     MVT VT = Op.getValueType();
2981
2982     if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
2983       VT = MVT::i32;
2984     }
2985     return VT.getSizeInBits();
2986   }
2987   }
2988 }
2989
2990 // LowerAsmOperandForConstraint
2991 void
2992 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
2993                                                 char ConstraintLetter,
2994                                                 bool hasMemory,
2995                                                 std::vector<SDValue> &Ops,
2996                                                 SelectionDAG &DAG) const {
2997   // Default, for the time being, to the base class handler
2998   TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
2999                                                Ops, DAG);
3000 }
3001
3002 /// isLegalAddressImmediate - Return true if the integer value can be used
3003 /// as the offset of the target addressing mode.
3004 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3005                                                 const Type *Ty) const {
3006   // SPU's addresses are 256K:
3007   return (V > -(1 << 18) && V < (1 << 18) - 1);
3008 }
3009
3010 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3011   return false;
3012 }
3013
3014 bool
3015 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3016   // The SPU target isn't yet aware of offsets.
3017   return false;
3018 }