lib/Target/CellSPU/SPUISelLowering.cpp

   1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the SPUTargetLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "SPURegisterNames.h"
  15 #include "SPUISelLowering.h"
  16 #include "SPUTargetMachine.h"
  17 #include "SPUFrameInfo.h"
  18 #include "llvm/ADT/APInt.h"
  19 #include "llvm/ADT/VectorExtras.h"
  20 #include "llvm/CodeGen/CallingConvLower.h"
  21 #include "llvm/CodeGen/MachineFrameInfo.h"
  22 #include "llvm/CodeGen/MachineFunction.h"
  23 #include "llvm/CodeGen/MachineInstrBuilder.h"
  24 #include "llvm/CodeGen/MachineRegisterInfo.h"
  25 #include "llvm/CodeGen/SelectionDAG.h"
  26 #include "llvm/Constants.h"
  27 #include "llvm/Function.h"
  28 #include "llvm/Intrinsics.h"
  29 #include "llvm/Support/Debug.h"
  30 #include "llvm/Support/MathExtras.h"
  31 #include "llvm/Target/TargetOptions.h"
  32
  33 #include <map>
  34
  35 using namespace llvm;
  36
  37 // Used in getTargetNodeName() below
  38 namespace {
  39   std::map<unsigned, const char *> node_names;
  40
  41   //! MVT mapping to useful data for Cell SPU
  42   struct valtype_map_s {
  43     const MVT   valtype;
  44     const int   prefslot_byte;
  45   };
  46
  47   const valtype_map_s valtype_map[] = {
  48     { MVT::i1,   3 },
  49     { MVT::i8,   3 },
  50     { MVT::i16,  2 },
  51     { MVT::i32,  0 },
  52     { MVT::f32,  0 },
  53     { MVT::i64,  0 },
  54     { MVT::f64,  0 },
  55     { MVT::i128, 0 }
  56   };
  57
  58   const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
  59
  60   const valtype_map_s *getValueTypeMapEntry(MVT VT) {
  61     const valtype_map_s *retval = 0;
  62
  63     for (size_t i = 0; i < n_valtype_map; ++i) {
  64       if (valtype_map[i].valtype == VT) {
  65         retval = valtype_map + i;
  66         break;
  67       }
  68     }
  69
  70 #ifndef NDEBUG
  71     if (retval == 0) {
  72       cerr << "getValueTypeMapEntry returns NULL for "
  73            << VT.getMVTString()
  74            << "\n";
  75       abort();
  76     }
  77 #endif
  78
  79     return retval;
  80   }
  81
  82 }
  83
  84 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
  85   : TargetLowering(TM),
  86     SPUTM(TM)
  87 {
  88   // Fold away setcc operations if possible.
  89   setPow2DivIsCheap();
  90
  91   // Use _setjmp/_longjmp instead of setjmp/longjmp.
  92   setUseUnderscoreSetJmp(true);
  93   setUseUnderscoreLongJmp(true);
  94
  95   // Set up the SPU's register classes:
  96   addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
  97   addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
  98   addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
  99   addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
 100   addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
 101   addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
 102   addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
 103
 104   // SPU has no sign or zero extended loads for i1, i8, i16:
 105   setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
 106   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
 107   setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
 108
 109   setLoadExtAction(ISD::EXTLOAD,  MVT::f32, Expand);
 110   setLoadExtAction(ISD::EXTLOAD,  MVT::f64, Expand);
 111
 112   // SPU constant load actions are custom lowered:
 113   setOperationAction(ISD::Constant,   MVT::i64, Custom);
 114   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
 115   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
 116
 117   // SPU's loads and stores have to be custom lowered:
 118   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
 119        ++sctype) {
 120     MVT VT = (MVT::SimpleValueType)sctype;
 121
 122     setOperationAction(ISD::LOAD,   VT, Custom);
 123     setOperationAction(ISD::STORE,  VT, Custom);
 124     setLoadExtAction(ISD::EXTLOAD,  VT, Custom);
 125     setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
 126     setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
 127
 128     // SMUL_LOHI, UMUL_LOHI are not legal for Cell:
 129     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 130     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
 131
 132     for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
 133       MVT StoreVT = (MVT::SimpleValueType) stype;
 134       setTruncStoreAction(VT, StoreVT, Expand);
 135     }
 136   }
 137
 138   for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
 139        ++sctype) {
 140     MVT VT = (MVT::SimpleValueType) sctype;
 141
 142     setOperationAction(ISD::LOAD,   VT, Custom);
 143     setOperationAction(ISD::STORE,  VT, Custom);
 144
 145     for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
 146       MVT StoreVT = (MVT::SimpleValueType) stype;
 147       setTruncStoreAction(VT, StoreVT, Expand);
 148     }
 149   }
 150
 151   // Expand the jumptable branches
 152   setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
 153   setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
 154
 155   // Custom lower SELECT_CC for most cases, but expand by default
 156   setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
 157   setOperationAction(ISD::SELECT_CC,    MVT::i8,    Custom);
 158   setOperationAction(ISD::SELECT_CC,    MVT::i16,   Custom);
 159   setOperationAction(ISD::SELECT_CC,    MVT::i32,   Custom);
 160   setOperationAction(ISD::SELECT_CC,    MVT::i64,   Custom);
 161
 162   // SPU has no intrinsics for these particular operations:
 163   setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
 164
 165   // SPU has no SREM/UREM instructions
 166   setOperationAction(ISD::SREM, MVT::i32, Expand);
 167   setOperationAction(ISD::UREM, MVT::i32, Expand);
 168   setOperationAction(ISD::SREM, MVT::i64, Expand);
 169   setOperationAction(ISD::UREM, MVT::i64, Expand);
 170
 171   // We don't support sin/cos/sqrt/fmod
 172   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 173   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 174   setOperationAction(ISD::FREM , MVT::f64, Expand);
 175   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 176   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 177   setOperationAction(ISD::FREM , MVT::f32, Expand);
 178
 179   // If we're enabling GP optimizations, use hardware square root
 180   setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 181   setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 182
 183   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 184   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 185
 186   // SPU can do rotate right and left, so legalize it... but customize for i8
 187   // because instructions don't exist.
 188
 189   // FIXME: Change from "expand" to appropriate type once ROTR is supported in
 190   //        .td files.
 191   setOperationAction(ISD::ROTR, MVT::i32,    Expand /*Legal*/);
 192   setOperationAction(ISD::ROTR, MVT::i16,    Expand /*Legal*/);
 193   setOperationAction(ISD::ROTR, MVT::i8,     Expand /*Custom*/);
 194
 195   setOperationAction(ISD::ROTL, MVT::i32,    Legal);
 196   setOperationAction(ISD::ROTL, MVT::i16,    Legal);
 197   setOperationAction(ISD::ROTL, MVT::i8,     Custom);
 198
 199   // SPU has no native version of shift left/right for i8
 200   setOperationAction(ISD::SHL,  MVT::i8,     Custom);
 201   setOperationAction(ISD::SRL,  MVT::i8,     Custom);
 202   setOperationAction(ISD::SRA,  MVT::i8,     Custom);
 203
 204   // Make these operations legal and handle them during instruction selection:
 205   setOperationAction(ISD::SHL,  MVT::i64,    Legal);
 206   setOperationAction(ISD::SRL,  MVT::i64,    Legal);
 207   setOperationAction(ISD::SRA,  MVT::i64,    Legal);
 208
 209   // Custom lower i8, i32 and i64 multiplications
 210   setOperationAction(ISD::MUL,  MVT::i8,     Custom);
 211   setOperationAction(ISD::MUL,  MVT::i32,    Legal);
 212   setOperationAction(ISD::MUL,  MVT::i64,    Legal);
 213
 214   // Need to custom handle (some) common i8, i64 math ops
 215   setOperationAction(ISD::ADD,  MVT::i8,     Custom);
 216   setOperationAction(ISD::ADD,  MVT::i64,    Legal);
 217   setOperationAction(ISD::SUB,  MVT::i8,     Custom);
 218   setOperationAction(ISD::SUB,  MVT::i64,    Legal);
 219
 220   // SPU does not have BSWAP. It does have i32 support CTLZ.
 221   // CTPOP has to be custom lowered.
 222   setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
 223   setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
 224
 225   setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
 226   setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
 227   setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
 228   setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
 229
 230   setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
 231   setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
 232
 233   setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
 234
 235   // SPU has a version of select that implements (a&~c)|(b&c), just like
 236   // select ought to work:
 237   setOperationAction(ISD::SELECT, MVT::i8,   Legal);
 238   setOperationAction(ISD::SELECT, MVT::i16,  Legal);
 239   setOperationAction(ISD::SELECT, MVT::i32,  Legal);
 240   setOperationAction(ISD::SELECT, MVT::i64,  Legal);
 241
 242   setOperationAction(ISD::SETCC, MVT::i8,    Legal);
 243   setOperationAction(ISD::SETCC, MVT::i16,   Legal);
 244   setOperationAction(ISD::SETCC, MVT::i32,   Legal);
 245   setOperationAction(ISD::SETCC, MVT::i64,   Legal);
 246
 247   // Custom lower i128 -> i64 truncates
 248   setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
 249
 250   // SPU has a legal FP -> signed INT instruction
 251   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
 252   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 253   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
 254   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
 255
 256   // FDIV on SPU requires custom lowering
 257   setOperationAction(ISD::FDIV, MVT::f64, Expand);      // libcall
 258
 259   // SPU has [U|S]INT_TO_FP
 260   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
 261   setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
 262   setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
 263   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
 264   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
 265   setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
 266   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 267   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 268
 269   setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
 270   setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
 271   setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
 272   setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
 273
 274   // We cannot sextinreg(i1).  Expand to shifts.
 275   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 276
 277   // Support label based line numbers.
 278   setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
 279   setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
 280
 281   // We want to legalize GlobalAddress and ConstantPool nodes into the
 282   // appropriate instructions to materialize the address.
 283   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
 284        ++sctype) {
 285     MVT VT = (MVT::SimpleValueType)sctype;
 286
 287     setOperationAction(ISD::GlobalAddress,  VT, Custom);
 288     setOperationAction(ISD::ConstantPool,   VT, Custom);
 289     setOperationAction(ISD::JumpTable,      VT, Custom);
 290   }
 291
 292   // RET must be custom lowered, to meet ABI requirements
 293   setOperationAction(ISD::RET,           MVT::Other, Custom);
 294
 295   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 296   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 297
 298   // Use the default implementation.
 299   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
 300   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 301   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 302   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 303   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
 304   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
 305   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
 306
 307   // Cell SPU has instructions for converting between i64 and fp.
 308   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 309   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 310
 311   // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
 312   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
 313
 314   // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 315   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 316
 317   // First set operation action for all vector types to expand. Then we
 318   // will selectively turn on ones that can be effectively codegen'd.
 319   addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
 320   addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
 321   addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
 322   addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
 323   addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
 324   addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
 325
 326   // "Odd size" vector classes that we're willing to support:
 327   addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
 328
 329   for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
 330        i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
 331     MVT VT = (MVT::SimpleValueType)i;
 332
 333     // add/sub are legal for all supported vector VT's.
 334     setOperationAction(ISD::ADD , VT, Legal);
 335     setOperationAction(ISD::SUB , VT, Legal);
 336     // mul has to be custom lowered.
 337     // TODO: v2i64 vector multiply
 338     setOperationAction(ISD::MUL , VT, Legal);
 339
 340     setOperationAction(ISD::AND   , VT, Legal);
 341     setOperationAction(ISD::OR    , VT, Legal);
 342     setOperationAction(ISD::XOR   , VT, Legal);
 343     setOperationAction(ISD::LOAD  , VT, Legal);
 344     setOperationAction(ISD::SELECT, VT, Legal);
 345     setOperationAction(ISD::STORE,  VT, Legal);
 346
 347     // These operations need to be expanded:
 348     setOperationAction(ISD::SDIV, VT, Expand);
 349     setOperationAction(ISD::SREM, VT, Expand);
 350     setOperationAction(ISD::UDIV, VT, Expand);
 351     setOperationAction(ISD::UREM, VT, Expand);
 352
 353     // Custom lower build_vector, constant pool spills, insert and
 354     // extract vector elements:
 355     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
 356     setOperationAction(ISD::ConstantPool, VT, Custom);
 357     setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
 358     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
 359     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
 360     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
 361   }
 362
 363   setOperationAction(ISD::AND, MVT::v16i8, Custom);
 364   setOperationAction(ISD::OR,  MVT::v16i8, Custom);
 365   setOperationAction(ISD::XOR, MVT::v16i8, Custom);
 366   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 367
 368   setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
 369
 370   setShiftAmountType(MVT::i32);
 371   setBooleanContents(ZeroOrNegativeOneBooleanContent);
 372
 373   setStackPointerRegisterToSaveRestore(SPU::R1);
 374
 375   // We have target-specific dag combine patterns for the following nodes:
 376   setTargetDAGCombine(ISD::ADD);
 377   setTargetDAGCombine(ISD::ZERO_EXTEND);
 378   setTargetDAGCombine(ISD::SIGN_EXTEND);
 379   setTargetDAGCombine(ISD::ANY_EXTEND);
 380
 381   computeRegisterProperties();
 382
 383   // Set pre-RA register scheduler default to BURR, which produces slightly
 384   // better code than the default (could also be TDRR, but TargetLowering.h
 385   // needs a mod to support that model):
 386   setSchedulingPreference(SchedulingForRegPressure);
 387 }
 388
 389 const char *
 390 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
 391 {
 392   if (node_names.empty()) {
 393     node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
 394     node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
 395     node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
 396     node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
 397     node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
 398     node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
 399     node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
 400     node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
 401     node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
 402     node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
 403     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
 404     node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
 405     node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
 406     node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
 407     node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
 408     node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
 409     node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
 410     node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
 411     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
 412     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
 413     node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
 414     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
 415     node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
 416     node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
 417     node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
 418   }
 419
 420   std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
 421
 422   return ((i != node_names.end()) ? i->second : 0);
 423 }
 424
 425 //===----------------------------------------------------------------------===//
 426 // Return the Cell SPU's SETCC result type
 427 //===----------------------------------------------------------------------===//
 428
 429 MVT SPUTargetLowering::getSetCCResultType(MVT VT) const {
 430   // i16 and i32 are valid SETCC result types
 431   return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32);
 432 }
 433
 434 //===----------------------------------------------------------------------===//
 435 // Calling convention code:
 436 //===----------------------------------------------------------------------===//
 437
 438 #include "SPUGenCallingConv.inc"
 439
 440 //===----------------------------------------------------------------------===//
 441 //  LowerOperation implementation
 442 //===----------------------------------------------------------------------===//
 443
 444 /// Custom lower loads for CellSPU
 445 /*!
 446  All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
 447  within a 16-byte block, we have to rotate to extract the requested element.
 448
 449  For extending loads, we also want to ensure that the following sequence is
 450  emitted, e.g. for MVT::f32 extending load to MVT::f64:
 451
 452 \verbatim
 453 %1  v16i8,ch = load
 454 %2  v16i8,ch = rotate %1
 455 %3  v4f8, ch = bitconvert %2
 456 %4  f32      = vec2perfslot %3
 457 %5  f64      = fp_extend %4
 458 \endverbatim
 459 */
 460 static SDValue
 461 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 462   LoadSDNode *LN = cast<LoadSDNode>(Op);
 463   SDValue the_chain = LN->getChain();
 464   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 465   MVT InVT = LN->getMemoryVT();
 466   MVT OutVT = Op.getValueType();
 467   ISD::LoadExtType ExtType = LN->getExtensionType();
 468   unsigned alignment = LN->getAlignment();
 469   const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
 470
 471   switch (LN->getAddressingMode()) {
 472   case ISD::UNINDEXED: {
 473     SDValue result;
 474     SDValue basePtr = LN->getBasePtr();
 475     SDValue rotate;
 476
 477     if (alignment == 16) {
 478       ConstantSDNode *CN;
 479
 480       // Special cases for a known aligned load to simplify the base pointer
 481       // and the rotation amount:
 482       if (basePtr.getOpcode() == ISD::ADD
 483           && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
 484         // Known offset into basePtr
 485         int64_t offset = CN->getSExtValue();
 486         int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
 487
 488         if (rotamt < 0)
 489           rotamt += 16;
 490
 491         rotate = DAG.getConstant(rotamt, MVT::i16);
 492
 493         // Simplify the base pointer for this case:
 494         basePtr = basePtr.getOperand(0);
 495         if ((offset & ~0xf) > 0) {
 496           basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
 497                                 basePtr,
 498                                 DAG.getConstant((offset & ~0xf), PtrVT));
 499         }
 500       } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
 501                  || (basePtr.getOpcode() == SPUISD::IndirectAddr
 502                      && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
 503                      && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
 504         // Plain aligned a-form address: rotate into preferred slot
 505         // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
 506         int64_t rotamt = -vtm->prefslot_byte;
 507         if (rotamt < 0)
 508           rotamt += 16;
 509         rotate = DAG.getConstant(rotamt, MVT::i16);
 510       } else {
 511         // Offset the rotate amount by the basePtr and the preferred slot
 512         // byte offset
 513         int64_t rotamt = -vtm->prefslot_byte;
 514         if (rotamt < 0)
 515           rotamt += 16;
 516         rotate = DAG.getNode(ISD::ADD, PtrVT,
 517                              basePtr,
 518                              DAG.getConstant(rotamt, PtrVT));
 519       }
 520     } else {
 521       // Unaligned load: must be more pessimistic about addressing modes:
 522       if (basePtr.getOpcode() == ISD::ADD) {
 523         MachineFunction &MF = DAG.getMachineFunction();
 524         MachineRegisterInfo &RegInfo = MF.getRegInfo();
 525         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 526         SDValue Flag;
 527
 528         SDValue Op0 = basePtr.getOperand(0);
 529         SDValue Op1 = basePtr.getOperand(1);
 530
 531         if (isa<ConstantSDNode>(Op1)) {
 532           // Convert the (add <ptr>, <const>) to an indirect address contained
 533           // in a register. Note that this is done because we need to avoid
 534           // creating a 0(reg) d-form address due to the SPU's block loads.
 535           basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
 536           the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag);
 537           basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT);
 538         } else {
 539           // Convert the (add <arg1>, <arg2>) to an indirect address, which
 540           // will likely be lowered as a reg(reg) x-form address.
 541           basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
 542         }
 543       } else {
 544         basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
 545                               basePtr,
 546                               DAG.getConstant(0, PtrVT));
 547       }
 548
 549       // Offset the rotate amount by the basePtr and the preferred slot
 550       // byte offset
 551       rotate = DAG.getNode(ISD::ADD, PtrVT,
 552                            basePtr,
 553                            DAG.getConstant(-vtm->prefslot_byte, PtrVT));
 554     }
 555
 556     // Re-emit as a v16i8 vector load
 557     result = DAG.getLoad(MVT::v16i8, the_chain, basePtr,
 558                          LN->getSrcValue(), LN->getSrcValueOffset(),
 559                          LN->isVolatile(), 16);
 560
 561     // Update the chain
 562     the_chain = result.getValue(1);
 563
 564     // Rotate into the preferred slot:
 565     result = DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v16i8,
 566                          result.getValue(0), rotate);
 567
 568     // Convert the loaded v16i8 vector to the appropriate vector type
 569     // specified by the operand:
 570     MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
 571     result = DAG.getNode(SPUISD::VEC2PREFSLOT, InVT,
 572                          DAG.getNode(ISD::BIT_CONVERT, vecVT, result));
 573
 574     // Handle extending loads by extending the scalar result:
 575     if (ExtType == ISD::SEXTLOAD) {
 576       result = DAG.getNode(ISD::SIGN_EXTEND, OutVT, result);
 577     } else if (ExtType == ISD::ZEXTLOAD) {
 578       result = DAG.getNode(ISD::ZERO_EXTEND, OutVT, result);
 579     } else if (ExtType == ISD::EXTLOAD) {
 580       unsigned NewOpc = ISD::ANY_EXTEND;
 581
 582       if (OutVT.isFloatingPoint())
 583         NewOpc = ISD::FP_EXTEND;
 584
 585       result = DAG.getNode(NewOpc, OutVT, result);
 586     }
 587
 588     SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
 589     SDValue retops[2] = {
 590       result,
 591       the_chain
 592     };
 593
 594     result = DAG.getNode(SPUISD::LDRESULT, retvts,
 595                          retops, sizeof(retops) / sizeof(retops[0]));
 596     return result;
 597   }
 598   case ISD::PRE_INC:
 599   case ISD::PRE_DEC:
 600   case ISD::POST_INC:
 601   case ISD::POST_DEC:
 602   case ISD::LAST_INDEXED_MODE:
 603     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 604             "UNINDEXED\n";
 605     cerr << (unsigned) LN->getAddressingMode() << "\n";
 606     abort();
 607     /*NOTREACHED*/
 608   }
 609
 610   return SDValue();
 611 }
 612
 613 /// Custom lower stores for CellSPU
 614 /*!
 615  All CellSPU stores are aligned to 16-byte boundaries, so for elements
 616  within a 16-byte block, we have to generate a shuffle to insert the
 617  requested element into its place, then store the resulting block.
 618  */
 619 static SDValue
 620 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 621   StoreSDNode *SN = cast<StoreSDNode>(Op);
 622   SDValue Value = SN->getValue();
 623   MVT VT = Value.getValueType();
 624   MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
 625   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 626   unsigned alignment = SN->getAlignment();
 627
 628   switch (SN->getAddressingMode()) {
 629   case ISD::UNINDEXED: {
 630     // The vector type we really want to load from the 16-byte chunk.
 631     MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
 632         stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
 633
 634     SDValue alignLoadVec;
 635     SDValue basePtr = SN->getBasePtr();
 636     SDValue the_chain = SN->getChain();
 637     SDValue insertEltOffs;
 638
 639     if (alignment == 16) {
 640       ConstantSDNode *CN;
 641
 642       // Special cases for a known aligned load to simplify the base pointer
 643       // and insertion byte:
 644       if (basePtr.getOpcode() == ISD::ADD
 645           && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
 646         // Known offset into basePtr
 647         int64_t offset = CN->getSExtValue();
 648
 649         // Simplify the base pointer for this case:
 650         basePtr = basePtr.getOperand(0);
 651         insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
 652                                     basePtr,
 653                                     DAG.getConstant((offset & 0xf), PtrVT));
 654
 655         if ((offset & ~0xf) > 0) {
 656           basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
 657                                 basePtr,
 658                                 DAG.getConstant((offset & ~0xf), PtrVT));
 659         }
 660       } else {
 661         // Otherwise, assume it's at byte 0 of basePtr
 662         insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
 663                                     basePtr,
 664                                     DAG.getConstant(0, PtrVT));
 665       }
 666     } else {
 667       // Unaligned load: must be more pessimistic about addressing modes:
 668       if (basePtr.getOpcode() == ISD::ADD) {
 669         MachineFunction &MF = DAG.getMachineFunction();
 670         MachineRegisterInfo &RegInfo = MF.getRegInfo();
 671         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 672         SDValue Flag;
 673
 674         SDValue Op0 = basePtr.getOperand(0);
 675         SDValue Op1 = basePtr.getOperand(1);
 676
 677         if (isa<ConstantSDNode>(Op1)) {
 678           // Convert the (add <ptr>, <const>) to an indirect address contained
 679           // in a register. Note that this is done because we need to avoid
 680           // creating a 0(reg) d-form address due to the SPU's block loads.
 681           basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
 682           the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag);
 683           basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT);
 684         } else {
 685           // Convert the (add <arg1>, <arg2>) to an indirect address, which
 686           // will likely be lowered as a reg(reg) x-form address.
 687           basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
 688         }
 689       } else {
 690         basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
 691                               basePtr,
 692                               DAG.getConstant(0, PtrVT));
 693       }
 694
 695       // Insertion point is solely determined by basePtr's contents
 696       insertEltOffs = DAG.getNode(ISD::ADD, PtrVT,
 697                                   basePtr,
 698                                   DAG.getConstant(0, PtrVT));
 699     }
 700
 701     // Re-emit as a v16i8 vector load
 702     alignLoadVec = DAG.getLoad(MVT::v16i8, the_chain, basePtr,
 703                                SN->getSrcValue(), SN->getSrcValueOffset(),
 704                                SN->isVolatile(), 16);
 705
 706     // Update the chain
 707     the_chain = alignLoadVec.getValue(1);
 708
 709     LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
 710     SDValue theValue = SN->getValue();
 711     SDValue result;
 712
 713     if (StVT != VT
 714         && (theValue.getOpcode() == ISD::AssertZext
 715             || theValue.getOpcode() == ISD::AssertSext)) {
 716       // Drill down and get the value for zero- and sign-extended
 717       // quantities
 718       theValue = theValue.getOperand(0);
 719     }
 720
 721     // If the base pointer is already a D-form address, then just create
 722     // a new D-form address with a slot offset and the orignal base pointer.
 723     // Otherwise generate a D-form address with the slot offset relative
 724     // to the stack pointer, which is always aligned.
 725 #if !defined(NDEBUG)
 726       if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
 727         cerr << "CellSPU LowerSTORE: basePtr = ";
 728         basePtr.getNode()->dump(&DAG);
 729         cerr << "\n";
 730       }
 731 #endif
 732
 733     SDValue insertEltOp =
 734             DAG.getNode(SPUISD::SHUFFLE_MASK, vecVT, insertEltOffs);
 735     SDValue vectorizeOp =
 736             DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue);
 737
 738     result = DAG.getNode(SPUISD::SHUFB, vecVT,
 739                          vectorizeOp, alignLoadVec,
 740                          DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, insertEltOp));
 741
 742     result = DAG.getStore(the_chain, result, basePtr,
 743                           LN->getSrcValue(), LN->getSrcValueOffset(),
 744                           LN->isVolatile(), LN->getAlignment());
 745
 746 #if 0 && !defined(NDEBUG)
 747     if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
 748       const SDValue &currentRoot = DAG.getRoot();
 749
 750       DAG.setRoot(result);
 751       cerr << "------- CellSPU:LowerStore result:\n";
 752       DAG.dump();
 753       cerr << "-------\n";
 754       DAG.setRoot(currentRoot);
 755     }
 756 #endif
 757
 758     return result;
 759     /*UNREACHED*/
 760   }
 761   case ISD::PRE_INC:
 762   case ISD::PRE_DEC:
 763   case ISD::POST_INC:
 764   case ISD::POST_DEC:
 765   case ISD::LAST_INDEXED_MODE:
 766     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 767             "UNINDEXED\n";
 768     cerr << (unsigned) SN->getAddressingMode() << "\n";
 769     abort();
 770     /*NOTREACHED*/
 771   }
 772
 773   return SDValue();
 774 }
 775
 776 //! Generate the address of a constant pool entry.
 777 SDValue
 778 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 779   MVT PtrVT = Op.getValueType();
 780   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 781   Constant *C = CP->getConstVal();
 782   SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
 783   SDValue Zero = DAG.getConstant(0, PtrVT);
 784   const TargetMachine &TM = DAG.getTarget();
 785
 786   if (TM.getRelocationModel() == Reloc::Static) {
 787     if (!ST->usingLargeMem()) {
 788       // Just return the SDValue with the constant pool address in it.
 789       return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
 790     } else {
 791       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
 792       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
 793       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 794     }
 795   }
 796
 797   assert(0 &&
 798          "LowerConstantPool: Relocation model other than static"
 799          " not supported.");
 800   return SDValue();
 801 }
 802
 803 //! Alternate entry point for generating the address of a constant pool entry
 804 SDValue
 805 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
 806   return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
 807 }
 808
 809 static SDValue
 810 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 811   MVT PtrVT = Op.getValueType();
 812   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 813   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
 814   SDValue Zero = DAG.getConstant(0, PtrVT);
 815   const TargetMachine &TM = DAG.getTarget();
 816
 817   if (TM.getRelocationModel() == Reloc::Static) {
 818     if (!ST->usingLargeMem()) {
 819       return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
 820     } else {
 821       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
 822       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
 823       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 824     }
 825   }
 826
 827   assert(0 &&
 828          "LowerJumpTable: Relocation model other than static not supported.");
 829   return SDValue();
 830 }
 831
 832 static SDValue
 833 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 834   MVT PtrVT = Op.getValueType();
 835   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
 836   GlobalValue *GV = GSDN->getGlobal();
 837   SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
 838   const TargetMachine &TM = DAG.getTarget();
 839   SDValue Zero = DAG.getConstant(0, PtrVT);
 840
 841   if (TM.getRelocationModel() == Reloc::Static) {
 842     if (!ST->usingLargeMem()) {
 843       return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
 844     } else {
 845       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
 846       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
 847       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 848     }
 849   } else {
 850     cerr << "LowerGlobalAddress: Relocation model other than static not "
 851          << "supported.\n";
 852     abort();
 853     /*NOTREACHED*/
 854   }
 855
 856   return SDValue();
 857 }
 858
 859 //! Custom lower i64 integer constants
 860 /*!
 861  This code inserts all of the necessary juggling that needs to occur to load
 862  a 64-bit constant into a register.
 863  */
 864 static SDValue
 865 LowerConstant(SDValue Op, SelectionDAG &DAG) {
 866   MVT VT = Op.getValueType();
 867
 868   if (VT == MVT::i64) {
 869     ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
 870     SDValue T = DAG.getConstant(CN->getZExtValue(), VT);
 871     return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
 872                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
 873   } else {
 874     cerr << "LowerConstant: unhandled constant type "
 875          << VT.getMVTString()
 876          << "\n";
 877     abort();
 878     /*NOTREACHED*/
 879   }
 880
 881   return SDValue();
 882 }
 883
 884 //! Custom lower double precision floating point constants
 885 static SDValue
 886 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
 887   MVT VT = Op.getValueType();
 888
 889   if (VT == MVT::f64) {
 890     ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
 891
 892     assert((FP != 0) &&
 893            "LowerConstantFP: Node is not ConstantFPSDNode");
 894
 895     uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
 896     SDValue T = DAG.getConstant(dbits, MVT::i64);
 897     SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T);
 898     return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
 899                        DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Tvec));
 900   }
 901
 902   return SDValue();
 903 }
 904
 905 static SDValue
 906 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
 907 {
 908   MachineFunction &MF = DAG.getMachineFunction();
 909   MachineFrameInfo *MFI = MF.getFrameInfo();
 910   MachineRegisterInfo &RegInfo = MF.getRegInfo();
 911   SmallVector<SDValue, 48> ArgValues;
 912   SDValue Root = Op.getOperand(0);
 913   bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
 914
 915   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
 916   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
 917
 918   unsigned ArgOffset = SPUFrameInfo::minStackSize();
 919   unsigned ArgRegIdx = 0;
 920   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
 921
 922   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 923
 924   // Add DAG nodes to load the arguments or copy them out of registers.
 925   for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
 926        ArgNo != e; ++ArgNo) {
 927     MVT ObjectVT = Op.getValue(ArgNo).getValueType();
 928     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
 929     SDValue ArgVal;
 930
 931     if (ArgRegIdx < NumArgRegs) {
 932       const TargetRegisterClass *ArgRegClass;
 933
 934       switch (ObjectVT.getSimpleVT()) {
 935       default: {
 936         cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
 937              << ObjectVT.getMVTString()
 938              << "\n";
 939         abort();
 940       }
 941       case MVT::i8:
 942         ArgRegClass = &SPU::R8CRegClass;
 943         break;
 944       case MVT::i16:
 945         ArgRegClass = &SPU::R16CRegClass;
 946         break;
 947       case MVT::i32:
 948         ArgRegClass = &SPU::R32CRegClass;
 949         break;
 950       case MVT::i64:
 951         ArgRegClass = &SPU::R64CRegClass;
 952         break;
 953       case MVT::i128:
 954         ArgRegClass = &SPU::GPRCRegClass;
 955         break;
 956       case MVT::f32:
 957         ArgRegClass = &SPU::R32FPRegClass;
 958         break;
 959       case MVT::f64:
 960         ArgRegClass = &SPU::R64FPRegClass;
 961         break;
 962       case MVT::v2f64:
 963       case MVT::v4f32:
 964       case MVT::v2i64:
 965       case MVT::v4i32:
 966       case MVT::v8i16:
 967       case MVT::v16i8:
 968         ArgRegClass = &SPU::VECREGRegClass;
 969         break;
 970       }
 971
 972       unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
 973       RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 974       ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
 975       ++ArgRegIdx;
 976     } else {
 977       // We need to load the argument to a virtual register if we determined
 978       // above that we ran out of physical registers of the appropriate type
 979       // or we're forced to do vararg
 980       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
 981       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
 982       ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
 983       ArgOffset += StackSlotSize;
 984     }
 985
 986     ArgValues.push_back(ArgVal);
 987     // Update the chain
 988     Root = ArgVal.getOperand(0);
 989   }
 990
 991   // vararg handling:
 992   if (isVarArg) {
 993     // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
 994     // We will spill (79-3)+1 registers to the stack
 995     SmallVector<SDValue, 79-3+1> MemOps;
 996
 997     // Create the frame slot
 998
 999     for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1000       VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1001       SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1002       SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1003       SDValue Store = DAG.getStore(Root, ArgVal, FIN, NULL, 0);
1004       Root = Store.getOperand(0);
1005       MemOps.push_back(Store);
1006
1007       // Increment address by stack slot size for the next stored argument
1008       ArgOffset += StackSlotSize;
1009     }
1010     if (!MemOps.empty())
1011       Root = DAG.getNode(ISD::TokenFactor,MVT::Other,&MemOps[0],MemOps.size());
1012   }
1013
1014   ArgValues.push_back(Root);
1015
1016   // Return the new list of results.
1017   return DAG.getNode(ISD::MERGE_VALUES, Op.getNode()->getVTList(),
1018                      &ArgValues[0], ArgValues.size());
1019 }
1020
1021 /// isLSAAddress - Return the immediate to use if the specified
1022 /// value is representable as a LSA address.
1023 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1024   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1025   if (!C) return 0;
1026
1027   int Addr = C->getZExtValue();
1028   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1029       (Addr << 14 >> 14) != Addr)
1030     return 0;  // Top 14 bits have to be sext of immediate.
1031
1032   return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1033 }
1034
1035 static SDValue
1036 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1037   CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1038   SDValue Chain = TheCall->getChain();
1039   SDValue Callee    = TheCall->getCallee();
1040   unsigned NumOps     = TheCall->getNumArgs();
1041   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1042   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1043   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1044
1045   // Handy pointer type
1046   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1047
1048   // Accumulate how many bytes are to be pushed on the stack, including the
1049   // linkage area, and parameter passing area.  According to the SPU ABI,
1050   // we minimally need space for [LR] and [SP]
1051   unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1052
1053   // Set up a copy of the stack pointer for use loading and storing any
1054   // arguments that may not fit in the registers available for argument
1055   // passing.
1056   SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1057
1058   // Figure out which arguments are going to go in registers, and which in
1059   // memory.
1060   unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1061   unsigned ArgRegIdx = 0;
1062
1063   // Keep track of registers passing arguments
1064   std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1065   // And the arguments passed on the stack
1066   SmallVector<SDValue, 8> MemOpChains;
1067
1068   for (unsigned i = 0; i != NumOps; ++i) {
1069     SDValue Arg = TheCall->getArg(i);
1070
1071     // PtrOff will be used to store the current argument to the stack if a
1072     // register cannot be found for it.
1073     SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1074     PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1075
1076     switch (Arg.getValueType().getSimpleVT()) {
1077     default: assert(0 && "Unexpected ValueType for argument!");
1078     case MVT::i8:
1079     case MVT::i16:
1080     case MVT::i32:
1081     case MVT::i64:
1082     case MVT::i128:
1083       if (ArgRegIdx != NumArgRegs) {
1084         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1085       } else {
1086         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1087         ArgOffset += StackSlotSize;
1088       }
1089       break;
1090     case MVT::f32:
1091     case MVT::f64:
1092       if (ArgRegIdx != NumArgRegs) {
1093         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1094       } else {
1095         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1096         ArgOffset += StackSlotSize;
1097       }
1098       break;
1099     case MVT::v2i64:
1100     case MVT::v2f64:
1101     case MVT::v4f32:
1102     case MVT::v4i32:
1103     case MVT::v8i16:
1104     case MVT::v16i8:
1105       if (ArgRegIdx != NumArgRegs) {
1106         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1107       } else {
1108         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1109         ArgOffset += StackSlotSize;
1110       }
1111       break;
1112     }
1113   }
1114
1115   // Update number of stack bytes actually used, insert a call sequence start
1116   NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1117   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1118                                                             true));
1119
1120   if (!MemOpChains.empty()) {
1121     // Adjust the stack pointer for the stack arguments.
1122     Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1123                         &MemOpChains[0], MemOpChains.size());
1124   }
1125
1126   // Build a sequence of copy-to-reg nodes chained together with token chain
1127   // and flag operands which copy the outgoing args into the appropriate regs.
1128   SDValue InFlag;
1129   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1130     Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1131                              InFlag);
1132     InFlag = Chain.getValue(1);
1133   }
1134
1135   SmallVector<SDValue, 8> Ops;
1136   unsigned CallOpc = SPUISD::CALL;
1137
1138   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1139   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1140   // node so that legalize doesn't hack it.
1141   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1142     GlobalValue *GV = G->getGlobal();
1143     MVT CalleeVT = Callee.getValueType();
1144     SDValue Zero = DAG.getConstant(0, PtrVT);
1145     SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1146
1147     if (!ST->usingLargeMem()) {
1148       // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1149       // style calls, otherwise, external symbols are BRASL calls. This assumes
1150       // that declared/defined symbols are in the same compilation unit and can
1151       // be reached through PC-relative jumps.
1152       //
1153       // NOTE:
1154       // This may be an unsafe assumption for JIT and really large compilation
1155       // units.
1156       if (GV->isDeclaration()) {
1157         Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1158       } else {
1159         Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1160       }
1161     } else {
1162       // "Large memory" mode: Turn all calls into indirect calls with a X-form
1163       // address pairs:
1164       Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1165     }
1166   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1167     MVT CalleeVT = Callee.getValueType();
1168     SDValue Zero = DAG.getConstant(0, PtrVT);
1169     SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1170         Callee.getValueType());
1171
1172     if (!ST->usingLargeMem()) {
1173       Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, ExtSym, Zero);
1174     } else {
1175       Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, ExtSym, Zero);
1176     }
1177   } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1178     // If this is an absolute destination address that appears to be a legal
1179     // local store address, use the munged value.
1180     Callee = SDValue(Dest, 0);
1181   }
1182
1183   Ops.push_back(Chain);
1184   Ops.push_back(Callee);
1185
1186   // Add argument registers to the end of the list so that they are known live
1187   // into the call.
1188   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1189     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1190                                   RegsToPass[i].second.getValueType()));
1191
1192   if (InFlag.getNode())
1193     Ops.push_back(InFlag);
1194   // Returns a chain and a flag for retval copy to use.
1195   Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1196                       &Ops[0], Ops.size());
1197   InFlag = Chain.getValue(1);
1198
1199   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1200                              DAG.getIntPtrConstant(0, true), InFlag);
1201   if (TheCall->getValueType(0) != MVT::Other)
1202     InFlag = Chain.getValue(1);
1203
1204   SDValue ResultVals[3];
1205   unsigned NumResults = 0;
1206
1207   // If the call has results, copy the values out of the ret val registers.
1208   switch (TheCall->getValueType(0).getSimpleVT()) {
1209   default: assert(0 && "Unexpected ret value!");
1210   case MVT::Other: break;
1211   case MVT::i32:
1212     if (TheCall->getValueType(1) == MVT::i32) {
1213       Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1214       ResultVals[0] = Chain.getValue(0);
1215       Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1216                                  Chain.getValue(2)).getValue(1);
1217       ResultVals[1] = Chain.getValue(0);
1218       NumResults = 2;
1219     } else {
1220       Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1221       ResultVals[0] = Chain.getValue(0);
1222       NumResults = 1;
1223     }
1224     break;
1225   case MVT::i64:
1226     Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1227     ResultVals[0] = Chain.getValue(0);
1228     NumResults = 1;
1229     break;
1230   case MVT::i128:
1231     Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i128, InFlag).getValue(1);
1232     ResultVals[0] = Chain.getValue(0);
1233     NumResults = 1;
1234     break;
1235   case MVT::f32:
1236   case MVT::f64:
1237     Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1238                                InFlag).getValue(1);
1239     ResultVals[0] = Chain.getValue(0);
1240     NumResults = 1;
1241     break;
1242   case MVT::v2f64:
1243   case MVT::v2i64:
1244   case MVT::v4f32:
1245   case MVT::v4i32:
1246   case MVT::v8i16:
1247   case MVT::v16i8:
1248     Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1249                                    InFlag).getValue(1);
1250     ResultVals[0] = Chain.getValue(0);
1251     NumResults = 1;
1252     break;
1253   }
1254
1255   // If the function returns void, just return the chain.
1256   if (NumResults == 0)
1257     return Chain;
1258
1259   // Otherwise, merge everything together with a MERGE_VALUES node.
1260   ResultVals[NumResults++] = Chain;
1261   SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1262   return Res.getValue(Op.getResNo());
1263 }
1264
1265 static SDValue
1266 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1267   SmallVector<CCValAssign, 16> RVLocs;
1268   unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1269   bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1270   CCState CCInfo(CC, isVarArg, TM, RVLocs);
1271   CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1272
1273   // If this is the first return lowered for this function, add the regs to the
1274   // liveout set for the function.
1275   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1276     for (unsigned i = 0; i != RVLocs.size(); ++i)
1277       DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1278   }
1279
1280   SDValue Chain = Op.getOperand(0);
1281   SDValue Flag;
1282
1283   // Copy the result values into the output registers.
1284   for (unsigned i = 0; i != RVLocs.size(); ++i) {
1285     CCValAssign &VA = RVLocs[i];
1286     assert(VA.isRegLoc() && "Can only return in registers!");
1287     Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1288     Flag = Chain.getValue(1);
1289   }
1290
1291   if (Flag.getNode())
1292     return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1293   else
1294     return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1295 }
1296
1297
1298 //===----------------------------------------------------------------------===//
1299 // Vector related lowering:
1300 //===----------------------------------------------------------------------===//
1301
1302 static ConstantSDNode *
1303 getVecImm(SDNode *N) {
1304   SDValue OpVal(0, 0);
1305
1306   // Check to see if this buildvec has a single non-undef value in its elements.
1307   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1308     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1309     if (OpVal.getNode() == 0)
1310       OpVal = N->getOperand(i);
1311     else if (OpVal != N->getOperand(i))
1312       return 0;
1313   }
1314
1315   if (OpVal.getNode() != 0) {
1316     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1317       return CN;
1318     }
1319   }
1320
1321   return 0; // All UNDEF: use implicit def.; not Constant node
1322 }
1323
1324 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1325 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1326 /// constant
1327 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1328                               MVT ValueType) {
1329   if (ConstantSDNode *CN = getVecImm(N)) {
1330     uint64_t Value = CN->getZExtValue();
1331     if (ValueType == MVT::i64) {
1332       uint64_t UValue = CN->getZExtValue();
1333       uint32_t upper = uint32_t(UValue >> 32);
1334       uint32_t lower = uint32_t(UValue);
1335       if (upper != lower)
1336         return SDValue();
1337       Value = Value >> 32;
1338     }
1339     if (Value <= 0x3ffff)
1340       return DAG.getTargetConstant(Value, ValueType);
1341   }
1342
1343   return SDValue();
1344 }
1345
1346 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1347 /// and the value fits into a signed 16-bit constant, and if so, return the
1348 /// constant
1349 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1350                               MVT ValueType) {
1351   if (ConstantSDNode *CN = getVecImm(N)) {
1352     int64_t Value = CN->getSExtValue();
1353     if (ValueType == MVT::i64) {
1354       uint64_t UValue = CN->getZExtValue();
1355       uint32_t upper = uint32_t(UValue >> 32);
1356       uint32_t lower = uint32_t(UValue);
1357       if (upper != lower)
1358         return SDValue();
1359       Value = Value >> 32;
1360     }
1361     if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1362       return DAG.getTargetConstant(Value, ValueType);
1363     }
1364   }
1365
1366   return SDValue();
1367 }
1368
1369 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1370 /// and the value fits into a signed 10-bit constant, and if so, return the
1371 /// constant
1372 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1373                               MVT ValueType) {
1374   if (ConstantSDNode *CN = getVecImm(N)) {
1375     int64_t Value = CN->getSExtValue();
1376     if (ValueType == MVT::i64) {
1377       uint64_t UValue = CN->getZExtValue();
1378       uint32_t upper = uint32_t(UValue >> 32);
1379       uint32_t lower = uint32_t(UValue);
1380       if (upper != lower)
1381         return SDValue();
1382       Value = Value >> 32;
1383     }
1384     if (isS10Constant(Value))
1385       return DAG.getTargetConstant(Value, ValueType);
1386   }
1387
1388   return SDValue();
1389 }
1390
1391 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1392 /// and the value fits into a signed 8-bit constant, and if so, return the
1393 /// constant.
1394 ///
1395 /// @note: The incoming vector is v16i8 because that's the only way we can load
1396 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1397 /// same value.
1398 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1399                              MVT ValueType) {
1400   if (ConstantSDNode *CN = getVecImm(N)) {
1401     int Value = (int) CN->getZExtValue();
1402     if (ValueType == MVT::i16
1403         && Value <= 0xffff                 /* truncated from uint64_t */
1404         && ((short) Value >> 8) == ((short) Value & 0xff))
1405       return DAG.getTargetConstant(Value & 0xff, ValueType);
1406     else if (ValueType == MVT::i8
1407              && (Value & 0xff) == Value)
1408       return DAG.getTargetConstant(Value, ValueType);
1409   }
1410
1411   return SDValue();
1412 }
1413
1414 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1415 /// and the value fits into a signed 16-bit constant, and if so, return the
1416 /// constant
1417 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1418                                MVT ValueType) {
1419   if (ConstantSDNode *CN = getVecImm(N)) {
1420     uint64_t Value = CN->getZExtValue();
1421     if ((ValueType == MVT::i32
1422           && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1423         || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1424       return DAG.getTargetConstant(Value >> 16, ValueType);
1425   }
1426
1427   return SDValue();
1428 }
1429
1430 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1431 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1432   if (ConstantSDNode *CN = getVecImm(N)) {
1433     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1434   }
1435
1436   return SDValue();
1437 }
1438
1439 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1440 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1441   if (ConstantSDNode *CN = getVecImm(N)) {
1442     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1443   }
1444
1445   return SDValue();
1446 }
1447
1448 // If this is a vector of constants or undefs, get the bits.  A bit in
1449 // UndefBits is set if the corresponding element of the vector is an
1450 // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1451 // zero.   Return true if this is not an array of constants, false if it is.
1452 //
1453 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1454                                        uint64_t UndefBits[2]) {
1455   // Start with zero'd results.
1456   VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1457
1458   unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1459   for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1460     SDValue OpVal = BV->getOperand(i);
1461
1462     unsigned PartNo = i >= e/2;     // In the upper 128 bits?
1463     unsigned SlotNo = e/2 - (i & (e/2-1))-1;  // Which subpiece of the uint64_t.
1464
1465     uint64_t EltBits = 0;
1466     if (OpVal.getOpcode() == ISD::UNDEF) {
1467       uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1468       UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1469       continue;
1470     } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1471       EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1472     } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1473       const APFloat &apf = CN->getValueAPF();
1474       EltBits = (CN->getValueType(0) == MVT::f32
1475                  ? FloatToBits(apf.convertToFloat())
1476                  : DoubleToBits(apf.convertToDouble()));
1477     } else {
1478       // Nonconstant element.
1479       return true;
1480     }
1481
1482     VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1483   }
1484
1485   //printf("%llx %llx  %llx %llx\n",
1486   //       VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1487   return false;
1488 }
1489
1490 /// If this is a splat (repetition) of a value across the whole vector, return
1491 /// the smallest size that splats it.  For example, "0x01010101010101..." is a
1492 /// splat of 0x01, 0x0101, and 0x01010101.  We return SplatBits = 0x01 and
1493 /// SplatSize = 1 byte.
1494 static bool isConstantSplat(const uint64_t Bits128[2],
1495                             const uint64_t Undef128[2],
1496                             int MinSplatBits,
1497                             uint64_t &SplatBits, uint64_t &SplatUndef,
1498                             int &SplatSize) {
1499   // Don't let undefs prevent splats from matching.  See if the top 64-bits are
1500   // the same as the lower 64-bits, ignoring undefs.
1501   uint64_t Bits64  = Bits128[0] | Bits128[1];
1502   uint64_t Undef64 = Undef128[0] & Undef128[1];
1503   uint32_t Bits32  = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1504   uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1505   uint16_t Bits16  = uint16_t(Bits32)  | uint16_t(Bits32 >> 16);
1506   uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1507
1508   if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1509     if (MinSplatBits < 64) {
1510
1511       // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1512       // undefs.
1513       if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1514         if (MinSplatBits < 32) {
1515
1516           // If the top 16-bits are different than the lower 16-bits, ignoring
1517           // undefs, we have an i32 splat.
1518           if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1519             if (MinSplatBits < 16) {
1520               // If the top 8-bits are different than the lower 8-bits, ignoring
1521               // undefs, we have an i16 splat.
1522               if ((Bits16 & (uint16_t(~Undef16) >> 8))
1523                   == ((Bits16 >> 8) & ~Undef16)) {
1524                 // Otherwise, we have an 8-bit splat.
1525                 SplatBits  = uint8_t(Bits16)  | uint8_t(Bits16 >> 8);
1526                 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1527                 SplatSize = 1;
1528                 return true;
1529               }
1530             } else {
1531               SplatBits = Bits16;
1532               SplatUndef = Undef16;
1533               SplatSize = 2;
1534               return true;
1535             }
1536           }
1537         } else {
1538           SplatBits = Bits32;
1539           SplatUndef = Undef32;
1540           SplatSize = 4;
1541           return true;
1542         }
1543       }
1544     } else {
1545       SplatBits = Bits128[0];
1546       SplatUndef = Undef128[0];
1547       SplatSize = 8;
1548       return true;
1549     }
1550   }
1551
1552   return false;  // Can't be a splat if two pieces don't match.
1553 }
1554
1555 // If this is a case we can't handle, return null and let the default
1556 // expansion code take care of it.  If we CAN select this case, and if it
1557 // selects to a single instruction, return Op.  Otherwise, if we can codegen
1558 // this case more efficiently than a constant pool load, lower it to the
1559 // sequence of ops that should be used.
1560 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1561   MVT VT = Op.getValueType();
1562   // If this is a vector of constants or undefs, get the bits.  A bit in
1563   // UndefBits is set if the corresponding element of the vector is an
1564   // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1565   // zero.
1566   uint64_t VectorBits[2];
1567   uint64_t UndefBits[2];
1568   uint64_t SplatBits, SplatUndef;
1569   int SplatSize;
1570   if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1571       || !isConstantSplat(VectorBits, UndefBits,
1572                           VT.getVectorElementType().getSizeInBits(),
1573                           SplatBits, SplatUndef, SplatSize))
1574     return SDValue();   // Not a constant vector, not a splat.
1575
1576   switch (VT.getSimpleVT()) {
1577   default:
1578   case MVT::v4f32: {
1579     uint32_t Value32 = SplatBits;
1580     assert(SplatSize == 4
1581            && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1582     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1583     SDValue T = DAG.getConstant(Value32, MVT::i32);
1584     return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1585                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1586     break;
1587   }
1588   case MVT::v2f64: {
1589     uint64_t f64val = SplatBits;
1590     assert(SplatSize == 8
1591            && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1592     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1593     SDValue T = DAG.getConstant(f64val, MVT::i64);
1594     return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1595                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1596     break;
1597   }
1598   case MVT::v16i8: {
1599    // 8-bit constants have to be expanded to 16-bits
1600    unsigned short Value16 = SplatBits | (SplatBits << 8);
1601    SDValue Ops[8];
1602    for (int i = 0; i < 8; ++i)
1603      Ops[i] = DAG.getConstant(Value16, MVT::i16);
1604    return DAG.getNode(ISD::BIT_CONVERT, VT,
1605                       DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1606   }
1607   case MVT::v8i16: {
1608     unsigned short Value16;
1609     if (SplatSize == 2)
1610       Value16 = (unsigned short) (SplatBits & 0xffff);
1611     else
1612       Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1613     SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1614     SDValue Ops[8];
1615     for (int i = 0; i < 8; ++i) Ops[i] = T;
1616     return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1617   }
1618   case MVT::v4i32: {
1619     unsigned int Value = SplatBits;
1620     SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1621     return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1622   }
1623   case MVT::v2i32: {
1624     unsigned int Value = SplatBits;
1625     SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1626     return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T);
1627   }
1628   case MVT::v2i64: {
1629     uint64_t val = SplatBits;
1630     uint32_t upper = uint32_t(val >> 32);
1631     uint32_t lower = uint32_t(val);
1632
1633     if (upper == lower) {
1634       // Magic constant that can be matched by IL, ILA, et. al.
1635       SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1636       return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1637     } else {
1638       SDValue LO32;
1639       SDValue HI32;
1640       SmallVector<SDValue, 16> ShufBytes;
1641       SDValue Result;
1642       bool upper_special, lower_special;
1643
1644       // NOTE: This code creates common-case shuffle masks that can be easily
1645       // detected as common expressions. It is not attempting to create highly
1646       // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1647
1648       // Detect if the upper or lower half is a special shuffle mask pattern:
1649       upper_special = (upper == 0||upper == 0xffffffff||upper == 0x80000000);
1650       lower_special = (lower == 0||lower == 0xffffffff||lower == 0x80000000);
1651
1652       // Create lower vector if not a special pattern
1653       if (!lower_special) {
1654         SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1655         LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1656                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1657                                        LO32C, LO32C, LO32C, LO32C));
1658       }
1659
1660       // Create upper vector if not a special pattern
1661       if (!upper_special) {
1662         SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1663         HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1664                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1665                                        HI32C, HI32C, HI32C, HI32C));
1666       }
1667
1668       // If either upper or lower are special, then the two input operands are
1669       // the same (basically, one of them is a "don't care")
1670       if (lower_special)
1671         LO32 = HI32;
1672       if (upper_special)
1673         HI32 = LO32;
1674       if (lower_special && upper_special) {
1675         // Unhappy situation... both upper and lower are special, so punt with
1676         // a target constant:
1677         SDValue Zero = DAG.getConstant(0, MVT::i32);
1678         HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1679                                   Zero, Zero);
1680       }
1681
1682       for (int i = 0; i < 4; ++i) {
1683         uint64_t val = 0;
1684         for (int j = 0; j < 4; ++j) {
1685           SDValue V;
1686           bool process_upper, process_lower;
1687           val <<= 8;
1688           process_upper = (upper_special && (i & 1) == 0);
1689           process_lower = (lower_special && (i & 1) == 1);
1690
1691           if (process_upper || process_lower) {
1692             if ((process_upper && upper == 0)
1693                 || (process_lower && lower == 0))
1694               val |= 0x80;
1695             else if ((process_upper && upper == 0xffffffff)
1696                      || (process_lower && lower == 0xffffffff))
1697               val |= 0xc0;
1698             else if ((process_upper && upper == 0x80000000)
1699                      || (process_lower && lower == 0x80000000))
1700               val |= (j == 0 ? 0xe0 : 0x80);
1701           } else
1702             val |= i * 4 + j + ((i & 1) * 16);
1703         }
1704
1705         ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1706       }
1707
1708       return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1709                          DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1710                                      &ShufBytes[0], ShufBytes.size()));
1711     }
1712   }
1713   }
1714
1715   return SDValue();
1716 }
1717
1718 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1719 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1720 /// permutation vector, V3, is monotonically increasing with one "exception"
1721 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1722 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1723 /// In either case, the net result is going to eventually invoke SHUFB to
1724 /// permute/shuffle the bytes from V1 and V2.
1725 /// \note
1726 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1727 /// control word for byte/halfword/word insertion. This takes care of a single
1728 /// element move from V2 into V1.
1729 /// \note
1730 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1731 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1732   SDValue V1 = Op.getOperand(0);
1733   SDValue V2 = Op.getOperand(1);
1734   SDValue PermMask = Op.getOperand(2);
1735
1736   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1737
1738   // If we have a single element being moved from V1 to V2, this can be handled
1739   // using the C*[DX] compute mask instructions, but the vector elements have
1740   // to be monotonically increasing with one exception element.
1741   MVT VecVT = V1.getValueType();
1742   MVT EltVT = VecVT.getVectorElementType();
1743   unsigned EltsFromV2 = 0;
1744   unsigned V2Elt = 0;
1745   unsigned V2EltIdx0 = 0;
1746   unsigned CurrElt = 0;
1747   unsigned MaxElts = VecVT.getVectorNumElements();
1748   unsigned PrevElt = 0;
1749   unsigned V0Elt = 0;
1750   bool monotonic = true;
1751   bool rotate = true;
1752
1753   if (EltVT == MVT::i8) {
1754     V2EltIdx0 = 16;
1755   } else if (EltVT == MVT::i16) {
1756     V2EltIdx0 = 8;
1757   } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1758     V2EltIdx0 = 4;
1759   } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1760     V2EltIdx0 = 2;
1761   } else
1762     assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1763
1764   for (unsigned i = 0; i != PermMask.getNumOperands(); ++i) {
1765     if (PermMask.getOperand(i).getOpcode() != ISD::UNDEF) {
1766       unsigned SrcElt = cast<ConstantSDNode > (PermMask.getOperand(i))->getZExtValue();
1767
1768       if (monotonic) {
1769         if (SrcElt >= V2EltIdx0) {
1770           if (1 >= (++EltsFromV2)) {
1771             V2Elt = (V2EltIdx0 - SrcElt) << 2;
1772           }
1773         } else if (CurrElt != SrcElt) {
1774           monotonic = false;
1775         }
1776
1777         ++CurrElt;
1778       }
1779
1780       if (rotate) {
1781         if (PrevElt > 0 && SrcElt < MaxElts) {
1782           if ((PrevElt == SrcElt - 1)
1783               || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1784             PrevElt = SrcElt;
1785             if (SrcElt == 0)
1786               V0Elt = i;
1787           } else {
1788             rotate = false;
1789           }
1790         } else if (PrevElt == 0) {
1791           // First time through, need to keep track of previous element
1792           PrevElt = SrcElt;
1793         } else {
1794           // This isn't a rotation, takes elements from vector 2
1795           rotate = false;
1796         }
1797       }
1798     }
1799   }
1800
1801   if (EltsFromV2 == 1 && monotonic) {
1802     // Compute mask and shuffle
1803     MachineFunction &MF = DAG.getMachineFunction();
1804     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1805     unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1806     MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1807     // Initialize temporary register to 0
1808     SDValue InitTempReg =
1809       DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1810     // Copy register's contents as index in SHUFFLE_MASK:
1811     SDValue ShufMaskOp =
1812       DAG.getNode(SPUISD::SHUFFLE_MASK, MVT::v4i32,
1813                   DAG.getTargetConstant(V2Elt, MVT::i32),
1814                   DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1815     // Use shuffle mask in SHUFB synthetic instruction:
1816     return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1817   } else if (rotate) {
1818     int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1819
1820     return DAG.getNode(SPUISD::ROTBYTES_LEFT, V1.getValueType(),
1821                        V1, DAG.getConstant(rotamt, MVT::i16));
1822   } else {
1823    // Convert the SHUFFLE_VECTOR mask's input element units to the
1824    // actual bytes.
1825     unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1826
1827     SmallVector<SDValue, 16> ResultMask;
1828     for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1829       unsigned SrcElt;
1830       if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1831         SrcElt = 0;
1832       else
1833         SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1834
1835       for (unsigned j = 0; j < BytesPerElement; ++j) {
1836         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1837                                              MVT::i8));
1838       }
1839     }
1840
1841     SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1842                                     &ResultMask[0], ResultMask.size());
1843     return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1844   }
1845 }
1846
1847 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1848   SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar
1849
1850   if (Op0.getNode()->getOpcode() == ISD::Constant) {
1851     // For a constant, build the appropriate constant vector, which will
1852     // eventually simplify to a vector register load.
1853
1854     ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1855     SmallVector<SDValue, 16> ConstVecValues;
1856     MVT VT;
1857     size_t n_copies;
1858
1859     // Create a constant vector:
1860     switch (Op.getValueType().getSimpleVT()) {
1861     default: assert(0 && "Unexpected constant value type in "
1862                          "LowerSCALAR_TO_VECTOR");
1863     case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1864     case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1865     case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1866     case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1867     case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1868     case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1869     }
1870
1871     SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1872     for (size_t j = 0; j < n_copies; ++j)
1873       ConstVecValues.push_back(CValue);
1874
1875     return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1876                        &ConstVecValues[0], ConstVecValues.size());
1877   } else {
1878     // Otherwise, copy the value from one register to another:
1879     switch (Op0.getValueType().getSimpleVT()) {
1880     default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1881     case MVT::i8:
1882     case MVT::i16:
1883     case MVT::i32:
1884     case MVT::i64:
1885     case MVT::f32:
1886     case MVT::f64:
1887       return DAG.getNode(SPUISD::PREFSLOT2VEC, Op.getValueType(), Op0, Op0);
1888     }
1889   }
1890
1891   return SDValue();
1892 }
1893
1894 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1895   MVT VT = Op.getValueType();
1896   SDValue N = Op.getOperand(0);
1897   SDValue Elt = Op.getOperand(1);
1898   SDValue retval;
1899
1900   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1901     // Constant argument:
1902     int EltNo = (int) C->getZExtValue();
1903
1904     // sanity checks:
1905     if (VT == MVT::i8 && EltNo >= 16)
1906       assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1907     else if (VT == MVT::i16 && EltNo >= 8)
1908       assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1909     else if (VT == MVT::i32 && EltNo >= 4)
1910       assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1911     else if (VT == MVT::i64 && EltNo >= 2)
1912       assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1913
1914     if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1915       // i32 and i64: Element 0 is the preferred slot
1916       return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, N);
1917     }
1918
1919     // Need to generate shuffle mask and extract:
1920     int prefslot_begin = -1, prefslot_end = -1;
1921     int elt_byte = EltNo * VT.getSizeInBits() / 8;
1922
1923     switch (VT.getSimpleVT()) {
1924     default:
1925       assert(false && "Invalid value type!");
1926     case MVT::i8: {
1927       prefslot_begin = prefslot_end = 3;
1928       break;
1929     }
1930     case MVT::i16: {
1931       prefslot_begin = 2; prefslot_end = 3;
1932       break;
1933     }
1934     case MVT::i32:
1935     case MVT::f32: {
1936       prefslot_begin = 0; prefslot_end = 3;
1937       break;
1938     }
1939     case MVT::i64:
1940     case MVT::f64: {
1941       prefslot_begin = 0; prefslot_end = 7;
1942       break;
1943     }
1944     }
1945
1946     assert(prefslot_begin != -1 && prefslot_end != -1 &&
1947            "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1948
1949     unsigned int ShufBytes[16];
1950     for (int i = 0; i < 16; ++i) {
1951       // zero fill uppper part of preferred slot, don't care about the
1952       // other slots:
1953       unsigned int mask_val;
1954       if (i <= prefslot_end) {
1955         mask_val =
1956           ((i < prefslot_begin)
1957            ? 0x80
1958            : elt_byte + (i - prefslot_begin));
1959
1960         ShufBytes[i] = mask_val;
1961       } else
1962         ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1963     }
1964
1965     SDValue ShufMask[4];
1966     for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1967       unsigned bidx = i * 4;
1968       unsigned int bits = ((ShufBytes[bidx] << 24) |
1969                            (ShufBytes[bidx+1] << 16) |
1970                            (ShufBytes[bidx+2] << 8) |
1971                            ShufBytes[bidx+3]);
1972       ShufMask[i] = DAG.getConstant(bits, MVT::i32);
1973     }
1974
1975     SDValue ShufMaskVec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1976                                       &ShufMask[0],
1977                                       sizeof(ShufMask) / sizeof(ShufMask[0]));
1978
1979     retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
1980                          DAG.getNode(SPUISD::SHUFB, N.getValueType(),
1981                                      N, N, ShufMaskVec));
1982   } else {
1983     // Variable index: Rotate the requested element into slot 0, then replicate
1984     // slot 0 across the vector
1985     MVT VecVT = N.getValueType();
1986     if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
1987       cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
1988       abort();
1989     }
1990
1991     // Make life easier by making sure the index is zero-extended to i32
1992     if (Elt.getValueType() != MVT::i32)
1993       Elt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Elt);
1994
1995     // Scale the index to a bit/byte shift quantity
1996     APInt scaleFactor =
1997             APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
1998     unsigned scaleShift = scaleFactor.logBase2();
1999     SDValue vecShift;
2000
2001     if (scaleShift > 0) {
2002       // Scale the shift factor:
2003       Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt,
2004                         DAG.getConstant(scaleShift, MVT::i32));
2005     }
2006
2007     vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, N, Elt);
2008
2009     // Replicate the bytes starting at byte 0 across the entire vector (for
2010     // consistency with the notion of a unified register set)
2011     SDValue replicate;
2012
2013     switch (VT.getSimpleVT()) {
2014     default:
2015       cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
2016       abort();
2017       /*NOTREACHED*/
2018     case MVT::i8: {
2019       SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2020       replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2021                               factor, factor);
2022       break;
2023     }
2024     case MVT::i16: {
2025       SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2026       replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2027                               factor, factor);
2028       break;
2029     }
2030     case MVT::i32:
2031     case MVT::f32: {
2032       SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2033       replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2034                               factor, factor);
2035       break;
2036     }
2037     case MVT::i64:
2038     case MVT::f64: {
2039       SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2040       SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2041       replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, loFactor, hiFactor,
2042                               loFactor, hiFactor);
2043       break;
2044     }
2045     }
2046
2047     retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2048                          DAG.getNode(SPUISD::SHUFB, VecVT,
2049                                      vecShift, vecShift, replicate));
2050   }
2051
2052   return retval;
2053 }
2054
2055 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2056   SDValue VecOp = Op.getOperand(0);
2057   SDValue ValOp = Op.getOperand(1);
2058   SDValue IdxOp = Op.getOperand(2);
2059   MVT VT = Op.getValueType();
2060
2061   ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2062   assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2063
2064   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2065   // Use $sp ($1) because it's always 16-byte aligned and it's available:
2066   SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
2067                                 DAG.getRegister(SPU::R1, PtrVT),
2068                                 DAG.getConstant(CN->getSExtValue(), PtrVT));
2069   SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, VT, Pointer);
2070
2071   SDValue result =
2072     DAG.getNode(SPUISD::SHUFB, VT,
2073                 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2074                 VecOp,
2075                 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, ShufMask));
2076
2077   return result;
2078 }
2079
2080 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2081                            const TargetLowering &TLI)
2082 {
2083   SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
2084   MVT ShiftVT = TLI.getShiftAmountTy();
2085
2086   assert(Op.getValueType() == MVT::i8);
2087   switch (Opc) {
2088   default:
2089     assert(0 && "Unhandled i8 math operator");
2090     /*NOTREACHED*/
2091     break;
2092   case ISD::ADD: {
2093     // 8-bit addition: Promote the arguments up to 16-bits and truncate
2094     // the result:
2095     SDValue N1 = Op.getOperand(1);
2096     N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0);
2097     N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1);
2098     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2099                        DAG.getNode(Opc, MVT::i16, N0, N1));
2100
2101   }
2102
2103   case ISD::SUB: {
2104     // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2105     // the result:
2106     SDValue N1 = Op.getOperand(1);
2107     N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0);
2108     N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1);
2109     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2110                        DAG.getNode(Opc, MVT::i16, N0, N1));
2111   }
2112   case ISD::ROTR:
2113   case ISD::ROTL: {
2114     SDValue N1 = Op.getOperand(1);
2115     unsigned N1Opc;
2116     N0 = (N0.getOpcode() != ISD::Constant
2117           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2118           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2119                             MVT::i16));
2120     N1Opc = N1.getValueType().bitsLT(ShiftVT)
2121             ? ISD::ZERO_EXTEND
2122             : ISD::TRUNCATE;
2123     N1 = (N1.getOpcode() != ISD::Constant
2124           ? DAG.getNode(N1Opc, ShiftVT, N1)
2125           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2126                             TLI.getShiftAmountTy()));
2127     SDValue ExpandArg =
2128       DAG.getNode(ISD::OR, MVT::i16, N0,
2129                   DAG.getNode(ISD::SHL, MVT::i16,
2130                               N0, DAG.getConstant(8, MVT::i32)));
2131     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2132                        DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2133   }
2134   case ISD::SRL:
2135   case ISD::SHL: {
2136     SDValue N1 = Op.getOperand(1);
2137     unsigned N1Opc;
2138     N0 = (N0.getOpcode() != ISD::Constant
2139           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2140           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2141                             MVT::i32));
2142     N1Opc = N1.getValueType().bitsLT(ShiftVT)
2143             ? ISD::ZERO_EXTEND
2144             : ISD::TRUNCATE;
2145     N1 = (N1.getOpcode() != ISD::Constant
2146           ? DAG.getNode(N1Opc, ShiftVT, N1)
2147           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(), ShiftVT));
2148     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2149                        DAG.getNode(Opc, MVT::i16, N0, N1));
2150   }
2151   case ISD::SRA: {
2152     SDValue N1 = Op.getOperand(1);
2153     unsigned N1Opc;
2154     N0 = (N0.getOpcode() != ISD::Constant
2155           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2156           : DAG.getConstant(cast<ConstantSDNode>(N0)->getSExtValue(),
2157                             MVT::i16));
2158     N1Opc = N1.getValueType().bitsLT(ShiftVT)
2159             ? ISD::SIGN_EXTEND
2160             : ISD::TRUNCATE;
2161     N1 = (N1.getOpcode() != ISD::Constant
2162           ? DAG.getNode(N1Opc, ShiftVT, N1)
2163           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2164                             ShiftVT));
2165     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2166                        DAG.getNode(Opc, MVT::i16, N0, N1));
2167   }
2168   case ISD::MUL: {
2169     SDValue N1 = Op.getOperand(1);
2170     unsigned N1Opc;
2171     N0 = (N0.getOpcode() != ISD::Constant
2172           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2173           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2174                             MVT::i16));
2175     N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2176     N1 = (N1.getOpcode() != ISD::Constant
2177           ? DAG.getNode(N1Opc, MVT::i16, N1)
2178           : DAG.getConstant(cast<ConstantSDNode>(N1)->getSExtValue(),
2179                             MVT::i16));
2180     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2181                        DAG.getNode(Opc, MVT::i16, N0, N1));
2182     break;
2183   }
2184   }
2185
2186   return SDValue();
2187 }
2188
2189 //! Generate the carry-generate shuffle mask.
2190 SDValue SPU::getCarryGenerateShufMask(SelectionDAG &DAG) {
2191 SmallVector<SDValue, 16> ShufBytes;
2192
2193 // Create the shuffle mask for "rotating" the borrow up one register slot
2194 // once the borrow is generated.
2195 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2196 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2197 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2198 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2199
2200 return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2201                    &ShufBytes[0], ShufBytes.size());
2202 }
2203
2204 //! Generate the borrow-generate shuffle mask
2205 SDValue SPU::getBorrowGenerateShufMask(SelectionDAG &DAG) {
2206 SmallVector<SDValue, 16> ShufBytes;
2207
2208 // Create the shuffle mask for "rotating" the borrow up one register slot
2209 // once the borrow is generated.
2210 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2211 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2212 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2213 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2214
2215 return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2216                    &ShufBytes[0], ShufBytes.size());
2217 }
2218
2219 //! Lower byte immediate operations for v16i8 vectors:
2220 static SDValue
2221 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2222   SDValue ConstVec;
2223   SDValue Arg;
2224   MVT VT = Op.getValueType();
2225
2226   ConstVec = Op.getOperand(0);
2227   Arg = Op.getOperand(1);
2228   if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2229     if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2230       ConstVec = ConstVec.getOperand(0);
2231     } else {
2232       ConstVec = Op.getOperand(1);
2233       Arg = Op.getOperand(0);
2234       if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2235         ConstVec = ConstVec.getOperand(0);
2236       }
2237     }
2238   }
2239
2240   if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2241     uint64_t VectorBits[2];
2242     uint64_t UndefBits[2];
2243     uint64_t SplatBits, SplatUndef;
2244     int SplatSize;
2245
2246     if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2247         && isConstantSplat(VectorBits, UndefBits,
2248                            VT.getVectorElementType().getSizeInBits(),
2249                            SplatBits, SplatUndef, SplatSize)) {
2250       SDValue tcVec[16];
2251       SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2252       const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2253
2254       // Turn the BUILD_VECTOR into a set of target constants:
2255       for (size_t i = 0; i < tcVecSize; ++i)
2256         tcVec[i] = tc;
2257
2258       return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2259                          DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2260     }
2261   }
2262   // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2263   // lowered.  Return the operation, rather than a null SDValue.
2264   return Op;
2265 }
2266
2267 //! Custom lowering for CTPOP (count population)
2268 /*!
2269   Custom lowering code that counts the number ones in the input
2270   operand. SPU has such an instruction, but it counts the number of
2271   ones per byte, which then have to be accumulated.
2272 */
2273 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2274   MVT VT = Op.getValueType();
2275   MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2276
2277   switch (VT.getSimpleVT()) {
2278   default:
2279     assert(false && "Invalid value type!");
2280   case MVT::i8: {
2281     SDValue N = Op.getOperand(0);
2282     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2283
2284     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2285     SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2286
2287     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2288   }
2289
2290   case MVT::i16: {
2291     MachineFunction &MF = DAG.getMachineFunction();
2292     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2293
2294     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2295
2296     SDValue N = Op.getOperand(0);
2297     SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2298     SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2299     SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2300
2301     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2302     SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2303
2304     // CNTB_result becomes the chain to which all of the virtual registers
2305     // CNTB_reg, SUM1_reg become associated:
2306     SDValue CNTB_result =
2307       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2308
2309     SDValue CNTB_rescopy =
2310       DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2311
2312     SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2313
2314     return DAG.getNode(ISD::AND, MVT::i16,
2315                        DAG.getNode(ISD::ADD, MVT::i16,
2316                                    DAG.getNode(ISD::SRL, MVT::i16,
2317                                                Tmp1, Shift1),
2318                                    Tmp1),
2319                        Mask0);
2320   }
2321
2322   case MVT::i32: {
2323     MachineFunction &MF = DAG.getMachineFunction();
2324     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2325
2326     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2327     unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2328
2329     SDValue N = Op.getOperand(0);
2330     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2331     SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2332     SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2333     SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2334
2335     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2336     SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2337
2338     // CNTB_result becomes the chain to which all of the virtual registers
2339     // CNTB_reg, SUM1_reg become associated:
2340     SDValue CNTB_result =
2341       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2342
2343     SDValue CNTB_rescopy =
2344       DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2345
2346     SDValue Comp1 =
2347       DAG.getNode(ISD::SRL, MVT::i32,
2348                   DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2349
2350     SDValue Sum1 =
2351       DAG.getNode(ISD::ADD, MVT::i32,
2352                   Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2353
2354     SDValue Sum1_rescopy =
2355       DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2356
2357     SDValue Comp2 =
2358       DAG.getNode(ISD::SRL, MVT::i32,
2359                   DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2360                   Shift2);
2361     SDValue Sum2 =
2362       DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2363                   DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2364
2365     return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2366   }
2367
2368   case MVT::i64:
2369     break;
2370   }
2371
2372   return SDValue();
2373 }
2374
2375 //! Lower ISD::SELECT_CC
2376 /*!
2377   ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2378   SELB instruction.
2379
2380   \note Need to revisit this in the future: if the code path through the true
2381   and false value computations is longer than the latency of a branch (6
2382   cycles), then it would be more advantageous to branch and insert a new basic
2383   block and branch on the condition. However, this code does not make that
2384   assumption, given the simplisitc uses so far.
2385  */
2386
2387 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2388                               const TargetLowering &TLI) {
2389   MVT VT = Op.getValueType();
2390   SDValue lhs = Op.getOperand(0);
2391   SDValue rhs = Op.getOperand(1);
2392   SDValue trueval = Op.getOperand(2);
2393   SDValue falseval = Op.getOperand(3);
2394   SDValue condition = Op.getOperand(4);
2395
2396   // NOTE: SELB's arguments: $rA, $rB, $mask
2397   //
2398   // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2399   // where bits in $mask are 1. CCond will be inverted, having 1s where the
2400   // condition was true and 0s where the condition was false. Hence, the
2401   // arguments to SELB get reversed.
2402
2403   // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2404   // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2405   // with another "cannot select select_cc" assert:
2406
2407   SDValue compare = DAG.getNode(ISD::SETCC,
2408                                 TLI.getSetCCResultType(Op.getValueType()),
2409                                 lhs, rhs, condition);
2410   return DAG.getNode(SPUISD::SELB, VT, falseval, trueval, compare);
2411 }
2412
2413 //! Custom lower ISD::TRUNCATE
2414 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2415 {
2416   MVT VT = Op.getValueType();
2417   MVT::SimpleValueType simpleVT = VT.getSimpleVT();
2418   MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2419
2420   SDValue Op0 = Op.getOperand(0);
2421   MVT Op0VT = Op0.getValueType();
2422   MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2423
2424   if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2425     // Create shuffle mask, least significant doubleword of quadword
2426     unsigned maskHigh = 0x08090a0b;
2427     unsigned maskLow = 0x0c0d0e0f;
2428     // Use a shuffle to perform the truncation
2429     SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2430                                    DAG.getConstant(maskHigh, MVT::i32),
2431                                    DAG.getConstant(maskLow, MVT::i32),
2432                                    DAG.getConstant(maskHigh, MVT::i32),
2433                                    DAG.getConstant(maskLow, MVT::i32));
2434
2435
2436     SDValue PromoteScalar = DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0);
2437
2438     SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT,
2439                                        PromoteScalar, PromoteScalar, shufMask);
2440
2441     return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2442                        DAG.getNode(ISD::BIT_CONVERT, VecVT, truncShuffle));
2443   }
2444
2445   return SDValue();             // Leave the truncate unmolested
2446 }
2447
2448 //! Custom (target-specific) lowering entry point
2449 /*!
2450   This is where LLVM's DAG selection process calls to do target-specific
2451   lowering of nodes.
2452  */
2453 SDValue
2454 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2455 {
2456   unsigned Opc = (unsigned) Op.getOpcode();
2457   MVT VT = Op.getValueType();
2458
2459   switch (Opc) {
2460   default: {
2461     cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2462     cerr << "Op.getOpcode() = " << Opc << "\n";
2463     cerr << "*Op.getNode():\n";
2464     Op.getNode()->dump();
2465     abort();
2466   }
2467   case ISD::LOAD:
2468   case ISD::EXTLOAD:
2469   case ISD::SEXTLOAD:
2470   case ISD::ZEXTLOAD:
2471     return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2472   case ISD::STORE:
2473     return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2474   case ISD::ConstantPool:
2475     return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2476   case ISD::GlobalAddress:
2477     return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2478   case ISD::JumpTable:
2479     return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2480   case ISD::Constant:
2481     return LowerConstant(Op, DAG);
2482   case ISD::ConstantFP:
2483     return LowerConstantFP(Op, DAG);
2484   case ISD::FORMAL_ARGUMENTS:
2485     return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2486   case ISD::CALL:
2487     return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2488   case ISD::RET:
2489     return LowerRET(Op, DAG, getTargetMachine());
2490
2491   // i8, i64 math ops:
2492   case ISD::ADD:
2493   case ISD::SUB:
2494   case ISD::ROTR:
2495   case ISD::ROTL:
2496   case ISD::SRL:
2497   case ISD::SHL:
2498   case ISD::SRA: {
2499     if (VT == MVT::i8)
2500       return LowerI8Math(Op, DAG, Opc, *this);
2501     break;
2502   }
2503
2504   // Vector-related lowering.
2505   case ISD::BUILD_VECTOR:
2506     return LowerBUILD_VECTOR(Op, DAG);
2507   case ISD::SCALAR_TO_VECTOR:
2508     return LowerSCALAR_TO_VECTOR(Op, DAG);
2509   case ISD::VECTOR_SHUFFLE:
2510     return LowerVECTOR_SHUFFLE(Op, DAG);
2511   case ISD::EXTRACT_VECTOR_ELT:
2512     return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2513   case ISD::INSERT_VECTOR_ELT:
2514     return LowerINSERT_VECTOR_ELT(Op, DAG);
2515
2516   // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2517   case ISD::AND:
2518   case ISD::OR:
2519   case ISD::XOR:
2520     return LowerByteImmed(Op, DAG);
2521
2522   // Vector and i8 multiply:
2523   case ISD::MUL:
2524     if (VT == MVT::i8)
2525       return LowerI8Math(Op, DAG, Opc, *this);
2526
2527   case ISD::CTPOP:
2528     return LowerCTPOP(Op, DAG);
2529
2530   case ISD::SELECT_CC:
2531     return LowerSELECT_CC(Op, DAG, *this);
2532
2533   case ISD::TRUNCATE:
2534     return LowerTRUNCATE(Op, DAG);
2535   }
2536
2537   return SDValue();
2538 }
2539
2540 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2541                                            SmallVectorImpl<SDValue>&Results,
2542                                            SelectionDAG &DAG)
2543 {
2544 #if 0
2545   unsigned Opc = (unsigned) N->getOpcode();
2546   MVT OpVT = N->getValueType(0);
2547
2548   switch (Opc) {
2549   default: {
2550     cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2551     cerr << "Op.getOpcode() = " << Opc << "\n";
2552     cerr << "*Op.getNode():\n";
2553     N->dump();
2554     abort();
2555     /*NOTREACHED*/
2556   }
2557   }
2558 #endif
2559
2560   /* Otherwise, return unchanged */
2561 }
2562
2563 //===----------------------------------------------------------------------===//
2564 // Target Optimization Hooks
2565 //===----------------------------------------------------------------------===//
2566
2567 SDValue
2568 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2569 {
2570 #if 0
2571   TargetMachine &TM = getTargetMachine();
2572 #endif
2573   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2574   SelectionDAG &DAG = DCI.DAG;
2575   SDValue Op0 = N->getOperand(0);       // everything has at least one operand
2576   MVT NodeVT = N->getValueType(0);      // The node's value type
2577   MVT Op0VT = Op0.getValueType();       // The first operand's result
2578   SDValue Result;                       // Initially, empty result
2579
2580   switch (N->getOpcode()) {
2581   default: break;
2582   case ISD::ADD: {
2583     SDValue Op1 = N->getOperand(1);
2584
2585     if (Op0.getOpcode() == SPUISD::IndirectAddr
2586         || Op1.getOpcode() == SPUISD::IndirectAddr) {
2587       // Normalize the operands to reduce repeated code
2588       SDValue IndirectArg = Op0, AddArg = Op1;
2589
2590       if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2591         IndirectArg = Op1;
2592         AddArg = Op0;
2593       }
2594
2595       if (isa<ConstantSDNode>(AddArg)) {
2596         ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2597         SDValue IndOp1 = IndirectArg.getOperand(1);
2598
2599         if (CN0->isNullValue()) {
2600           // (add (SPUindirect <arg>, <arg>), 0) ->
2601           // (SPUindirect <arg>, <arg>)
2602
2603 #if !defined(NDEBUG)
2604           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2605             cerr << "\n"
2606                  << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2607                  << "With:    (SPUindirect <arg>, <arg>)\n";
2608           }
2609 #endif
2610
2611           return IndirectArg;
2612         } else if (isa<ConstantSDNode>(IndOp1)) {
2613           // (add (SPUindirect <arg>, <const>), <const>) ->
2614           // (SPUindirect <arg>, <const + const>)
2615           ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2616           int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2617           SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2618
2619 #if !defined(NDEBUG)
2620           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2621             cerr << "\n"
2622                  << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2623                  << "), " << CN0->getSExtValue() << ")\n"
2624                  << "With:    (SPUindirect <arg>, "
2625                  << combinedConst << ")\n";
2626           }
2627 #endif
2628
2629           return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
2630                              IndirectArg, combinedValue);
2631         }
2632       }
2633     }
2634     break;
2635   }
2636   case ISD::SIGN_EXTEND:
2637   case ISD::ZERO_EXTEND:
2638   case ISD::ANY_EXTEND: {
2639     if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2640       // (any_extend (SPUextract_elt0 <arg>)) ->
2641       // (SPUextract_elt0 <arg>)
2642       // Types must match, however...
2643 #if !defined(NDEBUG)
2644       if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2645         cerr << "\nReplace: ";
2646         N->dump(&DAG);
2647         cerr << "\nWith:    ";
2648         Op0.getNode()->dump(&DAG);
2649         cerr << "\n";
2650       }
2651 #endif
2652
2653       return Op0;
2654     }
2655     break;
2656   }
2657   case SPUISD::IndirectAddr: {
2658     if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2659       ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2660       if (CN->getZExtValue() == 0) {
2661         // (SPUindirect (SPUaform <addr>, 0), 0) ->
2662         // (SPUaform <addr>, 0)
2663
2664         DEBUG(cerr << "Replace: ");
2665         DEBUG(N->dump(&DAG));
2666         DEBUG(cerr << "\nWith:    ");
2667         DEBUG(Op0.getNode()->dump(&DAG));
2668         DEBUG(cerr << "\n");
2669
2670         return Op0;
2671       }
2672     } else if (Op0.getOpcode() == ISD::ADD) {
2673       SDValue Op1 = N->getOperand(1);
2674       if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2675         // (SPUindirect (add <arg>, <arg>), 0) ->
2676         // (SPUindirect <arg>, <arg>)
2677         if (CN1->isNullValue()) {
2678
2679 #if !defined(NDEBUG)
2680           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2681             cerr << "\n"
2682                  << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2683                  << "With:    (SPUindirect <arg>, <arg>)\n";
2684           }
2685 #endif
2686
2687           return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
2688                              Op0.getOperand(0), Op0.getOperand(1));
2689         }
2690       }
2691     }
2692     break;
2693   }
2694   case SPUISD::SHLQUAD_L_BITS:
2695   case SPUISD::SHLQUAD_L_BYTES:
2696   case SPUISD::VEC_SHL:
2697   case SPUISD::VEC_SRL:
2698   case SPUISD::VEC_SRA:
2699   case SPUISD::ROTBYTES_LEFT: {
2700     SDValue Op1 = N->getOperand(1);
2701
2702     // Kill degenerate vector shifts:
2703     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2704       if (CN->isNullValue()) {
2705         Result = Op0;
2706       }
2707     }
2708     break;
2709   }
2710   case SPUISD::PREFSLOT2VEC: {
2711     switch (Op0.getOpcode()) {
2712     default:
2713       break;
2714     case ISD::ANY_EXTEND:
2715     case ISD::ZERO_EXTEND:
2716     case ISD::SIGN_EXTEND: {
2717       // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2718       // <arg>
2719       // but only if the SPUprefslot2vec and <arg> types match.
2720       SDValue Op00 = Op0.getOperand(0);
2721       if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2722         SDValue Op000 = Op00.getOperand(0);
2723         if (Op000.getValueType() == NodeVT) {
2724           Result = Op000;
2725         }
2726       }
2727       break;
2728     }
2729     case SPUISD::VEC2PREFSLOT: {
2730       // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2731       // <arg>
2732       Result = Op0.getOperand(0);
2733       break;
2734     }
2735     }
2736     break;
2737   }
2738   }
2739
2740   // Otherwise, return unchanged.
2741 #ifndef NDEBUG
2742   if (Result.getNode()) {
2743     DEBUG(cerr << "\nReplace.SPU: ");
2744     DEBUG(N->dump(&DAG));
2745     DEBUG(cerr << "\nWith:        ");
2746     DEBUG(Result.getNode()->dump(&DAG));
2747     DEBUG(cerr << "\n");
2748   }
2749 #endif
2750
2751   return Result;
2752 }
2753
2754 //===----------------------------------------------------------------------===//
2755 // Inline Assembly Support
2756 //===----------------------------------------------------------------------===//
2757
2758 /// getConstraintType - Given a constraint letter, return the type of
2759 /// constraint it is for this target.
2760 SPUTargetLowering::ConstraintType
2761 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2762   if (ConstraintLetter.size() == 1) {
2763     switch (ConstraintLetter[0]) {
2764     default: break;
2765     case 'b':
2766     case 'r':
2767     case 'f':
2768     case 'v':
2769     case 'y':
2770       return C_RegisterClass;
2771     }
2772   }
2773   return TargetLowering::getConstraintType(ConstraintLetter);
2774 }
2775
2776 std::pair<unsigned, const TargetRegisterClass*>
2777 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2778                                                 MVT VT) const
2779 {
2780   if (Constraint.size() == 1) {
2781     // GCC RS6000 Constraint Letters
2782     switch (Constraint[0]) {
2783     case 'b':   // R1-R31
2784     case 'r':   // R0-R31
2785       if (VT == MVT::i64)
2786         return std::make_pair(0U, SPU::R64CRegisterClass);
2787       return std::make_pair(0U, SPU::R32CRegisterClass);
2788     case 'f':
2789       if (VT == MVT::f32)
2790         return std::make_pair(0U, SPU::R32FPRegisterClass);
2791       else if (VT == MVT::f64)
2792         return std::make_pair(0U, SPU::R64FPRegisterClass);
2793       break;
2794     case 'v':
2795       return std::make_pair(0U, SPU::GPRCRegisterClass);
2796     }
2797   }
2798
2799   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2800 }
2801
2802 //! Compute used/known bits for a SPU operand
2803 void
2804 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2805                                                   const APInt &Mask,
2806                                                   APInt &KnownZero,
2807                                                   APInt &KnownOne,
2808                                                   const SelectionDAG &DAG,
2809                                                   unsigned Depth ) const {
2810 #if 0
2811   const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2812 #endif
2813
2814   switch (Op.getOpcode()) {
2815   default:
2816     // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2817     break;
2818
2819 #if 0
2820   case CALL:
2821   case SHUFB:
2822   case SHUFFLE_MASK:
2823   case CNTB:
2824 #endif
2825
2826   case SPUISD::PREFSLOT2VEC: {
2827     SDValue Op0 = Op.getOperand(0);
2828     MVT Op0VT = Op0.getValueType();
2829     unsigned Op0VTBits = Op0VT.getSizeInBits();
2830     uint64_t InMask = Op0VT.getIntegerVTBitMask();
2831     KnownZero |= APInt(Op0VTBits, ~InMask, false);
2832     KnownOne |= APInt(Op0VTBits, InMask, false);
2833     break;
2834   }
2835
2836   case SPUISD::LDRESULT:
2837   case SPUISD::VEC2PREFSLOT: {
2838     MVT OpVT = Op.getValueType();
2839     unsigned OpVTBits = OpVT.getSizeInBits();
2840     uint64_t InMask = OpVT.getIntegerVTBitMask();
2841     KnownZero |= APInt(OpVTBits, ~InMask, false);
2842     KnownOne |= APInt(OpVTBits, InMask, false);
2843     break;
2844   }
2845
2846 #if 0
2847   case SPUISD::SHLQUAD_L_BITS:
2848   case SPUISD::SHLQUAD_L_BYTES:
2849   case SPUISD::VEC_SHL:
2850   case SPUISD::VEC_SRL:
2851   case SPUISD::VEC_SRA:
2852   case SPUISD::VEC_ROTL:
2853   case SPUISD::VEC_ROTR:
2854   case SPUISD::ROTBYTES_LEFT:
2855   case SPUISD::SELECT_MASK:
2856   case SPUISD::SELB:
2857 #endif
2858   }
2859 }
2860
2861 unsigned
2862 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
2863                                                    unsigned Depth) const {
2864   switch (Op.getOpcode()) {
2865   default:
2866     return 1;
2867
2868   case ISD::SETCC: {
2869     MVT VT = Op.getValueType();
2870
2871     if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
2872       VT = MVT::i32;
2873     }
2874     return VT.getSizeInBits();
2875   }
2876   }
2877 }
2878
2879 // LowerAsmOperandForConstraint
2880 void
2881 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
2882                                                 char ConstraintLetter,
2883                                                 bool hasMemory,
2884                                                 std::vector<SDValue> &Ops,
2885                                                 SelectionDAG &DAG) const {
2886   // Default, for the time being, to the base class handler
2887   TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
2888                                                Ops, DAG);
2889 }
2890
2891 /// isLegalAddressImmediate - Return true if the integer value can be used
2892 /// as the offset of the target addressing mode.
2893 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
2894                                                 const Type *Ty) const {
2895   // SPU's addresses are 256K:
2896   return (V > -(1 << 18) && V < (1 << 18) - 1);
2897 }
2898
2899 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
2900   return false;
2901 }
2902
2903 bool
2904 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
2905   // The SPU target isn't yet aware of offsets.
2906   return false;
2907 }