lib/Target/CellSPU/SPUISelLowering.cpp

   1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the SPUTargetLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "SPURegisterNames.h"
  15 #include "SPUISelLowering.h"
  16 #include "SPUTargetMachine.h"
  17 #include "SPUFrameInfo.h"
  18 #include "llvm/ADT/APInt.h"
  19 #include "llvm/ADT/VectorExtras.h"
  20 #include "llvm/CodeGen/CallingConvLower.h"
  21 #include "llvm/CodeGen/MachineFrameInfo.h"
  22 #include "llvm/CodeGen/MachineFunction.h"
  23 #include "llvm/CodeGen/MachineInstrBuilder.h"
  24 #include "llvm/CodeGen/MachineRegisterInfo.h"
  25 #include "llvm/CodeGen/SelectionDAG.h"
  26 #include "llvm/Constants.h"
  27 #include "llvm/Function.h"
  28 #include "llvm/Intrinsics.h"
  29 #include "llvm/Support/Debug.h"
  30 #include "llvm/Support/MathExtras.h"
  31 #include "llvm/Target/TargetOptions.h"
  32
  33 #include <map>
  34
  35 using namespace llvm;
  36
  37 // Used in getTargetNodeName() below
  38 namespace {
  39   std::map<unsigned, const char *> node_names;
  40
  41   //! MVT mapping to useful data for Cell SPU
  42   struct valtype_map_s {
  43     const MVT   valtype;
  44     const int   prefslot_byte;
  45   };
  46
  47   const valtype_map_s valtype_map[] = {
  48     { MVT::i1,   3 },
  49     { MVT::i8,   3 },
  50     { MVT::i16,  2 },
  51     { MVT::i32,  0 },
  52     { MVT::f32,  0 },
  53     { MVT::i64,  0 },
  54     { MVT::f64,  0 },
  55     { MVT::i128, 0 }
  56   };
  57
  58   const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
  59
  60   const valtype_map_s *getValueTypeMapEntry(MVT VT) {
  61     const valtype_map_s *retval = 0;
  62
  63     for (size_t i = 0; i < n_valtype_map; ++i) {
  64       if (valtype_map[i].valtype == VT) {
  65         retval = valtype_map + i;
  66         break;
  67       }
  68     }
  69
  70 #ifndef NDEBUG
  71     if (retval == 0) {
  72       cerr << "getValueTypeMapEntry returns NULL for "
  73            << VT.getMVTString()
  74            << "\n";
  75       abort();
  76     }
  77 #endif
  78
  79     return retval;
  80   }
  81 }
  82
  83 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
  84   : TargetLowering(TM),
  85     SPUTM(TM)
  86 {
  87   // Fold away setcc operations if possible.
  88   setPow2DivIsCheap();
  89
  90   // Use _setjmp/_longjmp instead of setjmp/longjmp.
  91   setUseUnderscoreSetJmp(true);
  92   setUseUnderscoreLongJmp(true);
  93
  94   // Set up the SPU's register classes:
  95   addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
  96   addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
  97   addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
  98   addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
  99   addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
 100   addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
 101   addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
 102
 103   // SPU has no sign or zero extended loads for i1, i8, i16:
 104   setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
 105   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
 106   setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
 107
 108   setLoadExtAction(ISD::EXTLOAD,  MVT::f32, Expand);
 109   setLoadExtAction(ISD::EXTLOAD,  MVT::f64, Expand);
 110
 111   // SPU constant load actions are custom lowered:
 112   setOperationAction(ISD::Constant,   MVT::i64, Custom);
 113   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
 114   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
 115
 116   // SPU's loads and stores have to be custom lowered:
 117   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
 118        ++sctype) {
 119     MVT VT = (MVT::SimpleValueType)sctype;
 120
 121     setOperationAction(ISD::LOAD,   VT, Custom);
 122     setOperationAction(ISD::STORE,  VT, Custom);
 123     setLoadExtAction(ISD::EXTLOAD,  VT, Custom);
 124     setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
 125     setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
 126
 127     // SMUL_LOHI, UMUL_LOHI are not legal for Cell:
 128     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 129     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
 130
 131     for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
 132       MVT StoreVT = (MVT::SimpleValueType) stype;
 133       setTruncStoreAction(VT, StoreVT, Expand);
 134     }
 135   }
 136
 137   for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
 138        ++sctype) {
 139     MVT VT = (MVT::SimpleValueType) sctype;
 140
 141     setOperationAction(ISD::LOAD,   VT, Custom);
 142     setOperationAction(ISD::STORE,  VT, Custom);
 143
 144     for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
 145       MVT StoreVT = (MVT::SimpleValueType) stype;
 146       setTruncStoreAction(VT, StoreVT, Expand);
 147     }
 148   }
 149
 150   // Expand the jumptable branches
 151   setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
 152   setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
 153
 154   // Custom lower SELECT_CC for most cases, but expand by default
 155   setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
 156   setOperationAction(ISD::SELECT_CC,    MVT::i8,    Custom);
 157   setOperationAction(ISD::SELECT_CC,    MVT::i16,   Custom);
 158   setOperationAction(ISD::SELECT_CC,    MVT::i32,   Custom);
 159   setOperationAction(ISD::SELECT_CC,    MVT::i64,   Custom);
 160
 161   // SPU has no intrinsics for these particular operations:
 162   setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
 163
 164   // SPU has no SREM/UREM instructions
 165   setOperationAction(ISD::SREM, MVT::i32, Expand);
 166   setOperationAction(ISD::UREM, MVT::i32, Expand);
 167   setOperationAction(ISD::SREM, MVT::i64, Expand);
 168   setOperationAction(ISD::UREM, MVT::i64, Expand);
 169
 170   // We don't support sin/cos/sqrt/fmod
 171   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 172   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 173   setOperationAction(ISD::FREM , MVT::f64, Expand);
 174   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 175   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 176   setOperationAction(ISD::FREM , MVT::f32, Expand);
 177
 178   // If we're enabling GP optimizations, use hardware square root
 179   setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 180   setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 181
 182   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 183   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 184
 185   // SPU can do rotate right and left, so legalize it... but customize for i8
 186   // because instructions don't exist.
 187
 188   // FIXME: Change from "expand" to appropriate type once ROTR is supported in
 189   //        .td files.
 190   setOperationAction(ISD::ROTR, MVT::i32,    Expand /*Legal*/);
 191   setOperationAction(ISD::ROTR, MVT::i16,    Expand /*Legal*/);
 192   setOperationAction(ISD::ROTR, MVT::i8,     Expand /*Custom*/);
 193
 194   setOperationAction(ISD::ROTL, MVT::i32,    Legal);
 195   setOperationAction(ISD::ROTL, MVT::i16,    Legal);
 196   setOperationAction(ISD::ROTL, MVT::i8,     Custom);
 197
 198   // SPU has no native version of shift left/right for i8
 199   setOperationAction(ISD::SHL,  MVT::i8,     Custom);
 200   setOperationAction(ISD::SRL,  MVT::i8,     Custom);
 201   setOperationAction(ISD::SRA,  MVT::i8,     Custom);
 202
 203   // Make these operations legal and handle them during instruction selection:
 204   setOperationAction(ISD::SHL,  MVT::i64,    Legal);
 205   setOperationAction(ISD::SRL,  MVT::i64,    Legal);
 206   setOperationAction(ISD::SRA,  MVT::i64,    Legal);
 207
 208   // Custom lower i8, i32 and i64 multiplications
 209   setOperationAction(ISD::MUL,  MVT::i8,     Custom);
 210   setOperationAction(ISD::MUL,  MVT::i32,    Legal);
 211   setOperationAction(ISD::MUL,  MVT::i64,    Expand);   // libcall
 212
 213   // Need to custom handle (some) common i8, i64 math ops
 214   setOperationAction(ISD::ADD,  MVT::i8,     Custom);
 215   setOperationAction(ISD::ADD,  MVT::i64,    Custom);
 216   setOperationAction(ISD::SUB,  MVT::i8,     Custom);
 217   setOperationAction(ISD::SUB,  MVT::i64,    Custom);
 218
 219   // SPU does not have BSWAP. It does have i32 support CTLZ.
 220   // CTPOP has to be custom lowered.
 221   setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
 222   setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
 223
 224   setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
 225   setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
 226   setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
 227   setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
 228
 229   setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
 230   setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
 231
 232   setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
 233
 234   // SPU has a version of select that implements (a&~c)|(b&c), just like
 235   // select ought to work:
 236   setOperationAction(ISD::SELECT, MVT::i8,   Legal);
 237   setOperationAction(ISD::SELECT, MVT::i16,  Legal);
 238   setOperationAction(ISD::SELECT, MVT::i32,  Legal);
 239   setOperationAction(ISD::SELECT, MVT::i64,  Legal);
 240
 241   setOperationAction(ISD::SETCC, MVT::i8,    Legal);
 242   setOperationAction(ISD::SETCC, MVT::i16,   Legal);
 243   setOperationAction(ISD::SETCC, MVT::i32,   Legal);
 244   setOperationAction(ISD::SETCC, MVT::i64,   Legal);
 245
 246   // Zero extension and sign extension for i64 have to be
 247   // custom legalized
 248   setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
 249   setOperationAction(ISD::ANY_EXTEND,  MVT::i64, Custom);
 250
 251   // Custom lower i128 -> i64 truncates
 252   setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
 253
 254   // SPU has a legal FP -> signed INT instruction
 255   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
 256   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 257   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
 258   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
 259
 260   // FDIV on SPU requires custom lowering
 261   setOperationAction(ISD::FDIV, MVT::f64, Expand);      // libcall
 262
 263   // SPU has [U|S]INT_TO_FP
 264   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
 265   setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
 266   setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
 267   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
 268   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
 269   setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
 270   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 271   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 272
 273   setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
 274   setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
 275   setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
 276   setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
 277
 278   // We cannot sextinreg(i1).  Expand to shifts.
 279   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 280
 281   // Support label based line numbers.
 282   setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
 283   setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
 284
 285   // We want to legalize GlobalAddress and ConstantPool nodes into the
 286   // appropriate instructions to materialize the address.
 287   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
 288        ++sctype) {
 289     MVT VT = (MVT::SimpleValueType)sctype;
 290
 291     setOperationAction(ISD::GlobalAddress,  VT, Custom);
 292     setOperationAction(ISD::ConstantPool,   VT, Custom);
 293     setOperationAction(ISD::JumpTable,      VT, Custom);
 294   }
 295
 296   // RET must be custom lowered, to meet ABI requirements
 297   setOperationAction(ISD::RET,           MVT::Other, Custom);
 298
 299   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 300   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 301
 302   // Use the default implementation.
 303   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
 304   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 305   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 306   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 307   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
 308   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
 309   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
 310
 311   // Cell SPU has instructions for converting between i64 and fp.
 312   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 313   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 314
 315   // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
 316   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
 317
 318   // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 319   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 320
 321   // First set operation action for all vector types to expand. Then we
 322   // will selectively turn on ones that can be effectively codegen'd.
 323   addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
 324   addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
 325   addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
 326   addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
 327   addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
 328   addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
 329
 330   for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
 331        i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
 332     MVT VT = (MVT::SimpleValueType)i;
 333
 334     // add/sub are legal for all supported vector VT's.
 335     setOperationAction(ISD::ADD , VT, Legal);
 336     setOperationAction(ISD::SUB , VT, Legal);
 337     // mul has to be custom lowered.
 338     // TODO: v2i64 vector multiply
 339     setOperationAction(ISD::MUL , VT, Legal);
 340
 341     setOperationAction(ISD::AND   , VT, Legal);
 342     setOperationAction(ISD::OR    , VT, Legal);
 343     setOperationAction(ISD::XOR   , VT, Legal);
 344     setOperationAction(ISD::LOAD  , VT, Legal);
 345     setOperationAction(ISD::SELECT, VT, Legal);
 346     setOperationAction(ISD::STORE,  VT, Legal);
 347
 348     // These operations need to be expanded:
 349     setOperationAction(ISD::SDIV, VT, Expand);
 350     setOperationAction(ISD::SREM, VT, Expand);
 351     setOperationAction(ISD::UDIV, VT, Expand);
 352     setOperationAction(ISD::UREM, VT, Expand);
 353
 354     // Custom lower build_vector, constant pool spills, insert and
 355     // extract vector elements:
 356     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
 357     setOperationAction(ISD::ConstantPool, VT, Custom);
 358     setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
 359     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
 360     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
 361     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
 362   }
 363
 364   setOperationAction(ISD::AND, MVT::v16i8, Custom);
 365   setOperationAction(ISD::OR,  MVT::v16i8, Custom);
 366   setOperationAction(ISD::XOR, MVT::v16i8, Custom);
 367   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 368
 369   setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
 370
 371   setShiftAmountType(MVT::i32);
 372   setBooleanContents(ZeroOrNegativeOneBooleanContent);
 373
 374   setStackPointerRegisterToSaveRestore(SPU::R1);
 375
 376   // We have target-specific dag combine patterns for the following nodes:
 377   setTargetDAGCombine(ISD::ADD);
 378   setTargetDAGCombine(ISD::ZERO_EXTEND);
 379   setTargetDAGCombine(ISD::SIGN_EXTEND);
 380   setTargetDAGCombine(ISD::ANY_EXTEND);
 381
 382   computeRegisterProperties();
 383
 384   // Set pre-RA register scheduler default to BURR, which produces slightly
 385   // better code than the default (could also be TDRR, but TargetLowering.h
 386   // needs a mod to support that model):
 387   setSchedulingPreference(SchedulingForRegPressure);
 388 }
 389
 390 const char *
 391 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
 392 {
 393   if (node_names.empty()) {
 394     node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
 395     node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
 396     node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
 397     node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
 398     node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
 399     node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
 400     node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
 401     node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
 402     node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
 403     node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
 404     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
 405     node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
 406     node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
 407     node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
 408     node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
 409     node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
 410     node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
 411     node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
 412     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
 413     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
 414     node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
 415     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
 416     node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
 417     node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
 418     node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
 419     node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
 420     node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
 421   }
 422
 423   std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
 424
 425   return ((i != node_names.end()) ? i->second : 0);
 426 }
 427
 428 //===----------------------------------------------------------------------===//
 429 // Return the Cell SPU's SETCC result type
 430 //===----------------------------------------------------------------------===//
 431
 432 MVT SPUTargetLowering::getSetCCResultType(MVT VT) const {
 433   // i16 and i32 are valid SETCC result types
 434   return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32);
 435 }
 436
 437 //===----------------------------------------------------------------------===//
 438 // Calling convention code:
 439 //===----------------------------------------------------------------------===//
 440
 441 #include "SPUGenCallingConv.inc"
 442
 443 //===----------------------------------------------------------------------===//
 444 //  LowerOperation implementation
 445 //===----------------------------------------------------------------------===//
 446
 447 /// Custom lower loads for CellSPU
 448 /*!
 449  All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
 450  within a 16-byte block, we have to rotate to extract the requested element.
 451
 452  For extending loads, we also want to ensure that the following sequence is
 453  emitted, e.g. for MVT::f32 extending load to MVT::f64:
 454
 455 \verbatim
 456 %1  v16i8,ch = load
 457 %2  v16i8,ch = rotate %1
 458 %3  v4f8, ch = bitconvert %2
 459 %4  f32      = vec2perfslot %3
 460 %5  f64      = fp_extend %4
 461 \endverbatim
 462 */
 463 static SDValue
 464 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 465   LoadSDNode *LN = cast<LoadSDNode>(Op);
 466   SDValue the_chain = LN->getChain();
 467   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 468   MVT InVT = LN->getMemoryVT();
 469   MVT OutVT = Op.getValueType();
 470   ISD::LoadExtType ExtType = LN->getExtensionType();
 471   unsigned alignment = LN->getAlignment();
 472   const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
 473
 474   switch (LN->getAddressingMode()) {
 475   case ISD::UNINDEXED: {
 476     SDValue result;
 477     SDValue basePtr = LN->getBasePtr();
 478     SDValue rotate;
 479
 480     if (alignment == 16) {
 481       ConstantSDNode *CN;
 482
 483       // Special cases for a known aligned load to simplify the base pointer
 484       // and the rotation amount:
 485       if (basePtr.getOpcode() == ISD::ADD
 486           && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
 487         // Known offset into basePtr
 488         int64_t offset = CN->getSExtValue();
 489         int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
 490
 491         if (rotamt < 0)
 492           rotamt += 16;
 493
 494         rotate = DAG.getConstant(rotamt, MVT::i16);
 495
 496         // Simplify the base pointer for this case:
 497         basePtr = basePtr.getOperand(0);
 498         if ((offset & ~0xf) > 0) {
 499           basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
 500                                 basePtr,
 501                                 DAG.getConstant((offset & ~0xf), PtrVT));
 502         }
 503       } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
 504                  || (basePtr.getOpcode() == SPUISD::IndirectAddr
 505                      && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
 506                      && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
 507         // Plain aligned a-form address: rotate into preferred slot
 508         // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
 509         int64_t rotamt = -vtm->prefslot_byte;
 510         if (rotamt < 0)
 511           rotamt += 16;
 512         rotate = DAG.getConstant(rotamt, MVT::i16);
 513       } else {
 514         // Offset the rotate amount by the basePtr and the preferred slot
 515         // byte offset
 516         int64_t rotamt = -vtm->prefslot_byte;
 517         if (rotamt < 0)
 518           rotamt += 16;
 519         rotate = DAG.getNode(ISD::ADD, PtrVT,
 520                              basePtr,
 521                              DAG.getConstant(rotamt, PtrVT));
 522       }
 523     } else {
 524       // Unaligned load: must be more pessimistic about addressing modes:
 525       if (basePtr.getOpcode() == ISD::ADD) {
 526         MachineFunction &MF = DAG.getMachineFunction();
 527         MachineRegisterInfo &RegInfo = MF.getRegInfo();
 528         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 529         SDValue Flag;
 530
 531         SDValue Op0 = basePtr.getOperand(0);
 532         SDValue Op1 = basePtr.getOperand(1);
 533
 534         if (isa<ConstantSDNode>(Op1)) {
 535           // Convert the (add <ptr>, <const>) to an indirect address contained
 536           // in a register. Note that this is done because we need to avoid
 537           // creating a 0(reg) d-form address due to the SPU's block loads.
 538           basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
 539           the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag);
 540           basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT);
 541         } else {
 542           // Convert the (add <arg1>, <arg2>) to an indirect address, which
 543           // will likely be lowered as a reg(reg) x-form address.
 544           basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
 545         }
 546       } else {
 547         basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
 548                               basePtr,
 549                               DAG.getConstant(0, PtrVT));
 550       }
 551
 552       // Offset the rotate amount by the basePtr and the preferred slot
 553       // byte offset
 554       rotate = DAG.getNode(ISD::ADD, PtrVT,
 555                            basePtr,
 556                            DAG.getConstant(-vtm->prefslot_byte, PtrVT));
 557     }
 558
 559     // Re-emit as a v16i8 vector load
 560     result = DAG.getLoad(MVT::v16i8, the_chain, basePtr,
 561                          LN->getSrcValue(), LN->getSrcValueOffset(),
 562                          LN->isVolatile(), 16);
 563
 564     // Update the chain
 565     the_chain = result.getValue(1);
 566
 567     // Rotate into the preferred slot:
 568     result = DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v16i8,
 569                          result.getValue(0), rotate);
 570
 571     // Convert the loaded v16i8 vector to the appropriate vector type
 572     // specified by the operand:
 573     MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
 574     result = DAG.getNode(SPUISD::VEC2PREFSLOT, InVT,
 575                          DAG.getNode(ISD::BIT_CONVERT, vecVT, result));
 576
 577     // Handle extending loads by extending the scalar result:
 578     if (ExtType == ISD::SEXTLOAD) {
 579       result = DAG.getNode(ISD::SIGN_EXTEND, OutVT, result);
 580     } else if (ExtType == ISD::ZEXTLOAD) {
 581       result = DAG.getNode(ISD::ZERO_EXTEND, OutVT, result);
 582     } else if (ExtType == ISD::EXTLOAD) {
 583       unsigned NewOpc = ISD::ANY_EXTEND;
 584
 585       if (OutVT.isFloatingPoint())
 586         NewOpc = ISD::FP_EXTEND;
 587
 588       result = DAG.getNode(NewOpc, OutVT, result);
 589     }
 590
 591     SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
 592     SDValue retops[2] = {
 593       result,
 594       the_chain
 595     };
 596
 597     result = DAG.getNode(SPUISD::LDRESULT, retvts,
 598                          retops, sizeof(retops) / sizeof(retops[0]));
 599     return result;
 600   }
 601   case ISD::PRE_INC:
 602   case ISD::PRE_DEC:
 603   case ISD::POST_INC:
 604   case ISD::POST_DEC:
 605   case ISD::LAST_INDEXED_MODE:
 606     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 607             "UNINDEXED\n";
 608     cerr << (unsigned) LN->getAddressingMode() << "\n";
 609     abort();
 610     /*NOTREACHED*/
 611   }
 612
 613   return SDValue();
 614 }
 615
 616 /// Custom lower stores for CellSPU
 617 /*!
 618  All CellSPU stores are aligned to 16-byte boundaries, so for elements
 619  within a 16-byte block, we have to generate a shuffle to insert the
 620  requested element into its place, then store the resulting block.
 621  */
 622 static SDValue
 623 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 624   StoreSDNode *SN = cast<StoreSDNode>(Op);
 625   SDValue Value = SN->getValue();
 626   MVT VT = Value.getValueType();
 627   MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
 628   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 629   unsigned alignment = SN->getAlignment();
 630
 631   switch (SN->getAddressingMode()) {
 632   case ISD::UNINDEXED: {
 633     // The vector type we really want to load from the 16-byte chunk.
 634     MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
 635         stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
 636
 637     SDValue alignLoadVec;
 638     SDValue basePtr = SN->getBasePtr();
 639     SDValue the_chain = SN->getChain();
 640     SDValue insertEltOffs;
 641
 642     if (alignment == 16) {
 643       ConstantSDNode *CN;
 644
 645       // Special cases for a known aligned load to simplify the base pointer
 646       // and insertion byte:
 647       if (basePtr.getOpcode() == ISD::ADD
 648           && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
 649         // Known offset into basePtr
 650         int64_t offset = CN->getSExtValue();
 651
 652         // Simplify the base pointer for this case:
 653         basePtr = basePtr.getOperand(0);
 654         insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
 655                                     basePtr,
 656                                     DAG.getConstant((offset & 0xf), PtrVT));
 657
 658         if ((offset & ~0xf) > 0) {
 659           basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
 660                                 basePtr,
 661                                 DAG.getConstant((offset & ~0xf), PtrVT));
 662         }
 663       } else {
 664         // Otherwise, assume it's at byte 0 of basePtr
 665         insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
 666                                     basePtr,
 667                                     DAG.getConstant(0, PtrVT));
 668       }
 669     } else {
 670       // Unaligned load: must be more pessimistic about addressing modes:
 671       if (basePtr.getOpcode() == ISD::ADD) {
 672         MachineFunction &MF = DAG.getMachineFunction();
 673         MachineRegisterInfo &RegInfo = MF.getRegInfo();
 674         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 675         SDValue Flag;
 676
 677         SDValue Op0 = basePtr.getOperand(0);
 678         SDValue Op1 = basePtr.getOperand(1);
 679
 680         if (isa<ConstantSDNode>(Op1)) {
 681           // Convert the (add <ptr>, <const>) to an indirect address contained
 682           // in a register. Note that this is done because we need to avoid
 683           // creating a 0(reg) d-form address due to the SPU's block loads.
 684           basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
 685           the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag);
 686           basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT);
 687         } else {
 688           // Convert the (add <arg1>, <arg2>) to an indirect address, which
 689           // will likely be lowered as a reg(reg) x-form address.
 690           basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
 691         }
 692       } else {
 693         basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
 694                               basePtr,
 695                               DAG.getConstant(0, PtrVT));
 696       }
 697
 698       // Insertion point is solely determined by basePtr's contents
 699       insertEltOffs = DAG.getNode(ISD::ADD, PtrVT,
 700                                   basePtr,
 701                                   DAG.getConstant(0, PtrVT));
 702     }
 703
 704     // Re-emit as a v16i8 vector load
 705     alignLoadVec = DAG.getLoad(MVT::v16i8, the_chain, basePtr,
 706                                SN->getSrcValue(), SN->getSrcValueOffset(),
 707                                SN->isVolatile(), 16);
 708
 709     // Update the chain
 710     the_chain = alignLoadVec.getValue(1);
 711
 712     LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
 713     SDValue theValue = SN->getValue();
 714     SDValue result;
 715
 716     if (StVT != VT
 717         && (theValue.getOpcode() == ISD::AssertZext
 718             || theValue.getOpcode() == ISD::AssertSext)) {
 719       // Drill down and get the value for zero- and sign-extended
 720       // quantities
 721       theValue = theValue.getOperand(0);
 722     }
 723
 724     // If the base pointer is already a D-form address, then just create
 725     // a new D-form address with a slot offset and the orignal base pointer.
 726     // Otherwise generate a D-form address with the slot offset relative
 727     // to the stack pointer, which is always aligned.
 728 #if !defined(NDEBUG)
 729       if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
 730         cerr << "CellSPU LowerSTORE: basePtr = ";
 731         basePtr.getNode()->dump(&DAG);
 732         cerr << "\n";
 733       }
 734 #endif
 735
 736     SDValue insertEltOp =
 737             DAG.getNode(SPUISD::SHUFFLE_MASK, vecVT, insertEltOffs);
 738     SDValue vectorizeOp =
 739             DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue);
 740
 741     result = DAG.getNode(SPUISD::SHUFB, vecVT,
 742                          vectorizeOp, alignLoadVec,
 743                          DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, insertEltOp));
 744
 745     result = DAG.getStore(the_chain, result, basePtr,
 746                           LN->getSrcValue(), LN->getSrcValueOffset(),
 747                           LN->isVolatile(), LN->getAlignment());
 748
 749 #if 0 && !defined(NDEBUG)
 750     if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
 751       const SDValue &currentRoot = DAG.getRoot();
 752
 753       DAG.setRoot(result);
 754       cerr << "------- CellSPU:LowerStore result:\n";
 755       DAG.dump();
 756       cerr << "-------\n";
 757       DAG.setRoot(currentRoot);
 758     }
 759 #endif
 760
 761     return result;
 762     /*UNREACHED*/
 763   }
 764   case ISD::PRE_INC:
 765   case ISD::PRE_DEC:
 766   case ISD::POST_INC:
 767   case ISD::POST_DEC:
 768   case ISD::LAST_INDEXED_MODE:
 769     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 770             "UNINDEXED\n";
 771     cerr << (unsigned) SN->getAddressingMode() << "\n";
 772     abort();
 773     /*NOTREACHED*/
 774   }
 775
 776   return SDValue();
 777 }
 778
 779 /// Generate the address of a constant pool entry.
 780 static SDValue
 781 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 782   MVT PtrVT = Op.getValueType();
 783   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 784   Constant *C = CP->getConstVal();
 785   SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
 786   SDValue Zero = DAG.getConstant(0, PtrVT);
 787   const TargetMachine &TM = DAG.getTarget();
 788
 789   if (TM.getRelocationModel() == Reloc::Static) {
 790     if (!ST->usingLargeMem()) {
 791       // Just return the SDValue with the constant pool address in it.
 792       return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
 793     } else {
 794       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
 795       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
 796       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 797     }
 798   }
 799
 800   assert(0 &&
 801          "LowerConstantPool: Relocation model other than static"
 802          " not supported.");
 803   return SDValue();
 804 }
 805
 806 static SDValue
 807 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 808   MVT PtrVT = Op.getValueType();
 809   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 810   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
 811   SDValue Zero = DAG.getConstant(0, PtrVT);
 812   const TargetMachine &TM = DAG.getTarget();
 813
 814   if (TM.getRelocationModel() == Reloc::Static) {
 815     if (!ST->usingLargeMem()) {
 816       return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
 817     } else {
 818       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
 819       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
 820       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 821     }
 822   }
 823
 824   assert(0 &&
 825          "LowerJumpTable: Relocation model other than static not supported.");
 826   return SDValue();
 827 }
 828
 829 static SDValue
 830 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 831   MVT PtrVT = Op.getValueType();
 832   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
 833   GlobalValue *GV = GSDN->getGlobal();
 834   SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
 835   const TargetMachine &TM = DAG.getTarget();
 836   SDValue Zero = DAG.getConstant(0, PtrVT);
 837
 838   if (TM.getRelocationModel() == Reloc::Static) {
 839     if (!ST->usingLargeMem()) {
 840       return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
 841     } else {
 842       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
 843       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
 844       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 845     }
 846   } else {
 847     cerr << "LowerGlobalAddress: Relocation model other than static not "
 848          << "supported.\n";
 849     abort();
 850     /*NOTREACHED*/
 851   }
 852
 853   return SDValue();
 854 }
 855
 856 //! Custom lower i64 integer constants
 857 /*!
 858  This code inserts all of the necessary juggling that needs to occur to load
 859  a 64-bit constant into a register.
 860  */
 861 static SDValue
 862 LowerConstant(SDValue Op, SelectionDAG &DAG) {
 863   MVT VT = Op.getValueType();
 864
 865   if (VT == MVT::i64) {
 866     ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
 867     SDValue T = DAG.getConstant(CN->getZExtValue(), VT);
 868     return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
 869                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
 870   } else {
 871     cerr << "LowerConstant: unhandled constant type "
 872          << VT.getMVTString()
 873          << "\n";
 874     abort();
 875     /*NOTREACHED*/
 876   }
 877
 878   return SDValue();
 879 }
 880
 881 //! Custom lower double precision floating point constants
 882 static SDValue
 883 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
 884   MVT VT = Op.getValueType();
 885
 886   if (VT == MVT::f64) {
 887     ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
 888
 889     assert((FP != 0) &&
 890            "LowerConstantFP: Node is not ConstantFPSDNode");
 891
 892     uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
 893     SDValue T = DAG.getConstant(dbits, MVT::i64);
 894     SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T);
 895     return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
 896                        DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Tvec));
 897   }
 898
 899   return SDValue();
 900 }
 901
 902 static SDValue
 903 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
 904 {
 905   MachineFunction &MF = DAG.getMachineFunction();
 906   MachineFrameInfo *MFI = MF.getFrameInfo();
 907   MachineRegisterInfo &RegInfo = MF.getRegInfo();
 908   SmallVector<SDValue, 48> ArgValues;
 909   SDValue Root = Op.getOperand(0);
 910   bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
 911
 912   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
 913   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
 914
 915   unsigned ArgOffset = SPUFrameInfo::minStackSize();
 916   unsigned ArgRegIdx = 0;
 917   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
 918
 919   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 920
 921   // Add DAG nodes to load the arguments or copy them out of registers.
 922   for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
 923        ArgNo != e; ++ArgNo) {
 924     MVT ObjectVT = Op.getValue(ArgNo).getValueType();
 925     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
 926     SDValue ArgVal;
 927
 928     if (ArgRegIdx < NumArgRegs) {
 929       const TargetRegisterClass *ArgRegClass;
 930
 931       switch (ObjectVT.getSimpleVT()) {
 932       default: {
 933         cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
 934              << ObjectVT.getMVTString()
 935              << "\n";
 936         abort();
 937       }
 938       case MVT::i8:
 939         ArgRegClass = &SPU::R8CRegClass;
 940         break;
 941       case MVT::i16:
 942         ArgRegClass = &SPU::R16CRegClass;
 943         break;
 944       case MVT::i32:
 945         ArgRegClass = &SPU::R32CRegClass;
 946         break;
 947       case MVT::i64:
 948         ArgRegClass = &SPU::R64CRegClass;
 949         break;
 950       case MVT::f32:
 951         ArgRegClass = &SPU::R32FPRegClass;
 952         break;
 953       case MVT::f64:
 954         ArgRegClass = &SPU::R64FPRegClass;
 955         break;
 956       case MVT::v2f64:
 957       case MVT::v4f32:
 958       case MVT::v2i64:
 959       case MVT::v4i32:
 960       case MVT::v8i16:
 961       case MVT::v16i8:
 962         ArgRegClass = &SPU::VECREGRegClass;
 963         break;
 964       }
 965
 966       unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
 967       RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 968       ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
 969       ++ArgRegIdx;
 970     } else {
 971       // We need to load the argument to a virtual register if we determined
 972       // above that we ran out of physical registers of the appropriate type
 973       // or we're forced to do vararg
 974       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
 975       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
 976       ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
 977       ArgOffset += StackSlotSize;
 978     }
 979
 980     ArgValues.push_back(ArgVal);
 981     // Update the chain
 982     Root = ArgVal.getOperand(0);
 983   }
 984
 985   // vararg handling:
 986   if (isVarArg) {
 987     // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
 988     // We will spill (79-3)+1 registers to the stack
 989     SmallVector<SDValue, 79-3+1> MemOps;
 990
 991     // Create the frame slot
 992
 993     for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
 994       VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
 995       SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
 996       SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
 997       SDValue Store = DAG.getStore(Root, ArgVal, FIN, NULL, 0);
 998       Root = Store.getOperand(0);
 999       MemOps.push_back(Store);
1000
1001       // Increment address by stack slot size for the next stored argument
1002       ArgOffset += StackSlotSize;
1003     }
1004     if (!MemOps.empty())
1005       Root = DAG.getNode(ISD::TokenFactor,MVT::Other,&MemOps[0],MemOps.size());
1006   }
1007
1008   ArgValues.push_back(Root);
1009
1010   // Return the new list of results.
1011   return DAG.getNode(ISD::MERGE_VALUES, Op.getNode()->getVTList(),
1012                      &ArgValues[0], ArgValues.size());
1013 }
1014
1015 /// isLSAAddress - Return the immediate to use if the specified
1016 /// value is representable as a LSA address.
1017 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1018   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1019   if (!C) return 0;
1020
1021   int Addr = C->getZExtValue();
1022   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1023       (Addr << 14 >> 14) != Addr)
1024     return 0;  // Top 14 bits have to be sext of immediate.
1025
1026   return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1027 }
1028
1029 static
1030 SDValue
1031 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1032   CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1033   SDValue Chain = TheCall->getChain();
1034   SDValue Callee    = TheCall->getCallee();
1035   unsigned NumOps     = TheCall->getNumArgs();
1036   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1037   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1038   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1039
1040   // Handy pointer type
1041   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1042
1043   // Accumulate how many bytes are to be pushed on the stack, including the
1044   // linkage area, and parameter passing area.  According to the SPU ABI,
1045   // we minimally need space for [LR] and [SP]
1046   unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1047
1048   // Set up a copy of the stack pointer for use loading and storing any
1049   // arguments that may not fit in the registers available for argument
1050   // passing.
1051   SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1052
1053   // Figure out which arguments are going to go in registers, and which in
1054   // memory.
1055   unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1056   unsigned ArgRegIdx = 0;
1057
1058   // Keep track of registers passing arguments
1059   std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1060   // And the arguments passed on the stack
1061   SmallVector<SDValue, 8> MemOpChains;
1062
1063   for (unsigned i = 0; i != NumOps; ++i) {
1064     SDValue Arg = TheCall->getArg(i);
1065
1066     // PtrOff will be used to store the current argument to the stack if a
1067     // register cannot be found for it.
1068     SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1069     PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1070
1071     switch (Arg.getValueType().getSimpleVT()) {
1072     default: assert(0 && "Unexpected ValueType for argument!");
1073     case MVT::i32:
1074     case MVT::i64:
1075     case MVT::i128:
1076       if (ArgRegIdx != NumArgRegs) {
1077         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1078       } else {
1079         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1080         ArgOffset += StackSlotSize;
1081       }
1082       break;
1083     case MVT::f32:
1084     case MVT::f64:
1085       if (ArgRegIdx != NumArgRegs) {
1086         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1087       } else {
1088         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1089         ArgOffset += StackSlotSize;
1090       }
1091       break;
1092     case MVT::v2i64:
1093     case MVT::v2f64:
1094     case MVT::v4f32:
1095     case MVT::v4i32:
1096     case MVT::v8i16:
1097     case MVT::v16i8:
1098       if (ArgRegIdx != NumArgRegs) {
1099         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1100       } else {
1101         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1102         ArgOffset += StackSlotSize;
1103       }
1104       break;
1105     }
1106   }
1107
1108   // Update number of stack bytes actually used, insert a call sequence start
1109   NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1110   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1111                                                             true));
1112
1113   if (!MemOpChains.empty()) {
1114     // Adjust the stack pointer for the stack arguments.
1115     Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1116                         &MemOpChains[0], MemOpChains.size());
1117   }
1118
1119   // Build a sequence of copy-to-reg nodes chained together with token chain
1120   // and flag operands which copy the outgoing args into the appropriate regs.
1121   SDValue InFlag;
1122   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1123     Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1124                              InFlag);
1125     InFlag = Chain.getValue(1);
1126   }
1127
1128   SmallVector<SDValue, 8> Ops;
1129   unsigned CallOpc = SPUISD::CALL;
1130
1131   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1132   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1133   // node so that legalize doesn't hack it.
1134   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1135     GlobalValue *GV = G->getGlobal();
1136     MVT CalleeVT = Callee.getValueType();
1137     SDValue Zero = DAG.getConstant(0, PtrVT);
1138     SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1139
1140     if (!ST->usingLargeMem()) {
1141       // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1142       // style calls, otherwise, external symbols are BRASL calls. This assumes
1143       // that declared/defined symbols are in the same compilation unit and can
1144       // be reached through PC-relative jumps.
1145       //
1146       // NOTE:
1147       // This may be an unsafe assumption for JIT and really large compilation
1148       // units.
1149       if (GV->isDeclaration()) {
1150         Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1151       } else {
1152         Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1153       }
1154     } else {
1155       // "Large memory" mode: Turn all calls into indirect calls with a X-form
1156       // address pairs:
1157       Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1158     }
1159   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1160     MVT CalleeVT = Callee.getValueType();
1161     SDValue Zero = DAG.getConstant(0, PtrVT);
1162     SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1163         Callee.getValueType());
1164
1165     if (!ST->usingLargeMem()) {
1166       Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, ExtSym, Zero);
1167     } else {
1168       Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, ExtSym, Zero);
1169     }
1170   } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1171     // If this is an absolute destination address that appears to be a legal
1172     // local store address, use the munged value.
1173     Callee = SDValue(Dest, 0);
1174   }
1175
1176   Ops.push_back(Chain);
1177   Ops.push_back(Callee);
1178
1179   // Add argument registers to the end of the list so that they are known live
1180   // into the call.
1181   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1182     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1183                                   RegsToPass[i].second.getValueType()));
1184
1185   if (InFlag.getNode())
1186     Ops.push_back(InFlag);
1187   // Returns a chain and a flag for retval copy to use.
1188   Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1189                       &Ops[0], Ops.size());
1190   InFlag = Chain.getValue(1);
1191
1192   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1193                              DAG.getIntPtrConstant(0, true), InFlag);
1194   if (TheCall->getValueType(0) != MVT::Other)
1195     InFlag = Chain.getValue(1);
1196
1197   SDValue ResultVals[3];
1198   unsigned NumResults = 0;
1199
1200   // If the call has results, copy the values out of the ret val registers.
1201   switch (TheCall->getValueType(0).getSimpleVT()) {
1202   default: assert(0 && "Unexpected ret value!");
1203   case MVT::Other: break;
1204   case MVT::i32:
1205     if (TheCall->getValueType(1) == MVT::i32) {
1206       Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1207       ResultVals[0] = Chain.getValue(0);
1208       Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1209                                  Chain.getValue(2)).getValue(1);
1210       ResultVals[1] = Chain.getValue(0);
1211       NumResults = 2;
1212     } else {
1213       Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1214       ResultVals[0] = Chain.getValue(0);
1215       NumResults = 1;
1216     }
1217     break;
1218   case MVT::i64:
1219     Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1220     ResultVals[0] = Chain.getValue(0);
1221     NumResults = 1;
1222     break;
1223   case MVT::f32:
1224   case MVT::f64:
1225     Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1226                                InFlag).getValue(1);
1227     ResultVals[0] = Chain.getValue(0);
1228     NumResults = 1;
1229     break;
1230   case MVT::v2f64:
1231   case MVT::v2i64:
1232   case MVT::v4f32:
1233   case MVT::v4i32:
1234   case MVT::v8i16:
1235   case MVT::v16i8:
1236     Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1237                                    InFlag).getValue(1);
1238     ResultVals[0] = Chain.getValue(0);
1239     NumResults = 1;
1240     break;
1241   }
1242
1243   // If the function returns void, just return the chain.
1244   if (NumResults == 0)
1245     return Chain;
1246
1247   // Otherwise, merge everything together with a MERGE_VALUES node.
1248   ResultVals[NumResults++] = Chain;
1249   SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1250   return Res.getValue(Op.getResNo());
1251 }
1252
1253 static SDValue
1254 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1255   SmallVector<CCValAssign, 16> RVLocs;
1256   unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1257   bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1258   CCState CCInfo(CC, isVarArg, TM, RVLocs);
1259   CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1260
1261   // If this is the first return lowered for this function, add the regs to the
1262   // liveout set for the function.
1263   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1264     for (unsigned i = 0; i != RVLocs.size(); ++i)
1265       DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1266   }
1267
1268   SDValue Chain = Op.getOperand(0);
1269   SDValue Flag;
1270
1271   // Copy the result values into the output registers.
1272   for (unsigned i = 0; i != RVLocs.size(); ++i) {
1273     CCValAssign &VA = RVLocs[i];
1274     assert(VA.isRegLoc() && "Can only return in registers!");
1275     Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1276     Flag = Chain.getValue(1);
1277   }
1278
1279   if (Flag.getNode())
1280     return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1281   else
1282     return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1283 }
1284
1285
1286 //===----------------------------------------------------------------------===//
1287 // Vector related lowering:
1288 //===----------------------------------------------------------------------===//
1289
1290 static ConstantSDNode *
1291 getVecImm(SDNode *N) {
1292   SDValue OpVal(0, 0);
1293
1294   // Check to see if this buildvec has a single non-undef value in its elements.
1295   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1296     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1297     if (OpVal.getNode() == 0)
1298       OpVal = N->getOperand(i);
1299     else if (OpVal != N->getOperand(i))
1300       return 0;
1301   }
1302
1303   if (OpVal.getNode() != 0) {
1304     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1305       return CN;
1306     }
1307   }
1308
1309   return 0; // All UNDEF: use implicit def.; not Constant node
1310 }
1311
1312 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1313 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1314 /// constant
1315 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1316                               MVT ValueType) {
1317   if (ConstantSDNode *CN = getVecImm(N)) {
1318     uint64_t Value = CN->getZExtValue();
1319     if (ValueType == MVT::i64) {
1320       uint64_t UValue = CN->getZExtValue();
1321       uint32_t upper = uint32_t(UValue >> 32);
1322       uint32_t lower = uint32_t(UValue);
1323       if (upper != lower)
1324         return SDValue();
1325       Value = Value >> 32;
1326     }
1327     if (Value <= 0x3ffff)
1328       return DAG.getTargetConstant(Value, ValueType);
1329   }
1330
1331   return SDValue();
1332 }
1333
1334 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1335 /// and the value fits into a signed 16-bit constant, and if so, return the
1336 /// constant
1337 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1338                               MVT ValueType) {
1339   if (ConstantSDNode *CN = getVecImm(N)) {
1340     int64_t Value = CN->getSExtValue();
1341     if (ValueType == MVT::i64) {
1342       uint64_t UValue = CN->getZExtValue();
1343       uint32_t upper = uint32_t(UValue >> 32);
1344       uint32_t lower = uint32_t(UValue);
1345       if (upper != lower)
1346         return SDValue();
1347       Value = Value >> 32;
1348     }
1349     if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1350       return DAG.getTargetConstant(Value, ValueType);
1351     }
1352   }
1353
1354   return SDValue();
1355 }
1356
1357 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1358 /// and the value fits into a signed 10-bit constant, and if so, return the
1359 /// constant
1360 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1361                               MVT ValueType) {
1362   if (ConstantSDNode *CN = getVecImm(N)) {
1363     int64_t Value = CN->getSExtValue();
1364     if (ValueType == MVT::i64) {
1365       uint64_t UValue = CN->getZExtValue();
1366       uint32_t upper = uint32_t(UValue >> 32);
1367       uint32_t lower = uint32_t(UValue);
1368       if (upper != lower)
1369         return SDValue();
1370       Value = Value >> 32;
1371     }
1372     if (isS10Constant(Value))
1373       return DAG.getTargetConstant(Value, ValueType);
1374   }
1375
1376   return SDValue();
1377 }
1378
1379 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1380 /// and the value fits into a signed 8-bit constant, and if so, return the
1381 /// constant.
1382 ///
1383 /// @note: The incoming vector is v16i8 because that's the only way we can load
1384 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1385 /// same value.
1386 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1387                              MVT ValueType) {
1388   if (ConstantSDNode *CN = getVecImm(N)) {
1389     int Value = (int) CN->getZExtValue();
1390     if (ValueType == MVT::i16
1391         && Value <= 0xffff                 /* truncated from uint64_t */
1392         && ((short) Value >> 8) == ((short) Value & 0xff))
1393       return DAG.getTargetConstant(Value & 0xff, ValueType);
1394     else if (ValueType == MVT::i8
1395              && (Value & 0xff) == Value)
1396       return DAG.getTargetConstant(Value, ValueType);
1397   }
1398
1399   return SDValue();
1400 }
1401
1402 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1403 /// and the value fits into a signed 16-bit constant, and if so, return the
1404 /// constant
1405 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1406                                MVT ValueType) {
1407   if (ConstantSDNode *CN = getVecImm(N)) {
1408     uint64_t Value = CN->getZExtValue();
1409     if ((ValueType == MVT::i32
1410           && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1411         || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1412       return DAG.getTargetConstant(Value >> 16, ValueType);
1413   }
1414
1415   return SDValue();
1416 }
1417
1418 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1419 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1420   if (ConstantSDNode *CN = getVecImm(N)) {
1421     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1422   }
1423
1424   return SDValue();
1425 }
1426
1427 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1428 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1429   if (ConstantSDNode *CN = getVecImm(N)) {
1430     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1431   }
1432
1433   return SDValue();
1434 }
1435
1436 // If this is a vector of constants or undefs, get the bits.  A bit in
1437 // UndefBits is set if the corresponding element of the vector is an
1438 // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1439 // zero.   Return true if this is not an array of constants, false if it is.
1440 //
1441 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1442                                        uint64_t UndefBits[2]) {
1443   // Start with zero'd results.
1444   VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1445
1446   unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1447   for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1448     SDValue OpVal = BV->getOperand(i);
1449
1450     unsigned PartNo = i >= e/2;     // In the upper 128 bits?
1451     unsigned SlotNo = e/2 - (i & (e/2-1))-1;  // Which subpiece of the uint64_t.
1452
1453     uint64_t EltBits = 0;
1454     if (OpVal.getOpcode() == ISD::UNDEF) {
1455       uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1456       UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1457       continue;
1458     } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1459       EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1460     } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1461       const APFloat &apf = CN->getValueAPF();
1462       EltBits = (CN->getValueType(0) == MVT::f32
1463                  ? FloatToBits(apf.convertToFloat())
1464                  : DoubleToBits(apf.convertToDouble()));
1465     } else {
1466       // Nonconstant element.
1467       return true;
1468     }
1469
1470     VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1471   }
1472
1473   //printf("%llx %llx  %llx %llx\n",
1474   //       VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1475   return false;
1476 }
1477
1478 /// If this is a splat (repetition) of a value across the whole vector, return
1479 /// the smallest size that splats it.  For example, "0x01010101010101..." is a
1480 /// splat of 0x01, 0x0101, and 0x01010101.  We return SplatBits = 0x01 and
1481 /// SplatSize = 1 byte.
1482 static bool isConstantSplat(const uint64_t Bits128[2],
1483                             const uint64_t Undef128[2],
1484                             int MinSplatBits,
1485                             uint64_t &SplatBits, uint64_t &SplatUndef,
1486                             int &SplatSize) {
1487   // Don't let undefs prevent splats from matching.  See if the top 64-bits are
1488   // the same as the lower 64-bits, ignoring undefs.
1489   uint64_t Bits64  = Bits128[0] | Bits128[1];
1490   uint64_t Undef64 = Undef128[0] & Undef128[1];
1491   uint32_t Bits32  = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1492   uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1493   uint16_t Bits16  = uint16_t(Bits32)  | uint16_t(Bits32 >> 16);
1494   uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1495
1496   if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1497     if (MinSplatBits < 64) {
1498
1499       // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1500       // undefs.
1501       if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1502         if (MinSplatBits < 32) {
1503
1504           // If the top 16-bits are different than the lower 16-bits, ignoring
1505           // undefs, we have an i32 splat.
1506           if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1507             if (MinSplatBits < 16) {
1508               // If the top 8-bits are different than the lower 8-bits, ignoring
1509               // undefs, we have an i16 splat.
1510               if ((Bits16 & (uint16_t(~Undef16) >> 8))
1511                   == ((Bits16 >> 8) & ~Undef16)) {
1512                 // Otherwise, we have an 8-bit splat.
1513                 SplatBits  = uint8_t(Bits16)  | uint8_t(Bits16 >> 8);
1514                 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1515                 SplatSize = 1;
1516                 return true;
1517               }
1518             } else {
1519               SplatBits = Bits16;
1520               SplatUndef = Undef16;
1521               SplatSize = 2;
1522               return true;
1523             }
1524           }
1525         } else {
1526           SplatBits = Bits32;
1527           SplatUndef = Undef32;
1528           SplatSize = 4;
1529           return true;
1530         }
1531       }
1532     } else {
1533       SplatBits = Bits128[0];
1534       SplatUndef = Undef128[0];
1535       SplatSize = 8;
1536       return true;
1537     }
1538   }
1539
1540   return false;  // Can't be a splat if two pieces don't match.
1541 }
1542
1543 // If this is a case we can't handle, return null and let the default
1544 // expansion code take care of it.  If we CAN select this case, and if it
1545 // selects to a single instruction, return Op.  Otherwise, if we can codegen
1546 // this case more efficiently than a constant pool load, lower it to the
1547 // sequence of ops that should be used.
1548 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1549   MVT VT = Op.getValueType();
1550   // If this is a vector of constants or undefs, get the bits.  A bit in
1551   // UndefBits is set if the corresponding element of the vector is an
1552   // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1553   // zero.
1554   uint64_t VectorBits[2];
1555   uint64_t UndefBits[2];
1556   uint64_t SplatBits, SplatUndef;
1557   int SplatSize;
1558   if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1559       || !isConstantSplat(VectorBits, UndefBits,
1560                           VT.getVectorElementType().getSizeInBits(),
1561                           SplatBits, SplatUndef, SplatSize))
1562     return SDValue();   // Not a constant vector, not a splat.
1563
1564   switch (VT.getSimpleVT()) {
1565   default:
1566   case MVT::v4f32: {
1567     uint32_t Value32 = SplatBits;
1568     assert(SplatSize == 4
1569            && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1570     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1571     SDValue T = DAG.getConstant(Value32, MVT::i32);
1572     return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1573                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1574     break;
1575   }
1576   case MVT::v2f64: {
1577     uint64_t f64val = SplatBits;
1578     assert(SplatSize == 8
1579            && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1580     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1581     SDValue T = DAG.getConstant(f64val, MVT::i64);
1582     return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1583                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1584     break;
1585   }
1586   case MVT::v16i8: {
1587    // 8-bit constants have to be expanded to 16-bits
1588    unsigned short Value16 = SplatBits | (SplatBits << 8);
1589    SDValue Ops[8];
1590    for (int i = 0; i < 8; ++i)
1591      Ops[i] = DAG.getConstant(Value16, MVT::i16);
1592    return DAG.getNode(ISD::BIT_CONVERT, VT,
1593                       DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1594   }
1595   case MVT::v8i16: {
1596     unsigned short Value16;
1597     if (SplatSize == 2)
1598       Value16 = (unsigned short) (SplatBits & 0xffff);
1599     else
1600       Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1601     SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1602     SDValue Ops[8];
1603     for (int i = 0; i < 8; ++i) Ops[i] = T;
1604     return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1605   }
1606   case MVT::v4i32: {
1607     unsigned int Value = SplatBits;
1608     SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1609     return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1610   }
1611   case MVT::v2i64: {
1612     uint64_t val = SplatBits;
1613     uint32_t upper = uint32_t(val >> 32);
1614     uint32_t lower = uint32_t(val);
1615
1616     if (upper == lower) {
1617       // Magic constant that can be matched by IL, ILA, et. al.
1618       SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1619       return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1620     } else {
1621       SDValue LO32;
1622       SDValue HI32;
1623       SmallVector<SDValue, 16> ShufBytes;
1624       SDValue Result;
1625       bool upper_special, lower_special;
1626
1627       // NOTE: This code creates common-case shuffle masks that can be easily
1628       // detected as common expressions. It is not attempting to create highly
1629       // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1630
1631       // Detect if the upper or lower half is a special shuffle mask pattern:
1632       upper_special = (upper == 0||upper == 0xffffffff||upper == 0x80000000);
1633       lower_special = (lower == 0||lower == 0xffffffff||lower == 0x80000000);
1634
1635       // Create lower vector if not a special pattern
1636       if (!lower_special) {
1637         SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1638         LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1639                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1640                                        LO32C, LO32C, LO32C, LO32C));
1641       }
1642
1643       // Create upper vector if not a special pattern
1644       if (!upper_special) {
1645         SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1646         HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1647                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1648                                        HI32C, HI32C, HI32C, HI32C));
1649       }
1650
1651       // If either upper or lower are special, then the two input operands are
1652       // the same (basically, one of them is a "don't care")
1653       if (lower_special)
1654         LO32 = HI32;
1655       if (upper_special)
1656         HI32 = LO32;
1657       if (lower_special && upper_special) {
1658         // Unhappy situation... both upper and lower are special, so punt with
1659         // a target constant:
1660         SDValue Zero = DAG.getConstant(0, MVT::i32);
1661         HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1662                                   Zero, Zero);
1663       }
1664
1665       for (int i = 0; i < 4; ++i) {
1666         uint64_t val = 0;
1667         for (int j = 0; j < 4; ++j) {
1668           SDValue V;
1669           bool process_upper, process_lower;
1670           val <<= 8;
1671           process_upper = (upper_special && (i & 1) == 0);
1672           process_lower = (lower_special && (i & 1) == 1);
1673
1674           if (process_upper || process_lower) {
1675             if ((process_upper && upper == 0)
1676                 || (process_lower && lower == 0))
1677               val |= 0x80;
1678             else if ((process_upper && upper == 0xffffffff)
1679                      || (process_lower && lower == 0xffffffff))
1680               val |= 0xc0;
1681             else if ((process_upper && upper == 0x80000000)
1682                      || (process_lower && lower == 0x80000000))
1683               val |= (j == 0 ? 0xe0 : 0x80);
1684           } else
1685             val |= i * 4 + j + ((i & 1) * 16);
1686         }
1687
1688         ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1689       }
1690
1691       return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1692                          DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1693                                      &ShufBytes[0], ShufBytes.size()));
1694     }
1695   }
1696   }
1697
1698   return SDValue();
1699 }
1700
1701 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1702 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1703 /// permutation vector, V3, is monotonically increasing with one "exception"
1704 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1705 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1706 /// In either case, the net result is going to eventually invoke SHUFB to
1707 /// permute/shuffle the bytes from V1 and V2.
1708 /// \note
1709 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1710 /// control word for byte/halfword/word insertion. This takes care of a single
1711 /// element move from V2 into V1.
1712 /// \note
1713 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1714 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1715   SDValue V1 = Op.getOperand(0);
1716   SDValue V2 = Op.getOperand(1);
1717   SDValue PermMask = Op.getOperand(2);
1718
1719   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1720
1721   // If we have a single element being moved from V1 to V2, this can be handled
1722   // using the C*[DX] compute mask instructions, but the vector elements have
1723   // to be monotonically increasing with one exception element.
1724   MVT VecVT = V1.getValueType();
1725   MVT EltVT = VecVT.getVectorElementType();
1726   unsigned EltsFromV2 = 0;
1727   unsigned V2Elt = 0;
1728   unsigned V2EltIdx0 = 0;
1729   unsigned CurrElt = 0;
1730   unsigned MaxElts = VecVT.getVectorNumElements();
1731   unsigned PrevElt = 0;
1732   unsigned V0Elt = 0;
1733   bool monotonic = true;
1734   bool rotate = true;
1735
1736   if (EltVT == MVT::i8) {
1737     V2EltIdx0 = 16;
1738   } else if (EltVT == MVT::i16) {
1739     V2EltIdx0 = 8;
1740   } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1741     V2EltIdx0 = 4;
1742   } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1743     V2EltIdx0 = 2;
1744   } else
1745     assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1746
1747   for (unsigned i = 0; i != PermMask.getNumOperands(); ++i) {
1748     if (PermMask.getOperand(i).getOpcode() != ISD::UNDEF) {
1749       unsigned SrcElt = cast<ConstantSDNode > (PermMask.getOperand(i))->getZExtValue();
1750
1751       if (monotonic) {
1752         if (SrcElt >= V2EltIdx0) {
1753           if (1 >= (++EltsFromV2)) {
1754             V2Elt = (V2EltIdx0 - SrcElt) << 2;
1755           }
1756         } else if (CurrElt != SrcElt) {
1757           monotonic = false;
1758         }
1759
1760         ++CurrElt;
1761       }
1762
1763       if (rotate) {
1764         if (PrevElt > 0 && SrcElt < MaxElts) {
1765           if ((PrevElt == SrcElt - 1)
1766               || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1767             PrevElt = SrcElt;
1768             if (SrcElt == 0)
1769               V0Elt = i;
1770           } else {
1771             rotate = false;
1772           }
1773         } else if (PrevElt == 0) {
1774           // First time through, need to keep track of previous element
1775           PrevElt = SrcElt;
1776         } else {
1777           // This isn't a rotation, takes elements from vector 2
1778           rotate = false;
1779         }
1780       }
1781     }
1782   }
1783
1784   if (EltsFromV2 == 1 && monotonic) {
1785     // Compute mask and shuffle
1786     MachineFunction &MF = DAG.getMachineFunction();
1787     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1788     unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1789     MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1790     // Initialize temporary register to 0
1791     SDValue InitTempReg =
1792       DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1793     // Copy register's contents as index in SHUFFLE_MASK:
1794     SDValue ShufMaskOp =
1795       DAG.getNode(SPUISD::SHUFFLE_MASK, MVT::v4i32,
1796                   DAG.getTargetConstant(V2Elt, MVT::i32),
1797                   DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1798     // Use shuffle mask in SHUFB synthetic instruction:
1799     return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1800   } else if (rotate) {
1801     int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1802
1803     return DAG.getNode(SPUISD::ROTBYTES_LEFT, V1.getValueType(),
1804                        V1, DAG.getConstant(rotamt, MVT::i16));
1805   } else {
1806    // Convert the SHUFFLE_VECTOR mask's input element units to the
1807    // actual bytes.
1808     unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1809
1810     SmallVector<SDValue, 16> ResultMask;
1811     for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1812       unsigned SrcElt;
1813       if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1814         SrcElt = 0;
1815       else
1816         SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1817
1818       for (unsigned j = 0; j < BytesPerElement; ++j) {
1819         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1820                                              MVT::i8));
1821       }
1822     }
1823
1824     SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1825                                     &ResultMask[0], ResultMask.size());
1826     return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1827   }
1828 }
1829
1830 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1831   SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar
1832
1833   if (Op0.getNode()->getOpcode() == ISD::Constant) {
1834     // For a constant, build the appropriate constant vector, which will
1835     // eventually simplify to a vector register load.
1836
1837     ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1838     SmallVector<SDValue, 16> ConstVecValues;
1839     MVT VT;
1840     size_t n_copies;
1841
1842     // Create a constant vector:
1843     switch (Op.getValueType().getSimpleVT()) {
1844     default: assert(0 && "Unexpected constant value type in "
1845                          "LowerSCALAR_TO_VECTOR");
1846     case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1847     case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1848     case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1849     case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1850     case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1851     case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1852     }
1853
1854     SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1855     for (size_t j = 0; j < n_copies; ++j)
1856       ConstVecValues.push_back(CValue);
1857
1858     return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1859                        &ConstVecValues[0], ConstVecValues.size());
1860   } else {
1861     // Otherwise, copy the value from one register to another:
1862     switch (Op0.getValueType().getSimpleVT()) {
1863     default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1864     case MVT::i8:
1865     case MVT::i16:
1866     case MVT::i32:
1867     case MVT::i64:
1868     case MVT::f32:
1869     case MVT::f64:
1870       return DAG.getNode(SPUISD::PREFSLOT2VEC, Op.getValueType(), Op0, Op0);
1871     }
1872   }
1873
1874   return SDValue();
1875 }
1876
1877 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1878   MVT VT = Op.getValueType();
1879   SDValue N = Op.getOperand(0);
1880   SDValue Elt = Op.getOperand(1);
1881   SDValue retval;
1882
1883   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1884     // Constant argument:
1885     int EltNo = (int) C->getZExtValue();
1886
1887     // sanity checks:
1888     if (VT == MVT::i8 && EltNo >= 16)
1889       assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1890     else if (VT == MVT::i16 && EltNo >= 8)
1891       assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1892     else if (VT == MVT::i32 && EltNo >= 4)
1893       assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1894     else if (VT == MVT::i64 && EltNo >= 2)
1895       assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1896
1897     if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1898       // i32 and i64: Element 0 is the preferred slot
1899       return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, N);
1900     }
1901
1902     // Need to generate shuffle mask and extract:
1903     int prefslot_begin = -1, prefslot_end = -1;
1904     int elt_byte = EltNo * VT.getSizeInBits() / 8;
1905
1906     switch (VT.getSimpleVT()) {
1907     default:
1908       assert(false && "Invalid value type!");
1909     case MVT::i8: {
1910       prefslot_begin = prefslot_end = 3;
1911       break;
1912     }
1913     case MVT::i16: {
1914       prefslot_begin = 2; prefslot_end = 3;
1915       break;
1916     }
1917     case MVT::i32:
1918     case MVT::f32: {
1919       prefslot_begin = 0; prefslot_end = 3;
1920       break;
1921     }
1922     case MVT::i64:
1923     case MVT::f64: {
1924       prefslot_begin = 0; prefslot_end = 7;
1925       break;
1926     }
1927     }
1928
1929     assert(prefslot_begin != -1 && prefslot_end != -1 &&
1930            "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1931
1932     unsigned int ShufBytes[16];
1933     for (int i = 0; i < 16; ++i) {
1934       // zero fill uppper part of preferred slot, don't care about the
1935       // other slots:
1936       unsigned int mask_val;
1937       if (i <= prefslot_end) {
1938         mask_val =
1939           ((i < prefslot_begin)
1940            ? 0x80
1941            : elt_byte + (i - prefslot_begin));
1942
1943         ShufBytes[i] = mask_val;
1944       } else
1945         ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1946     }
1947
1948     SDValue ShufMask[4];
1949     for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1950       unsigned bidx = i * 4;
1951       unsigned int bits = ((ShufBytes[bidx] << 24) |
1952                            (ShufBytes[bidx+1] << 16) |
1953                            (ShufBytes[bidx+2] << 8) |
1954                            ShufBytes[bidx+3]);
1955       ShufMask[i] = DAG.getConstant(bits, MVT::i32);
1956     }
1957
1958     SDValue ShufMaskVec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1959                                       &ShufMask[0],
1960                                       sizeof(ShufMask) / sizeof(ShufMask[0]));
1961
1962     retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
1963                          DAG.getNode(SPUISD::SHUFB, N.getValueType(),
1964                                      N, N, ShufMaskVec));
1965   } else {
1966     // Variable index: Rotate the requested element into slot 0, then replicate
1967     // slot 0 across the vector
1968     MVT VecVT = N.getValueType();
1969     if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
1970       cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
1971       abort();
1972     }
1973
1974     // Make life easier by making sure the index is zero-extended to i32
1975     if (Elt.getValueType() != MVT::i32)
1976       Elt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Elt);
1977
1978     // Scale the index to a bit/byte shift quantity
1979     APInt scaleFactor =
1980             APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
1981     unsigned scaleShift = scaleFactor.logBase2();
1982     SDValue vecShift;
1983
1984     if (scaleShift > 0) {
1985       // Scale the shift factor:
1986       Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt,
1987                         DAG.getConstant(scaleShift, MVT::i32));
1988     }
1989
1990     vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, N, Elt);
1991
1992     // Replicate the bytes starting at byte 0 across the entire vector (for
1993     // consistency with the notion of a unified register set)
1994     SDValue replicate;
1995
1996     switch (VT.getSimpleVT()) {
1997     default:
1998       cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
1999       abort();
2000       /*NOTREACHED*/
2001     case MVT::i8: {
2002       SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2003       replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2004                               factor, factor);
2005       break;
2006     }
2007     case MVT::i16: {
2008       SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2009       replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2010                               factor, factor);
2011       break;
2012     }
2013     case MVT::i32:
2014     case MVT::f32: {
2015       SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2016       replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2017                               factor, factor);
2018       break;
2019     }
2020     case MVT::i64:
2021     case MVT::f64: {
2022       SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2023       SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2024       replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, loFactor, hiFactor,
2025                               loFactor, hiFactor);
2026       break;
2027     }
2028     }
2029
2030     retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2031                          DAG.getNode(SPUISD::SHUFB, VecVT,
2032                                      vecShift, vecShift, replicate));
2033   }
2034
2035   return retval;
2036 }
2037
2038 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2039   SDValue VecOp = Op.getOperand(0);
2040   SDValue ValOp = Op.getOperand(1);
2041   SDValue IdxOp = Op.getOperand(2);
2042   MVT VT = Op.getValueType();
2043
2044   ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2045   assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2046
2047   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2048   // Use $sp ($1) because it's always 16-byte aligned and it's available:
2049   SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
2050                                 DAG.getRegister(SPU::R1, PtrVT),
2051                                 DAG.getConstant(CN->getSExtValue(), PtrVT));
2052   SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, VT, Pointer);
2053
2054   SDValue result =
2055     DAG.getNode(SPUISD::SHUFB, VT,
2056                 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2057                 VecOp,
2058                 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, ShufMask));
2059
2060   return result;
2061 }
2062
2063 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2064                            const TargetLowering &TLI)
2065 {
2066   SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
2067   MVT ShiftVT = TLI.getShiftAmountTy();
2068
2069   assert(Op.getValueType() == MVT::i8);
2070   switch (Opc) {
2071   default:
2072     assert(0 && "Unhandled i8 math operator");
2073     /*NOTREACHED*/
2074     break;
2075   case ISD::ADD: {
2076     // 8-bit addition: Promote the arguments up to 16-bits and truncate
2077     // the result:
2078     SDValue N1 = Op.getOperand(1);
2079     N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0);
2080     N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1);
2081     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2082                        DAG.getNode(Opc, MVT::i16, N0, N1));
2083
2084   }
2085
2086   case ISD::SUB: {
2087     // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2088     // the result:
2089     SDValue N1 = Op.getOperand(1);
2090     N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0);
2091     N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1);
2092     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2093                        DAG.getNode(Opc, MVT::i16, N0, N1));
2094   }
2095   case ISD::ROTR:
2096   case ISD::ROTL: {
2097     SDValue N1 = Op.getOperand(1);
2098     unsigned N1Opc;
2099     N0 = (N0.getOpcode() != ISD::Constant
2100           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2101           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2102                             MVT::i16));
2103     N1Opc = N1.getValueType().bitsLT(ShiftVT)
2104             ? ISD::ZERO_EXTEND
2105             : ISD::TRUNCATE;
2106     N1 = (N1.getOpcode() != ISD::Constant
2107           ? DAG.getNode(N1Opc, ShiftVT, N1)
2108           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2109                             TLI.getShiftAmountTy()));
2110     SDValue ExpandArg =
2111       DAG.getNode(ISD::OR, MVT::i16, N0,
2112                   DAG.getNode(ISD::SHL, MVT::i16,
2113                               N0, DAG.getConstant(8, MVT::i32)));
2114     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2115                        DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2116   }
2117   case ISD::SRL:
2118   case ISD::SHL: {
2119     SDValue N1 = Op.getOperand(1);
2120     unsigned N1Opc;
2121     N0 = (N0.getOpcode() != ISD::Constant
2122           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2123           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2124                             MVT::i32));
2125     N1Opc = N1.getValueType().bitsLT(ShiftVT)
2126             ? ISD::ZERO_EXTEND
2127             : ISD::TRUNCATE;
2128     N1 = (N1.getOpcode() != ISD::Constant
2129           ? DAG.getNode(N1Opc, ShiftVT, N1)
2130           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(), ShiftVT));
2131     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2132                        DAG.getNode(Opc, MVT::i16, N0, N1));
2133   }
2134   case ISD::SRA: {
2135     SDValue N1 = Op.getOperand(1);
2136     unsigned N1Opc;
2137     N0 = (N0.getOpcode() != ISD::Constant
2138           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2139           : DAG.getConstant(cast<ConstantSDNode>(N0)->getSExtValue(),
2140                             MVT::i16));
2141     N1Opc = N1.getValueType().bitsLT(ShiftVT)
2142             ? ISD::SIGN_EXTEND
2143             : ISD::TRUNCATE;
2144     N1 = (N1.getOpcode() != ISD::Constant
2145           ? DAG.getNode(N1Opc, ShiftVT, N1)
2146           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2147                             ShiftVT));
2148     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2149                        DAG.getNode(Opc, MVT::i16, N0, N1));
2150   }
2151   case ISD::MUL: {
2152     SDValue N1 = Op.getOperand(1);
2153     unsigned N1Opc;
2154     N0 = (N0.getOpcode() != ISD::Constant
2155           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2156           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2157                             MVT::i16));
2158     N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2159     N1 = (N1.getOpcode() != ISD::Constant
2160           ? DAG.getNode(N1Opc, MVT::i16, N1)
2161           : DAG.getConstant(cast<ConstantSDNode>(N1)->getSExtValue(),
2162                             MVT::i16));
2163     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2164                        DAG.getNode(Opc, MVT::i16, N0, N1));
2165     break;
2166   }
2167   }
2168
2169   return SDValue();
2170 }
2171
2172 static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2173 {
2174   MVT VT = Op.getValueType();
2175   MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2176
2177   SDValue Op0 = Op.getOperand(0);
2178
2179   switch (Opc) {
2180   case ISD::ZERO_EXTEND:
2181   case ISD::ANY_EXTEND: {
2182     MVT Op0VT = Op0.getValueType();
2183     MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2184
2185     assert(Op0VT == MVT::i32
2186            && "CellSPU: Zero/sign extending something other than i32");
2187
2188     DEBUG(cerr << "CellSPU.LowerI64Math: lowering zero/sign/any extend\n");
2189
2190     SDValue PromoteScalar =
2191             DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0);
2192
2193     // Use a shuffle to zero extend the i32 to i64 directly:
2194     SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, Op0VecVT,
2195         DAG.getConstant(0x80808080, MVT::i32), DAG.getConstant(0x00010203,
2196             MVT::i32), DAG.getConstant(0x80808080, MVT::i32), DAG.getConstant(
2197             0x08090a0b, MVT::i32));
2198     SDValue zextShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT, PromoteScalar,
2199         PromoteScalar, shufMask);
2200
2201     return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, DAG.getNode(ISD::BIT_CONVERT,
2202         VecVT, zextShuffle));
2203   }
2204
2205   case ISD::ADD: {
2206     // Turn operands into vectors to satisfy type checking (shufb works on
2207     // vectors)
2208     SDValue Op0 =
2209       DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0));
2210     SDValue Op1 =
2211       DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(1));
2212     SmallVector<SDValue, 16> ShufBytes;
2213
2214     // Create the shuffle mask for "rotating" the borrow up one register slot
2215     // once the borrow is generated.
2216     ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2217     ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2218     ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2219     ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2220
2221     SDValue CarryGen =
2222       DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2223     SDValue ShiftedCarry =
2224       DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2225                   CarryGen, CarryGen,
2226                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2227                               &ShufBytes[0], ShufBytes.size()));
2228
2229     return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2230                        DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2231                                    Op0, Op1, ShiftedCarry));
2232   }
2233
2234   case ISD::SUB: {
2235     // Turn operands into vectors to satisfy type checking (shufb works on
2236     // vectors)
2237     SDValue Op0 =
2238       DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0));
2239     SDValue Op1 =
2240       DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(1));
2241     SmallVector<SDValue, 16> ShufBytes;
2242
2243     // Create the shuffle mask for "rotating" the borrow up one register slot
2244     // once the borrow is generated.
2245     ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2246     ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2247     ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2248     ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2249
2250     SDValue BorrowGen =
2251       DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2252     SDValue ShiftedBorrow =
2253       DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2254                   BorrowGen, BorrowGen,
2255                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2256                               &ShufBytes[0], ShufBytes.size()));
2257
2258     return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2259                        DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2260                                    Op0, Op1, ShiftedBorrow));
2261   }
2262   }
2263
2264   return SDValue();
2265 }
2266
2267 //! Lower byte immediate operations for v16i8 vectors:
2268 static SDValue
2269 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2270   SDValue ConstVec;
2271   SDValue Arg;
2272   MVT VT = Op.getValueType();
2273
2274   ConstVec = Op.getOperand(0);
2275   Arg = Op.getOperand(1);
2276   if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2277     if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2278       ConstVec = ConstVec.getOperand(0);
2279     } else {
2280       ConstVec = Op.getOperand(1);
2281       Arg = Op.getOperand(0);
2282       if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2283         ConstVec = ConstVec.getOperand(0);
2284       }
2285     }
2286   }
2287
2288   if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2289     uint64_t VectorBits[2];
2290     uint64_t UndefBits[2];
2291     uint64_t SplatBits, SplatUndef;
2292     int SplatSize;
2293
2294     if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2295         && isConstantSplat(VectorBits, UndefBits,
2296                            VT.getVectorElementType().getSizeInBits(),
2297                            SplatBits, SplatUndef, SplatSize)) {
2298       SDValue tcVec[16];
2299       SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2300       const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2301
2302       // Turn the BUILD_VECTOR into a set of target constants:
2303       for (size_t i = 0; i < tcVecSize; ++i)
2304         tcVec[i] = tc;
2305
2306       return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2307                          DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2308     }
2309   }
2310   // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2311   // lowered.  Return the operation, rather than a null SDValue.
2312   return Op;
2313 }
2314
2315 //! Custom lowering for CTPOP (count population)
2316 /*!
2317   Custom lowering code that counts the number ones in the input
2318   operand. SPU has such an instruction, but it counts the number of
2319   ones per byte, which then have to be accumulated.
2320 */
2321 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2322   MVT VT = Op.getValueType();
2323   MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2324
2325   switch (VT.getSimpleVT()) {
2326   default:
2327     assert(false && "Invalid value type!");
2328   case MVT::i8: {
2329     SDValue N = Op.getOperand(0);
2330     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2331
2332     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2333     SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2334
2335     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2336   }
2337
2338   case MVT::i16: {
2339     MachineFunction &MF = DAG.getMachineFunction();
2340     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2341
2342     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2343
2344     SDValue N = Op.getOperand(0);
2345     SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2346     SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2347     SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2348
2349     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2350     SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2351
2352     // CNTB_result becomes the chain to which all of the virtual registers
2353     // CNTB_reg, SUM1_reg become associated:
2354     SDValue CNTB_result =
2355       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2356
2357     SDValue CNTB_rescopy =
2358       DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2359
2360     SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2361
2362     return DAG.getNode(ISD::AND, MVT::i16,
2363                        DAG.getNode(ISD::ADD, MVT::i16,
2364                                    DAG.getNode(ISD::SRL, MVT::i16,
2365                                                Tmp1, Shift1),
2366                                    Tmp1),
2367                        Mask0);
2368   }
2369
2370   case MVT::i32: {
2371     MachineFunction &MF = DAG.getMachineFunction();
2372     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2373
2374     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2375     unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2376
2377     SDValue N = Op.getOperand(0);
2378     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2379     SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2380     SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2381     SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2382
2383     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2384     SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2385
2386     // CNTB_result becomes the chain to which all of the virtual registers
2387     // CNTB_reg, SUM1_reg become associated:
2388     SDValue CNTB_result =
2389       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2390
2391     SDValue CNTB_rescopy =
2392       DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2393
2394     SDValue Comp1 =
2395       DAG.getNode(ISD::SRL, MVT::i32,
2396                   DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2397
2398     SDValue Sum1 =
2399       DAG.getNode(ISD::ADD, MVT::i32,
2400                   Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2401
2402     SDValue Sum1_rescopy =
2403       DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2404
2405     SDValue Comp2 =
2406       DAG.getNode(ISD::SRL, MVT::i32,
2407                   DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2408                   Shift2);
2409     SDValue Sum2 =
2410       DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2411                   DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2412
2413     return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2414   }
2415
2416   case MVT::i64:
2417     break;
2418   }
2419
2420   return SDValue();
2421 }
2422
2423 //! Lower ISD::SETCC
2424 /*!
2425  Lower i64 condition code handling.
2426  */
2427
2428 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) {
2429   MVT VT = Op.getValueType();
2430   SDValue lhs = Op.getOperand(0);
2431   SDValue rhs = Op.getOperand(1);
2432   SDValue condition = Op.getOperand(2);
2433
2434   if (VT == MVT::i32 && lhs.getValueType() == MVT::i64) {
2435     // Expand the i64 comparisons to what Cell can actually support,
2436     // which is eq, ugt and sgt:
2437 #if 0
2438     CondCodeSDNode *ccvalue = dyn_cast<CondCodeSDValue>(condition);
2439
2440     switch (ccvalue->get()) {
2441       case
2442     }
2443 #endif
2444   }
2445
2446   return SDValue();
2447 }
2448
2449 //! Lower ISD::SELECT_CC
2450 /*!
2451   ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2452   SELB instruction.
2453
2454   \note Need to revisit this in the future: if the code path through the true
2455   and false value computations is longer than the latency of a branch (6
2456   cycles), then it would be more advantageous to branch and insert a new basic
2457   block and branch on the condition. However, this code does not make that
2458   assumption, given the simplisitc uses so far.
2459  */
2460
2461 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2462                               const TargetLowering &TLI) {
2463   MVT VT = Op.getValueType();
2464   SDValue lhs = Op.getOperand(0);
2465   SDValue rhs = Op.getOperand(1);
2466   SDValue trueval = Op.getOperand(2);
2467   SDValue falseval = Op.getOperand(3);
2468   SDValue condition = Op.getOperand(4);
2469
2470   // NOTE: SELB's arguments: $rA, $rB, $mask
2471   //
2472   // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2473   // where bits in $mask are 1. CCond will be inverted, having 1s where the
2474   // condition was true and 0s where the condition was false. Hence, the
2475   // arguments to SELB get reversed.
2476
2477   // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2478   // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2479   // with another "cannot select select_cc" assert:
2480
2481   SDValue compare = DAG.getNode(ISD::SETCC,
2482                                 TLI.getSetCCResultType(Op.getValueType()),
2483                                 lhs, rhs, condition);
2484   return DAG.getNode(SPUISD::SELB, VT, falseval, trueval, compare);
2485 }
2486
2487 //! Custom lower ISD::TRUNCATE
2488 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2489 {
2490   MVT VT = Op.getValueType();
2491   MVT::SimpleValueType simpleVT = VT.getSimpleVT();
2492   MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2493
2494   SDValue Op0 = Op.getOperand(0);
2495   MVT Op0VT = Op0.getValueType();
2496   MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2497
2498   if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2499     // Create shuffle mask, least significant doubleword of quadword
2500     unsigned maskHigh = 0x08090a0b;
2501     unsigned maskLow = 0x0c0d0e0f;
2502     // Use a shuffle to perform the truncation
2503     SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2504                                    DAG.getConstant(maskHigh, MVT::i32),
2505                                    DAG.getConstant(maskLow, MVT::i32),
2506                                    DAG.getConstant(maskHigh, MVT::i32),
2507                                    DAG.getConstant(maskLow, MVT::i32));
2508
2509
2510     SDValue PromoteScalar = DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0);
2511
2512     SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT,
2513                                        PromoteScalar, PromoteScalar, shufMask);
2514
2515     return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2516                        DAG.getNode(ISD::BIT_CONVERT, VecVT, truncShuffle));
2517   }
2518
2519   return SDValue();             // Leave the truncate unmolested
2520 }
2521
2522 //! Custom (target-specific) lowering entry point
2523 /*!
2524   This is where LLVM's DAG selection process calls to do target-specific
2525   lowering of nodes.
2526  */
2527 SDValue
2528 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2529 {
2530   unsigned Opc = (unsigned) Op.getOpcode();
2531   MVT VT = Op.getValueType();
2532
2533   switch (Opc) {
2534   default: {
2535     cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2536     cerr << "Op.getOpcode() = " << Opc << "\n";
2537     cerr << "*Op.getNode():\n";
2538     Op.getNode()->dump();
2539     abort();
2540   }
2541   case ISD::LOAD:
2542   case ISD::EXTLOAD:
2543   case ISD::SEXTLOAD:
2544   case ISD::ZEXTLOAD:
2545     return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2546   case ISD::STORE:
2547     return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2548   case ISD::ConstantPool:
2549     return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2550   case ISD::GlobalAddress:
2551     return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2552   case ISD::JumpTable:
2553     return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2554   case ISD::Constant:
2555     return LowerConstant(Op, DAG);
2556   case ISD::ConstantFP:
2557     return LowerConstantFP(Op, DAG);
2558   case ISD::FORMAL_ARGUMENTS:
2559     return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2560   case ISD::CALL:
2561     return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2562   case ISD::RET:
2563     return LowerRET(Op, DAG, getTargetMachine());
2564
2565
2566   case ISD::ZERO_EXTEND:
2567   case ISD::ANY_EXTEND:
2568     return LowerI64Math(Op, DAG, Opc);
2569
2570   // i8, i64 math ops:
2571   case ISD::ADD:
2572   case ISD::SUB:
2573   case ISD::ROTR:
2574   case ISD::ROTL:
2575   case ISD::SRL:
2576   case ISD::SHL:
2577   case ISD::SRA: {
2578     if (VT == MVT::i8)
2579       return LowerI8Math(Op, DAG, Opc, *this);
2580     else if (VT == MVT::i64)
2581       return LowerI64Math(Op, DAG, Opc);
2582     break;
2583   }
2584
2585   // Vector-related lowering.
2586   case ISD::BUILD_VECTOR:
2587     return LowerBUILD_VECTOR(Op, DAG);
2588   case ISD::SCALAR_TO_VECTOR:
2589     return LowerSCALAR_TO_VECTOR(Op, DAG);
2590   case ISD::VECTOR_SHUFFLE:
2591     return LowerVECTOR_SHUFFLE(Op, DAG);
2592   case ISD::EXTRACT_VECTOR_ELT:
2593     return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2594   case ISD::INSERT_VECTOR_ELT:
2595     return LowerINSERT_VECTOR_ELT(Op, DAG);
2596
2597   // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2598   case ISD::AND:
2599   case ISD::OR:
2600   case ISD::XOR:
2601     return LowerByteImmed(Op, DAG);
2602
2603   // Vector and i8 multiply:
2604   case ISD::MUL:
2605     if (VT == MVT::i8)
2606       return LowerI8Math(Op, DAG, Opc, *this);
2607
2608   case ISD::CTPOP:
2609     return LowerCTPOP(Op, DAG);
2610
2611   case ISD::SELECT_CC:
2612     return LowerSELECT_CC(Op, DAG, *this);
2613
2614   case ISD::TRUNCATE:
2615     return LowerTRUNCATE(Op, DAG);
2616
2617   case ISD::SETCC:
2618     return LowerSETCC(Op, DAG);
2619   }
2620
2621   return SDValue();
2622 }
2623
2624 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2625                                            SmallVectorImpl<SDValue>&Results,
2626                                            SelectionDAG &DAG)
2627 {
2628 #if 0
2629   unsigned Opc = (unsigned) N->getOpcode();
2630   MVT OpVT = N->getValueType(0);
2631
2632   switch (Opc) {
2633   default: {
2634     cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2635     cerr << "Op.getOpcode() = " << Opc << "\n";
2636     cerr << "*Op.getNode():\n";
2637     N->dump();
2638     abort();
2639     /*NOTREACHED*/
2640   }
2641   }
2642 #endif
2643
2644   /* Otherwise, return unchanged */
2645 }
2646
2647 //===----------------------------------------------------------------------===//
2648 // Target Optimization Hooks
2649 //===----------------------------------------------------------------------===//
2650
2651 SDValue
2652 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2653 {
2654 #if 0
2655   TargetMachine &TM = getTargetMachine();
2656 #endif
2657   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2658   SelectionDAG &DAG = DCI.DAG;
2659   SDValue Op0 = N->getOperand(0);       // everything has at least one operand
2660   MVT NodeVT = N->getValueType(0);      // The node's value type
2661   MVT Op0VT = Op0.getValueType();       // The first operand's result
2662   SDValue Result;                       // Initially, empty result
2663
2664   switch (N->getOpcode()) {
2665   default: break;
2666   case ISD::ADD: {
2667     SDValue Op1 = N->getOperand(1);
2668
2669     if (Op0.getOpcode() == SPUISD::IndirectAddr
2670         || Op1.getOpcode() == SPUISD::IndirectAddr) {
2671       // Normalize the operands to reduce repeated code
2672       SDValue IndirectArg = Op0, AddArg = Op1;
2673
2674       if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2675         IndirectArg = Op1;
2676         AddArg = Op0;
2677       }
2678
2679       if (isa<ConstantSDNode>(AddArg)) {
2680         ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2681         SDValue IndOp1 = IndirectArg.getOperand(1);
2682
2683         if (CN0->isNullValue()) {
2684           // (add (SPUindirect <arg>, <arg>), 0) ->
2685           // (SPUindirect <arg>, <arg>)
2686
2687 #if !defined(NDEBUG)
2688           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2689             cerr << "\n"
2690                  << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2691                  << "With:    (SPUindirect <arg>, <arg>)\n";
2692           }
2693 #endif
2694
2695           return IndirectArg;
2696         } else if (isa<ConstantSDNode>(IndOp1)) {
2697           // (add (SPUindirect <arg>, <const>), <const>) ->
2698           // (SPUindirect <arg>, <const + const>)
2699           ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2700           int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2701           SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2702
2703 #if !defined(NDEBUG)
2704           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2705             cerr << "\n"
2706                  << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2707                  << "), " << CN0->getSExtValue() << ")\n"
2708                  << "With:    (SPUindirect <arg>, "
2709                  << combinedConst << ")\n";
2710           }
2711 #endif
2712
2713           return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
2714                              IndirectArg, combinedValue);
2715         }
2716       }
2717     }
2718     break;
2719   }
2720   case ISD::SIGN_EXTEND:
2721   case ISD::ZERO_EXTEND:
2722   case ISD::ANY_EXTEND: {
2723     if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2724       // (any_extend (SPUextract_elt0 <arg>)) ->
2725       // (SPUextract_elt0 <arg>)
2726       // Types must match, however...
2727 #if !defined(NDEBUG)
2728       if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2729         cerr << "\nReplace: ";
2730         N->dump(&DAG);
2731         cerr << "\nWith:    ";
2732         Op0.getNode()->dump(&DAG);
2733         cerr << "\n";
2734       }
2735 #endif
2736
2737       return Op0;
2738     }
2739     break;
2740   }
2741   case SPUISD::IndirectAddr: {
2742     if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2743       ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2744       if (CN->getZExtValue() == 0) {
2745         // (SPUindirect (SPUaform <addr>, 0), 0) ->
2746         // (SPUaform <addr>, 0)
2747
2748         DEBUG(cerr << "Replace: ");
2749         DEBUG(N->dump(&DAG));
2750         DEBUG(cerr << "\nWith:    ");
2751         DEBUG(Op0.getNode()->dump(&DAG));
2752         DEBUG(cerr << "\n");
2753
2754         return Op0;
2755       }
2756     } else if (Op0.getOpcode() == ISD::ADD) {
2757       SDValue Op1 = N->getOperand(1);
2758       if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2759         // (SPUindirect (add <arg>, <arg>), 0) ->
2760         // (SPUindirect <arg>, <arg>)
2761         if (CN1->isNullValue()) {
2762
2763 #if !defined(NDEBUG)
2764           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2765             cerr << "\n"
2766                  << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2767                  << "With:    (SPUindirect <arg>, <arg>)\n";
2768           }
2769 #endif
2770
2771           return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
2772                              Op0.getOperand(0), Op0.getOperand(1));
2773         }
2774       }
2775     }
2776     break;
2777   }
2778   case SPUISD::SHLQUAD_L_BITS:
2779   case SPUISD::SHLQUAD_L_BYTES:
2780   case SPUISD::VEC_SHL:
2781   case SPUISD::VEC_SRL:
2782   case SPUISD::VEC_SRA:
2783   case SPUISD::ROTBYTES_LEFT: {
2784     SDValue Op1 = N->getOperand(1);
2785
2786     // Kill degenerate vector shifts:
2787     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2788       if (CN->isNullValue()) {
2789         Result = Op0;
2790       }
2791     }
2792     break;
2793   }
2794   case SPUISD::PREFSLOT2VEC: {
2795     switch (Op0.getOpcode()) {
2796     default:
2797       break;
2798     case ISD::ANY_EXTEND:
2799     case ISD::ZERO_EXTEND:
2800     case ISD::SIGN_EXTEND: {
2801       // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2802       // <arg>
2803       // but only if the SPUprefslot2vec and <arg> types match.
2804       SDValue Op00 = Op0.getOperand(0);
2805       if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2806         SDValue Op000 = Op00.getOperand(0);
2807         if (Op000.getValueType() == NodeVT) {
2808           Result = Op000;
2809         }
2810       }
2811       break;
2812     }
2813     case SPUISD::VEC2PREFSLOT: {
2814       // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2815       // <arg>
2816       Result = Op0.getOperand(0);
2817       break;
2818     }
2819     }
2820     break;
2821   }
2822   }
2823   // Otherwise, return unchanged.
2824 #ifndef NDEBUG
2825   if (Result.getNode()) {
2826     DEBUG(cerr << "\nReplace.SPU: ");
2827     DEBUG(N->dump(&DAG));
2828     DEBUG(cerr << "\nWith:        ");
2829     DEBUG(Result.getNode()->dump(&DAG));
2830     DEBUG(cerr << "\n");
2831   }
2832 #endif
2833
2834   return Result;
2835 }
2836
2837 //===----------------------------------------------------------------------===//
2838 // Inline Assembly Support
2839 //===----------------------------------------------------------------------===//
2840
2841 /// getConstraintType - Given a constraint letter, return the type of
2842 /// constraint it is for this target.
2843 SPUTargetLowering::ConstraintType
2844 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2845   if (ConstraintLetter.size() == 1) {
2846     switch (ConstraintLetter[0]) {
2847     default: break;
2848     case 'b':
2849     case 'r':
2850     case 'f':
2851     case 'v':
2852     case 'y':
2853       return C_RegisterClass;
2854     }
2855   }
2856   return TargetLowering::getConstraintType(ConstraintLetter);
2857 }
2858
2859 std::pair<unsigned, const TargetRegisterClass*>
2860 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2861                                                 MVT VT) const
2862 {
2863   if (Constraint.size() == 1) {
2864     // GCC RS6000 Constraint Letters
2865     switch (Constraint[0]) {
2866     case 'b':   // R1-R31
2867     case 'r':   // R0-R31
2868       if (VT == MVT::i64)
2869         return std::make_pair(0U, SPU::R64CRegisterClass);
2870       return std::make_pair(0U, SPU::R32CRegisterClass);
2871     case 'f':
2872       if (VT == MVT::f32)
2873         return std::make_pair(0U, SPU::R32FPRegisterClass);
2874       else if (VT == MVT::f64)
2875         return std::make_pair(0U, SPU::R64FPRegisterClass);
2876       break;
2877     case 'v':
2878       return std::make_pair(0U, SPU::GPRCRegisterClass);
2879     }
2880   }
2881
2882   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2883 }
2884
2885 //! Compute used/known bits for a SPU operand
2886 void
2887 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2888                                                   const APInt &Mask,
2889                                                   APInt &KnownZero,
2890                                                   APInt &KnownOne,
2891                                                   const SelectionDAG &DAG,
2892                                                   unsigned Depth ) const {
2893 #if 0
2894   const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2895 #endif
2896
2897   switch (Op.getOpcode()) {
2898   default:
2899     // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2900     break;
2901
2902 #if 0
2903   case CALL:
2904   case SHUFB:
2905   case SHUFFLE_MASK:
2906   case CNTB:
2907 #endif
2908
2909   case SPUISD::PREFSLOT2VEC: {
2910     SDValue Op0 = Op.getOperand(0);
2911     MVT Op0VT = Op0.getValueType();
2912     unsigned Op0VTBits = Op0VT.getSizeInBits();
2913     uint64_t InMask = Op0VT.getIntegerVTBitMask();
2914     KnownZero |= APInt(Op0VTBits, ~InMask, false);
2915     KnownOne |= APInt(Op0VTBits, InMask, false);
2916     break;
2917   }
2918
2919   case SPUISD::LDRESULT:
2920   case SPUISD::VEC2PREFSLOT: {
2921     MVT OpVT = Op.getValueType();
2922     unsigned OpVTBits = OpVT.getSizeInBits();
2923     uint64_t InMask = OpVT.getIntegerVTBitMask();
2924     KnownZero |= APInt(OpVTBits, ~InMask, false);
2925     KnownOne |= APInt(OpVTBits, InMask, false);
2926     break;
2927   }
2928
2929 #if 0
2930   case SPUISD::SHLQUAD_L_BITS:
2931   case SPUISD::SHLQUAD_L_BYTES:
2932   case SPUISD::VEC_SHL:
2933   case SPUISD::VEC_SRL:
2934   case SPUISD::VEC_SRA:
2935   case SPUISD::VEC_ROTL:
2936   case SPUISD::VEC_ROTR:
2937   case SPUISD::ROTBYTES_LEFT:
2938   case SPUISD::SELECT_MASK:
2939   case SPUISD::SELB:
2940   case SPUISD::SEXT32TO64:
2941 #endif
2942   }
2943 }
2944
2945 unsigned
2946 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
2947                                                    unsigned Depth) const {
2948   switch (Op.getOpcode()) {
2949   default:
2950     return 1;
2951
2952   case ISD::SETCC: {
2953     MVT VT = Op.getValueType();
2954
2955     if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
2956       VT = MVT::i32;
2957     }
2958     return VT.getSizeInBits();
2959   }
2960   }
2961 }
2962
2963 // LowerAsmOperandForConstraint
2964 void
2965 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
2966                                                 char ConstraintLetter,
2967                                                 bool hasMemory,
2968                                                 std::vector<SDValue> &Ops,
2969                                                 SelectionDAG &DAG) const {
2970   // Default, for the time being, to the base class handler
2971   TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
2972                                                Ops, DAG);
2973 }
2974
2975 /// isLegalAddressImmediate - Return true if the integer value can be used
2976 /// as the offset of the target addressing mode.
2977 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
2978                                                 const Type *Ty) const {
2979   // SPU's addresses are 256K:
2980   return (V > -(1 << 18) && V < (1 << 18) - 1);
2981 }
2982
2983 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
2984   return false;
2985 }
2986
2987 bool
2988 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
2989   // The SPU target isn't yet aware of offsets.
2990   return false;
2991 }