lib/Target/CellSPU/SPUISelLowering.cpp

   1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the SPUTargetLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "SPURegisterNames.h"
  15 #include "SPUISelLowering.h"
  16 #include "SPUTargetMachine.h"
  17 #include "SPUFrameInfo.h"
  18 #include "llvm/ADT/VectorExtras.h"
  19 #include "llvm/CodeGen/CallingConvLower.h"
  20 #include "llvm/CodeGen/MachineFrameInfo.h"
  21 #include "llvm/CodeGen/MachineFunction.h"
  22 #include "llvm/CodeGen/MachineInstrBuilder.h"
  23 #include "llvm/CodeGen/MachineRegisterInfo.h"
  24 #include "llvm/CodeGen/SelectionDAG.h"
  25 #include "llvm/Constants.h"
  26 #include "llvm/Function.h"
  27 #include "llvm/Intrinsics.h"
  28 #include "llvm/Support/Debug.h"
  29 #include "llvm/Support/MathExtras.h"
  30 #include "llvm/Target/TargetOptions.h"
  31
  32 #include <map>
  33
  34 using namespace llvm;
  35
  36 // Used in getTargetNodeName() below
  37 namespace {
  38   std::map<unsigned, const char *> node_names;
  39
  40   //! MVT mapping to useful data for Cell SPU
  41   struct valtype_map_s {
  42     const MVT   valtype;
  43     const int   prefslot_byte;
  44   };
  45
  46   const valtype_map_s valtype_map[] = {
  47     { MVT::i1,   3 },
  48     { MVT::i8,   3 },
  49     { MVT::i16,  2 },
  50     { MVT::i32,  0 },
  51     { MVT::f32,  0 },
  52     { MVT::i64,  0 },
  53     { MVT::f64,  0 },
  54     { MVT::i128, 0 }
  55   };
  56
  57   const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
  58
  59   const valtype_map_s *getValueTypeMapEntry(MVT VT) {
  60     const valtype_map_s *retval = 0;
  61
  62     for (size_t i = 0; i < n_valtype_map; ++i) {
  63       if (valtype_map[i].valtype == VT) {
  64         retval = valtype_map + i;
  65         break;
  66       }
  67     }
  68
  69 #ifndef NDEBUG
  70     if (retval == 0) {
  71       cerr << "getValueTypeMapEntry returns NULL for "
  72            << VT.getMVTString()
  73            << "\n";
  74       abort();
  75     }
  76 #endif
  77
  78     return retval;
  79   }
  80
  81   //! Predicate that returns true if operand is a memory target
  82   /*!
  83     \arg Op Operand to test
  84     \return true if the operand is a memory target (i.e., global
  85     address, external symbol, constant pool) or an A-form
  86     address.
  87    */
  88   bool isMemoryOperand(const SDValue &Op)
  89   {
  90     const unsigned Opc = Op.getOpcode();
  91     return (Opc == ISD::GlobalAddress
  92             || Opc == ISD::GlobalTLSAddress
  93             || Opc == ISD::JumpTable
  94             || Opc == ISD::ConstantPool
  95             || Opc == ISD::ExternalSymbol
  96             || Opc == ISD::TargetGlobalAddress
  97             || Opc == ISD::TargetGlobalTLSAddress
  98             || Opc == ISD::TargetJumpTable
  99             || Opc == ISD::TargetConstantPool
 100             || Opc == ISD::TargetExternalSymbol
 101             || Opc == SPUISD::AFormAddr);
 102   }
 103
 104   //! Predicate that returns true if the operand is an indirect target
 105   bool isIndirectOperand(const SDValue &Op)
 106   {
 107     const unsigned Opc = Op.getOpcode();
 108     return (Opc == ISD::Register
 109             || Opc == SPUISD::LDRESULT);
 110   }
 111 }
 112
 113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
 114   : TargetLowering(TM),
 115     SPUTM(TM)
 116 {
 117   // Fold away setcc operations if possible.
 118   setPow2DivIsCheap();
 119
 120   // Use _setjmp/_longjmp instead of setjmp/longjmp.
 121   setUseUnderscoreSetJmp(true);
 122   setUseUnderscoreLongJmp(true);
 123
 124   // Set up the SPU's register classes:
 125   addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
 126   addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
 127   addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
 128   addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
 129   addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
 130   addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
 131   addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
 132
 133   // Initialize libcalls:
 134   setLibcallName(RTLIB::MUL_I64, "__muldi3");
 135
 136   // SPU has no sign or zero extended loads for i1, i8, i16:
 137   setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
 138   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
 139   setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
 140
 141   setLoadExtAction(ISD::EXTLOAD,  MVT::i8, Custom);
 142   setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
 143   setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
 144   setTruncStoreAction(MVT::i8,    MVT::i8, Custom);
 145   setTruncStoreAction(MVT::i16,   MVT::i8, Custom);
 146   setTruncStoreAction(MVT::i32,   MVT::i8, Custom);
 147   setTruncStoreAction(MVT::i64,   MVT::i8, Custom);
 148   setTruncStoreAction(MVT::i128,  MVT::i8, Custom);
 149
 150   setLoadExtAction(ISD::EXTLOAD,  MVT::i16, Custom);
 151   setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
 152   setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
 153
 154   // SPU constant load actions are custom lowered:
 155   setOperationAction(ISD::Constant,   MVT::i64, Custom);
 156   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
 157   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
 158
 159   // SPU's loads and stores have to be custom lowered:
 160   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
 161        ++sctype) {
 162     MVT VT = (MVT::SimpleValueType)sctype;
 163
 164     setOperationAction(ISD::LOAD, VT, Custom);
 165     setOperationAction(ISD::STORE, VT, Custom);
 166   }
 167
 168   // Custom lower BRCOND for i8 to "promote" the result to i16
 169   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
 170
 171   // Expand the jumptable branches
 172   setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
 173   setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
 174
 175   // Custom lower SELECT_CC for most cases, but expand by default
 176   setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
 177   setOperationAction(ISD::SELECT_CC,    MVT::i8,    Custom);
 178   setOperationAction(ISD::SELECT_CC,    MVT::i16,   Custom);
 179   setOperationAction(ISD::SELECT_CC,    MVT::i32,   Custom);
 180   setOperationAction(ISD::SELECT_CC,    MVT::i64,   Custom);
 181
 182   // SPU has no intrinsics for these particular operations:
 183   setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
 184
 185   // PowerPC has no SREM/UREM instructions
 186   setOperationAction(ISD::SREM, MVT::i32, Expand);
 187   setOperationAction(ISD::UREM, MVT::i32, Expand);
 188   setOperationAction(ISD::SREM, MVT::i64, Expand);
 189   setOperationAction(ISD::UREM, MVT::i64, Expand);
 190
 191   // We don't support sin/cos/sqrt/fmod
 192   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 193   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 194   setOperationAction(ISD::FREM , MVT::f64, Expand);
 195   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 196   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 197   setOperationAction(ISD::FREM , MVT::f32, Expand);
 198
 199   // If we're enabling GP optimizations, use hardware square root
 200   setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 201   setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 202
 203   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 204   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 205
 206   // SPU can do rotate right and left, so legalize it... but customize for i8
 207   // because instructions don't exist.
 208
 209   // FIXME: Change from "expand" to appropriate type once ROTR is supported in
 210   //        .td files.
 211   setOperationAction(ISD::ROTR, MVT::i32,    Expand /*Legal*/);
 212   setOperationAction(ISD::ROTR, MVT::i16,    Expand /*Legal*/);
 213   setOperationAction(ISD::ROTR, MVT::i8,     Expand /*Custom*/);
 214
 215   setOperationAction(ISD::ROTL, MVT::i32,    Legal);
 216   setOperationAction(ISD::ROTL, MVT::i16,    Legal);
 217   setOperationAction(ISD::ROTL, MVT::i8,     Custom);
 218
 219   // SPU has no native version of shift left/right for i8
 220   setOperationAction(ISD::SHL,  MVT::i8,     Custom);
 221   setOperationAction(ISD::SRL,  MVT::i8,     Custom);
 222   setOperationAction(ISD::SRA,  MVT::i8,     Custom);
 223
 224   // SPU needs custom lowering for shift left/right for i64
 225   setOperationAction(ISD::SHL,  MVT::i64,    Custom);
 226   setOperationAction(ISD::SRL,  MVT::i64,    Custom);
 227   setOperationAction(ISD::SRA,  MVT::i64,    Custom);
 228
 229   // Custom lower i8, i32 and i64 multiplications
 230   setOperationAction(ISD::MUL,  MVT::i8,     Custom);
 231   setOperationAction(ISD::MUL,  MVT::i32,    Custom);
 232   setOperationAction(ISD::MUL,  MVT::i64,    Expand);   // libcall
 233
 234   // SMUL_LOHI, UMUL_LOHI
 235   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom);
 236   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom);
 237   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Custom);
 238   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom);
 239
 240   // Need to custom handle (some) common i8, i64 math ops
 241   setOperationAction(ISD::ADD,  MVT::i64,    Custom);
 242   setOperationAction(ISD::SUB,  MVT::i8,     Custom);
 243   setOperationAction(ISD::SUB,  MVT::i64,    Custom);
 244
 245   // SPU does not have BSWAP. It does have i32 support CTLZ.
 246   // CTPOP has to be custom lowered.
 247   setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
 248   setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
 249
 250   setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
 251   setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
 252   setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
 253   setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
 254
 255   setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
 256   setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
 257
 258   setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
 259
 260   // SPU has a version of select that implements (a&~c)|(b&c), just like
 261   // select ought to work:
 262   setOperationAction(ISD::SELECT, MVT::i8,   Legal);
 263   setOperationAction(ISD::SELECT, MVT::i16,  Legal);
 264   setOperationAction(ISD::SELECT, MVT::i32,  Legal);
 265   setOperationAction(ISD::SELECT, MVT::i64,  Expand);
 266
 267   setOperationAction(ISD::SETCC, MVT::i8,    Legal);
 268   setOperationAction(ISD::SETCC, MVT::i16,   Legal);
 269   setOperationAction(ISD::SETCC, MVT::i32,   Legal);
 270   setOperationAction(ISD::SETCC, MVT::i64,   Expand);
 271
 272   // Zero extension and sign extension for i64 have to be
 273   // custom legalized
 274   setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
 275   setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
 276   setOperationAction(ISD::ANY_EXTEND,  MVT::i64, Custom);
 277
 278   // SPU has a legal FP -> signed INT instruction
 279   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
 280   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 281   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
 282   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
 283
 284   // FDIV on SPU requires custom lowering
 285   setOperationAction(ISD::FDIV, MVT::f32, Custom);
 286   //setOperationAction(ISD::FDIV, MVT::f64, Custom);
 287
 288   // SPU has [U|S]INT_TO_FP
 289   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
 290   setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
 291   setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
 292   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
 293   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
 294   setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
 295   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 296   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 297
 298   setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
 299   setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
 300   setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
 301   setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
 302
 303   // We cannot sextinreg(i1).  Expand to shifts.
 304   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 305
 306   // Support label based line numbers.
 307   setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
 308   setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
 309
 310   // We want to legalize GlobalAddress and ConstantPool nodes into the
 311   // appropriate instructions to materialize the address.
 312   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
 313        ++sctype) {
 314     MVT VT = (MVT::SimpleValueType)sctype;
 315
 316     setOperationAction(ISD::GlobalAddress, VT, Custom);
 317     setOperationAction(ISD::ConstantPool,  VT, Custom);
 318     setOperationAction(ISD::JumpTable,     VT, Custom);
 319   }
 320
 321   // RET must be custom lowered, to meet ABI requirements
 322   setOperationAction(ISD::RET,           MVT::Other, Custom);
 323
 324   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 325   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 326
 327   // Use the default implementation.
 328   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
 329   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 330   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 331   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 332   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
 333   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
 334   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
 335
 336   // Cell SPU has instructions for converting between i64 and fp.
 337   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 338   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 339
 340   // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
 341   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
 342
 343   // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 344   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 345
 346   // First set operation action for all vector types to expand. Then we
 347   // will selectively turn on ones that can be effectively codegen'd.
 348   addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
 349   addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
 350   addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
 351   addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
 352   addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
 353   addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
 354
 355   for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
 356        i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
 357     MVT VT = (MVT::SimpleValueType)i;
 358
 359     // add/sub are legal for all supported vector VT's.
 360     setOperationAction(ISD::ADD , VT, Legal);
 361     setOperationAction(ISD::SUB , VT, Legal);
 362     // mul has to be custom lowered.
 363     setOperationAction(ISD::MUL , VT, Custom);
 364
 365     setOperationAction(ISD::AND   , VT, Legal);
 366     setOperationAction(ISD::OR    , VT, Legal);
 367     setOperationAction(ISD::XOR   , VT, Legal);
 368     setOperationAction(ISD::LOAD  , VT, Legal);
 369     setOperationAction(ISD::SELECT, VT, Legal);
 370     setOperationAction(ISD::STORE,  VT, Legal);
 371
 372     // These operations need to be expanded:
 373     setOperationAction(ISD::SDIV, VT, Expand);
 374     setOperationAction(ISD::SREM, VT, Expand);
 375     setOperationAction(ISD::UDIV, VT, Expand);
 376     setOperationAction(ISD::UREM, VT, Expand);
 377     setOperationAction(ISD::FDIV, VT, Custom);
 378
 379     // Custom lower build_vector, constant pool spills, insert and
 380     // extract vector elements:
 381     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
 382     setOperationAction(ISD::ConstantPool, VT, Custom);
 383     setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
 384     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
 385     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
 386     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
 387   }
 388
 389   setOperationAction(ISD::MUL, MVT::v16i8, Custom);
 390   setOperationAction(ISD::AND, MVT::v16i8, Custom);
 391   setOperationAction(ISD::OR,  MVT::v16i8, Custom);
 392   setOperationAction(ISD::XOR, MVT::v16i8, Custom);
 393   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 394
 395   setShiftAmountType(MVT::i32);
 396   setBooleanContents(ZeroOrOneBooleanContent);
 397
 398   setStackPointerRegisterToSaveRestore(SPU::R1);
 399
 400   // We have target-specific dag combine patterns for the following nodes:
 401   setTargetDAGCombine(ISD::ADD);
 402   setTargetDAGCombine(ISD::ZERO_EXTEND);
 403   setTargetDAGCombine(ISD::SIGN_EXTEND);
 404   setTargetDAGCombine(ISD::ANY_EXTEND);
 405
 406   computeRegisterProperties();
 407
 408   // Set other properties:
 409   setSchedulingPreference(SchedulingForLatency);
 410 }
 411
 412 const char *
 413 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
 414 {
 415   if (node_names.empty()) {
 416     node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
 417     node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
 418     node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
 419     node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
 420     node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
 421     node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
 422     node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
 423     node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
 424     node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
 425     node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
 426     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
 427     node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
 428     node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
 429     node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED]
 430                                               = "SPUISD::EXTRACT_ELT0_CHAINED";
 431     node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
 432     node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
 433     node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
 434     node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
 435     node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
 436     node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
 437     node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
 438     node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
 439     node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
 440     node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
 441     node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
 442     node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
 443     node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
 444     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
 445     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
 446     node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
 447       "SPUISD::ROTQUAD_RZ_BYTES";
 448     node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
 449       "SPUISD::ROTQUAD_RZ_BITS";
 450     node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
 451       "SPUISD::ROTBYTES_RIGHT_S";
 452     node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
 453     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
 454       "SPUISD::ROTBYTES_LEFT_CHAINED";
 455     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
 456       "SPUISD::ROTBYTES_LEFT_BITS";
 457     node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
 458     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
 459     node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
 460     node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
 461     node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
 462     node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
 463     node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
 464     node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
 465     node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
 466   }
 467
 468   std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
 469
 470   return ((i != node_names.end()) ? i->second : 0);
 471 }
 472
 473 MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
 474   MVT VT = Op.getValueType();
 475   return (VT.isInteger() ? VT : MVT(MVT::i32));
 476 }
 477
 478 //===----------------------------------------------------------------------===//
 479 // Calling convention code:
 480 //===----------------------------------------------------------------------===//
 481
 482 #include "SPUGenCallingConv.inc"
 483
 484 //===----------------------------------------------------------------------===//
 485 //  LowerOperation implementation
 486 //===----------------------------------------------------------------------===//
 487
 488 /// Aligned load common code for CellSPU
 489 /*!
 490   \param[in] Op The SelectionDAG load or store operand
 491   \param[in] DAG The selection DAG
 492   \param[in] ST CellSPU subtarget information structure
 493   \param[in,out] alignment Caller initializes this to the load or store node's
 494   value from getAlignment(), may be updated while generating the aligned load
 495   \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
 496   offset (divisible by 16, modulo 16 == 0)
 497   \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
 498   offset of the preferred slot (modulo 16 != 0)
 499   \param[in,out] VT Caller initializes this value type to the the load or store
 500   node's loaded or stored value type; may be updated if an i1-extended load or
 501   store.
 502   \param[out] was16aligned true if the base pointer had 16-byte alignment,
 503   otherwise false. Can help to determine if the chunk needs to be rotated.
 504
 505  Both load and store lowering load a block of data aligned on a 16-byte
 506  boundary. This is the common aligned load code shared between both.
 507  */
 508 static SDValue
 509 AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
 510             LSBaseSDNode *LSN,
 511             unsigned &alignment, int &alignOffs, int &prefSlotOffs,
 512             MVT &VT, bool &was16aligned)
 513 {
 514   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 515   const valtype_map_s *vtm = getValueTypeMapEntry(VT);
 516   SDValue basePtr = LSN->getBasePtr();
 517   SDValue chain = LSN->getChain();
 518
 519   if (basePtr.getOpcode() == ISD::ADD) {
 520     SDValue Op1 = basePtr.getNode()->getOperand(1);
 521
 522     if (Op1.getOpcode() == ISD::Constant
 523         || Op1.getOpcode() == ISD::TargetConstant) {
 524       const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
 525
 526       alignOffs = (int) CN->getZExtValue();
 527       prefSlotOffs = (int) (alignOffs & 0xf);
 528
 529       // Adjust the rotation amount to ensure that the final result ends up in
 530       // the preferred slot:
 531       prefSlotOffs -= vtm->prefslot_byte;
 532       basePtr = basePtr.getOperand(0);
 533
 534       // Loading from memory, can we adjust alignment?
 535       if (basePtr.getOpcode() == SPUISD::AFormAddr) {
 536         SDValue APtr = basePtr.getOperand(0);
 537         if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
 538           GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
 539           alignment = GSDN->getGlobal()->getAlignment();
 540         }
 541       }
 542     } else {
 543       alignOffs = 0;
 544       prefSlotOffs = -vtm->prefslot_byte;
 545     }
 546   } else if (basePtr.getOpcode() == ISD::FrameIndex) {
 547     FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
 548     alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
 549     prefSlotOffs = (int) (alignOffs & 0xf);
 550     prefSlotOffs -= vtm->prefslot_byte;
 551     basePtr = DAG.getRegister(SPU::R1, VT);
 552   } else {
 553     alignOffs = 0;
 554     prefSlotOffs = -vtm->prefslot_byte;
 555   }
 556
 557   if (alignment == 16) {
 558     // Realign the base pointer as a D-Form address:
 559     if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
 560       basePtr = DAG.getNode(ISD::ADD, PtrVT,
 561                             basePtr,
 562                             DAG.getConstant((alignOffs & ~0xf), PtrVT));
 563     }
 564
 565     // Emit the vector load:
 566     was16aligned = true;
 567     return DAG.getLoad(MVT::v16i8, chain, basePtr,
 568                        LSN->getSrcValue(), LSN->getSrcValueOffset(),
 569                        LSN->isVolatile(), 16);
 570   }
 571
 572   // Unaligned load or we're using the "large memory" model, which means that
 573   // we have to be very pessimistic:
 574   if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
 575     basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr,
 576                           DAG.getConstant(0, PtrVT));
 577   }
 578
 579   // Add the offset
 580   basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
 581                         DAG.getConstant((alignOffs & ~0xf), PtrVT));
 582   was16aligned = false;
 583   return DAG.getLoad(MVT::v16i8, chain, basePtr,
 584                      LSN->getSrcValue(), LSN->getSrcValueOffset(),
 585                      LSN->isVolatile(), 16);
 586 }
 587
 588 /// Custom lower loads for CellSPU
 589 /*!
 590  All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
 591  within a 16-byte block, we have to rotate to extract the requested element.
 592  */
 593 static SDValue
 594 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 595   LoadSDNode *LN = cast<LoadSDNode>(Op);
 596   SDValue the_chain = LN->getChain();
 597   MVT VT = LN->getMemoryVT();
 598   MVT OpVT = Op.getNode()->getValueType(0);
 599   ISD::LoadExtType ExtType = LN->getExtensionType();
 600   unsigned alignment = LN->getAlignment();
 601   SDValue Ops[8];
 602
 603   switch (LN->getAddressingMode()) {
 604   case ISD::UNINDEXED: {
 605     int offset, rotamt;
 606     bool was16aligned;
 607     SDValue result =
 608       AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
 609
 610     if (result.getNode() == 0)
 611       return result;
 612
 613     the_chain = result.getValue(1);
 614     // Rotate the chunk if necessary
 615     if (rotamt < 0)
 616       rotamt += 16;
 617     if (rotamt != 0 || !was16aligned) {
 618       SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
 619
 620       Ops[0] = the_chain;
 621       Ops[1] = result;
 622       if (was16aligned) {
 623         Ops[2] = DAG.getConstant(rotamt, MVT::i16);
 624       } else {
 625         MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 626         LoadSDNode *LN1 = cast<LoadSDNode>(result);
 627         Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
 628                              DAG.getConstant(rotamt, PtrVT));
 629       }
 630
 631       result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
 632       the_chain = result.getValue(1);
 633     }
 634
 635     if (VT == OpVT || ExtType == ISD::EXTLOAD) {
 636       SDVTList scalarvts;
 637       MVT vecVT = MVT::v16i8;
 638
 639       // Convert the loaded v16i8 vector to the appropriate vector type
 640       // specified by the operand:
 641       if (OpVT == VT) {
 642         if (VT != MVT::i1)
 643           vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
 644       } else
 645         vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
 646
 647       Ops[0] = the_chain;
 648       Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
 649       scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
 650       result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
 651       the_chain = result.getValue(1);
 652     } else {
 653       // Handle the sign and zero-extending loads for i1 and i8:
 654       unsigned NewOpC;
 655
 656       if (ExtType == ISD::SEXTLOAD) {
 657         NewOpC = (OpVT == MVT::i1
 658                   ? SPUISD::EXTRACT_I1_SEXT
 659                   : SPUISD::EXTRACT_I8_SEXT);
 660       } else {
 661         assert(ExtType == ISD::ZEXTLOAD);
 662         NewOpC = (OpVT == MVT::i1
 663                   ? SPUISD::EXTRACT_I1_ZEXT
 664                   : SPUISD::EXTRACT_I8_ZEXT);
 665       }
 666
 667       result = DAG.getNode(NewOpC, OpVT, result);
 668     }
 669
 670     SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
 671     SDValue retops[2] = {
 672       result,
 673       the_chain
 674     };
 675
 676     result = DAG.getNode(SPUISD::LDRESULT, retvts,
 677                          retops, sizeof(retops) / sizeof(retops[0]));
 678     return result;
 679   }
 680   case ISD::PRE_INC:
 681   case ISD::PRE_DEC:
 682   case ISD::POST_INC:
 683   case ISD::POST_DEC:
 684   case ISD::LAST_INDEXED_MODE:
 685     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 686             "UNINDEXED\n";
 687     cerr << (unsigned) LN->getAddressingMode() << "\n";
 688     abort();
 689     /*NOTREACHED*/
 690   }
 691
 692   return SDValue();
 693 }
 694
 695 /// Custom lower stores for CellSPU
 696 /*!
 697  All CellSPU stores are aligned to 16-byte boundaries, so for elements
 698  within a 16-byte block, we have to generate a shuffle to insert the
 699  requested element into its place, then store the resulting block.
 700  */
 701 static SDValue
 702 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 703   StoreSDNode *SN = cast<StoreSDNode>(Op);
 704   SDValue Value = SN->getValue();
 705   MVT VT = Value.getValueType();
 706   MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
 707   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 708   unsigned alignment = SN->getAlignment();
 709
 710   switch (SN->getAddressingMode()) {
 711   case ISD::UNINDEXED: {
 712     int chunk_offset, slot_offset;
 713     bool was16aligned;
 714
 715     // The vector type we really want to load from the 16-byte chunk.
 716     MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
 717         stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
 718
 719     SDValue alignLoadVec =
 720       AlignedLoad(Op, DAG, ST, SN, alignment,
 721                   chunk_offset, slot_offset, VT, was16aligned);
 722
 723     if (alignLoadVec.getNode() == 0)
 724       return alignLoadVec;
 725
 726     LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
 727     SDValue basePtr = LN->getBasePtr();
 728     SDValue the_chain = alignLoadVec.getValue(1);
 729     SDValue theValue = SN->getValue();
 730     SDValue result;
 731
 732     if (StVT != VT
 733         && (theValue.getOpcode() == ISD::AssertZext
 734             || theValue.getOpcode() == ISD::AssertSext)) {
 735       // Drill down and get the value for zero- and sign-extended
 736       // quantities
 737       theValue = theValue.getOperand(0);
 738     }
 739
 740     chunk_offset &= 0xf;
 741
 742     SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
 743     SDValue insertEltPtr;
 744
 745     // If the base pointer is already a D-form address, then just create
 746     // a new D-form address with a slot offset and the orignal base pointer.
 747     // Otherwise generate a D-form address with the slot offset relative
 748     // to the stack pointer, which is always aligned.
 749     DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
 750     DEBUG(basePtr.getNode()->dump(&DAG));
 751     DEBUG(cerr << "\n");
 752
 753     if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
 754         (basePtr.getOpcode() == ISD::ADD
 755          && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
 756       insertEltPtr = basePtr;
 757     } else {
 758       insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
 759     }
 760
 761     SDValue insertEltOp =
 762             DAG.getNode(SPUISD::SHUFFLE_MASK, stVecVT, insertEltPtr);
 763     SDValue vectorizeOp =
 764             DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue);
 765
 766     result = DAG.getNode(SPUISD::SHUFB, vecVT, vectorizeOp, alignLoadVec,
 767                          DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
 768
 769     result = DAG.getStore(the_chain, result, basePtr,
 770                           LN->getSrcValue(), LN->getSrcValueOffset(),
 771                           LN->isVolatile(), LN->getAlignment());
 772
 773 #if 0 && defined(NDEBUG)
 774     if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
 775       const SDValue &currentRoot = DAG.getRoot();
 776
 777       DAG.setRoot(result);
 778       cerr << "------- CellSPU:LowerStore result:\n";
 779       DAG.dump();
 780       cerr << "-------\n";
 781       DAG.setRoot(currentRoot);
 782     }
 783 #endif
 784
 785     return result;
 786     /*UNREACHED*/
 787   }
 788   case ISD::PRE_INC:
 789   case ISD::PRE_DEC:
 790   case ISD::POST_INC:
 791   case ISD::POST_DEC:
 792   case ISD::LAST_INDEXED_MODE:
 793     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 794             "UNINDEXED\n";
 795     cerr << (unsigned) SN->getAddressingMode() << "\n";
 796     abort();
 797     /*NOTREACHED*/
 798   }
 799
 800   return SDValue();
 801 }
 802
 803 /// Generate the address of a constant pool entry.
 804 static SDValue
 805 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 806   MVT PtrVT = Op.getValueType();
 807   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 808   Constant *C = CP->getConstVal();
 809   SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
 810   SDValue Zero = DAG.getConstant(0, PtrVT);
 811   const TargetMachine &TM = DAG.getTarget();
 812
 813   if (TM.getRelocationModel() == Reloc::Static) {
 814     if (!ST->usingLargeMem()) {
 815       // Just return the SDValue with the constant pool address in it.
 816       return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
 817     } else {
 818       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
 819       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
 820       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 821     }
 822   }
 823
 824   assert(0 &&
 825          "LowerConstantPool: Relocation model other than static"
 826          " not supported.");
 827   return SDValue();
 828 }
 829
 830 static SDValue
 831 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 832   MVT PtrVT = Op.getValueType();
 833   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 834   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
 835   SDValue Zero = DAG.getConstant(0, PtrVT);
 836   const TargetMachine &TM = DAG.getTarget();
 837
 838   if (TM.getRelocationModel() == Reloc::Static) {
 839     if (!ST->usingLargeMem()) {
 840       return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
 841     } else {
 842       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
 843       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
 844       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 845     }
 846   }
 847
 848   assert(0 &&
 849          "LowerJumpTable: Relocation model other than static not supported.");
 850   return SDValue();
 851 }
 852
 853 static SDValue
 854 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 855   MVT PtrVT = Op.getValueType();
 856   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
 857   GlobalValue *GV = GSDN->getGlobal();
 858   SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
 859   const TargetMachine &TM = DAG.getTarget();
 860   SDValue Zero = DAG.getConstant(0, PtrVT);
 861
 862   if (TM.getRelocationModel() == Reloc::Static) {
 863     if (!ST->usingLargeMem()) {
 864       return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
 865     } else {
 866       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
 867       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
 868       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 869     }
 870   } else {
 871     cerr << "LowerGlobalAddress: Relocation model other than static not "
 872          << "supported.\n";
 873     abort();
 874     /*NOTREACHED*/
 875   }
 876
 877   return SDValue();
 878 }
 879
 880 //! Custom lower i64 integer constants
 881 /*!
 882  This code inserts all of the necessary juggling that needs to occur to load
 883  a 64-bit constant into a register.
 884  */
 885 static SDValue
 886 LowerConstant(SDValue Op, SelectionDAG &DAG) {
 887   MVT VT = Op.getValueType();
 888   ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
 889
 890   if (VT == MVT::i64) {
 891     SDValue T = DAG.getConstant(CN->getZExtValue(), MVT::i64);
 892     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
 893                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
 894   } else {
 895     cerr << "LowerConstant: unhandled constant type "
 896          << VT.getMVTString()
 897          << "\n";
 898     abort();
 899     /*NOTREACHED*/
 900   }
 901
 902   return SDValue();
 903 }
 904
 905 //! Custom lower double precision floating point constants
 906 static SDValue
 907 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
 908   MVT VT = Op.getValueType();
 909   ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
 910
 911   assert((FP != 0) &&
 912          "LowerConstantFP: Node is not ConstantFPSDNode");
 913
 914   if (VT == MVT::f64) {
 915     uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
 916     return DAG.getNode(ISD::BIT_CONVERT, VT,
 917                        LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
 918   }
 919
 920   return SDValue();
 921 }
 922
 923 //! Lower MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
 924 static SDValue
 925 LowerBRCOND(SDValue Op, SelectionDAG &DAG)
 926 {
 927   SDValue Cond = Op.getOperand(1);
 928   MVT CondVT = Cond.getValueType();
 929   MVT CondNVT;
 930
 931   if (CondVT == MVT::i8) {
 932     CondNVT = MVT::i16;
 933     return DAG.getNode(ISD::BRCOND, Op.getValueType(),
 934                       Op.getOperand(0),
 935                       DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
 936                       Op.getOperand(2));
 937   } else
 938     return SDValue();                // Unchanged
 939 }
 940
 941 static SDValue
 942 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
 943 {
 944   MachineFunction &MF = DAG.getMachineFunction();
 945   MachineFrameInfo *MFI = MF.getFrameInfo();
 946   MachineRegisterInfo &RegInfo = MF.getRegInfo();
 947   SmallVector<SDValue, 48> ArgValues;
 948   SDValue Root = Op.getOperand(0);
 949   bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
 950
 951   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
 952   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
 953
 954   unsigned ArgOffset = SPUFrameInfo::minStackSize();
 955   unsigned ArgRegIdx = 0;
 956   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
 957
 958   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 959
 960   // Add DAG nodes to load the arguments or copy them out of registers.
 961   for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
 962        ArgNo != e; ++ArgNo) {
 963     MVT ObjectVT = Op.getValue(ArgNo).getValueType();
 964     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
 965     SDValue ArgVal;
 966
 967     if (ArgRegIdx < NumArgRegs) {
 968       const TargetRegisterClass *ArgRegClass;
 969
 970       switch (ObjectVT.getSimpleVT()) {
 971       default: {
 972         cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
 973              << ObjectVT.getMVTString()
 974              << "\n";
 975         abort();
 976       }
 977       case MVT::i8:
 978         ArgRegClass = &SPU::R8CRegClass;
 979         break;
 980       case MVT::i16:
 981         ArgRegClass = &SPU::R16CRegClass;
 982         break;
 983       case MVT::i32:
 984         ArgRegClass = &SPU::R32CRegClass;
 985         break;
 986       case MVT::i64:
 987         ArgRegClass = &SPU::R64CRegClass;
 988         break;
 989       case MVT::f32:
 990         ArgRegClass = &SPU::R32FPRegClass;
 991         break;
 992       case MVT::f64:
 993         ArgRegClass = &SPU::R64FPRegClass;
 994         break;
 995       case MVT::v2f64:
 996       case MVT::v4f32:
 997       case MVT::v2i64:
 998       case MVT::v4i32:
 999       case MVT::v8i16:
1000       case MVT::v16i8:
1001         ArgRegClass = &SPU::VECREGRegClass;
1002         break;
1003       }
1004
1005       unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1006       RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1007       ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1008       ++ArgRegIdx;
1009     } else {
1010       // We need to load the argument to a virtual register if we determined
1011       // above that we ran out of physical registers of the appropriate type
1012       // or we're forced to do vararg
1013       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1014       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1015       ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1016       ArgOffset += StackSlotSize;
1017     }
1018
1019     ArgValues.push_back(ArgVal);
1020     // Update the chain
1021     Root = ArgVal.getOperand(0);
1022   }
1023
1024   // vararg handling:
1025   if (isVarArg) {
1026     // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1027     // We will spill (79-3)+1 registers to the stack
1028     SmallVector<SDValue, 79-3+1> MemOps;
1029
1030     // Create the frame slot
1031
1032     for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1033       VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1034       SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1035       SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1036       SDValue Store = DAG.getStore(Root, ArgVal, FIN, NULL, 0);
1037       Root = Store.getOperand(0);
1038       MemOps.push_back(Store);
1039
1040       // Increment address by stack slot size for the next stored argument
1041       ArgOffset += StackSlotSize;
1042     }
1043     if (!MemOps.empty())
1044       Root = DAG.getNode(ISD::TokenFactor,MVT::Other,&MemOps[0],MemOps.size());
1045   }
1046
1047   ArgValues.push_back(Root);
1048
1049   // Return the new list of results.
1050   return DAG.getMergeValues(Op.getNode()->getVTList(), &ArgValues[0],
1051                             ArgValues.size());
1052 }
1053
1054 /// isLSAAddress - Return the immediate to use if the specified
1055 /// value is representable as a LSA address.
1056 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1057   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1058   if (!C) return 0;
1059
1060   int Addr = C->getZExtValue();
1061   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1062       (Addr << 14 >> 14) != Addr)
1063     return 0;  // Top 14 bits have to be sext of immediate.
1064
1065   return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1066 }
1067
1068 static
1069 SDValue
1070 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1071   CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1072   SDValue Chain = TheCall->getChain();
1073   SDValue Callee    = TheCall->getCallee();
1074   unsigned NumOps     = TheCall->getNumArgs();
1075   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1076   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1077   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1078
1079   // Handy pointer type
1080   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1081
1082   // Accumulate how many bytes are to be pushed on the stack, including the
1083   // linkage area, and parameter passing area.  According to the SPU ABI,
1084   // we minimally need space for [LR] and [SP]
1085   unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1086
1087   // Set up a copy of the stack pointer for use loading and storing any
1088   // arguments that may not fit in the registers available for argument
1089   // passing.
1090   SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1091
1092   // Figure out which arguments are going to go in registers, and which in
1093   // memory.
1094   unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1095   unsigned ArgRegIdx = 0;
1096
1097   // Keep track of registers passing arguments
1098   std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1099   // And the arguments passed on the stack
1100   SmallVector<SDValue, 8> MemOpChains;
1101
1102   for (unsigned i = 0; i != NumOps; ++i) {
1103     SDValue Arg = TheCall->getArg(i);
1104
1105     // PtrOff will be used to store the current argument to the stack if a
1106     // register cannot be found for it.
1107     SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1108     PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1109
1110     switch (Arg.getValueType().getSimpleVT()) {
1111     default: assert(0 && "Unexpected ValueType for argument!");
1112     case MVT::i32:
1113     case MVT::i64:
1114     case MVT::i128:
1115       if (ArgRegIdx != NumArgRegs) {
1116         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1117       } else {
1118         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1119         ArgOffset += StackSlotSize;
1120       }
1121       break;
1122     case MVT::f32:
1123     case MVT::f64:
1124       if (ArgRegIdx != NumArgRegs) {
1125         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1126       } else {
1127         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1128         ArgOffset += StackSlotSize;
1129       }
1130       break;
1131     case MVT::v4f32:
1132     case MVT::v4i32:
1133     case MVT::v8i16:
1134     case MVT::v16i8:
1135       if (ArgRegIdx != NumArgRegs) {
1136         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1137       } else {
1138         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1139         ArgOffset += StackSlotSize;
1140       }
1141       break;
1142     }
1143   }
1144
1145   // Update number of stack bytes actually used, insert a call sequence start
1146   NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1147   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1148                                                             true));
1149
1150   if (!MemOpChains.empty()) {
1151     // Adjust the stack pointer for the stack arguments.
1152     Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1153                         &MemOpChains[0], MemOpChains.size());
1154   }
1155
1156   // Build a sequence of copy-to-reg nodes chained together with token chain
1157   // and flag operands which copy the outgoing args into the appropriate regs.
1158   SDValue InFlag;
1159   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1160     Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1161                              InFlag);
1162     InFlag = Chain.getValue(1);
1163   }
1164
1165   SmallVector<SDValue, 8> Ops;
1166   unsigned CallOpc = SPUISD::CALL;
1167
1168   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1169   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1170   // node so that legalize doesn't hack it.
1171   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1172     GlobalValue *GV = G->getGlobal();
1173     MVT CalleeVT = Callee.getValueType();
1174     SDValue Zero = DAG.getConstant(0, PtrVT);
1175     SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1176
1177     if (!ST->usingLargeMem()) {
1178       // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1179       // style calls, otherwise, external symbols are BRASL calls. This assumes
1180       // that declared/defined symbols are in the same compilation unit and can
1181       // be reached through PC-relative jumps.
1182       //
1183       // NOTE:
1184       // This may be an unsafe assumption for JIT and really large compilation
1185       // units.
1186       if (GV->isDeclaration()) {
1187         Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1188       } else {
1189         Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1190       }
1191     } else {
1192       // "Large memory" mode: Turn all calls into indirect calls with a X-form
1193       // address pairs:
1194       Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1195     }
1196   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1197     Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1198   else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1199     // If this is an absolute destination address that appears to be a legal
1200     // local store address, use the munged value.
1201     Callee = SDValue(Dest, 0);
1202   }
1203
1204   Ops.push_back(Chain);
1205   Ops.push_back(Callee);
1206
1207   // Add argument registers to the end of the list so that they are known live
1208   // into the call.
1209   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1210     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1211                                   RegsToPass[i].second.getValueType()));
1212
1213   if (InFlag.getNode())
1214     Ops.push_back(InFlag);
1215   // Returns a chain and a flag for retval copy to use.
1216   Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1217                       &Ops[0], Ops.size());
1218   InFlag = Chain.getValue(1);
1219
1220   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1221                              DAG.getIntPtrConstant(0, true), InFlag);
1222   if (TheCall->getValueType(0) != MVT::Other)
1223     InFlag = Chain.getValue(1);
1224
1225   SDValue ResultVals[3];
1226   unsigned NumResults = 0;
1227
1228   // If the call has results, copy the values out of the ret val registers.
1229   switch (TheCall->getValueType(0).getSimpleVT()) {
1230   default: assert(0 && "Unexpected ret value!");
1231   case MVT::Other: break;
1232   case MVT::i32:
1233     if (TheCall->getValueType(1) == MVT::i32) {
1234       Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1235       ResultVals[0] = Chain.getValue(0);
1236       Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1237                                  Chain.getValue(2)).getValue(1);
1238       ResultVals[1] = Chain.getValue(0);
1239       NumResults = 2;
1240     } else {
1241       Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1242       ResultVals[0] = Chain.getValue(0);
1243       NumResults = 1;
1244     }
1245     break;
1246   case MVT::i64:
1247     Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1248     ResultVals[0] = Chain.getValue(0);
1249     NumResults = 1;
1250     break;
1251   case MVT::f32:
1252   case MVT::f64:
1253     Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1254                                InFlag).getValue(1);
1255     ResultVals[0] = Chain.getValue(0);
1256     NumResults = 1;
1257     break;
1258   case MVT::v2f64:
1259   case MVT::v4f32:
1260   case MVT::v4i32:
1261   case MVT::v8i16:
1262   case MVT::v16i8:
1263     Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1264                                    InFlag).getValue(1);
1265     ResultVals[0] = Chain.getValue(0);
1266     NumResults = 1;
1267     break;
1268   }
1269
1270   // If the function returns void, just return the chain.
1271   if (NumResults == 0)
1272     return Chain;
1273
1274   // Otherwise, merge everything together with a MERGE_VALUES node.
1275   ResultVals[NumResults++] = Chain;
1276   SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1277   return Res.getValue(Op.getResNo());
1278 }
1279
1280 static SDValue
1281 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1282   SmallVector<CCValAssign, 16> RVLocs;
1283   unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1284   bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1285   CCState CCInfo(CC, isVarArg, TM, RVLocs);
1286   CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1287
1288   // If this is the first return lowered for this function, add the regs to the
1289   // liveout set for the function.
1290   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1291     for (unsigned i = 0; i != RVLocs.size(); ++i)
1292       DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1293   }
1294
1295   SDValue Chain = Op.getOperand(0);
1296   SDValue Flag;
1297
1298   // Copy the result values into the output registers.
1299   for (unsigned i = 0; i != RVLocs.size(); ++i) {
1300     CCValAssign &VA = RVLocs[i];
1301     assert(VA.isRegLoc() && "Can only return in registers!");
1302     Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1303     Flag = Chain.getValue(1);
1304   }
1305
1306   if (Flag.getNode())
1307     return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1308   else
1309     return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1310 }
1311
1312
1313 //===----------------------------------------------------------------------===//
1314 // Vector related lowering:
1315 //===----------------------------------------------------------------------===//
1316
1317 static ConstantSDNode *
1318 getVecImm(SDNode *N) {
1319   SDValue OpVal(0, 0);
1320
1321   // Check to see if this buildvec has a single non-undef value in its elements.
1322   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1323     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1324     if (OpVal.getNode() == 0)
1325       OpVal = N->getOperand(i);
1326     else if (OpVal != N->getOperand(i))
1327       return 0;
1328   }
1329
1330   if (OpVal.getNode() != 0) {
1331     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1332       return CN;
1333     }
1334   }
1335
1336   return 0; // All UNDEF: use implicit def.; not Constant node
1337 }
1338
1339 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1340 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1341 /// constant
1342 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1343                               MVT ValueType) {
1344   if (ConstantSDNode *CN = getVecImm(N)) {
1345     uint64_t Value = CN->getZExtValue();
1346     if (ValueType == MVT::i64) {
1347       uint64_t UValue = CN->getZExtValue();
1348       uint32_t upper = uint32_t(UValue >> 32);
1349       uint32_t lower = uint32_t(UValue);
1350       if (upper != lower)
1351         return SDValue();
1352       Value = Value >> 32;
1353     }
1354     if (Value <= 0x3ffff)
1355       return DAG.getTargetConstant(Value, ValueType);
1356   }
1357
1358   return SDValue();
1359 }
1360
1361 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1362 /// and the value fits into a signed 16-bit constant, and if so, return the
1363 /// constant
1364 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1365                               MVT ValueType) {
1366   if (ConstantSDNode *CN = getVecImm(N)) {
1367     int64_t Value = CN->getSExtValue();
1368     if (ValueType == MVT::i64) {
1369       uint64_t UValue = CN->getZExtValue();
1370       uint32_t upper = uint32_t(UValue >> 32);
1371       uint32_t lower = uint32_t(UValue);
1372       if (upper != lower)
1373         return SDValue();
1374       Value = Value >> 32;
1375     }
1376     if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1377       return DAG.getTargetConstant(Value, ValueType);
1378     }
1379   }
1380
1381   return SDValue();
1382 }
1383
1384 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1385 /// and the value fits into a signed 10-bit constant, and if so, return the
1386 /// constant
1387 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1388                               MVT ValueType) {
1389   if (ConstantSDNode *CN = getVecImm(N)) {
1390     int64_t Value = CN->getSExtValue();
1391     if (ValueType == MVT::i64) {
1392       uint64_t UValue = CN->getZExtValue();
1393       uint32_t upper = uint32_t(UValue >> 32);
1394       uint32_t lower = uint32_t(UValue);
1395       if (upper != lower)
1396         return SDValue();
1397       Value = Value >> 32;
1398     }
1399     if (isS10Constant(Value))
1400       return DAG.getTargetConstant(Value, ValueType);
1401   }
1402
1403   return SDValue();
1404 }
1405
1406 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1407 /// and the value fits into a signed 8-bit constant, and if so, return the
1408 /// constant.
1409 ///
1410 /// @note: The incoming vector is v16i8 because that's the only way we can load
1411 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1412 /// same value.
1413 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1414                              MVT ValueType) {
1415   if (ConstantSDNode *CN = getVecImm(N)) {
1416     int Value = (int) CN->getZExtValue();
1417     if (ValueType == MVT::i16
1418         && Value <= 0xffff                 /* truncated from uint64_t */
1419         && ((short) Value >> 8) == ((short) Value & 0xff))
1420       return DAG.getTargetConstant(Value & 0xff, ValueType);
1421     else if (ValueType == MVT::i8
1422              && (Value & 0xff) == Value)
1423       return DAG.getTargetConstant(Value, ValueType);
1424   }
1425
1426   return SDValue();
1427 }
1428
1429 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1430 /// and the value fits into a signed 16-bit constant, and if so, return the
1431 /// constant
1432 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1433                                MVT ValueType) {
1434   if (ConstantSDNode *CN = getVecImm(N)) {
1435     uint64_t Value = CN->getZExtValue();
1436     if ((ValueType == MVT::i32
1437           && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1438         || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1439       return DAG.getTargetConstant(Value >> 16, ValueType);
1440   }
1441
1442   return SDValue();
1443 }
1444
1445 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1446 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1447   if (ConstantSDNode *CN = getVecImm(N)) {
1448     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1449   }
1450
1451   return SDValue();
1452 }
1453
1454 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1455 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1456   if (ConstantSDNode *CN = getVecImm(N)) {
1457     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1458   }
1459
1460   return SDValue();
1461 }
1462
1463 // If this is a vector of constants or undefs, get the bits.  A bit in
1464 // UndefBits is set if the corresponding element of the vector is an
1465 // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1466 // zero.   Return true if this is not an array of constants, false if it is.
1467 //
1468 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1469                                        uint64_t UndefBits[2]) {
1470   // Start with zero'd results.
1471   VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1472
1473   unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1474   for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1475     SDValue OpVal = BV->getOperand(i);
1476
1477     unsigned PartNo = i >= e/2;     // In the upper 128 bits?
1478     unsigned SlotNo = e/2 - (i & (e/2-1))-1;  // Which subpiece of the uint64_t.
1479
1480     uint64_t EltBits = 0;
1481     if (OpVal.getOpcode() == ISD::UNDEF) {
1482       uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1483       UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1484       continue;
1485     } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1486       EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1487     } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1488       const APFloat &apf = CN->getValueAPF();
1489       EltBits = (CN->getValueType(0) == MVT::f32
1490                  ? FloatToBits(apf.convertToFloat())
1491                  : DoubleToBits(apf.convertToDouble()));
1492     } else {
1493       // Nonconstant element.
1494       return true;
1495     }
1496
1497     VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1498   }
1499
1500   //printf("%llx %llx  %llx %llx\n",
1501   //       VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1502   return false;
1503 }
1504
1505 /// If this is a splat (repetition) of a value across the whole vector, return
1506 /// the smallest size that splats it.  For example, "0x01010101010101..." is a
1507 /// splat of 0x01, 0x0101, and 0x01010101.  We return SplatBits = 0x01 and
1508 /// SplatSize = 1 byte.
1509 static bool isConstantSplat(const uint64_t Bits128[2],
1510                             const uint64_t Undef128[2],
1511                             int MinSplatBits,
1512                             uint64_t &SplatBits, uint64_t &SplatUndef,
1513                             int &SplatSize) {
1514   // Don't let undefs prevent splats from matching.  See if the top 64-bits are
1515   // the same as the lower 64-bits, ignoring undefs.
1516   uint64_t Bits64  = Bits128[0] | Bits128[1];
1517   uint64_t Undef64 = Undef128[0] & Undef128[1];
1518   uint32_t Bits32  = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1519   uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1520   uint16_t Bits16  = uint16_t(Bits32)  | uint16_t(Bits32 >> 16);
1521   uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1522
1523   if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1524     if (MinSplatBits < 64) {
1525
1526       // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1527       // undefs.
1528       if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1529         if (MinSplatBits < 32) {
1530
1531           // If the top 16-bits are different than the lower 16-bits, ignoring
1532           // undefs, we have an i32 splat.
1533           if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1534             if (MinSplatBits < 16) {
1535               // If the top 8-bits are different than the lower 8-bits, ignoring
1536               // undefs, we have an i16 splat.
1537               if ((Bits16 & (uint16_t(~Undef16) >> 8))
1538                   == ((Bits16 >> 8) & ~Undef16)) {
1539                 // Otherwise, we have an 8-bit splat.
1540                 SplatBits  = uint8_t(Bits16)  | uint8_t(Bits16 >> 8);
1541                 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1542                 SplatSize = 1;
1543                 return true;
1544               }
1545             } else {
1546               SplatBits = Bits16;
1547               SplatUndef = Undef16;
1548               SplatSize = 2;
1549               return true;
1550             }
1551           }
1552         } else {
1553           SplatBits = Bits32;
1554           SplatUndef = Undef32;
1555           SplatSize = 4;
1556           return true;
1557         }
1558       }
1559     } else {
1560       SplatBits = Bits128[0];
1561       SplatUndef = Undef128[0];
1562       SplatSize = 8;
1563       return true;
1564     }
1565   }
1566
1567   return false;  // Can't be a splat if two pieces don't match.
1568 }
1569
1570 // If this is a case we can't handle, return null and let the default
1571 // expansion code take care of it.  If we CAN select this case, and if it
1572 // selects to a single instruction, return Op.  Otherwise, if we can codegen
1573 // this case more efficiently than a constant pool load, lower it to the
1574 // sequence of ops that should be used.
1575 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1576   MVT VT = Op.getValueType();
1577   // If this is a vector of constants or undefs, get the bits.  A bit in
1578   // UndefBits is set if the corresponding element of the vector is an
1579   // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1580   // zero.
1581   uint64_t VectorBits[2];
1582   uint64_t UndefBits[2];
1583   uint64_t SplatBits, SplatUndef;
1584   int SplatSize;
1585   if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1586       || !isConstantSplat(VectorBits, UndefBits,
1587                           VT.getVectorElementType().getSizeInBits(),
1588                           SplatBits, SplatUndef, SplatSize))
1589     return SDValue();   // Not a constant vector, not a splat.
1590
1591   switch (VT.getSimpleVT()) {
1592   default:
1593   case MVT::v4f32: {
1594     uint32_t Value32 = SplatBits;
1595     assert(SplatSize == 4
1596            && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1597     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1598     SDValue T = DAG.getConstant(Value32, MVT::i32);
1599     return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1600                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1601     break;
1602   }
1603   case MVT::v2f64: {
1604     uint64_t f64val = SplatBits;
1605     assert(SplatSize == 8
1606            && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1607     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1608     SDValue T = DAG.getConstant(f64val, MVT::i64);
1609     return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1610                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1611     break;
1612   }
1613   case MVT::v16i8: {
1614    // 8-bit constants have to be expanded to 16-bits
1615    unsigned short Value16 = SplatBits | (SplatBits << 8);
1616    SDValue Ops[8];
1617    for (int i = 0; i < 8; ++i)
1618      Ops[i] = DAG.getConstant(Value16, MVT::i16);
1619    return DAG.getNode(ISD::BIT_CONVERT, VT,
1620                       DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1621   }
1622   case MVT::v8i16: {
1623     unsigned short Value16;
1624     if (SplatSize == 2)
1625       Value16 = (unsigned short) (SplatBits & 0xffff);
1626     else
1627       Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1628     SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1629     SDValue Ops[8];
1630     for (int i = 0; i < 8; ++i) Ops[i] = T;
1631     return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1632   }
1633   case MVT::v4i32: {
1634     unsigned int Value = SplatBits;
1635     SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1636     return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1637   }
1638   case MVT::v2i64: {
1639     uint64_t val = SplatBits;
1640     uint32_t upper = uint32_t(val >> 32);
1641     uint32_t lower = uint32_t(val);
1642
1643     if (upper == lower) {
1644       // Magic constant that can be matched by IL, ILA, et. al.
1645       SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1646       return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1647     } else {
1648       SDValue LO32;
1649       SDValue HI32;
1650       SmallVector<SDValue, 16> ShufBytes;
1651       SDValue Result;
1652       bool upper_special, lower_special;
1653
1654       // NOTE: This code creates common-case shuffle masks that can be easily
1655       // detected as common expressions. It is not attempting to create highly
1656       // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1657
1658       // Detect if the upper or lower half is a special shuffle mask pattern:
1659       upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1660       lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1661
1662       // Create lower vector if not a special pattern
1663       if (!lower_special) {
1664         SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1665         LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1666                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1667                                        LO32C, LO32C, LO32C, LO32C));
1668       }
1669
1670       // Create upper vector if not a special pattern
1671       if (!upper_special) {
1672         SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1673         HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1674                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1675                                        HI32C, HI32C, HI32C, HI32C));
1676       }
1677
1678       // If either upper or lower are special, then the two input operands are
1679       // the same (basically, one of them is a "don't care")
1680       if (lower_special)
1681         LO32 = HI32;
1682       if (upper_special)
1683         HI32 = LO32;
1684       if (lower_special && upper_special) {
1685         // Unhappy situation... both upper and lower are special, so punt with
1686         // a target constant:
1687         SDValue Zero = DAG.getConstant(0, MVT::i32);
1688         HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1689                                   Zero, Zero);
1690       }
1691
1692       for (int i = 0; i < 4; ++i) {
1693         uint64_t val = 0;
1694         for (int j = 0; j < 4; ++j) {
1695           SDValue V;
1696           bool process_upper, process_lower;
1697           val <<= 8;
1698           process_upper = (upper_special && (i & 1) == 0);
1699           process_lower = (lower_special && (i & 1) == 1);
1700
1701           if (process_upper || process_lower) {
1702             if ((process_upper && upper == 0)
1703                 || (process_lower && lower == 0))
1704               val |= 0x80;
1705             else if ((process_upper && upper == 0xffffffff)
1706                      || (process_lower && lower == 0xffffffff))
1707               val |= 0xc0;
1708             else if ((process_upper && upper == 0x80000000)
1709                      || (process_lower && lower == 0x80000000))
1710               val |= (j == 0 ? 0xe0 : 0x80);
1711           } else
1712             val |= i * 4 + j + ((i & 1) * 16);
1713         }
1714
1715         ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1716       }
1717
1718       return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1719                          DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1720                                      &ShufBytes[0], ShufBytes.size()));
1721     }
1722   }
1723   }
1724
1725   return SDValue();
1726 }
1727
1728 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1729 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1730 /// permutation vector, V3, is monotonically increasing with one "exception"
1731 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1732 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1733 /// In either case, the net result is going to eventually invoke SHUFB to
1734 /// permute/shuffle the bytes from V1 and V2.
1735 /// \note
1736 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1737 /// control word for byte/halfword/word insertion. This takes care of a single
1738 /// element move from V2 into V1.
1739 /// \note
1740 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1741 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1742   SDValue V1 = Op.getOperand(0);
1743   SDValue V2 = Op.getOperand(1);
1744   SDValue PermMask = Op.getOperand(2);
1745
1746   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1747
1748   // If we have a single element being moved from V1 to V2, this can be handled
1749   // using the C*[DX] compute mask instructions, but the vector elements have
1750   // to be monotonically increasing with one exception element.
1751   MVT EltVT = V1.getValueType().getVectorElementType();
1752   unsigned EltsFromV2 = 0;
1753   unsigned V2Elt = 0;
1754   unsigned V2EltIdx0 = 0;
1755   unsigned CurrElt = 0;
1756   bool monotonic = true;
1757   if (EltVT == MVT::i8)
1758     V2EltIdx0 = 16;
1759   else if (EltVT == MVT::i16)
1760     V2EltIdx0 = 8;
1761   else if (EltVT == MVT::i32)
1762     V2EltIdx0 = 4;
1763   else
1764     assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1765
1766   for (unsigned i = 0, e = PermMask.getNumOperands();
1767        EltsFromV2 <= 1 && monotonic && i != e;
1768        ++i) {
1769     unsigned SrcElt;
1770     if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1771       SrcElt = 0;
1772     else
1773       SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1774
1775     if (SrcElt >= V2EltIdx0) {
1776       ++EltsFromV2;
1777       V2Elt = (V2EltIdx0 - SrcElt) << 2;
1778     } else if (CurrElt != SrcElt) {
1779       monotonic = false;
1780     }
1781
1782     ++CurrElt;
1783   }
1784
1785   if (EltsFromV2 == 1 && monotonic) {
1786     // Compute mask and shuffle
1787     MachineFunction &MF = DAG.getMachineFunction();
1788     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1789     unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1790     MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1791     // Initialize temporary register to 0
1792     SDValue InitTempReg =
1793       DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1794     // Copy register's contents as index in SHUFFLE_MASK:
1795     SDValue ShufMaskOp =
1796       DAG.getNode(SPUISD::SHUFFLE_MASK, V1.getValueType(),
1797                   DAG.getTargetConstant(V2Elt, MVT::i32),
1798                   DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1799     // Use shuffle mask in SHUFB synthetic instruction:
1800     return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1801   } else {
1802    // Convert the SHUFFLE_VECTOR mask's input element units to the
1803    // actual bytes.
1804     unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1805
1806     SmallVector<SDValue, 16> ResultMask;
1807     for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1808       unsigned SrcElt;
1809       if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1810         SrcElt = 0;
1811       else
1812         SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1813
1814       for (unsigned j = 0; j < BytesPerElement; ++j) {
1815         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1816                                              MVT::i8));
1817       }
1818     }
1819
1820     SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1821                                       &ResultMask[0], ResultMask.size());
1822     return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1823   }
1824 }
1825
1826 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1827   SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar
1828
1829   if (Op0.getNode()->getOpcode() == ISD::Constant) {
1830     // For a constant, build the appropriate constant vector, which will
1831     // eventually simplify to a vector register load.
1832
1833     ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1834     SmallVector<SDValue, 16> ConstVecValues;
1835     MVT VT;
1836     size_t n_copies;
1837
1838     // Create a constant vector:
1839     switch (Op.getValueType().getSimpleVT()) {
1840     default: assert(0 && "Unexpected constant value type in "
1841                          "LowerSCALAR_TO_VECTOR");
1842     case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1843     case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1844     case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1845     case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1846     case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1847     case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1848     }
1849
1850     SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1851     for (size_t j = 0; j < n_copies; ++j)
1852       ConstVecValues.push_back(CValue);
1853
1854     return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1855                        &ConstVecValues[0], ConstVecValues.size());
1856   } else {
1857     // Otherwise, copy the value from one register to another:
1858     switch (Op0.getValueType().getSimpleVT()) {
1859     default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1860     case MVT::i8:
1861     case MVT::i16:
1862     case MVT::i32:
1863     case MVT::i64:
1864     case MVT::f32:
1865     case MVT::f64:
1866       return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1867     }
1868   }
1869
1870   return SDValue();
1871 }
1872
1873 static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
1874   switch (Op.getValueType().getSimpleVT()) {
1875   default:
1876     cerr << "CellSPU: Unknown vector multiplication, got "
1877          << Op.getValueType().getMVTString()
1878          << "\n";
1879     abort();
1880     /*NOTREACHED*/
1881
1882   case MVT::v4i32: {
1883     SDValue rA = Op.getOperand(0);
1884     SDValue rB = Op.getOperand(1);
1885     SDValue HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1886     SDValue HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1887     SDValue LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1888     SDValue Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1889
1890     return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1891     break;
1892   }
1893
1894   // Multiply two v8i16 vectors (pipeline friendly version):
1895   // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1896   // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1897   // c) Use SELB to select upper and lower halves from the intermediate results
1898   //
1899   // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1900   // dual-issue. This code does manage to do this, even if it's a little on
1901   // the wacky side
1902   case MVT::v8i16: {
1903     MachineFunction &MF = DAG.getMachineFunction();
1904     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1905     SDValue Chain = Op.getOperand(0);
1906     SDValue rA = Op.getOperand(0);
1907     SDValue rB = Op.getOperand(1);
1908     unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1909     unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1910
1911     SDValue FSMBOp =
1912       DAG.getCopyToReg(Chain, FSMBIreg,
1913                        DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1914                                    DAG.getConstant(0xcccc, MVT::i16)));
1915
1916     SDValue HHProd =
1917       DAG.getCopyToReg(FSMBOp, HiProdReg,
1918                        DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1919
1920     SDValue HHProd_v4i32 =
1921       DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1922                   DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1923
1924     return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1925                        DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1926                        DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1927                                    DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1928                                                HHProd_v4i32,
1929                                                DAG.getConstant(16, MVT::i16))),
1930                        DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1931   }
1932
1933   // This M00sE is N@stI! (apologies to Monty Python)
1934   //
1935   // SPU doesn't know how to do any 8-bit multiplication, so the solution
1936   // is to break it all apart, sign extend, and reassemble the various
1937   // intermediate products.
1938   case MVT::v16i8: {
1939     SDValue rA = Op.getOperand(0);
1940     SDValue rB = Op.getOperand(1);
1941     SDValue c8 = DAG.getConstant(8, MVT::i32);
1942     SDValue c16 = DAG.getConstant(16, MVT::i32);
1943
1944     SDValue LLProd =
1945       DAG.getNode(SPUISD::MPY, MVT::v8i16,
1946                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1947                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1948
1949     SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1950
1951     SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1952
1953     SDValue LHProd =
1954       DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1955                   DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1956
1957     SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1958                                      DAG.getConstant(0x2222, MVT::i16));
1959
1960     SDValue LoProdParts =
1961       DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1962                   DAG.getNode(SPUISD::SELB, MVT::v8i16,
1963                               LLProd, LHProd, FSMBmask));
1964
1965     SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1966
1967     SDValue LoProd =
1968       DAG.getNode(ISD::AND, MVT::v4i32,
1969                   LoProdParts,
1970                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1971                               LoProdMask, LoProdMask,
1972                               LoProdMask, LoProdMask));
1973
1974     SDValue rAH =
1975       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1976                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1977
1978     SDValue rBH =
1979       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1980                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1981
1982     SDValue HLProd =
1983       DAG.getNode(SPUISD::MPY, MVT::v8i16,
1984                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1985                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1986
1987     SDValue HHProd_1 =
1988       DAG.getNode(SPUISD::MPY, MVT::v8i16,
1989                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1990                               DAG.getNode(SPUISD::VEC_SRA,
1991                                           MVT::v4i32, rAH, c8)),
1992                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1993                               DAG.getNode(SPUISD::VEC_SRA,
1994                                           MVT::v4i32, rBH, c8)));
1995
1996     SDValue HHProd =
1997       DAG.getNode(SPUISD::SELB, MVT::v8i16,
1998                   HLProd,
1999                   DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2000                   FSMBmask);
2001
2002     SDValue HiProd =
2003       DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
2004
2005     return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2006                        DAG.getNode(ISD::OR, MVT::v4i32,
2007                                    LoProd, HiProd));
2008   }
2009   }
2010
2011   return SDValue();
2012 }
2013
2014 static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) {
2015   MachineFunction &MF = DAG.getMachineFunction();
2016   MachineRegisterInfo &RegInfo = MF.getRegInfo();
2017
2018   SDValue A = Op.getOperand(0);
2019   SDValue B = Op.getOperand(1);
2020   MVT VT = Op.getValueType();
2021
2022   unsigned VRegBR, VRegC;
2023
2024   if (VT == MVT::f32) {
2025     VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2026     VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2027   } else {
2028     VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2029     VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2030   }
2031   // TODO: make sure we're feeding FPInterp the right arguments
2032   // Right now: fi B, frest(B)
2033
2034   // Computes BRcpl =
2035   // (Floating Interpolate (FP Reciprocal Estimate B))
2036   SDValue BRcpl =
2037       DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2038                        DAG.getNode(SPUISD::FPInterp, VT, B,
2039                                 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2040
2041   // Computes A * BRcpl and stores in a temporary register
2042   SDValue AxBRcpl =
2043       DAG.getCopyToReg(BRcpl, VRegC,
2044                  DAG.getNode(ISD::FMUL, VT, A,
2045                         DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2046   // What's the Chain variable do? It's magic!
2047   // TODO: set Chain = Op(0).getEntryNode()
2048
2049   return DAG.getNode(ISD::FADD, VT,
2050                 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2051                 DAG.getNode(ISD::FMUL, VT,
2052                         DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2053                         DAG.getNode(ISD::FSUB, VT, A,
2054                             DAG.getNode(ISD::FMUL, VT, B,
2055                             DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2056 }
2057
2058 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2059   MVT VT = Op.getValueType();
2060   SDValue N = Op.getOperand(0);
2061   SDValue Elt = Op.getOperand(1);
2062   SDValue retval;
2063
2064   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
2065     // Constant argument:
2066     int EltNo = (int) C->getZExtValue();
2067
2068     // sanity checks:
2069     if (VT == MVT::i8 && EltNo >= 16)
2070       assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2071     else if (VT == MVT::i16 && EltNo >= 8)
2072       assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2073     else if (VT == MVT::i32 && EltNo >= 4)
2074       assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2075     else if (VT == MVT::i64 && EltNo >= 2)
2076       assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2077
2078     if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2079       // i32 and i64: Element 0 is the preferred slot
2080       return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2081     }
2082
2083     // Need to generate shuffle mask and extract:
2084     int prefslot_begin = -1, prefslot_end = -1;
2085     int elt_byte = EltNo * VT.getSizeInBits() / 8;
2086
2087     switch (VT.getSimpleVT()) {
2088     default:
2089       assert(false && "Invalid value type!");
2090     case MVT::i8: {
2091       prefslot_begin = prefslot_end = 3;
2092       break;
2093     }
2094     case MVT::i16: {
2095       prefslot_begin = 2; prefslot_end = 3;
2096       break;
2097     }
2098     case MVT::i32:
2099     case MVT::f32: {
2100       prefslot_begin = 0; prefslot_end = 3;
2101       break;
2102     }
2103     case MVT::i64:
2104     case MVT::f64: {
2105       prefslot_begin = 0; prefslot_end = 7;
2106       break;
2107     }
2108     }
2109
2110     assert(prefslot_begin != -1 && prefslot_end != -1 &&
2111            "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2112
2113     unsigned int ShufBytes[16];
2114     for (int i = 0; i < 16; ++i) {
2115       // zero fill uppper part of preferred slot, don't care about the
2116       // other slots:
2117       unsigned int mask_val;
2118       if (i <= prefslot_end) {
2119         mask_val =
2120           ((i < prefslot_begin)
2121            ? 0x80
2122            : elt_byte + (i - prefslot_begin));
2123
2124         ShufBytes[i] = mask_val;
2125       } else
2126         ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
2127     }
2128
2129     SDValue ShufMask[4];
2130     for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
2131       unsigned bidx = i / 4;
2132       unsigned int bits = ((ShufBytes[bidx] << 24) |
2133                            (ShufBytes[bidx+1] << 16) |
2134                            (ShufBytes[bidx+2] << 8) |
2135                            ShufBytes[bidx+3]);
2136       ShufMask[i] = DAG.getConstant(bits, MVT::i32);
2137     }
2138
2139     SDValue ShufMaskVec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2140                                       &ShufMask[0],
2141                                       sizeof(ShufMask) / sizeof(ShufMask[0]));
2142
2143     retval = DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2144                          DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2145                                      N, N, ShufMaskVec));
2146   } else {
2147     // Variable index: Rotate the requested element into slot 0, then replicate
2148     // slot 0 across the vector
2149     MVT VecVT = N.getValueType();
2150     if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
2151       cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
2152       abort();
2153     }
2154
2155     // Make life easier by making sure the index is zero-extended to i32
2156     if (Elt.getValueType() != MVT::i32)
2157       Elt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Elt);
2158
2159     // Scale the index to a bit/byte shift quantity
2160     APInt scaleFactor =
2161       APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2162     SDValue vecShift;
2163
2164     switch (VT.getSimpleVT()) {
2165     default:
2166       cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
2167       abort();
2168       /*NOTREACHED*/
2169     case MVT::i8: {
2170       // Don't need to scale, but we do need to correct for where bytes go in
2171       // slot 0:
2172       SDValue prefSlot = DAG.getNode(ISD::SUB, MVT::i32,
2173                                      Elt, DAG.getConstant(3, MVT::i32));
2174       SDValue corrected = DAG.getNode(ISD::ADD, MVT::i32, prefSlot,
2175                                       DAG.getConstant(16, MVT::i32));
2176
2177       SDValue shiftAmt = DAG.getNode(ISD::SELECT_CC, MVT::i32,
2178                                      prefSlot, DAG.getConstant(0, MVT::i32),
2179                                      prefSlot,          // trueval
2180                                      corrected,         // falseval
2181                                      DAG.getCondCode(ISD::SETGT));
2182       vecShift = DAG.getNode(SPUISD::ROTBYTES_LEFT, VecVT, N, shiftAmt);
2183       break;
2184     }
2185     case MVT::i16: {
2186       // Scale the index to bytes, subtract for preferred slot:
2187       Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt,
2188                         DAG.getConstant(scaleFactor.logBase2(), MVT::i32));
2189       SDValue prefSlot = DAG.getNode(ISD::SUB, MVT::i32,
2190                                      Elt, DAG.getConstant(2, MVT::i32));
2191       SDValue corrected = DAG.getNode(ISD::ADD, MVT::i32, prefSlot,
2192                                       DAG.getConstant(16, MVT::i32));
2193
2194       SDValue shiftAmt = DAG.getNode(ISD::SELECT_CC, MVT::i32,
2195                                      prefSlot, DAG.getConstant(0, MVT::i32),
2196                                      prefSlot,          // trueval
2197                                      corrected,         // falseval
2198                                      DAG.getCondCode(ISD::SETGT));
2199       vecShift = DAG.getNode(SPUISD::ROTBYTES_LEFT, VecVT, N, shiftAmt);
2200       break;
2201     }
2202     case MVT::i32:
2203     case MVT::f32:
2204     case MVT::i64:
2205     case MVT::f64:
2206       // Simple left shift to slot 0
2207       Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt,
2208                         DAG.getConstant(scaleFactor.logBase2(), MVT::i32));
2209       vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, N, Elt);
2210       break;
2211     }
2212
2213     // Replicate slot 0 across the entire vector (for consistency with the
2214     // notion of a unified register set)
2215     SDValue replicate;
2216
2217     switch (VT.getSimpleVT()) {
2218     default:
2219       cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
2220       abort();
2221       /*NOTREACHED*/
2222     case MVT::i8: {
2223       SDValue factor = DAG.getConstant(0x03030303, MVT::i32);
2224       replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2225                               factor, factor);
2226       break;
2227     }
2228     case MVT::i16: {
2229       SDValue factor = DAG.getConstant(0x02030203, MVT::i32);
2230       replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2231                               factor, factor);
2232       break;
2233     }
2234     case MVT::i32:
2235     case MVT::f32: {
2236       SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2237       replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2238                               factor, factor);
2239       break;
2240     }
2241     case MVT::i64:
2242     case MVT::f64: {
2243       SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2244       SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2245       replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, loFactor, hiFactor,
2246                               loFactor, hiFactor);
2247       break;
2248     }
2249     }
2250
2251     retval = DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2252                          DAG.getNode(SPUISD::SHUFB, VecVT, vecShift, vecShift, replicate));
2253   }
2254
2255   return retval;
2256 }
2257
2258 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2259   SDValue VecOp = Op.getOperand(0);
2260   SDValue ValOp = Op.getOperand(1);
2261   SDValue IdxOp = Op.getOperand(2);
2262   MVT VT = Op.getValueType();
2263
2264   ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2265   assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2266
2267   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2268   // Use $2 because it's always 16-byte aligned and it's available:
2269   SDValue PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2270
2271   SDValue result =
2272     DAG.getNode(SPUISD::SHUFB, VT,
2273                 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2274                 VecOp,
2275                 DAG.getNode(SPUISD::SHUFFLE_MASK, VT,
2276                             DAG.getNode(ISD::ADD, PtrVT,
2277                                         PtrBase,
2278                                         DAG.getConstant(CN->getZExtValue(),
2279                                                         PtrVT))));
2280
2281   return result;
2282 }
2283
2284 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2285 {
2286   SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
2287
2288   assert(Op.getValueType() == MVT::i8);
2289   switch (Opc) {
2290   default:
2291     assert(0 && "Unhandled i8 math operator");
2292     /*NOTREACHED*/
2293     break;
2294   case ISD::SUB: {
2295     // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2296     // the result:
2297     SDValue N1 = Op.getOperand(1);
2298     N0 = (N0.getOpcode() != ISD::Constant
2299           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2300           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2301                             MVT::i16));
2302     N1 = (N1.getOpcode() != ISD::Constant
2303           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2304           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2305                             MVT::i16));
2306     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2307                        DAG.getNode(Opc, MVT::i16, N0, N1));
2308   }
2309   case ISD::ROTR:
2310   case ISD::ROTL: {
2311     SDValue N1 = Op.getOperand(1);
2312     unsigned N1Opc;
2313     N0 = (N0.getOpcode() != ISD::Constant
2314           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2315           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2316                             MVT::i16));
2317     N1Opc = N1.getValueType().bitsLT(MVT::i32)
2318             ? ISD::ZERO_EXTEND
2319             : ISD::TRUNCATE;
2320     N1 = (N1.getOpcode() != ISD::Constant
2321           ? DAG.getNode(N1Opc, MVT::i32, N1)
2322           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2323                             MVT::i32));
2324     SDValue ExpandArg =
2325       DAG.getNode(ISD::OR, MVT::i16, N0,
2326                   DAG.getNode(ISD::SHL, MVT::i16,
2327                               N0, DAG.getConstant(8, MVT::i32)));
2328     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2329                        DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2330   }
2331   case ISD::SRL:
2332   case ISD::SHL: {
2333     SDValue N1 = Op.getOperand(1);
2334     unsigned N1Opc;
2335     N0 = (N0.getOpcode() != ISD::Constant
2336           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2337           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2338                             MVT::i16));
2339     N1Opc = N1.getValueType().bitsLT(MVT::i16)
2340             ? ISD::ZERO_EXTEND
2341             : ISD::TRUNCATE;
2342     N1 = (N1.getOpcode() != ISD::Constant
2343           ? DAG.getNode(N1Opc, MVT::i16, N1)
2344           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2345                             MVT::i16));
2346     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2347                        DAG.getNode(Opc, MVT::i16, N0, N1));
2348   }
2349   case ISD::SRA: {
2350     SDValue N1 = Op.getOperand(1);
2351     unsigned N1Opc;
2352     N0 = (N0.getOpcode() != ISD::Constant
2353           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2354           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2355                             MVT::i16));
2356     N1Opc = N1.getValueType().bitsLT(MVT::i16)
2357             ? ISD::SIGN_EXTEND
2358             : ISD::TRUNCATE;
2359     N1 = (N1.getOpcode() != ISD::Constant
2360           ? DAG.getNode(N1Opc, MVT::i16, N1)
2361           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2362                             MVT::i16));
2363     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2364                        DAG.getNode(Opc, MVT::i16, N0, N1));
2365   }
2366   case ISD::MUL: {
2367     SDValue N1 = Op.getOperand(1);
2368     unsigned N1Opc;
2369     N0 = (N0.getOpcode() != ISD::Constant
2370           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2371           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2372                             MVT::i16));
2373     N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2374     N1 = (N1.getOpcode() != ISD::Constant
2375           ? DAG.getNode(N1Opc, MVT::i16, N1)
2376           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2377                             MVT::i16));
2378     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2379                        DAG.getNode(Opc, MVT::i16, N0, N1));
2380     break;
2381   }
2382   }
2383
2384   return SDValue();
2385 }
2386
2387 static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2388 {
2389   MVT VT = Op.getValueType();
2390   MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2391
2392   SDValue Op0 = Op.getOperand(0);
2393
2394   switch (Opc) {
2395   case ISD::ZERO_EXTEND:
2396   case ISD::SIGN_EXTEND:
2397   case ISD::ANY_EXTEND: {
2398     MVT Op0VT = Op0.getValueType();
2399     MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2400
2401     assert(Op0VT == MVT::i32
2402            && "CellSPU: Zero/sign extending something other than i32");
2403     DEBUG(cerr << "CellSPU: LowerI64Math custom lowering zero/sign/any extend\n");
2404
2405     unsigned NewOpc = (Opc == ISD::SIGN_EXTEND
2406                       ? SPUISD::ROTBYTES_RIGHT_S
2407                       : SPUISD::ROTQUAD_RZ_BYTES);
2408     SDValue PromoteScalar =
2409       DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2410
2411     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2412                        DAG.getNode(ISD::BIT_CONVERT, VecVT,
2413                                    DAG.getNode(NewOpc, Op0VecVT,
2414                                                PromoteScalar,
2415                                                DAG.getConstant(4, MVT::i32))));
2416   }
2417
2418   case ISD::ADD: {
2419     // Turn operands into vectors to satisfy type checking (shufb works on
2420     // vectors)
2421     SDValue Op0 =
2422       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2423     SDValue Op1 =
2424       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2425     SmallVector<SDValue, 16> ShufBytes;
2426
2427     // Create the shuffle mask for "rotating" the borrow up one register slot
2428     // once the borrow is generated.
2429     ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2430     ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2431     ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2432     ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2433
2434     SDValue CarryGen =
2435       DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2436     SDValue ShiftedCarry =
2437       DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2438                   CarryGen, CarryGen,
2439                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2440                               &ShufBytes[0], ShufBytes.size()));
2441
2442     return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2443                        DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2444                                    Op0, Op1, ShiftedCarry));
2445   }
2446
2447   case ISD::SUB: {
2448     // Turn operands into vectors to satisfy type checking (shufb works on
2449     // vectors)
2450     SDValue Op0 =
2451       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2452     SDValue Op1 =
2453       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2454     SmallVector<SDValue, 16> ShufBytes;
2455
2456     // Create the shuffle mask for "rotating" the borrow up one register slot
2457     // once the borrow is generated.
2458     ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2459     ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2460     ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2461     ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2462
2463     SDValue BorrowGen =
2464       DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2465     SDValue ShiftedBorrow =
2466       DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2467                   BorrowGen, BorrowGen,
2468                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2469                               &ShufBytes[0], ShufBytes.size()));
2470
2471     return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2472                        DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2473                                    Op0, Op1, ShiftedBorrow));
2474   }
2475
2476   case ISD::SHL: {
2477     SDValue ShiftAmt = Op.getOperand(1);
2478     MVT ShiftAmtVT = ShiftAmt.getValueType();
2479     SDValue Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2480     SDValue MaskLower =
2481       DAG.getNode(SPUISD::SELB, VecVT,
2482                   Op0Vec,
2483                   DAG.getConstant(0, VecVT),
2484                   DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2485                               DAG.getConstant(0xff00ULL, MVT::i16)));
2486     SDValue ShiftAmtBytes =
2487       DAG.getNode(ISD::SRL, ShiftAmtVT,
2488                   ShiftAmt,
2489                   DAG.getConstant(3, ShiftAmtVT));
2490     SDValue ShiftAmtBits =
2491       DAG.getNode(ISD::AND, ShiftAmtVT,
2492                   ShiftAmt,
2493                   DAG.getConstant(7, ShiftAmtVT));
2494
2495     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2496                        DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2497                                    DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2498                                                MaskLower, ShiftAmtBytes),
2499                                    ShiftAmtBits));
2500   }
2501
2502   case ISD::SRL: {
2503     MVT VT = Op.getValueType();
2504     SDValue ShiftAmt = Op.getOperand(1);
2505     MVT ShiftAmtVT = ShiftAmt.getValueType();
2506     SDValue ShiftAmtBytes =
2507       DAG.getNode(ISD::SRL, ShiftAmtVT,
2508                   ShiftAmt,
2509                   DAG.getConstant(3, ShiftAmtVT));
2510     SDValue ShiftAmtBits =
2511       DAG.getNode(ISD::AND, ShiftAmtVT,
2512                   ShiftAmt,
2513                   DAG.getConstant(7, ShiftAmtVT));
2514
2515     return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2516                        DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2517                                    Op0, ShiftAmtBytes),
2518                        ShiftAmtBits);
2519   }
2520
2521   case ISD::SRA: {
2522     // Promote Op0 to vector
2523     SDValue Op0 =
2524       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2525     SDValue ShiftAmt = Op.getOperand(1);
2526     MVT ShiftVT = ShiftAmt.getValueType();
2527
2528     // Negate variable shift amounts
2529     if (!isa<ConstantSDNode>(ShiftAmt)) {
2530       ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2531                              DAG.getConstant(0, ShiftVT), ShiftAmt);
2532     }
2533
2534     SDValue UpperHalfSign =
2535       DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i32,
2536                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2537                               DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2538                                           Op0, DAG.getConstant(31, MVT::i32))));
2539     SDValue UpperHalfSignMask =
2540       DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2541     SDValue UpperLowerMask =
2542       DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2543                   DAG.getConstant(0xff00, MVT::i16));
2544     SDValue UpperLowerSelect =
2545       DAG.getNode(SPUISD::SELB, MVT::v2i64,
2546                   UpperHalfSignMask, Op0, UpperLowerMask);
2547     SDValue RotateLeftBytes =
2548       DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2549                   UpperLowerSelect, ShiftAmt);
2550     SDValue RotateLeftBits =
2551       DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2552                   RotateLeftBytes, ShiftAmt);
2553
2554     return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2555                        RotateLeftBits);
2556   }
2557   }
2558
2559   return SDValue();
2560 }
2561
2562 //! Lower byte immediate operations for v16i8 vectors:
2563 static SDValue
2564 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2565   SDValue ConstVec;
2566   SDValue Arg;
2567   MVT VT = Op.getValueType();
2568
2569   ConstVec = Op.getOperand(0);
2570   Arg = Op.getOperand(1);
2571   if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2572     if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2573       ConstVec = ConstVec.getOperand(0);
2574     } else {
2575       ConstVec = Op.getOperand(1);
2576       Arg = Op.getOperand(0);
2577       if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2578         ConstVec = ConstVec.getOperand(0);
2579       }
2580     }
2581   }
2582
2583   if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2584     uint64_t VectorBits[2];
2585     uint64_t UndefBits[2];
2586     uint64_t SplatBits, SplatUndef;
2587     int SplatSize;
2588
2589     if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2590         && isConstantSplat(VectorBits, UndefBits,
2591                            VT.getVectorElementType().getSizeInBits(),
2592                            SplatBits, SplatUndef, SplatSize)) {
2593       SDValue tcVec[16];
2594       SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2595       const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2596
2597       // Turn the BUILD_VECTOR into a set of target constants:
2598       for (size_t i = 0; i < tcVecSize; ++i)
2599         tcVec[i] = tc;
2600
2601       return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2602                          DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2603     }
2604   }
2605   // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2606   // lowered.  Return the operation, rather than a null SDValue.
2607   return Op;
2608 }
2609
2610 //! Lower i32 multiplication
2611 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT,
2612                           unsigned Opc) {
2613   switch (VT.getSimpleVT()) {
2614   default:
2615     cerr << "CellSPU: Unknown LowerMUL value type, got "
2616          << Op.getValueType().getMVTString()
2617          << "\n";
2618     abort();
2619     /*NOTREACHED*/
2620
2621   case MVT::i32: {
2622     SDValue rA = Op.getOperand(0);
2623     SDValue rB = Op.getOperand(1);
2624
2625     return DAG.getNode(ISD::ADD, MVT::i32,
2626                        DAG.getNode(ISD::ADD, MVT::i32,
2627                                    DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2628                                    DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2629                        DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2630   }
2631   }
2632
2633   return SDValue();
2634 }
2635
2636 //! Custom lowering for CTPOP (count population)
2637 /*!
2638   Custom lowering code that counts the number ones in the input
2639   operand. SPU has such an instruction, but it counts the number of
2640   ones per byte, which then have to be accumulated.
2641 */
2642 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2643   MVT VT = Op.getValueType();
2644   MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2645
2646   switch (VT.getSimpleVT()) {
2647   default:
2648     assert(false && "Invalid value type!");
2649   case MVT::i8: {
2650     SDValue N = Op.getOperand(0);
2651     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2652
2653     SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2654     SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2655
2656     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2657   }
2658
2659   case MVT::i16: {
2660     MachineFunction &MF = DAG.getMachineFunction();
2661     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2662
2663     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2664
2665     SDValue N = Op.getOperand(0);
2666     SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2667     SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2668     SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2669
2670     SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2671     SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2672
2673     // CNTB_result becomes the chain to which all of the virtual registers
2674     // CNTB_reg, SUM1_reg become associated:
2675     SDValue CNTB_result =
2676       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2677
2678     SDValue CNTB_rescopy =
2679       DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2680
2681     SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2682
2683     return DAG.getNode(ISD::AND, MVT::i16,
2684                        DAG.getNode(ISD::ADD, MVT::i16,
2685                                    DAG.getNode(ISD::SRL, MVT::i16,
2686                                                Tmp1, Shift1),
2687                                    Tmp1),
2688                        Mask0);
2689   }
2690
2691   case MVT::i32: {
2692     MachineFunction &MF = DAG.getMachineFunction();
2693     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2694
2695     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2696     unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2697
2698     SDValue N = Op.getOperand(0);
2699     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2700     SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2701     SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2702     SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2703
2704     SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2705     SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2706
2707     // CNTB_result becomes the chain to which all of the virtual registers
2708     // CNTB_reg, SUM1_reg become associated:
2709     SDValue CNTB_result =
2710       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2711
2712     SDValue CNTB_rescopy =
2713       DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2714
2715     SDValue Comp1 =
2716       DAG.getNode(ISD::SRL, MVT::i32,
2717                   DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2718
2719     SDValue Sum1 =
2720       DAG.getNode(ISD::ADD, MVT::i32,
2721                   Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2722
2723     SDValue Sum1_rescopy =
2724       DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2725
2726     SDValue Comp2 =
2727       DAG.getNode(ISD::SRL, MVT::i32,
2728                   DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2729                   Shift2);
2730     SDValue Sum2 =
2731       DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2732                   DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2733
2734     return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2735   }
2736
2737   case MVT::i64:
2738     break;
2739   }
2740
2741   return SDValue();
2742 }
2743
2744 //! Lower ISD::SELECT_CC
2745 /*!
2746   ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2747   SELB instruction.
2748
2749   \note Need to revisit this in the future: if the code path through the true
2750   and false value computations is longer than the latency of a branch (6
2751   cycles), then it would be more advantageous to branch and insert a new basic
2752   block and branch on the condition. However, this code does not make that
2753   assumption, given the simplisitc uses so far.
2754  */
2755
2756 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
2757   MVT VT = Op.getValueType();
2758   SDValue lhs = Op.getOperand(0);
2759   SDValue rhs = Op.getOperand(1);
2760   SDValue trueval = Op.getOperand(2);
2761   SDValue falseval = Op.getOperand(3);
2762   SDValue condition = Op.getOperand(4);
2763
2764   // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2765   // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2766   // with another "cannot select select_cc" assert:
2767
2768   SDValue compare = DAG.getNode(ISD::SETCC, VT, lhs, rhs, condition);
2769   return DAG.getNode(SPUISD::SELB, VT, trueval, falseval, compare);
2770 }
2771
2772 //! Custom (target-specific) lowering entry point
2773 /*!
2774   This is where LLVM's DAG selection process calls to do target-specific
2775   lowering of nodes.
2776  */
2777 SDValue
2778 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2779 {
2780   unsigned Opc = (unsigned) Op.getOpcode();
2781   MVT VT = Op.getValueType();
2782
2783   switch (Opc) {
2784   default: {
2785     cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2786     cerr << "Op.getOpcode() = " << Opc << "\n";
2787     cerr << "*Op.getNode():\n";
2788     Op.getNode()->dump();
2789     abort();
2790   }
2791   case ISD::LOAD:
2792   case ISD::SEXTLOAD:
2793   case ISD::ZEXTLOAD:
2794     return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2795   case ISD::STORE:
2796     return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2797   case ISD::ConstantPool:
2798     return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2799   case ISD::GlobalAddress:
2800     return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2801   case ISD::JumpTable:
2802     return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2803   case ISD::Constant:
2804     return LowerConstant(Op, DAG);
2805   case ISD::ConstantFP:
2806     return LowerConstantFP(Op, DAG);
2807   case ISD::BRCOND:
2808     return LowerBRCOND(Op, DAG);
2809   case ISD::FORMAL_ARGUMENTS:
2810     return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2811   case ISD::CALL:
2812     return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2813   case ISD::RET:
2814     return LowerRET(Op, DAG, getTargetMachine());
2815
2816
2817   // i8, i64 math ops:
2818   case ISD::ZERO_EXTEND:
2819   case ISD::SIGN_EXTEND:
2820   case ISD::ANY_EXTEND:
2821   case ISD::ADD:
2822   case ISD::SUB:
2823   case ISD::ROTR:
2824   case ISD::ROTL:
2825   case ISD::SRL:
2826   case ISD::SHL:
2827   case ISD::SRA: {
2828     if (VT == MVT::i8)
2829       return LowerI8Math(Op, DAG, Opc);
2830     else if (VT == MVT::i64)
2831       return LowerI64Math(Op, DAG, Opc);
2832     break;
2833   }
2834
2835   // Vector-related lowering.
2836   case ISD::BUILD_VECTOR:
2837     return LowerBUILD_VECTOR(Op, DAG);
2838   case ISD::SCALAR_TO_VECTOR:
2839     return LowerSCALAR_TO_VECTOR(Op, DAG);
2840   case ISD::VECTOR_SHUFFLE:
2841     return LowerVECTOR_SHUFFLE(Op, DAG);
2842   case ISD::EXTRACT_VECTOR_ELT:
2843     return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2844   case ISD::INSERT_VECTOR_ELT:
2845     return LowerINSERT_VECTOR_ELT(Op, DAG);
2846
2847   // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2848   case ISD::AND:
2849   case ISD::OR:
2850   case ISD::XOR:
2851     return LowerByteImmed(Op, DAG);
2852
2853   // Vector and i8 multiply:
2854   case ISD::MUL:
2855     if (VT.isVector())
2856       return LowerVectorMUL(Op, DAG);
2857     else if (VT == MVT::i8)
2858       return LowerI8Math(Op, DAG, Opc);
2859     else
2860       return LowerMUL(Op, DAG, VT, Opc);
2861
2862   case ISD::FDIV:
2863     if (VT == MVT::f32 || VT == MVT::v4f32)
2864       return LowerFDIVf32(Op, DAG);
2865 #if 0
2866     // This is probably a libcall
2867     else if (Op.getValueType() == MVT::f64)
2868       return LowerFDIVf64(Op, DAG);
2869 #endif
2870     else
2871       assert(0 && "Calling FDIV on unsupported MVT");
2872
2873   case ISD::CTPOP:
2874     return LowerCTPOP(Op, DAG);
2875
2876   case ISD::SELECT_CC:
2877     return LowerSELECT_CC(Op, DAG);
2878   }
2879
2880   return SDValue();
2881 }
2882
2883 SDNode *SPUTargetLowering::ReplaceNodeResults(SDNode *N, SelectionDAG &DAG)
2884 {
2885 #if 0
2886   unsigned Opc = (unsigned) N->getOpcode();
2887   MVT OpVT = N->getValueType(0);
2888
2889   switch (Opc) {
2890   default: {
2891     cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2892     cerr << "Op.getOpcode() = " << Opc << "\n";
2893     cerr << "*Op.getNode():\n";
2894     N->dump();
2895     abort();
2896     /*NOTREACHED*/
2897   }
2898   }
2899 #endif
2900
2901   /* Otherwise, return unchanged */
2902   return 0;
2903 }
2904
2905 //===----------------------------------------------------------------------===//
2906 // Target Optimization Hooks
2907 //===----------------------------------------------------------------------===//
2908
2909 SDValue
2910 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2911 {
2912 #if 0
2913   TargetMachine &TM = getTargetMachine();
2914 #endif
2915   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2916   SelectionDAG &DAG = DCI.DAG;
2917   SDValue Op0 = N->getOperand(0);      // everything has at least one operand
2918   SDValue Result;                     // Initially, NULL result
2919
2920   switch (N->getOpcode()) {
2921   default: break;
2922   case ISD::ADD: {
2923     SDValue Op1 = N->getOperand(1);
2924
2925     if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
2926       SDValue Op01 = Op0.getOperand(1);
2927       if (Op01.getOpcode() == ISD::Constant
2928           || Op01.getOpcode() == ISD::TargetConstant) {
2929         // (add <const>, (SPUindirect <arg>, <const>)) ->
2930         // (SPUindirect <arg>, <const + const>)
2931         ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2932         ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2933         SDValue combinedConst =
2934           DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2935                           Op0.getValueType());
2936
2937         DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2938                    << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2939         DEBUG(cerr << "With:    (SPUindirect <arg>, "
2940                    << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2941         return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2942                            Op0.getOperand(0), combinedConst);
2943       }
2944     } else if (isa<ConstantSDNode>(Op0)
2945                && Op1.getOpcode() == SPUISD::IndirectAddr) {
2946       SDValue Op11 = Op1.getOperand(1);
2947       if (Op11.getOpcode() == ISD::Constant
2948           || Op11.getOpcode() == ISD::TargetConstant) {
2949         // (add (SPUindirect <arg>, <const>), <const>) ->
2950         // (SPUindirect <arg>, <const + const>)
2951         ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2952         ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2953         SDValue combinedConst =
2954           DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2955                           Op0.getValueType());
2956
2957         DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2958                    << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2959         DEBUG(cerr << "With:    (SPUindirect <arg>, "
2960                    << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2961
2962         return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2963                            Op1.getOperand(0), combinedConst);
2964       }
2965     }
2966     break;
2967   }
2968   case ISD::SIGN_EXTEND:
2969   case ISD::ZERO_EXTEND:
2970   case ISD::ANY_EXTEND: {
2971     if (Op0.getOpcode() == SPUISD::EXTRACT_ELT0 &&
2972         N->getValueType(0) == Op0.getValueType()) {
2973       // (any_extend (SPUextract_elt0 <arg>)) ->
2974       // (SPUextract_elt0 <arg>)
2975       // Types must match, however...
2976       DEBUG(cerr << "Replace: ");
2977       DEBUG(N->dump(&DAG));
2978       DEBUG(cerr << "\nWith:    ");
2979       DEBUG(Op0.getNode()->dump(&DAG));
2980       DEBUG(cerr << "\n");
2981
2982       return Op0;
2983     }
2984     break;
2985   }
2986   case SPUISD::IndirectAddr: {
2987     if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2988       ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2989       if (CN->getZExtValue() == 0) {
2990         // (SPUindirect (SPUaform <addr>, 0), 0) ->
2991         // (SPUaform <addr>, 0)
2992
2993         DEBUG(cerr << "Replace: ");
2994         DEBUG(N->dump(&DAG));
2995         DEBUG(cerr << "\nWith:    ");
2996         DEBUG(Op0.getNode()->dump(&DAG));
2997         DEBUG(cerr << "\n");
2998
2999         return Op0;
3000       }
3001     }
3002     break;
3003   }
3004   case SPUISD::SHLQUAD_L_BITS:
3005   case SPUISD::SHLQUAD_L_BYTES:
3006   case SPUISD::VEC_SHL:
3007   case SPUISD::VEC_SRL:
3008   case SPUISD::VEC_SRA:
3009   case SPUISD::ROTQUAD_RZ_BYTES:
3010   case SPUISD::ROTQUAD_RZ_BITS: {
3011     SDValue Op1 = N->getOperand(1);
3012
3013     if (isa<ConstantSDNode>(Op1)) {
3014       // Kill degenerate vector shifts:
3015       ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
3016
3017       if (CN->getZExtValue() == 0) {
3018         Result = Op0;
3019       }
3020     }
3021     break;
3022   }
3023   case SPUISD::PROMOTE_SCALAR: {
3024     switch (Op0.getOpcode()) {
3025     default:
3026       break;
3027     case ISD::ANY_EXTEND:
3028     case ISD::ZERO_EXTEND:
3029     case ISD::SIGN_EXTEND: {
3030       // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
3031       // <arg>
3032       // but only if the SPUpromote_scalar and <arg> types match.
3033       SDValue Op00 = Op0.getOperand(0);
3034       if (Op00.getOpcode() == SPUISD::EXTRACT_ELT0) {
3035         SDValue Op000 = Op00.getOperand(0);
3036         if (Op000.getValueType() == N->getValueType(0)) {
3037           Result = Op000;
3038         }
3039       }
3040       break;
3041     }
3042     case SPUISD::EXTRACT_ELT0: {
3043       // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
3044       // <arg>
3045       Result = Op0.getOperand(0);
3046       break;
3047     }
3048     }
3049     break;
3050   }
3051   }
3052   // Otherwise, return unchanged.
3053 #ifdef NDEBUG
3054   if (Result.getNode()) {
3055     DEBUG(cerr << "\nReplace.SPU: ");
3056     DEBUG(N->dump(&DAG));
3057     DEBUG(cerr << "\nWith:        ");
3058     DEBUG(Result.getNode()->dump(&DAG));
3059     DEBUG(cerr << "\n");
3060   }
3061 #endif
3062
3063   return Result;
3064 }
3065
3066 //===----------------------------------------------------------------------===//
3067 // Inline Assembly Support
3068 //===----------------------------------------------------------------------===//
3069
3070 /// getConstraintType - Given a constraint letter, return the type of
3071 /// constraint it is for this target.
3072 SPUTargetLowering::ConstraintType
3073 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
3074   if (ConstraintLetter.size() == 1) {
3075     switch (ConstraintLetter[0]) {
3076     default: break;
3077     case 'b':
3078     case 'r':
3079     case 'f':
3080     case 'v':
3081     case 'y':
3082       return C_RegisterClass;
3083     }
3084   }
3085   return TargetLowering::getConstraintType(ConstraintLetter);
3086 }
3087
3088 std::pair<unsigned, const TargetRegisterClass*>
3089 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3090                                                 MVT VT) const
3091 {
3092   if (Constraint.size() == 1) {
3093     // GCC RS6000 Constraint Letters
3094     switch (Constraint[0]) {
3095     case 'b':   // R1-R31
3096     case 'r':   // R0-R31
3097       if (VT == MVT::i64)
3098         return std::make_pair(0U, SPU::R64CRegisterClass);
3099       return std::make_pair(0U, SPU::R32CRegisterClass);
3100     case 'f':
3101       if (VT == MVT::f32)
3102         return std::make_pair(0U, SPU::R32FPRegisterClass);
3103       else if (VT == MVT::f64)
3104         return std::make_pair(0U, SPU::R64FPRegisterClass);
3105       break;
3106     case 'v':
3107       return std::make_pair(0U, SPU::GPRCRegisterClass);
3108     }
3109   }
3110
3111   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3112 }
3113
3114 //! Compute used/known bits for a SPU operand
3115 void
3116 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3117                                                   const APInt &Mask,
3118                                                   APInt &KnownZero,
3119                                                   APInt &KnownOne,
3120                                                   const SelectionDAG &DAG,
3121                                                   unsigned Depth ) const {
3122 #if 0
3123   const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
3124 #endif
3125
3126   switch (Op.getOpcode()) {
3127   default:
3128     // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3129     break;
3130
3131 #if 0
3132   case CALL:
3133   case SHUFB:
3134   case SHUFFLE_MASK:
3135   case CNTB:
3136 #endif
3137
3138   case SPUISD::PROMOTE_SCALAR: {
3139     SDValue Op0 = Op.getOperand(0);
3140     MVT Op0VT = Op0.getValueType();
3141     unsigned Op0VTBits = Op0VT.getSizeInBits();
3142     uint64_t InMask = Op0VT.getIntegerVTBitMask();
3143     KnownZero |= APInt(Op0VTBits, ~InMask, false);
3144     KnownOne |= APInt(Op0VTBits, InMask, false);
3145     break;
3146   }
3147
3148   case SPUISD::LDRESULT:
3149   case SPUISD::EXTRACT_ELT0:
3150   case SPUISD::EXTRACT_ELT0_CHAINED: {
3151     MVT OpVT = Op.getValueType();
3152     unsigned OpVTBits = OpVT.getSizeInBits();
3153     uint64_t InMask = OpVT.getIntegerVTBitMask();
3154     KnownZero |= APInt(OpVTBits, ~InMask, false);
3155     KnownOne |= APInt(OpVTBits, InMask, false);
3156     break;
3157   }
3158
3159 #if 0
3160   case EXTRACT_I1_ZEXT:
3161   case EXTRACT_I1_SEXT:
3162   case EXTRACT_I8_ZEXT:
3163   case EXTRACT_I8_SEXT:
3164   case MPY:
3165   case MPYU:
3166   case MPYH:
3167   case MPYHH:
3168   case SPUISD::SHLQUAD_L_BITS:
3169   case SPUISD::SHLQUAD_L_BYTES:
3170   case SPUISD::VEC_SHL:
3171   case SPUISD::VEC_SRL:
3172   case SPUISD::VEC_SRA:
3173   case SPUISD::VEC_ROTL:
3174   case SPUISD::VEC_ROTR:
3175   case SPUISD::ROTQUAD_RZ_BYTES:
3176   case SPUISD::ROTQUAD_RZ_BITS:
3177   case SPUISD::ROTBYTES_RIGHT_S:
3178   case SPUISD::ROTBYTES_LEFT:
3179   case SPUISD::ROTBYTES_LEFT_CHAINED:
3180   case SPUISD::SELECT_MASK:
3181   case SPUISD::SELB:
3182   case SPUISD::FPInterp:
3183   case SPUISD::FPRecipEst:
3184   case SPUISD::SEXT32TO64:
3185 #endif
3186   }
3187 }
3188
3189 // LowerAsmOperandForConstraint
3190 void
3191 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3192                                                 char ConstraintLetter,
3193                                                 bool hasMemory,
3194                                                 std::vector<SDValue> &Ops,
3195                                                 SelectionDAG &DAG) const {
3196   // Default, for the time being, to the base class handler
3197   TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3198                                                Ops, DAG);
3199 }
3200
3201 /// isLegalAddressImmediate - Return true if the integer value can be used
3202 /// as the offset of the target addressing mode.
3203 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3204                                                 const Type *Ty) const {
3205   // SPU's addresses are 256K:
3206   return (V > -(1 << 18) && V < (1 << 18) - 1);
3207 }
3208
3209 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3210   return false;
3211 }
3212
3213 bool
3214 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3215   // The SPU target isn't yet aware of offsets.
3216   return false;
3217 }