lib/Target/CellSPU/SPUISelLowering.cpp

   1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the SPUTargetLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "SPURegisterNames.h"
  15 #include "SPUISelLowering.h"
  16 #include "SPUTargetMachine.h"
  17 #include "SPUFrameInfo.h"
  18 #include "llvm/ADT/VectorExtras.h"
  19 #include "llvm/CodeGen/CallingConvLower.h"
  20 #include "llvm/CodeGen/MachineFrameInfo.h"
  21 #include "llvm/CodeGen/MachineFunction.h"
  22 #include "llvm/CodeGen/MachineInstrBuilder.h"
  23 #include "llvm/CodeGen/MachineRegisterInfo.h"
  24 #include "llvm/CodeGen/SelectionDAG.h"
  25 #include "llvm/Constants.h"
  26 #include "llvm/Function.h"
  27 #include "llvm/Intrinsics.h"
  28 #include "llvm/Support/Debug.h"
  29 #include "llvm/Support/MathExtras.h"
  30 #include "llvm/Target/TargetOptions.h"
  31
  32 #include <map>
  33
  34 using namespace llvm;
  35
  36 // Used in getTargetNodeName() below
  37 namespace {
  38   std::map<unsigned, const char *> node_names;
  39
  40   //! MVT mapping to useful data for Cell SPU
  41   struct valtype_map_s {
  42     const MVT   valtype;
  43     const int   prefslot_byte;
  44   };
  45
  46   const valtype_map_s valtype_map[] = {
  47     { MVT::i1,   3 },
  48     { MVT::i8,   3 },
  49     { MVT::i16,  2 },
  50     { MVT::i32,  0 },
  51     { MVT::f32,  0 },
  52     { MVT::i64,  0 },
  53     { MVT::f64,  0 },
  54     { MVT::i128, 0 }
  55   };
  56
  57   const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
  58
  59   const valtype_map_s *getValueTypeMapEntry(MVT VT) {
  60     const valtype_map_s *retval = 0;
  61
  62     for (size_t i = 0; i < n_valtype_map; ++i) {
  63       if (valtype_map[i].valtype == VT) {
  64         retval = valtype_map + i;
  65         break;
  66       }
  67     }
  68
  69 #ifndef NDEBUG
  70     if (retval == 0) {
  71       cerr << "getValueTypeMapEntry returns NULL for "
  72            << VT.getMVTString()
  73            << "\n";
  74       abort();
  75     }
  76 #endif
  77
  78     return retval;
  79   }
  80
  81   //! Predicate that returns true if operand is a memory target
  82   /*!
  83     \arg Op Operand to test
  84     \return true if the operand is a memory target (i.e., global
  85     address, external symbol, constant pool) or an A-form
  86     address.
  87    */
  88   bool isMemoryOperand(const SDValue &Op)
  89   {
  90     const unsigned Opc = Op.getOpcode();
  91     return (Opc == ISD::GlobalAddress
  92             || Opc == ISD::GlobalTLSAddress
  93             || Opc == ISD::JumpTable
  94             || Opc == ISD::ConstantPool
  95             || Opc == ISD::ExternalSymbol
  96             || Opc == ISD::TargetGlobalAddress
  97             || Opc == ISD::TargetGlobalTLSAddress
  98             || Opc == ISD::TargetJumpTable
  99             || Opc == ISD::TargetConstantPool
 100             || Opc == ISD::TargetExternalSymbol
 101             || Opc == SPUISD::AFormAddr);
 102   }
 103
 104   //! Predicate that returns true if the operand is an indirect target
 105   bool isIndirectOperand(const SDValue &Op)
 106   {
 107     const unsigned Opc = Op.getOpcode();
 108     return (Opc == ISD::Register
 109             || Opc == SPUISD::LDRESULT);
 110   }
 111 }
 112
 113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
 114   : TargetLowering(TM),
 115     SPUTM(TM)
 116 {
 117   // Fold away setcc operations if possible.
 118   setPow2DivIsCheap();
 119
 120   // Use _setjmp/_longjmp instead of setjmp/longjmp.
 121   setUseUnderscoreSetJmp(true);
 122   setUseUnderscoreLongJmp(true);
 123
 124   // Set up the SPU's register classes:
 125   addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
 126   addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
 127   addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
 128   addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
 129   addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
 130   addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
 131   addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
 132
 133   // Initialize libcalls:
 134   setLibcallName(RTLIB::MUL_I64, "__muldi3");
 135
 136   // SPU has no sign or zero extended loads for i1, i8, i16:
 137   setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
 138   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
 139   setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
 140
 141   setLoadExtAction(ISD::EXTLOAD,  MVT::i8, Custom);
 142   setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
 143   setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
 144   setTruncStoreAction(MVT::i8,    MVT::i8, Custom);
 145   setTruncStoreAction(MVT::i16,   MVT::i8, Custom);
 146   setTruncStoreAction(MVT::i32,   MVT::i8, Custom);
 147   setTruncStoreAction(MVT::i64,   MVT::i8, Custom);
 148   setTruncStoreAction(MVT::i128,  MVT::i8, Custom);
 149
 150   setLoadExtAction(ISD::EXTLOAD,  MVT::i16, Custom);
 151   setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
 152   setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
 153
 154   // SPU constant load actions are custom lowered:
 155   setOperationAction(ISD::Constant,   MVT::i64, Custom);
 156   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
 157   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
 158
 159   // SPU's loads and stores have to be custom lowered:
 160   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
 161        ++sctype) {
 162     MVT VT = (MVT::SimpleValueType)sctype;
 163
 164     setOperationAction(ISD::LOAD, VT, Custom);
 165     setOperationAction(ISD::STORE, VT, Custom);
 166   }
 167
 168   // Custom lower BRCOND for i8 to "promote" the result to i16
 169   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
 170
 171   // Expand the jumptable branches
 172   setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
 173   setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
 174
 175   // Custom lower SELECT_CC for most cases, but expand by default
 176   setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
 177   setOperationAction(ISD::SELECT_CC,    MVT::i8,    Custom);
 178   setOperationAction(ISD::SELECT_CC,    MVT::i16,   Custom);
 179   setOperationAction(ISD::SELECT_CC,    MVT::i32,   Custom);
 180   setOperationAction(ISD::SELECT_CC,    MVT::i64,   Custom);
 181
 182   // SPU has no intrinsics for these particular operations:
 183   setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
 184
 185   // PowerPC has no SREM/UREM instructions
 186   setOperationAction(ISD::SREM, MVT::i32, Expand);
 187   setOperationAction(ISD::UREM, MVT::i32, Expand);
 188   setOperationAction(ISD::SREM, MVT::i64, Expand);
 189   setOperationAction(ISD::UREM, MVT::i64, Expand);
 190
 191   // We don't support sin/cos/sqrt/fmod
 192   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 193   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 194   setOperationAction(ISD::FREM , MVT::f64, Expand);
 195   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 196   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 197   setOperationAction(ISD::FREM , MVT::f32, Expand);
 198
 199   // If we're enabling GP optimizations, use hardware square root
 200   setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 201   setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 202
 203   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 204   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 205
 206   // SPU can do rotate right and left, so legalize it... but customize for i8
 207   // because instructions don't exist.
 208
 209   // FIXME: Change from "expand" to appropriate type once ROTR is supported in
 210   //        .td files.
 211   setOperationAction(ISD::ROTR, MVT::i32,    Expand /*Legal*/);
 212   setOperationAction(ISD::ROTR, MVT::i16,    Expand /*Legal*/);
 213   setOperationAction(ISD::ROTR, MVT::i8,     Expand /*Custom*/);
 214
 215   setOperationAction(ISD::ROTL, MVT::i32,    Legal);
 216   setOperationAction(ISD::ROTL, MVT::i16,    Legal);
 217   setOperationAction(ISD::ROTL, MVT::i8,     Custom);
 218
 219   // SPU has no native version of shift left/right for i8
 220   setOperationAction(ISD::SHL,  MVT::i8,     Custom);
 221   setOperationAction(ISD::SRL,  MVT::i8,     Custom);
 222   setOperationAction(ISD::SRA,  MVT::i8,     Custom);
 223
 224   // SPU needs custom lowering for shift left/right for i64
 225   setOperationAction(ISD::SHL,  MVT::i64,    Custom);
 226   setOperationAction(ISD::SRL,  MVT::i64,    Custom);
 227   setOperationAction(ISD::SRA,  MVT::i64,    Custom);
 228
 229   // Custom lower i8, i32 and i64 multiplications
 230   setOperationAction(ISD::MUL,  MVT::i8,     Custom);
 231   setOperationAction(ISD::MUL,  MVT::i32,    Custom);
 232   setOperationAction(ISD::MUL,  MVT::i64,    Expand);   // libcall
 233
 234   // SMUL_LOHI, UMUL_LOHI
 235   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom);
 236   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom);
 237   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Custom);
 238   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom);
 239
 240   // Need to custom handle (some) common i8, i64 math ops
 241   setOperationAction(ISD::ADD,  MVT::i64,    Custom);
 242   setOperationAction(ISD::SUB,  MVT::i8,     Custom);
 243   setOperationAction(ISD::SUB,  MVT::i64,    Custom);
 244
 245   // SPU does not have BSWAP. It does have i32 support CTLZ.
 246   // CTPOP has to be custom lowered.
 247   setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
 248   setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
 249
 250   setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
 251   setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
 252   setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
 253   setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
 254
 255   setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
 256   setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
 257
 258   setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
 259
 260   // SPU has a version of select that implements (a&~c)|(b&c), just like
 261   // select ought to work:
 262   setOperationAction(ISD::SELECT, MVT::i8,   Legal);
 263   setOperationAction(ISD::SELECT, MVT::i16,  Legal);
 264   setOperationAction(ISD::SELECT, MVT::i32,  Legal);
 265   setOperationAction(ISD::SELECT, MVT::i64,  Expand);
 266
 267   setOperationAction(ISD::SETCC, MVT::i8,    Legal);
 268   setOperationAction(ISD::SETCC, MVT::i16,   Legal);
 269   setOperationAction(ISD::SETCC, MVT::i32,   Legal);
 270   setOperationAction(ISD::SETCC, MVT::i64,   Expand);
 271
 272   // Zero extension and sign extension for i64 have to be
 273   // custom legalized
 274   setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
 275   setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
 276   setOperationAction(ISD::ANY_EXTEND,  MVT::i64, Custom);
 277
 278   // SPU has a legal FP -> signed INT instruction
 279   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
 280   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 281   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
 282   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
 283
 284   // FDIV on SPU requires custom lowering
 285   setOperationAction(ISD::FDIV, MVT::f32, Custom);
 286   //setOperationAction(ISD::FDIV, MVT::f64, Custom);
 287
 288   // SPU has [U|S]INT_TO_FP
 289   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
 290   setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
 291   setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
 292   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
 293   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
 294   setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
 295   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 296   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 297
 298   setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
 299   setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
 300   setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
 301   setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
 302
 303   // We cannot sextinreg(i1).  Expand to shifts.
 304   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 305
 306   // Support label based line numbers.
 307   setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
 308   setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
 309
 310   // We want to legalize GlobalAddress and ConstantPool nodes into the
 311   // appropriate instructions to materialize the address.
 312   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
 313        ++sctype) {
 314     MVT VT = (MVT::SimpleValueType)sctype;
 315
 316     setOperationAction(ISD::GlobalAddress, VT, Custom);
 317     setOperationAction(ISD::ConstantPool,  VT, Custom);
 318     setOperationAction(ISD::JumpTable,     VT, Custom);
 319   }
 320
 321   // RET must be custom lowered, to meet ABI requirements
 322   setOperationAction(ISD::RET,           MVT::Other, Custom);
 323
 324   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 325   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 326
 327   // Use the default implementation.
 328   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
 329   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 330   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 331   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 332   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
 333   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
 334   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
 335
 336   // Cell SPU has instructions for converting between i64 and fp.
 337   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 338   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 339
 340   // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
 341   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
 342
 343   // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 344   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 345
 346   // First set operation action for all vector types to expand. Then we
 347   // will selectively turn on ones that can be effectively codegen'd.
 348   addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
 349   addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
 350   addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
 351   addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
 352   addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
 353   addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
 354
 355   for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
 356        i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
 357     MVT VT = (MVT::SimpleValueType)i;
 358
 359     // add/sub are legal for all supported vector VT's.
 360     setOperationAction(ISD::ADD , VT, Legal);
 361     setOperationAction(ISD::SUB , VT, Legal);
 362     // mul has to be custom lowered.
 363     setOperationAction(ISD::MUL , VT, Custom);
 364
 365     setOperationAction(ISD::AND   , VT, Legal);
 366     setOperationAction(ISD::OR    , VT, Legal);
 367     setOperationAction(ISD::XOR   , VT, Legal);
 368     setOperationAction(ISD::LOAD  , VT, Legal);
 369     setOperationAction(ISD::SELECT, VT, Legal);
 370     setOperationAction(ISD::STORE,  VT, Legal);
 371
 372     // These operations need to be expanded:
 373     setOperationAction(ISD::SDIV, VT, Expand);
 374     setOperationAction(ISD::SREM, VT, Expand);
 375     setOperationAction(ISD::UDIV, VT, Expand);
 376     setOperationAction(ISD::UREM, VT, Expand);
 377     setOperationAction(ISD::FDIV, VT, Custom);
 378
 379     // Custom lower build_vector, constant pool spills, insert and
 380     // extract vector elements:
 381     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
 382     setOperationAction(ISD::ConstantPool, VT, Custom);
 383     setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
 384     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
 385     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
 386     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
 387   }
 388
 389   setOperationAction(ISD::MUL, MVT::v16i8, Custom);
 390   setOperationAction(ISD::AND, MVT::v16i8, Custom);
 391   setOperationAction(ISD::OR,  MVT::v16i8, Custom);
 392   setOperationAction(ISD::XOR, MVT::v16i8, Custom);
 393   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 394
 395   setShiftAmountType(MVT::i32);
 396   setBooleanContents(ZeroOrOneBooleanContent);
 397
 398   setStackPointerRegisterToSaveRestore(SPU::R1);
 399
 400   // We have target-specific dag combine patterns for the following nodes:
 401   setTargetDAGCombine(ISD::ADD);
 402   setTargetDAGCombine(ISD::ZERO_EXTEND);
 403   setTargetDAGCombine(ISD::SIGN_EXTEND);
 404   setTargetDAGCombine(ISD::ANY_EXTEND);
 405
 406   computeRegisterProperties();
 407
 408   // Set other properties:
 409   setSchedulingPreference(SchedulingForLatency);
 410 }
 411
 412 const char *
 413 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
 414 {
 415   if (node_names.empty()) {
 416     node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
 417     node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
 418     node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
 419     node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
 420     node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
 421     node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
 422     node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
 423     node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
 424     node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
 425     node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
 426     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
 427     node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
 428     node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
 429     node_names[(unsigned) SPUISD::VEC2PREFSLOT_CHAINED]
 430                                               = "SPUISD::VEC2PREFSLOT_CHAINED";
 431     node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
 432     node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
 433     node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
 434     node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
 435     node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
 436     node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
 437     node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
 438     node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
 439     node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
 440     node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
 441     node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
 442     node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
 443     node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
 444     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
 445     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
 446     node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
 447       "SPUISD::ROTQUAD_RZ_BYTES";
 448     node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
 449       "SPUISD::ROTQUAD_RZ_BITS";
 450     node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
 451     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
 452       "SPUISD::ROTBYTES_LEFT_CHAINED";
 453     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
 454       "SPUISD::ROTBYTES_LEFT_BITS";
 455     node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
 456     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
 457     node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
 458     node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
 459     node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
 460     node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
 461     node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
 462     node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
 463     node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
 464   }
 465
 466   std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
 467
 468   return ((i != node_names.end()) ? i->second : 0);
 469 }
 470
 471 MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
 472   MVT VT = Op.getValueType();
 473   return (VT.isInteger() ? VT : MVT(MVT::i32));
 474 }
 475
 476 //===----------------------------------------------------------------------===//
 477 // Calling convention code:
 478 //===----------------------------------------------------------------------===//
 479
 480 #include "SPUGenCallingConv.inc"
 481
 482 //===----------------------------------------------------------------------===//
 483 //  LowerOperation implementation
 484 //===----------------------------------------------------------------------===//
 485
 486 /// Aligned load common code for CellSPU
 487 /*!
 488   \param[in] Op The SelectionDAG load or store operand
 489   \param[in] DAG The selection DAG
 490   \param[in] ST CellSPU subtarget information structure
 491   \param[in,out] alignment Caller initializes this to the load or store node's
 492   value from getAlignment(), may be updated while generating the aligned load
 493   \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
 494   offset (divisible by 16, modulo 16 == 0)
 495   \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
 496   offset of the preferred slot (modulo 16 != 0)
 497   \param[in,out] VT Caller initializes this value type to the the load or store
 498   node's loaded or stored value type; may be updated if an i1-extended load or
 499   store.
 500   \param[out] was16aligned true if the base pointer had 16-byte alignment,
 501   otherwise false. Can help to determine if the chunk needs to be rotated.
 502
 503  Both load and store lowering load a block of data aligned on a 16-byte
 504  boundary. This is the common aligned load code shared between both.
 505  */
 506 static SDValue
 507 AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
 508             LSBaseSDNode *LSN,
 509             unsigned &alignment, int &alignOffs, int &prefSlotOffs,
 510             MVT &VT, bool &was16aligned)
 511 {
 512   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 513   const valtype_map_s *vtm = getValueTypeMapEntry(VT);
 514   SDValue basePtr = LSN->getBasePtr();
 515   SDValue chain = LSN->getChain();
 516
 517   if (basePtr.getOpcode() == ISD::ADD) {
 518     SDValue Op1 = basePtr.getNode()->getOperand(1);
 519
 520     if (Op1.getOpcode() == ISD::Constant
 521         || Op1.getOpcode() == ISD::TargetConstant) {
 522       const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
 523
 524       alignOffs = (int) CN->getZExtValue();
 525       prefSlotOffs = (int) (alignOffs & 0xf);
 526
 527       // Adjust the rotation amount to ensure that the final result ends up in
 528       // the preferred slot:
 529       prefSlotOffs -= vtm->prefslot_byte;
 530       basePtr = basePtr.getOperand(0);
 531
 532       // Loading from memory, can we adjust alignment?
 533       if (basePtr.getOpcode() == SPUISD::AFormAddr) {
 534         SDValue APtr = basePtr.getOperand(0);
 535         if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
 536           GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
 537           alignment = GSDN->getGlobal()->getAlignment();
 538         }
 539       }
 540     } else {
 541       alignOffs = 0;
 542       prefSlotOffs = -vtm->prefslot_byte;
 543     }
 544   } else if (basePtr.getOpcode() == ISD::FrameIndex) {
 545     FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
 546     alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
 547     prefSlotOffs = (int) (alignOffs & 0xf);
 548     prefSlotOffs -= vtm->prefslot_byte;
 549     basePtr = DAG.getRegister(SPU::R1, VT);
 550   } else {
 551     alignOffs = 0;
 552     prefSlotOffs = -vtm->prefslot_byte;
 553   }
 554
 555   if (alignment == 16) {
 556     // Realign the base pointer as a D-Form address:
 557     if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
 558       basePtr = DAG.getNode(ISD::ADD, PtrVT,
 559                             basePtr,
 560                             DAG.getConstant((alignOffs & ~0xf), PtrVT));
 561     }
 562
 563     // Emit the vector load:
 564     was16aligned = true;
 565     return DAG.getLoad(MVT::v16i8, chain, basePtr,
 566                        LSN->getSrcValue(), LSN->getSrcValueOffset(),
 567                        LSN->isVolatile(), 16);
 568   }
 569
 570   // Unaligned load or we're using the "large memory" model, which means that
 571   // we have to be very pessimistic:
 572   if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
 573     basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr,
 574                           DAG.getConstant(0, PtrVT));
 575   }
 576
 577   // Add the offset
 578   basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
 579                         DAG.getConstant((alignOffs & ~0xf), PtrVT));
 580   was16aligned = false;
 581   return DAG.getLoad(MVT::v16i8, chain, basePtr,
 582                      LSN->getSrcValue(), LSN->getSrcValueOffset(),
 583                      LSN->isVolatile(), 16);
 584 }
 585
 586 /// Custom lower loads for CellSPU
 587 /*!
 588  All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
 589  within a 16-byte block, we have to rotate to extract the requested element.
 590  */
 591 static SDValue
 592 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 593   LoadSDNode *LN = cast<LoadSDNode>(Op);
 594   SDValue the_chain = LN->getChain();
 595   MVT VT = LN->getMemoryVT();
 596   MVT OpVT = Op.getNode()->getValueType(0);
 597   ISD::LoadExtType ExtType = LN->getExtensionType();
 598   unsigned alignment = LN->getAlignment();
 599   SDValue Ops[8];
 600
 601   switch (LN->getAddressingMode()) {
 602   case ISD::UNINDEXED: {
 603     int offset, rotamt;
 604     bool was16aligned;
 605     SDValue result =
 606       AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
 607
 608     if (result.getNode() == 0)
 609       return result;
 610
 611     the_chain = result.getValue(1);
 612     // Rotate the chunk if necessary
 613     if (rotamt < 0)
 614       rotamt += 16;
 615     if (rotamt != 0 || !was16aligned) {
 616       SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
 617
 618       Ops[0] = the_chain;
 619       Ops[1] = result;
 620       if (was16aligned) {
 621         Ops[2] = DAG.getConstant(rotamt, MVT::i16);
 622       } else {
 623         MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 624         LoadSDNode *LN1 = cast<LoadSDNode>(result);
 625         Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
 626                              DAG.getConstant(rotamt, PtrVT));
 627       }
 628
 629       result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
 630       the_chain = result.getValue(1);
 631     }
 632
 633     if (VT == OpVT || ExtType == ISD::EXTLOAD) {
 634       SDVTList scalarvts;
 635       MVT vecVT = MVT::v16i8;
 636
 637       // Convert the loaded v16i8 vector to the appropriate vector type
 638       // specified by the operand:
 639       if (OpVT == VT) {
 640         if (VT != MVT::i1)
 641           vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
 642       } else
 643         vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
 644
 645       Ops[0] = the_chain;
 646       Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
 647       scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
 648       result = DAG.getNode(SPUISD::VEC2PREFSLOT_CHAINED, scalarvts, Ops, 2);
 649       the_chain = result.getValue(1);
 650     } else {
 651       // Handle the sign and zero-extending loads for i1 and i8:
 652       unsigned NewOpC;
 653
 654       if (ExtType == ISD::SEXTLOAD) {
 655         NewOpC = (OpVT == MVT::i1
 656                   ? SPUISD::EXTRACT_I1_SEXT
 657                   : SPUISD::EXTRACT_I8_SEXT);
 658       } else {
 659         assert(ExtType == ISD::ZEXTLOAD);
 660         NewOpC = (OpVT == MVT::i1
 661                   ? SPUISD::EXTRACT_I1_ZEXT
 662                   : SPUISD::EXTRACT_I8_ZEXT);
 663       }
 664
 665       result = DAG.getNode(NewOpC, OpVT, result);
 666     }
 667
 668     SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
 669     SDValue retops[2] = {
 670       result,
 671       the_chain
 672     };
 673
 674     result = DAG.getNode(SPUISD::LDRESULT, retvts,
 675                          retops, sizeof(retops) / sizeof(retops[0]));
 676     return result;
 677   }
 678   case ISD::PRE_INC:
 679   case ISD::PRE_DEC:
 680   case ISD::POST_INC:
 681   case ISD::POST_DEC:
 682   case ISD::LAST_INDEXED_MODE:
 683     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 684             "UNINDEXED\n";
 685     cerr << (unsigned) LN->getAddressingMode() << "\n";
 686     abort();
 687     /*NOTREACHED*/
 688   }
 689
 690   return SDValue();
 691 }
 692
 693 /// Custom lower stores for CellSPU
 694 /*!
 695  All CellSPU stores are aligned to 16-byte boundaries, so for elements
 696  within a 16-byte block, we have to generate a shuffle to insert the
 697  requested element into its place, then store the resulting block.
 698  */
 699 static SDValue
 700 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 701   StoreSDNode *SN = cast<StoreSDNode>(Op);
 702   SDValue Value = SN->getValue();
 703   MVT VT = Value.getValueType();
 704   MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
 705   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 706   unsigned alignment = SN->getAlignment();
 707
 708   switch (SN->getAddressingMode()) {
 709   case ISD::UNINDEXED: {
 710     int chunk_offset, slot_offset;
 711     bool was16aligned;
 712
 713     // The vector type we really want to load from the 16-byte chunk.
 714     MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
 715         stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
 716
 717     SDValue alignLoadVec =
 718       AlignedLoad(Op, DAG, ST, SN, alignment,
 719                   chunk_offset, slot_offset, VT, was16aligned);
 720
 721     if (alignLoadVec.getNode() == 0)
 722       return alignLoadVec;
 723
 724     LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
 725     SDValue basePtr = LN->getBasePtr();
 726     SDValue the_chain = alignLoadVec.getValue(1);
 727     SDValue theValue = SN->getValue();
 728     SDValue result;
 729
 730     if (StVT != VT
 731         && (theValue.getOpcode() == ISD::AssertZext
 732             || theValue.getOpcode() == ISD::AssertSext)) {
 733       // Drill down and get the value for zero- and sign-extended
 734       // quantities
 735       theValue = theValue.getOperand(0);
 736     }
 737
 738     chunk_offset &= 0xf;
 739
 740     SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
 741     SDValue insertEltPtr;
 742
 743     // If the base pointer is already a D-form address, then just create
 744     // a new D-form address with a slot offset and the orignal base pointer.
 745     // Otherwise generate a D-form address with the slot offset relative
 746     // to the stack pointer, which is always aligned.
 747     DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
 748     DEBUG(basePtr.getNode()->dump(&DAG));
 749     DEBUG(cerr << "\n");
 750
 751     if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
 752         (basePtr.getOpcode() == ISD::ADD
 753          && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
 754       insertEltPtr = basePtr;
 755     } else {
 756       insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
 757     }
 758
 759     SDValue insertEltOp =
 760             DAG.getNode(SPUISD::SHUFFLE_MASK, stVecVT, insertEltPtr);
 761     SDValue vectorizeOp =
 762             DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue);
 763
 764     result = DAG.getNode(SPUISD::SHUFB, vecVT, vectorizeOp, alignLoadVec,
 765                          DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
 766
 767     result = DAG.getStore(the_chain, result, basePtr,
 768                           LN->getSrcValue(), LN->getSrcValueOffset(),
 769                           LN->isVolatile(), LN->getAlignment());
 770
 771 #if 0 && defined(NDEBUG)
 772     if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
 773       const SDValue &currentRoot = DAG.getRoot();
 774
 775       DAG.setRoot(result);
 776       cerr << "------- CellSPU:LowerStore result:\n";
 777       DAG.dump();
 778       cerr << "-------\n";
 779       DAG.setRoot(currentRoot);
 780     }
 781 #endif
 782
 783     return result;
 784     /*UNREACHED*/
 785   }
 786   case ISD::PRE_INC:
 787   case ISD::PRE_DEC:
 788   case ISD::POST_INC:
 789   case ISD::POST_DEC:
 790   case ISD::LAST_INDEXED_MODE:
 791     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 792             "UNINDEXED\n";
 793     cerr << (unsigned) SN->getAddressingMode() << "\n";
 794     abort();
 795     /*NOTREACHED*/
 796   }
 797
 798   return SDValue();
 799 }
 800
 801 /// Generate the address of a constant pool entry.
 802 static SDValue
 803 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 804   MVT PtrVT = Op.getValueType();
 805   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 806   Constant *C = CP->getConstVal();
 807   SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
 808   SDValue Zero = DAG.getConstant(0, PtrVT);
 809   const TargetMachine &TM = DAG.getTarget();
 810
 811   if (TM.getRelocationModel() == Reloc::Static) {
 812     if (!ST->usingLargeMem()) {
 813       // Just return the SDValue with the constant pool address in it.
 814       return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
 815     } else {
 816       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
 817       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
 818       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 819     }
 820   }
 821
 822   assert(0 &&
 823          "LowerConstantPool: Relocation model other than static"
 824          " not supported.");
 825   return SDValue();
 826 }
 827
 828 static SDValue
 829 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 830   MVT PtrVT = Op.getValueType();
 831   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 832   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
 833   SDValue Zero = DAG.getConstant(0, PtrVT);
 834   const TargetMachine &TM = DAG.getTarget();
 835
 836   if (TM.getRelocationModel() == Reloc::Static) {
 837     if (!ST->usingLargeMem()) {
 838       return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
 839     } else {
 840       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
 841       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
 842       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 843     }
 844   }
 845
 846   assert(0 &&
 847          "LowerJumpTable: Relocation model other than static not supported.");
 848   return SDValue();
 849 }
 850
 851 static SDValue
 852 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 853   MVT PtrVT = Op.getValueType();
 854   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
 855   GlobalValue *GV = GSDN->getGlobal();
 856   SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
 857   const TargetMachine &TM = DAG.getTarget();
 858   SDValue Zero = DAG.getConstant(0, PtrVT);
 859
 860   if (TM.getRelocationModel() == Reloc::Static) {
 861     if (!ST->usingLargeMem()) {
 862       return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
 863     } else {
 864       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
 865       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
 866       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 867     }
 868   } else {
 869     cerr << "LowerGlobalAddress: Relocation model other than static not "
 870          << "supported.\n";
 871     abort();
 872     /*NOTREACHED*/
 873   }
 874
 875   return SDValue();
 876 }
 877
 878 //! Custom lower i64 integer constants
 879 /*!
 880  This code inserts all of the necessary juggling that needs to occur to load
 881  a 64-bit constant into a register.
 882  */
 883 static SDValue
 884 LowerConstant(SDValue Op, SelectionDAG &DAG) {
 885   MVT VT = Op.getValueType();
 886   ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
 887
 888   if (VT == MVT::i64) {
 889     SDValue T = DAG.getConstant(CN->getZExtValue(), MVT::i64);
 890     return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
 891                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
 892   } else {
 893     cerr << "LowerConstant: unhandled constant type "
 894          << VT.getMVTString()
 895          << "\n";
 896     abort();
 897     /*NOTREACHED*/
 898   }
 899
 900   return SDValue();
 901 }
 902
 903 //! Custom lower double precision floating point constants
 904 static SDValue
 905 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
 906   MVT VT = Op.getValueType();
 907   ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
 908
 909   assert((FP != 0) &&
 910          "LowerConstantFP: Node is not ConstantFPSDNode");
 911
 912   if (VT == MVT::f64) {
 913     uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
 914     return DAG.getNode(ISD::BIT_CONVERT, VT,
 915                        LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
 916   }
 917
 918   return SDValue();
 919 }
 920
 921 //! Lower MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
 922 static SDValue
 923 LowerBRCOND(SDValue Op, SelectionDAG &DAG)
 924 {
 925   SDValue Cond = Op.getOperand(1);
 926   MVT CondVT = Cond.getValueType();
 927   MVT CondNVT;
 928
 929   if (CondVT == MVT::i8) {
 930     CondNVT = MVT::i16;
 931     return DAG.getNode(ISD::BRCOND, Op.getValueType(),
 932                       Op.getOperand(0),
 933                       DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
 934                       Op.getOperand(2));
 935   } else
 936     return SDValue();                // Unchanged
 937 }
 938
 939 static SDValue
 940 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
 941 {
 942   MachineFunction &MF = DAG.getMachineFunction();
 943   MachineFrameInfo *MFI = MF.getFrameInfo();
 944   MachineRegisterInfo &RegInfo = MF.getRegInfo();
 945   SmallVector<SDValue, 48> ArgValues;
 946   SDValue Root = Op.getOperand(0);
 947   bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
 948
 949   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
 950   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
 951
 952   unsigned ArgOffset = SPUFrameInfo::minStackSize();
 953   unsigned ArgRegIdx = 0;
 954   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
 955
 956   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 957
 958   // Add DAG nodes to load the arguments or copy them out of registers.
 959   for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
 960        ArgNo != e; ++ArgNo) {
 961     MVT ObjectVT = Op.getValue(ArgNo).getValueType();
 962     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
 963     SDValue ArgVal;
 964
 965     if (ArgRegIdx < NumArgRegs) {
 966       const TargetRegisterClass *ArgRegClass;
 967
 968       switch (ObjectVT.getSimpleVT()) {
 969       default: {
 970         cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
 971              << ObjectVT.getMVTString()
 972              << "\n";
 973         abort();
 974       }
 975       case MVT::i8:
 976         ArgRegClass = &SPU::R8CRegClass;
 977         break;
 978       case MVT::i16:
 979         ArgRegClass = &SPU::R16CRegClass;
 980         break;
 981       case MVT::i32:
 982         ArgRegClass = &SPU::R32CRegClass;
 983         break;
 984       case MVT::i64:
 985         ArgRegClass = &SPU::R64CRegClass;
 986         break;
 987       case MVT::f32:
 988         ArgRegClass = &SPU::R32FPRegClass;
 989         break;
 990       case MVT::f64:
 991         ArgRegClass = &SPU::R64FPRegClass;
 992         break;
 993       case MVT::v2f64:
 994       case MVT::v4f32:
 995       case MVT::v2i64:
 996       case MVT::v4i32:
 997       case MVT::v8i16:
 998       case MVT::v16i8:
 999         ArgRegClass = &SPU::VECREGRegClass;
1000         break;
1001       }
1002
1003       unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1004       RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1005       ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1006       ++ArgRegIdx;
1007     } else {
1008       // We need to load the argument to a virtual register if we determined
1009       // above that we ran out of physical registers of the appropriate type
1010       // or we're forced to do vararg
1011       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1012       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1013       ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1014       ArgOffset += StackSlotSize;
1015     }
1016
1017     ArgValues.push_back(ArgVal);
1018     // Update the chain
1019     Root = ArgVal.getOperand(0);
1020   }
1021
1022   // vararg handling:
1023   if (isVarArg) {
1024     // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1025     // We will spill (79-3)+1 registers to the stack
1026     SmallVector<SDValue, 79-3+1> MemOps;
1027
1028     // Create the frame slot
1029
1030     for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1031       VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1032       SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1033       SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1034       SDValue Store = DAG.getStore(Root, ArgVal, FIN, NULL, 0);
1035       Root = Store.getOperand(0);
1036       MemOps.push_back(Store);
1037
1038       // Increment address by stack slot size for the next stored argument
1039       ArgOffset += StackSlotSize;
1040     }
1041     if (!MemOps.empty())
1042       Root = DAG.getNode(ISD::TokenFactor,MVT::Other,&MemOps[0],MemOps.size());
1043   }
1044
1045   ArgValues.push_back(Root);
1046
1047   // Return the new list of results.
1048   return DAG.getMergeValues(Op.getNode()->getVTList(), &ArgValues[0],
1049                             ArgValues.size());
1050 }
1051
1052 /// isLSAAddress - Return the immediate to use if the specified
1053 /// value is representable as a LSA address.
1054 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1055   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1056   if (!C) return 0;
1057
1058   int Addr = C->getZExtValue();
1059   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1060       (Addr << 14 >> 14) != Addr)
1061     return 0;  // Top 14 bits have to be sext of immediate.
1062
1063   return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1064 }
1065
1066 static
1067 SDValue
1068 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1069   CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1070   SDValue Chain = TheCall->getChain();
1071   SDValue Callee    = TheCall->getCallee();
1072   unsigned NumOps     = TheCall->getNumArgs();
1073   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1074   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1075   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1076
1077   // Handy pointer type
1078   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1079
1080   // Accumulate how many bytes are to be pushed on the stack, including the
1081   // linkage area, and parameter passing area.  According to the SPU ABI,
1082   // we minimally need space for [LR] and [SP]
1083   unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1084
1085   // Set up a copy of the stack pointer for use loading and storing any
1086   // arguments that may not fit in the registers available for argument
1087   // passing.
1088   SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1089
1090   // Figure out which arguments are going to go in registers, and which in
1091   // memory.
1092   unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1093   unsigned ArgRegIdx = 0;
1094
1095   // Keep track of registers passing arguments
1096   std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1097   // And the arguments passed on the stack
1098   SmallVector<SDValue, 8> MemOpChains;
1099
1100   for (unsigned i = 0; i != NumOps; ++i) {
1101     SDValue Arg = TheCall->getArg(i);
1102
1103     // PtrOff will be used to store the current argument to the stack if a
1104     // register cannot be found for it.
1105     SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1106     PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1107
1108     switch (Arg.getValueType().getSimpleVT()) {
1109     default: assert(0 && "Unexpected ValueType for argument!");
1110     case MVT::i32:
1111     case MVT::i64:
1112     case MVT::i128:
1113       if (ArgRegIdx != NumArgRegs) {
1114         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1115       } else {
1116         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1117         ArgOffset += StackSlotSize;
1118       }
1119       break;
1120     case MVT::f32:
1121     case MVT::f64:
1122       if (ArgRegIdx != NumArgRegs) {
1123         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1124       } else {
1125         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1126         ArgOffset += StackSlotSize;
1127       }
1128       break;
1129     case MVT::v4f32:
1130     case MVT::v4i32:
1131     case MVT::v8i16:
1132     case MVT::v16i8:
1133       if (ArgRegIdx != NumArgRegs) {
1134         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1135       } else {
1136         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1137         ArgOffset += StackSlotSize;
1138       }
1139       break;
1140     }
1141   }
1142
1143   // Update number of stack bytes actually used, insert a call sequence start
1144   NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1145   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1146                                                             true));
1147
1148   if (!MemOpChains.empty()) {
1149     // Adjust the stack pointer for the stack arguments.
1150     Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1151                         &MemOpChains[0], MemOpChains.size());
1152   }
1153
1154   // Build a sequence of copy-to-reg nodes chained together with token chain
1155   // and flag operands which copy the outgoing args into the appropriate regs.
1156   SDValue InFlag;
1157   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1158     Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1159                              InFlag);
1160     InFlag = Chain.getValue(1);
1161   }
1162
1163   SmallVector<SDValue, 8> Ops;
1164   unsigned CallOpc = SPUISD::CALL;
1165
1166   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1167   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1168   // node so that legalize doesn't hack it.
1169   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1170     GlobalValue *GV = G->getGlobal();
1171     MVT CalleeVT = Callee.getValueType();
1172     SDValue Zero = DAG.getConstant(0, PtrVT);
1173     SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1174
1175     if (!ST->usingLargeMem()) {
1176       // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1177       // style calls, otherwise, external symbols are BRASL calls. This assumes
1178       // that declared/defined symbols are in the same compilation unit and can
1179       // be reached through PC-relative jumps.
1180       //
1181       // NOTE:
1182       // This may be an unsafe assumption for JIT and really large compilation
1183       // units.
1184       if (GV->isDeclaration()) {
1185         Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1186       } else {
1187         Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1188       }
1189     } else {
1190       // "Large memory" mode: Turn all calls into indirect calls with a X-form
1191       // address pairs:
1192       Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1193     }
1194   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1195     Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1196   else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1197     // If this is an absolute destination address that appears to be a legal
1198     // local store address, use the munged value.
1199     Callee = SDValue(Dest, 0);
1200   }
1201
1202   Ops.push_back(Chain);
1203   Ops.push_back(Callee);
1204
1205   // Add argument registers to the end of the list so that they are known live
1206   // into the call.
1207   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1208     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1209                                   RegsToPass[i].second.getValueType()));
1210
1211   if (InFlag.getNode())
1212     Ops.push_back(InFlag);
1213   // Returns a chain and a flag for retval copy to use.
1214   Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1215                       &Ops[0], Ops.size());
1216   InFlag = Chain.getValue(1);
1217
1218   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1219                              DAG.getIntPtrConstant(0, true), InFlag);
1220   if (TheCall->getValueType(0) != MVT::Other)
1221     InFlag = Chain.getValue(1);
1222
1223   SDValue ResultVals[3];
1224   unsigned NumResults = 0;
1225
1226   // If the call has results, copy the values out of the ret val registers.
1227   switch (TheCall->getValueType(0).getSimpleVT()) {
1228   default: assert(0 && "Unexpected ret value!");
1229   case MVT::Other: break;
1230   case MVT::i32:
1231     if (TheCall->getValueType(1) == MVT::i32) {
1232       Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1233       ResultVals[0] = Chain.getValue(0);
1234       Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1235                                  Chain.getValue(2)).getValue(1);
1236       ResultVals[1] = Chain.getValue(0);
1237       NumResults = 2;
1238     } else {
1239       Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1240       ResultVals[0] = Chain.getValue(0);
1241       NumResults = 1;
1242     }
1243     break;
1244   case MVT::i64:
1245     Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1246     ResultVals[0] = Chain.getValue(0);
1247     NumResults = 1;
1248     break;
1249   case MVT::f32:
1250   case MVT::f64:
1251     Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1252                                InFlag).getValue(1);
1253     ResultVals[0] = Chain.getValue(0);
1254     NumResults = 1;
1255     break;
1256   case MVT::v2f64:
1257   case MVT::v4f32:
1258   case MVT::v4i32:
1259   case MVT::v8i16:
1260   case MVT::v16i8:
1261     Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1262                                    InFlag).getValue(1);
1263     ResultVals[0] = Chain.getValue(0);
1264     NumResults = 1;
1265     break;
1266   }
1267
1268   // If the function returns void, just return the chain.
1269   if (NumResults == 0)
1270     return Chain;
1271
1272   // Otherwise, merge everything together with a MERGE_VALUES node.
1273   ResultVals[NumResults++] = Chain;
1274   SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1275   return Res.getValue(Op.getResNo());
1276 }
1277
1278 static SDValue
1279 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1280   SmallVector<CCValAssign, 16> RVLocs;
1281   unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1282   bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1283   CCState CCInfo(CC, isVarArg, TM, RVLocs);
1284   CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1285
1286   // If this is the first return lowered for this function, add the regs to the
1287   // liveout set for the function.
1288   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1289     for (unsigned i = 0; i != RVLocs.size(); ++i)
1290       DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1291   }
1292
1293   SDValue Chain = Op.getOperand(0);
1294   SDValue Flag;
1295
1296   // Copy the result values into the output registers.
1297   for (unsigned i = 0; i != RVLocs.size(); ++i) {
1298     CCValAssign &VA = RVLocs[i];
1299     assert(VA.isRegLoc() && "Can only return in registers!");
1300     Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1301     Flag = Chain.getValue(1);
1302   }
1303
1304   if (Flag.getNode())
1305     return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1306   else
1307     return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1308 }
1309
1310
1311 //===----------------------------------------------------------------------===//
1312 // Vector related lowering:
1313 //===----------------------------------------------------------------------===//
1314
1315 static ConstantSDNode *
1316 getVecImm(SDNode *N) {
1317   SDValue OpVal(0, 0);
1318
1319   // Check to see if this buildvec has a single non-undef value in its elements.
1320   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1321     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1322     if (OpVal.getNode() == 0)
1323       OpVal = N->getOperand(i);
1324     else if (OpVal != N->getOperand(i))
1325       return 0;
1326   }
1327
1328   if (OpVal.getNode() != 0) {
1329     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1330       return CN;
1331     }
1332   }
1333
1334   return 0; // All UNDEF: use implicit def.; not Constant node
1335 }
1336
1337 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1338 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1339 /// constant
1340 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1341                               MVT ValueType) {
1342   if (ConstantSDNode *CN = getVecImm(N)) {
1343     uint64_t Value = CN->getZExtValue();
1344     if (ValueType == MVT::i64) {
1345       uint64_t UValue = CN->getZExtValue();
1346       uint32_t upper = uint32_t(UValue >> 32);
1347       uint32_t lower = uint32_t(UValue);
1348       if (upper != lower)
1349         return SDValue();
1350       Value = Value >> 32;
1351     }
1352     if (Value <= 0x3ffff)
1353       return DAG.getTargetConstant(Value, ValueType);
1354   }
1355
1356   return SDValue();
1357 }
1358
1359 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1360 /// and the value fits into a signed 16-bit constant, and if so, return the
1361 /// constant
1362 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1363                               MVT ValueType) {
1364   if (ConstantSDNode *CN = getVecImm(N)) {
1365     int64_t Value = CN->getSExtValue();
1366     if (ValueType == MVT::i64) {
1367       uint64_t UValue = CN->getZExtValue();
1368       uint32_t upper = uint32_t(UValue >> 32);
1369       uint32_t lower = uint32_t(UValue);
1370       if (upper != lower)
1371         return SDValue();
1372       Value = Value >> 32;
1373     }
1374     if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1375       return DAG.getTargetConstant(Value, ValueType);
1376     }
1377   }
1378
1379   return SDValue();
1380 }
1381
1382 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1383 /// and the value fits into a signed 10-bit constant, and if so, return the
1384 /// constant
1385 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1386                               MVT ValueType) {
1387   if (ConstantSDNode *CN = getVecImm(N)) {
1388     int64_t Value = CN->getSExtValue();
1389     if (ValueType == MVT::i64) {
1390       uint64_t UValue = CN->getZExtValue();
1391       uint32_t upper = uint32_t(UValue >> 32);
1392       uint32_t lower = uint32_t(UValue);
1393       if (upper != lower)
1394         return SDValue();
1395       Value = Value >> 32;
1396     }
1397     if (isS10Constant(Value))
1398       return DAG.getTargetConstant(Value, ValueType);
1399   }
1400
1401   return SDValue();
1402 }
1403
1404 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1405 /// and the value fits into a signed 8-bit constant, and if so, return the
1406 /// constant.
1407 ///
1408 /// @note: The incoming vector is v16i8 because that's the only way we can load
1409 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1410 /// same value.
1411 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1412                              MVT ValueType) {
1413   if (ConstantSDNode *CN = getVecImm(N)) {
1414     int Value = (int) CN->getZExtValue();
1415     if (ValueType == MVT::i16
1416         && Value <= 0xffff                 /* truncated from uint64_t */
1417         && ((short) Value >> 8) == ((short) Value & 0xff))
1418       return DAG.getTargetConstant(Value & 0xff, ValueType);
1419     else if (ValueType == MVT::i8
1420              && (Value & 0xff) == Value)
1421       return DAG.getTargetConstant(Value, ValueType);
1422   }
1423
1424   return SDValue();
1425 }
1426
1427 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1428 /// and the value fits into a signed 16-bit constant, and if so, return the
1429 /// constant
1430 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1431                                MVT ValueType) {
1432   if (ConstantSDNode *CN = getVecImm(N)) {
1433     uint64_t Value = CN->getZExtValue();
1434     if ((ValueType == MVT::i32
1435           && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1436         || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1437       return DAG.getTargetConstant(Value >> 16, ValueType);
1438   }
1439
1440   return SDValue();
1441 }
1442
1443 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1444 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1445   if (ConstantSDNode *CN = getVecImm(N)) {
1446     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1447   }
1448
1449   return SDValue();
1450 }
1451
1452 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1453 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1454   if (ConstantSDNode *CN = getVecImm(N)) {
1455     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1456   }
1457
1458   return SDValue();
1459 }
1460
1461 // If this is a vector of constants or undefs, get the bits.  A bit in
1462 // UndefBits is set if the corresponding element of the vector is an
1463 // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1464 // zero.   Return true if this is not an array of constants, false if it is.
1465 //
1466 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1467                                        uint64_t UndefBits[2]) {
1468   // Start with zero'd results.
1469   VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1470
1471   unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1472   for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1473     SDValue OpVal = BV->getOperand(i);
1474
1475     unsigned PartNo = i >= e/2;     // In the upper 128 bits?
1476     unsigned SlotNo = e/2 - (i & (e/2-1))-1;  // Which subpiece of the uint64_t.
1477
1478     uint64_t EltBits = 0;
1479     if (OpVal.getOpcode() == ISD::UNDEF) {
1480       uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1481       UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1482       continue;
1483     } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1484       EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1485     } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1486       const APFloat &apf = CN->getValueAPF();
1487       EltBits = (CN->getValueType(0) == MVT::f32
1488                  ? FloatToBits(apf.convertToFloat())
1489                  : DoubleToBits(apf.convertToDouble()));
1490     } else {
1491       // Nonconstant element.
1492       return true;
1493     }
1494
1495     VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1496   }
1497
1498   //printf("%llx %llx  %llx %llx\n",
1499   //       VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1500   return false;
1501 }
1502
1503 /// If this is a splat (repetition) of a value across the whole vector, return
1504 /// the smallest size that splats it.  For example, "0x01010101010101..." is a
1505 /// splat of 0x01, 0x0101, and 0x01010101.  We return SplatBits = 0x01 and
1506 /// SplatSize = 1 byte.
1507 static bool isConstantSplat(const uint64_t Bits128[2],
1508                             const uint64_t Undef128[2],
1509                             int MinSplatBits,
1510                             uint64_t &SplatBits, uint64_t &SplatUndef,
1511                             int &SplatSize) {
1512   // Don't let undefs prevent splats from matching.  See if the top 64-bits are
1513   // the same as the lower 64-bits, ignoring undefs.
1514   uint64_t Bits64  = Bits128[0] | Bits128[1];
1515   uint64_t Undef64 = Undef128[0] & Undef128[1];
1516   uint32_t Bits32  = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1517   uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1518   uint16_t Bits16  = uint16_t(Bits32)  | uint16_t(Bits32 >> 16);
1519   uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1520
1521   if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1522     if (MinSplatBits < 64) {
1523
1524       // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1525       // undefs.
1526       if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1527         if (MinSplatBits < 32) {
1528
1529           // If the top 16-bits are different than the lower 16-bits, ignoring
1530           // undefs, we have an i32 splat.
1531           if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1532             if (MinSplatBits < 16) {
1533               // If the top 8-bits are different than the lower 8-bits, ignoring
1534               // undefs, we have an i16 splat.
1535               if ((Bits16 & (uint16_t(~Undef16) >> 8))
1536                   == ((Bits16 >> 8) & ~Undef16)) {
1537                 // Otherwise, we have an 8-bit splat.
1538                 SplatBits  = uint8_t(Bits16)  | uint8_t(Bits16 >> 8);
1539                 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1540                 SplatSize = 1;
1541                 return true;
1542               }
1543             } else {
1544               SplatBits = Bits16;
1545               SplatUndef = Undef16;
1546               SplatSize = 2;
1547               return true;
1548             }
1549           }
1550         } else {
1551           SplatBits = Bits32;
1552           SplatUndef = Undef32;
1553           SplatSize = 4;
1554           return true;
1555         }
1556       }
1557     } else {
1558       SplatBits = Bits128[0];
1559       SplatUndef = Undef128[0];
1560       SplatSize = 8;
1561       return true;
1562     }
1563   }
1564
1565   return false;  // Can't be a splat if two pieces don't match.
1566 }
1567
1568 // If this is a case we can't handle, return null and let the default
1569 // expansion code take care of it.  If we CAN select this case, and if it
1570 // selects to a single instruction, return Op.  Otherwise, if we can codegen
1571 // this case more efficiently than a constant pool load, lower it to the
1572 // sequence of ops that should be used.
1573 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1574   MVT VT = Op.getValueType();
1575   // If this is a vector of constants or undefs, get the bits.  A bit in
1576   // UndefBits is set if the corresponding element of the vector is an
1577   // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1578   // zero.
1579   uint64_t VectorBits[2];
1580   uint64_t UndefBits[2];
1581   uint64_t SplatBits, SplatUndef;
1582   int SplatSize;
1583   if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1584       || !isConstantSplat(VectorBits, UndefBits,
1585                           VT.getVectorElementType().getSizeInBits(),
1586                           SplatBits, SplatUndef, SplatSize))
1587     return SDValue();   // Not a constant vector, not a splat.
1588
1589   switch (VT.getSimpleVT()) {
1590   default:
1591   case MVT::v4f32: {
1592     uint32_t Value32 = SplatBits;
1593     assert(SplatSize == 4
1594            && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1595     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1596     SDValue T = DAG.getConstant(Value32, MVT::i32);
1597     return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1598                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1599     break;
1600   }
1601   case MVT::v2f64: {
1602     uint64_t f64val = SplatBits;
1603     assert(SplatSize == 8
1604            && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1605     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1606     SDValue T = DAG.getConstant(f64val, MVT::i64);
1607     return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1608                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1609     break;
1610   }
1611   case MVT::v16i8: {
1612    // 8-bit constants have to be expanded to 16-bits
1613    unsigned short Value16 = SplatBits | (SplatBits << 8);
1614    SDValue Ops[8];
1615    for (int i = 0; i < 8; ++i)
1616      Ops[i] = DAG.getConstant(Value16, MVT::i16);
1617    return DAG.getNode(ISD::BIT_CONVERT, VT,
1618                       DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1619   }
1620   case MVT::v8i16: {
1621     unsigned short Value16;
1622     if (SplatSize == 2)
1623       Value16 = (unsigned short) (SplatBits & 0xffff);
1624     else
1625       Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1626     SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1627     SDValue Ops[8];
1628     for (int i = 0; i < 8; ++i) Ops[i] = T;
1629     return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1630   }
1631   case MVT::v4i32: {
1632     unsigned int Value = SplatBits;
1633     SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1634     return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1635   }
1636   case MVT::v2i64: {
1637     uint64_t val = SplatBits;
1638     uint32_t upper = uint32_t(val >> 32);
1639     uint32_t lower = uint32_t(val);
1640
1641     if (upper == lower) {
1642       // Magic constant that can be matched by IL, ILA, et. al.
1643       SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1644       return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1645     } else {
1646       SDValue LO32;
1647       SDValue HI32;
1648       SmallVector<SDValue, 16> ShufBytes;
1649       SDValue Result;
1650       bool upper_special, lower_special;
1651
1652       // NOTE: This code creates common-case shuffle masks that can be easily
1653       // detected as common expressions. It is not attempting to create highly
1654       // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1655
1656       // Detect if the upper or lower half is a special shuffle mask pattern:
1657       upper_special = (upper == 0||upper == 0xffffffff||upper == 0x80000000);
1658       lower_special = (lower == 0||lower == 0xffffffff||lower == 0x80000000);
1659
1660       // Create lower vector if not a special pattern
1661       if (!lower_special) {
1662         SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1663         LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1664                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1665                                        LO32C, LO32C, LO32C, LO32C));
1666       }
1667
1668       // Create upper vector if not a special pattern
1669       if (!upper_special) {
1670         SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1671         HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1672                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1673                                        HI32C, HI32C, HI32C, HI32C));
1674       }
1675
1676       // If either upper or lower are special, then the two input operands are
1677       // the same (basically, one of them is a "don't care")
1678       if (lower_special)
1679         LO32 = HI32;
1680       if (upper_special)
1681         HI32 = LO32;
1682       if (lower_special && upper_special) {
1683         // Unhappy situation... both upper and lower are special, so punt with
1684         // a target constant:
1685         SDValue Zero = DAG.getConstant(0, MVT::i32);
1686         HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1687                                   Zero, Zero);
1688       }
1689
1690       for (int i = 0; i < 4; ++i) {
1691         uint64_t val = 0;
1692         for (int j = 0; j < 4; ++j) {
1693           SDValue V;
1694           bool process_upper, process_lower;
1695           val <<= 8;
1696           process_upper = (upper_special && (i & 1) == 0);
1697           process_lower = (lower_special && (i & 1) == 1);
1698
1699           if (process_upper || process_lower) {
1700             if ((process_upper && upper == 0)
1701                 || (process_lower && lower == 0))
1702               val |= 0x80;
1703             else if ((process_upper && upper == 0xffffffff)
1704                      || (process_lower && lower == 0xffffffff))
1705               val |= 0xc0;
1706             else if ((process_upper && upper == 0x80000000)
1707                      || (process_lower && lower == 0x80000000))
1708               val |= (j == 0 ? 0xe0 : 0x80);
1709           } else
1710             val |= i * 4 + j + ((i & 1) * 16);
1711         }
1712
1713         ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1714       }
1715
1716       return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1717                          DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1718                                      &ShufBytes[0], ShufBytes.size()));
1719     }
1720   }
1721   }
1722
1723   return SDValue();
1724 }
1725
1726 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1727 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1728 /// permutation vector, V3, is monotonically increasing with one "exception"
1729 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1730 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1731 /// In either case, the net result is going to eventually invoke SHUFB to
1732 /// permute/shuffle the bytes from V1 and V2.
1733 /// \note
1734 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1735 /// control word for byte/halfword/word insertion. This takes care of a single
1736 /// element move from V2 into V1.
1737 /// \note
1738 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1739 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1740   SDValue V1 = Op.getOperand(0);
1741   SDValue V2 = Op.getOperand(1);
1742   SDValue PermMask = Op.getOperand(2);
1743
1744   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1745
1746   // If we have a single element being moved from V1 to V2, this can be handled
1747   // using the C*[DX] compute mask instructions, but the vector elements have
1748   // to be monotonically increasing with one exception element.
1749   MVT EltVT = V1.getValueType().getVectorElementType();
1750   unsigned EltsFromV2 = 0;
1751   unsigned V2Elt = 0;
1752   unsigned V2EltIdx0 = 0;
1753   unsigned CurrElt = 0;
1754   bool monotonic = true;
1755   if (EltVT == MVT::i8)
1756     V2EltIdx0 = 16;
1757   else if (EltVT == MVT::i16)
1758     V2EltIdx0 = 8;
1759   else if (EltVT == MVT::i32)
1760     V2EltIdx0 = 4;
1761   else
1762     assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1763
1764   for (unsigned i = 0, e = PermMask.getNumOperands();
1765        EltsFromV2 <= 1 && monotonic && i != e;
1766        ++i) {
1767     unsigned SrcElt;
1768     if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1769       SrcElt = 0;
1770     else
1771       SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1772
1773     if (SrcElt >= V2EltIdx0) {
1774       ++EltsFromV2;
1775       V2Elt = (V2EltIdx0 - SrcElt) << 2;
1776     } else if (CurrElt != SrcElt) {
1777       monotonic = false;
1778     }
1779
1780     ++CurrElt;
1781   }
1782
1783   if (EltsFromV2 == 1 && monotonic) {
1784     // Compute mask and shuffle
1785     MachineFunction &MF = DAG.getMachineFunction();
1786     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1787     unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1788     MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1789     // Initialize temporary register to 0
1790     SDValue InitTempReg =
1791       DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1792     // Copy register's contents as index in SHUFFLE_MASK:
1793     SDValue ShufMaskOp =
1794       DAG.getNode(SPUISD::SHUFFLE_MASK, V1.getValueType(),
1795                   DAG.getTargetConstant(V2Elt, MVT::i32),
1796                   DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1797     // Use shuffle mask in SHUFB synthetic instruction:
1798     return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1799   } else {
1800    // Convert the SHUFFLE_VECTOR mask's input element units to the
1801    // actual bytes.
1802     unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1803
1804     SmallVector<SDValue, 16> ResultMask;
1805     for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1806       unsigned SrcElt;
1807       if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1808         SrcElt = 0;
1809       else
1810         SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1811
1812       for (unsigned j = 0; j < BytesPerElement; ++j) {
1813         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1814                                              MVT::i8));
1815       }
1816     }
1817
1818     SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1819                                       &ResultMask[0], ResultMask.size());
1820     return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1821   }
1822 }
1823
1824 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1825   SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar
1826
1827   if (Op0.getNode()->getOpcode() == ISD::Constant) {
1828     // For a constant, build the appropriate constant vector, which will
1829     // eventually simplify to a vector register load.
1830
1831     ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1832     SmallVector<SDValue, 16> ConstVecValues;
1833     MVT VT;
1834     size_t n_copies;
1835
1836     // Create a constant vector:
1837     switch (Op.getValueType().getSimpleVT()) {
1838     default: assert(0 && "Unexpected constant value type in "
1839                          "LowerSCALAR_TO_VECTOR");
1840     case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1841     case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1842     case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1843     case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1844     case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1845     case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1846     }
1847
1848     SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1849     for (size_t j = 0; j < n_copies; ++j)
1850       ConstVecValues.push_back(CValue);
1851
1852     return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1853                        &ConstVecValues[0], ConstVecValues.size());
1854   } else {
1855     // Otherwise, copy the value from one register to another:
1856     switch (Op0.getValueType().getSimpleVT()) {
1857     default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1858     case MVT::i8:
1859     case MVT::i16:
1860     case MVT::i32:
1861     case MVT::i64:
1862     case MVT::f32:
1863     case MVT::f64:
1864       return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1865     }
1866   }
1867
1868   return SDValue();
1869 }
1870
1871 static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
1872   switch (Op.getValueType().getSimpleVT()) {
1873   default:
1874     cerr << "CellSPU: Unknown vector multiplication, got "
1875          << Op.getValueType().getMVTString()
1876          << "\n";
1877     abort();
1878     /*NOTREACHED*/
1879
1880   case MVT::v4i32: {
1881     SDValue rA = Op.getOperand(0);
1882     SDValue rB = Op.getOperand(1);
1883     SDValue HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1884     SDValue HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1885     SDValue LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1886     SDValue Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1887
1888     return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1889     break;
1890   }
1891
1892   // Multiply two v8i16 vectors (pipeline friendly version):
1893   // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1894   // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1895   // c) Use SELB to select upper and lower halves from the intermediate results
1896   //
1897   // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1898   // dual-issue. This code does manage to do this, even if it's a little on
1899   // the wacky side
1900   case MVT::v8i16: {
1901     MachineFunction &MF = DAG.getMachineFunction();
1902     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1903     SDValue Chain = Op.getOperand(0);
1904     SDValue rA = Op.getOperand(0);
1905     SDValue rB = Op.getOperand(1);
1906     unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1907     unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1908
1909     SDValue FSMBOp =
1910       DAG.getCopyToReg(Chain, FSMBIreg,
1911                        DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1912                                    DAG.getConstant(0xcccc, MVT::i16)));
1913
1914     SDValue HHProd =
1915       DAG.getCopyToReg(FSMBOp, HiProdReg,
1916                        DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1917
1918     SDValue HHProd_v4i32 =
1919       DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1920                   DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1921
1922     return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1923                        DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1924                        DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1925                                    DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1926                                                HHProd_v4i32,
1927                                                DAG.getConstant(16, MVT::i16))),
1928                        DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1929   }
1930
1931   // This M00sE is N@stI! (apologies to Monty Python)
1932   //
1933   // SPU doesn't know how to do any 8-bit multiplication, so the solution
1934   // is to break it all apart, sign extend, and reassemble the various
1935   // intermediate products.
1936   case MVT::v16i8: {
1937     SDValue rA = Op.getOperand(0);
1938     SDValue rB = Op.getOperand(1);
1939     SDValue c8 = DAG.getConstant(8, MVT::i32);
1940     SDValue c16 = DAG.getConstant(16, MVT::i32);
1941
1942     SDValue LLProd =
1943       DAG.getNode(SPUISD::MPY, MVT::v8i16,
1944                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1945                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1946
1947     SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1948
1949     SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1950
1951     SDValue LHProd =
1952       DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1953                   DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1954
1955     SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1956                                      DAG.getConstant(0x2222, MVT::i16));
1957
1958     SDValue LoProdParts =
1959       DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1960                   DAG.getNode(SPUISD::SELB, MVT::v8i16,
1961                               LLProd, LHProd, FSMBmask));
1962
1963     SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1964
1965     SDValue LoProd =
1966       DAG.getNode(ISD::AND, MVT::v4i32,
1967                   LoProdParts,
1968                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1969                               LoProdMask, LoProdMask,
1970                               LoProdMask, LoProdMask));
1971
1972     SDValue rAH =
1973       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1974                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1975
1976     SDValue rBH =
1977       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1978                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1979
1980     SDValue HLProd =
1981       DAG.getNode(SPUISD::MPY, MVT::v8i16,
1982                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1983                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1984
1985     SDValue HHProd_1 =
1986       DAG.getNode(SPUISD::MPY, MVT::v8i16,
1987                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1988                               DAG.getNode(SPUISD::VEC_SRA,
1989                                           MVT::v4i32, rAH, c8)),
1990                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1991                               DAG.getNode(SPUISD::VEC_SRA,
1992                                           MVT::v4i32, rBH, c8)));
1993
1994     SDValue HHProd =
1995       DAG.getNode(SPUISD::SELB, MVT::v8i16,
1996                   HLProd,
1997                   DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
1998                   FSMBmask);
1999
2000     SDValue HiProd =
2001       DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
2002
2003     return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2004                        DAG.getNode(ISD::OR, MVT::v4i32,
2005                                    LoProd, HiProd));
2006   }
2007   }
2008
2009   return SDValue();
2010 }
2011
2012 static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) {
2013   MachineFunction &MF = DAG.getMachineFunction();
2014   MachineRegisterInfo &RegInfo = MF.getRegInfo();
2015
2016   SDValue A = Op.getOperand(0);
2017   SDValue B = Op.getOperand(1);
2018   MVT VT = Op.getValueType();
2019
2020   unsigned VRegBR, VRegC;
2021
2022   if (VT == MVT::f32) {
2023     VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2024     VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2025   } else {
2026     VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2027     VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2028   }
2029   // TODO: make sure we're feeding FPInterp the right arguments
2030   // Right now: fi B, frest(B)
2031
2032   // Computes BRcpl =
2033   // (Floating Interpolate (FP Reciprocal Estimate B))
2034   SDValue BRcpl =
2035       DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2036                        DAG.getNode(SPUISD::FPInterp, VT, B,
2037                                 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2038
2039   // Computes A * BRcpl and stores in a temporary register
2040   SDValue AxBRcpl =
2041       DAG.getCopyToReg(BRcpl, VRegC,
2042                  DAG.getNode(ISD::FMUL, VT, A,
2043                         DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2044   // What's the Chain variable do? It's magic!
2045   // TODO: set Chain = Op(0).getEntryNode()
2046
2047   return DAG.getNode(ISD::FADD, VT,
2048                 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2049                 DAG.getNode(ISD::FMUL, VT,
2050                         DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2051                         DAG.getNode(ISD::FSUB, VT, A,
2052                             DAG.getNode(ISD::FMUL, VT, B,
2053                             DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2054 }
2055
2056 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2057   MVT VT = Op.getValueType();
2058   SDValue N = Op.getOperand(0);
2059   SDValue Elt = Op.getOperand(1);
2060   SDValue retval;
2061
2062   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
2063     // Constant argument:
2064     int EltNo = (int) C->getZExtValue();
2065
2066     // sanity checks:
2067     if (VT == MVT::i8 && EltNo >= 16)
2068       assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2069     else if (VT == MVT::i16 && EltNo >= 8)
2070       assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2071     else if (VT == MVT::i32 && EltNo >= 4)
2072       assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2073     else if (VT == MVT::i64 && EltNo >= 2)
2074       assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2075
2076     if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2077       // i32 and i64: Element 0 is the preferred slot
2078       return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, N);
2079     }
2080
2081     // Need to generate shuffle mask and extract:
2082     int prefslot_begin = -1, prefslot_end = -1;
2083     int elt_byte = EltNo * VT.getSizeInBits() / 8;
2084
2085     switch (VT.getSimpleVT()) {
2086     default:
2087       assert(false && "Invalid value type!");
2088     case MVT::i8: {
2089       prefslot_begin = prefslot_end = 3;
2090       break;
2091     }
2092     case MVT::i16: {
2093       prefslot_begin = 2; prefslot_end = 3;
2094       break;
2095     }
2096     case MVT::i32:
2097     case MVT::f32: {
2098       prefslot_begin = 0; prefslot_end = 3;
2099       break;
2100     }
2101     case MVT::i64:
2102     case MVT::f64: {
2103       prefslot_begin = 0; prefslot_end = 7;
2104       break;
2105     }
2106     }
2107
2108     assert(prefslot_begin != -1 && prefslot_end != -1 &&
2109            "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2110
2111     unsigned int ShufBytes[16];
2112     for (int i = 0; i < 16; ++i) {
2113       // zero fill uppper part of preferred slot, don't care about the
2114       // other slots:
2115       unsigned int mask_val;
2116       if (i <= prefslot_end) {
2117         mask_val =
2118           ((i < prefslot_begin)
2119            ? 0x80
2120            : elt_byte + (i - prefslot_begin));
2121
2122         ShufBytes[i] = mask_val;
2123       } else
2124         ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
2125     }
2126
2127     SDValue ShufMask[4];
2128     for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
2129       unsigned bidx = i / 4;
2130       unsigned int bits = ((ShufBytes[bidx] << 24) |
2131                            (ShufBytes[bidx+1] << 16) |
2132                            (ShufBytes[bidx+2] << 8) |
2133                            ShufBytes[bidx+3]);
2134       ShufMask[i] = DAG.getConstant(bits, MVT::i32);
2135     }
2136
2137     SDValue ShufMaskVec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2138                                       &ShufMask[0],
2139                                       sizeof(ShufMask) / sizeof(ShufMask[0]));
2140
2141     retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2142                          DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2143                                      N, N, ShufMaskVec));
2144   } else {
2145     // Variable index: Rotate the requested element into slot 0, then replicate
2146     // slot 0 across the vector
2147     MVT VecVT = N.getValueType();
2148     if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
2149       cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
2150       abort();
2151     }
2152
2153     // Make life easier by making sure the index is zero-extended to i32
2154     if (Elt.getValueType() != MVT::i32)
2155       Elt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Elt);
2156
2157     // Scale the index to a bit/byte shift quantity
2158     APInt scaleFactor =
2159             APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2160     unsigned scaleShift = scaleFactor.logBase2();
2161     SDValue vecShift;
2162
2163     if (scaleShift > 0) {
2164       // Scale the shift factor:
2165       Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt,
2166               DAG.getConstant(scaleShift, MVT::i32));
2167     }
2168
2169     vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, N, Elt);
2170
2171     // Replicate the bytes starting at byte 0 across the entire vector (for
2172     // consistency with the notion of a unified register set)
2173     SDValue replicate;
2174
2175     switch (VT.getSimpleVT()) {
2176     default:
2177       cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
2178       abort();
2179       /*NOTREACHED*/
2180     case MVT::i8: {
2181       SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2182       replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2183                               factor, factor);
2184       break;
2185     }
2186     case MVT::i16: {
2187       SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2188       replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2189                               factor, factor);
2190       break;
2191     }
2192     case MVT::i32:
2193     case MVT::f32: {
2194       SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2195       replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2196                               factor, factor);
2197       break;
2198     }
2199     case MVT::i64:
2200     case MVT::f64: {
2201       SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2202       SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2203       replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, loFactor, hiFactor,
2204                               loFactor, hiFactor);
2205       break;
2206     }
2207     }
2208
2209     retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2210                          DAG.getNode(SPUISD::SHUFB, VecVT, vecShift, vecShift, replicate));
2211   }
2212
2213   return retval;
2214 }
2215
2216 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2217   SDValue VecOp = Op.getOperand(0);
2218   SDValue ValOp = Op.getOperand(1);
2219   SDValue IdxOp = Op.getOperand(2);
2220   MVT VT = Op.getValueType();
2221
2222   ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2223   assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2224
2225   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2226   // Use $2 because it's always 16-byte aligned and it's available:
2227   SDValue PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2228
2229   SDValue result =
2230     DAG.getNode(SPUISD::SHUFB, VT,
2231                 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2232                 VecOp,
2233                 DAG.getNode(SPUISD::SHUFFLE_MASK, VT,
2234                             DAG.getNode(ISD::ADD, PtrVT,
2235                                         PtrBase,
2236                                         DAG.getConstant(CN->getZExtValue(),
2237                                                         PtrVT))));
2238
2239   return result;
2240 }
2241
2242 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2243 {
2244   SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
2245
2246   assert(Op.getValueType() == MVT::i8);
2247   switch (Opc) {
2248   default:
2249     assert(0 && "Unhandled i8 math operator");
2250     /*NOTREACHED*/
2251     break;
2252   case ISD::SUB: {
2253     // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2254     // the result:
2255     SDValue N1 = Op.getOperand(1);
2256     N0 = (N0.getOpcode() != ISD::Constant
2257           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2258           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2259                             MVT::i16));
2260     N1 = (N1.getOpcode() != ISD::Constant
2261           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2262           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2263                             MVT::i16));
2264     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2265                        DAG.getNode(Opc, MVT::i16, N0, N1));
2266   }
2267   case ISD::ROTR:
2268   case ISD::ROTL: {
2269     SDValue N1 = Op.getOperand(1);
2270     unsigned N1Opc;
2271     N0 = (N0.getOpcode() != ISD::Constant
2272           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2273           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2274                             MVT::i16));
2275     N1Opc = N1.getValueType().bitsLT(MVT::i32)
2276             ? ISD::ZERO_EXTEND
2277             : ISD::TRUNCATE;
2278     N1 = (N1.getOpcode() != ISD::Constant
2279           ? DAG.getNode(N1Opc, MVT::i32, N1)
2280           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2281                             MVT::i32));
2282     SDValue ExpandArg =
2283       DAG.getNode(ISD::OR, MVT::i16, N0,
2284                   DAG.getNode(ISD::SHL, MVT::i16,
2285                               N0, DAG.getConstant(8, MVT::i32)));
2286     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2287                        DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2288   }
2289   case ISD::SRL:
2290   case ISD::SHL: {
2291     SDValue N1 = Op.getOperand(1);
2292     unsigned N1Opc;
2293     N0 = (N0.getOpcode() != ISD::Constant
2294           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2295           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2296                             MVT::i16));
2297     N1Opc = N1.getValueType().bitsLT(MVT::i16)
2298             ? ISD::ZERO_EXTEND
2299             : ISD::TRUNCATE;
2300     N1 = (N1.getOpcode() != ISD::Constant
2301           ? DAG.getNode(N1Opc, MVT::i16, N1)
2302           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2303                             MVT::i16));
2304     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2305                        DAG.getNode(Opc, MVT::i16, N0, N1));
2306   }
2307   case ISD::SRA: {
2308     SDValue N1 = Op.getOperand(1);
2309     unsigned N1Opc;
2310     N0 = (N0.getOpcode() != ISD::Constant
2311           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2312           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2313                             MVT::i16));
2314     N1Opc = N1.getValueType().bitsLT(MVT::i16)
2315             ? ISD::SIGN_EXTEND
2316             : ISD::TRUNCATE;
2317     N1 = (N1.getOpcode() != ISD::Constant
2318           ? DAG.getNode(N1Opc, MVT::i16, N1)
2319           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2320                             MVT::i16));
2321     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2322                        DAG.getNode(Opc, MVT::i16, N0, N1));
2323   }
2324   case ISD::MUL: {
2325     SDValue N1 = Op.getOperand(1);
2326     unsigned N1Opc;
2327     N0 = (N0.getOpcode() != ISD::Constant
2328           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2329           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2330                             MVT::i16));
2331     N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2332     N1 = (N1.getOpcode() != ISD::Constant
2333           ? DAG.getNode(N1Opc, MVT::i16, N1)
2334           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2335                             MVT::i16));
2336     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2337                        DAG.getNode(Opc, MVT::i16, N0, N1));
2338     break;
2339   }
2340   }
2341
2342   return SDValue();
2343 }
2344
2345 static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2346 {
2347   MVT VT = Op.getValueType();
2348   MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2349
2350   SDValue Op0 = Op.getOperand(0);
2351
2352   switch (Opc) {
2353   case ISD::ZERO_EXTEND:
2354   case ISD::SIGN_EXTEND:
2355   case ISD::ANY_EXTEND: {
2356     MVT Op0VT = Op0.getValueType();
2357     MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2358
2359     assert(Op0VT == MVT::i32
2360            && "CellSPU: Zero/sign extending something other than i32");
2361
2362     DEBUG(cerr << "CellSPU.LowerI64Math: lowering zero/sign/any extend\n");
2363
2364     SDValue PromoteScalar =
2365             DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2366
2367     if (Opc != ISD::SIGN_EXTEND) {
2368       // Use a shuffle to zero extend the i32 to i64 directly:
2369       SDValue shufMask =
2370               DAG.getNode(ISD::BUILD_VECTOR, Op0VecVT,
2371                           DAG.getConstant(0x80808080, MVT::i32),
2372                           DAG.getConstant(0x00010203, MVT::i32),
2373                           DAG.getConstant(0x80808080, MVT::i32),
2374                           DAG.getConstant(0x08090a0b, MVT::i32));
2375       SDValue zextShuffle =
2376               DAG.getNode(SPUISD::SHUFB, Op0VecVT,
2377                           PromoteScalar, PromoteScalar, shufMask);
2378
2379       return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2380                          DAG.getNode(ISD::BIT_CONVERT, VecVT, zextShuffle));
2381     } else {
2382       // SPU has no "rotate quadword and replicate bit 0" (i.e. rotate/shift
2383       // right and propagate the sign bit) instruction.
2384       SDValue RotQuad =
2385               DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, Op0VecVT,
2386                           PromoteScalar, DAG.getConstant(4, MVT::i32));
2387       SDValue SignQuad =
2388               DAG.getNode(SPUISD::VEC_SRA, Op0VecVT,
2389                           PromoteScalar, DAG.getConstant(32, MVT::i32));
2390       SDValue SelMask =
2391               DAG.getNode(SPUISD::SELECT_MASK, Op0VecVT,
2392                           DAG.getConstant(0xf0f0, MVT::i16));
2393       SDValue CombineQuad =
2394               DAG.getNode(SPUISD::SELB, Op0VecVT,
2395                           SignQuad, RotQuad, SelMask);
2396
2397       return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2398                          DAG.getNode(ISD::BIT_CONVERT, VecVT, CombineQuad));
2399     }
2400   }
2401
2402   case ISD::ADD: {
2403     // Turn operands into vectors to satisfy type checking (shufb works on
2404     // vectors)
2405     SDValue Op0 =
2406       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2407     SDValue Op1 =
2408       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2409     SmallVector<SDValue, 16> ShufBytes;
2410
2411     // Create the shuffle mask for "rotating" the borrow up one register slot
2412     // once the borrow is generated.
2413     ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2414     ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2415     ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2416     ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2417
2418     SDValue CarryGen =
2419       DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2420     SDValue ShiftedCarry =
2421       DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2422                   CarryGen, CarryGen,
2423                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2424                               &ShufBytes[0], ShufBytes.size()));
2425
2426     return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2427                        DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2428                                    Op0, Op1, ShiftedCarry));
2429   }
2430
2431   case ISD::SUB: {
2432     // Turn operands into vectors to satisfy type checking (shufb works on
2433     // vectors)
2434     SDValue Op0 =
2435       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2436     SDValue Op1 =
2437       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2438     SmallVector<SDValue, 16> ShufBytes;
2439
2440     // Create the shuffle mask for "rotating" the borrow up one register slot
2441     // once the borrow is generated.
2442     ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2443     ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2444     ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2445     ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2446
2447     SDValue BorrowGen =
2448       DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2449     SDValue ShiftedBorrow =
2450       DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2451                   BorrowGen, BorrowGen,
2452                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2453                               &ShufBytes[0], ShufBytes.size()));
2454
2455     return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2456                        DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2457                                    Op0, Op1, ShiftedBorrow));
2458   }
2459
2460   case ISD::SHL: {
2461     SDValue ShiftAmt = Op.getOperand(1);
2462     MVT ShiftAmtVT = ShiftAmt.getValueType();
2463     SDValue Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2464     SDValue MaskLower =
2465       DAG.getNode(SPUISD::SELB, VecVT,
2466                   Op0Vec,
2467                   DAG.getConstant(0, VecVT),
2468                   DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2469                               DAG.getConstant(0xff00ULL, MVT::i16)));
2470     SDValue ShiftAmtBytes =
2471       DAG.getNode(ISD::SRL, ShiftAmtVT,
2472                   ShiftAmt,
2473                   DAG.getConstant(3, ShiftAmtVT));
2474     SDValue ShiftAmtBits =
2475       DAG.getNode(ISD::AND, ShiftAmtVT,
2476                   ShiftAmt,
2477                   DAG.getConstant(7, ShiftAmtVT));
2478
2479     return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2480                        DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2481                                    DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2482                                                MaskLower, ShiftAmtBytes),
2483                                    ShiftAmtBits));
2484   }
2485
2486   case ISD::SRL: {
2487     MVT VT = Op.getValueType();
2488     SDValue ShiftAmt = Op.getOperand(1);
2489     MVT ShiftAmtVT = ShiftAmt.getValueType();
2490     SDValue ShiftAmtBytes =
2491       DAG.getNode(ISD::SRL, ShiftAmtVT,
2492                   ShiftAmt,
2493                   DAG.getConstant(3, ShiftAmtVT));
2494     SDValue ShiftAmtBits =
2495       DAG.getNode(ISD::AND, ShiftAmtVT,
2496                   ShiftAmt,
2497                   DAG.getConstant(7, ShiftAmtVT));
2498
2499     return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2500                        DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2501                                    Op0, ShiftAmtBytes),
2502                        ShiftAmtBits);
2503   }
2504
2505   case ISD::SRA: {
2506     // Promote Op0 to vector
2507     SDValue Op0 =
2508       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2509     SDValue ShiftAmt = Op.getOperand(1);
2510     MVT ShiftVT = ShiftAmt.getValueType();
2511
2512     // Negate variable shift amounts
2513     if (!isa<ConstantSDNode>(ShiftAmt)) {
2514       ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2515                              DAG.getConstant(0, ShiftVT), ShiftAmt);
2516     }
2517
2518     SDValue UpperHalfSign =
2519       DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i32,
2520                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2521                               DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2522                                           Op0, DAG.getConstant(31, MVT::i32))));
2523     SDValue UpperHalfSignMask =
2524       DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2525     SDValue UpperLowerMask =
2526       DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2527                   DAG.getConstant(0xff00, MVT::i16));
2528     SDValue UpperLowerSelect =
2529       DAG.getNode(SPUISD::SELB, MVT::v2i64,
2530                   UpperHalfSignMask, Op0, UpperLowerMask);
2531     SDValue RotateLeftBytes =
2532       DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2533                   UpperLowerSelect, ShiftAmt);
2534     SDValue RotateLeftBits =
2535       DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2536                   RotateLeftBytes, ShiftAmt);
2537
2538     return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2539                        RotateLeftBits);
2540   }
2541   }
2542
2543   return SDValue();
2544 }
2545
2546 //! Lower byte immediate operations for v16i8 vectors:
2547 static SDValue
2548 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2549   SDValue ConstVec;
2550   SDValue Arg;
2551   MVT VT = Op.getValueType();
2552
2553   ConstVec = Op.getOperand(0);
2554   Arg = Op.getOperand(1);
2555   if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2556     if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2557       ConstVec = ConstVec.getOperand(0);
2558     } else {
2559       ConstVec = Op.getOperand(1);
2560       Arg = Op.getOperand(0);
2561       if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2562         ConstVec = ConstVec.getOperand(0);
2563       }
2564     }
2565   }
2566
2567   if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2568     uint64_t VectorBits[2];
2569     uint64_t UndefBits[2];
2570     uint64_t SplatBits, SplatUndef;
2571     int SplatSize;
2572
2573     if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2574         && isConstantSplat(VectorBits, UndefBits,
2575                            VT.getVectorElementType().getSizeInBits(),
2576                            SplatBits, SplatUndef, SplatSize)) {
2577       SDValue tcVec[16];
2578       SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2579       const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2580
2581       // Turn the BUILD_VECTOR into a set of target constants:
2582       for (size_t i = 0; i < tcVecSize; ++i)
2583         tcVec[i] = tc;
2584
2585       return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2586                          DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2587     }
2588   }
2589   // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2590   // lowered.  Return the operation, rather than a null SDValue.
2591   return Op;
2592 }
2593
2594 //! Lower i32 multiplication
2595 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT,
2596                           unsigned Opc) {
2597   switch (VT.getSimpleVT()) {
2598   default:
2599     cerr << "CellSPU: Unknown LowerMUL value type, got "
2600          << Op.getValueType().getMVTString()
2601          << "\n";
2602     abort();
2603     /*NOTREACHED*/
2604
2605   case MVT::i32: {
2606     SDValue rA = Op.getOperand(0);
2607     SDValue rB = Op.getOperand(1);
2608
2609     return DAG.getNode(ISD::ADD, MVT::i32,
2610                        DAG.getNode(ISD::ADD, MVT::i32,
2611                                    DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2612                                    DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2613                        DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2614   }
2615   }
2616
2617   return SDValue();
2618 }
2619
2620 //! Custom lowering for CTPOP (count population)
2621 /*!
2622   Custom lowering code that counts the number ones in the input
2623   operand. SPU has such an instruction, but it counts the number of
2624   ones per byte, which then have to be accumulated.
2625 */
2626 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2627   MVT VT = Op.getValueType();
2628   MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2629
2630   switch (VT.getSimpleVT()) {
2631   default:
2632     assert(false && "Invalid value type!");
2633   case MVT::i8: {
2634     SDValue N = Op.getOperand(0);
2635     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2636
2637     SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2638     SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2639
2640     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2641   }
2642
2643   case MVT::i16: {
2644     MachineFunction &MF = DAG.getMachineFunction();
2645     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2646
2647     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2648
2649     SDValue N = Op.getOperand(0);
2650     SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2651     SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2652     SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2653
2654     SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2655     SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2656
2657     // CNTB_result becomes the chain to which all of the virtual registers
2658     // CNTB_reg, SUM1_reg become associated:
2659     SDValue CNTB_result =
2660       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2661
2662     SDValue CNTB_rescopy =
2663       DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2664
2665     SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2666
2667     return DAG.getNode(ISD::AND, MVT::i16,
2668                        DAG.getNode(ISD::ADD, MVT::i16,
2669                                    DAG.getNode(ISD::SRL, MVT::i16,
2670                                                Tmp1, Shift1),
2671                                    Tmp1),
2672                        Mask0);
2673   }
2674
2675   case MVT::i32: {
2676     MachineFunction &MF = DAG.getMachineFunction();
2677     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2678
2679     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2680     unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2681
2682     SDValue N = Op.getOperand(0);
2683     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2684     SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2685     SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2686     SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2687
2688     SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2689     SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2690
2691     // CNTB_result becomes the chain to which all of the virtual registers
2692     // CNTB_reg, SUM1_reg become associated:
2693     SDValue CNTB_result =
2694       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2695
2696     SDValue CNTB_rescopy =
2697       DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2698
2699     SDValue Comp1 =
2700       DAG.getNode(ISD::SRL, MVT::i32,
2701                   DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2702
2703     SDValue Sum1 =
2704       DAG.getNode(ISD::ADD, MVT::i32,
2705                   Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2706
2707     SDValue Sum1_rescopy =
2708       DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2709
2710     SDValue Comp2 =
2711       DAG.getNode(ISD::SRL, MVT::i32,
2712                   DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2713                   Shift2);
2714     SDValue Sum2 =
2715       DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2716                   DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2717
2718     return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2719   }
2720
2721   case MVT::i64:
2722     break;
2723   }
2724
2725   return SDValue();
2726 }
2727
2728 //! Lower ISD::SELECT_CC
2729 /*!
2730   ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2731   SELB instruction.
2732
2733   \note Need to revisit this in the future: if the code path through the true
2734   and false value computations is longer than the latency of a branch (6
2735   cycles), then it would be more advantageous to branch and insert a new basic
2736   block and branch on the condition. However, this code does not make that
2737   assumption, given the simplisitc uses so far.
2738  */
2739
2740 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
2741   MVT VT = Op.getValueType();
2742   SDValue lhs = Op.getOperand(0);
2743   SDValue rhs = Op.getOperand(1);
2744   SDValue trueval = Op.getOperand(2);
2745   SDValue falseval = Op.getOperand(3);
2746   SDValue condition = Op.getOperand(4);
2747
2748   // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2749   // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2750   // with another "cannot select select_cc" assert:
2751
2752   SDValue compare = DAG.getNode(ISD::SETCC, VT, lhs, rhs, condition);
2753   return DAG.getNode(SPUISD::SELB, VT, trueval, falseval, compare);
2754 }
2755
2756 //! Custom (target-specific) lowering entry point
2757 /*!
2758   This is where LLVM's DAG selection process calls to do target-specific
2759   lowering of nodes.
2760  */
2761 SDValue
2762 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2763 {
2764   unsigned Opc = (unsigned) Op.getOpcode();
2765   MVT VT = Op.getValueType();
2766
2767   switch (Opc) {
2768   default: {
2769     cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2770     cerr << "Op.getOpcode() = " << Opc << "\n";
2771     cerr << "*Op.getNode():\n";
2772     Op.getNode()->dump();
2773     abort();
2774   }
2775   case ISD::LOAD:
2776   case ISD::SEXTLOAD:
2777   case ISD::ZEXTLOAD:
2778     return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2779   case ISD::STORE:
2780     return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2781   case ISD::ConstantPool:
2782     return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2783   case ISD::GlobalAddress:
2784     return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2785   case ISD::JumpTable:
2786     return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2787   case ISD::Constant:
2788     return LowerConstant(Op, DAG);
2789   case ISD::ConstantFP:
2790     return LowerConstantFP(Op, DAG);
2791   case ISD::BRCOND:
2792     return LowerBRCOND(Op, DAG);
2793   case ISD::FORMAL_ARGUMENTS:
2794     return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2795   case ISD::CALL:
2796     return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2797   case ISD::RET:
2798     return LowerRET(Op, DAG, getTargetMachine());
2799
2800
2801   // i8, i64 math ops:
2802   case ISD::ZERO_EXTEND:
2803   case ISD::SIGN_EXTEND:
2804   case ISD::ANY_EXTEND:
2805   case ISD::ADD:
2806   case ISD::SUB:
2807   case ISD::ROTR:
2808   case ISD::ROTL:
2809   case ISD::SRL:
2810   case ISD::SHL:
2811   case ISD::SRA: {
2812     if (VT == MVT::i8)
2813       return LowerI8Math(Op, DAG, Opc);
2814     else if (VT == MVT::i64)
2815       return LowerI64Math(Op, DAG, Opc);
2816     break;
2817   }
2818
2819   // Vector-related lowering.
2820   case ISD::BUILD_VECTOR:
2821     return LowerBUILD_VECTOR(Op, DAG);
2822   case ISD::SCALAR_TO_VECTOR:
2823     return LowerSCALAR_TO_VECTOR(Op, DAG);
2824   case ISD::VECTOR_SHUFFLE:
2825     return LowerVECTOR_SHUFFLE(Op, DAG);
2826   case ISD::EXTRACT_VECTOR_ELT:
2827     return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2828   case ISD::INSERT_VECTOR_ELT:
2829     return LowerINSERT_VECTOR_ELT(Op, DAG);
2830
2831   // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2832   case ISD::AND:
2833   case ISD::OR:
2834   case ISD::XOR:
2835     return LowerByteImmed(Op, DAG);
2836
2837   // Vector and i8 multiply:
2838   case ISD::MUL:
2839     if (VT.isVector())
2840       return LowerVectorMUL(Op, DAG);
2841     else if (VT == MVT::i8)
2842       return LowerI8Math(Op, DAG, Opc);
2843     else
2844       return LowerMUL(Op, DAG, VT, Opc);
2845
2846   case ISD::FDIV:
2847     if (VT == MVT::f32 || VT == MVT::v4f32)
2848       return LowerFDIVf32(Op, DAG);
2849 #if 0
2850     // This is probably a libcall
2851     else if (Op.getValueType() == MVT::f64)
2852       return LowerFDIVf64(Op, DAG);
2853 #endif
2854     else
2855       assert(0 && "Calling FDIV on unsupported MVT");
2856
2857   case ISD::CTPOP:
2858     return LowerCTPOP(Op, DAG);
2859
2860   case ISD::SELECT_CC:
2861     return LowerSELECT_CC(Op, DAG);
2862   }
2863
2864   return SDValue();
2865 }
2866
2867 SDNode *SPUTargetLowering::ReplaceNodeResults(SDNode *N, SelectionDAG &DAG)
2868 {
2869 #if 0
2870   unsigned Opc = (unsigned) N->getOpcode();
2871   MVT OpVT = N->getValueType(0);
2872
2873   switch (Opc) {
2874   default: {
2875     cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2876     cerr << "Op.getOpcode() = " << Opc << "\n";
2877     cerr << "*Op.getNode():\n";
2878     N->dump();
2879     abort();
2880     /*NOTREACHED*/
2881   }
2882   }
2883 #endif
2884
2885   /* Otherwise, return unchanged */
2886   return 0;
2887 }
2888
2889 //===----------------------------------------------------------------------===//
2890 // Target Optimization Hooks
2891 //===----------------------------------------------------------------------===//
2892
2893 SDValue
2894 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2895 {
2896 #if 0
2897   TargetMachine &TM = getTargetMachine();
2898 #endif
2899   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2900   SelectionDAG &DAG = DCI.DAG;
2901   SDValue Op0 = N->getOperand(0);      // everything has at least one operand
2902   SDValue Result;                     // Initially, NULL result
2903
2904   switch (N->getOpcode()) {
2905   default: break;
2906   case ISD::ADD: {
2907     SDValue Op1 = N->getOperand(1);
2908
2909     if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
2910       SDValue Op01 = Op0.getOperand(1);
2911       if (Op01.getOpcode() == ISD::Constant
2912           || Op01.getOpcode() == ISD::TargetConstant) {
2913         // (add <const>, (SPUindirect <arg>, <const>)) ->
2914         // (SPUindirect <arg>, <const + const>)
2915         ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2916         ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2917         SDValue combinedConst =
2918           DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2919                           Op0.getValueType());
2920
2921         DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2922                    << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2923         DEBUG(cerr << "With:    (SPUindirect <arg>, "
2924                    << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2925         return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2926                            Op0.getOperand(0), combinedConst);
2927       }
2928     } else if (isa<ConstantSDNode>(Op0)
2929                && Op1.getOpcode() == SPUISD::IndirectAddr) {
2930       SDValue Op11 = Op1.getOperand(1);
2931       if (Op11.getOpcode() == ISD::Constant
2932           || Op11.getOpcode() == ISD::TargetConstant) {
2933         // (add (SPUindirect <arg>, <const>), <const>) ->
2934         // (SPUindirect <arg>, <const + const>)
2935         ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2936         ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2937         SDValue combinedConst =
2938           DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2939                           Op0.getValueType());
2940
2941         DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2942                    << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2943         DEBUG(cerr << "With:    (SPUindirect <arg>, "
2944                    << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2945
2946         return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2947                            Op1.getOperand(0), combinedConst);
2948       }
2949     }
2950     break;
2951   }
2952   case ISD::SIGN_EXTEND:
2953   case ISD::ZERO_EXTEND:
2954   case ISD::ANY_EXTEND: {
2955     if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT &&
2956         N->getValueType(0) == Op0.getValueType()) {
2957       // (any_extend (SPUextract_elt0 <arg>)) ->
2958       // (SPUextract_elt0 <arg>)
2959       // Types must match, however...
2960       DEBUG(cerr << "Replace: ");
2961       DEBUG(N->dump(&DAG));
2962       DEBUG(cerr << "\nWith:    ");
2963       DEBUG(Op0.getNode()->dump(&DAG));
2964       DEBUG(cerr << "\n");
2965
2966       return Op0;
2967     }
2968     break;
2969   }
2970   case SPUISD::IndirectAddr: {
2971     if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2972       ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2973       if (CN->getZExtValue() == 0) {
2974         // (SPUindirect (SPUaform <addr>, 0), 0) ->
2975         // (SPUaform <addr>, 0)
2976
2977         DEBUG(cerr << "Replace: ");
2978         DEBUG(N->dump(&DAG));
2979         DEBUG(cerr << "\nWith:    ");
2980         DEBUG(Op0.getNode()->dump(&DAG));
2981         DEBUG(cerr << "\n");
2982
2983         return Op0;
2984       }
2985     }
2986     break;
2987   }
2988   case SPUISD::SHLQUAD_L_BITS:
2989   case SPUISD::SHLQUAD_L_BYTES:
2990   case SPUISD::VEC_SHL:
2991   case SPUISD::VEC_SRL:
2992   case SPUISD::VEC_SRA:
2993   case SPUISD::ROTQUAD_RZ_BYTES:
2994   case SPUISD::ROTQUAD_RZ_BITS: {
2995     SDValue Op1 = N->getOperand(1);
2996
2997     if (isa<ConstantSDNode>(Op1)) {
2998       // Kill degenerate vector shifts:
2999       ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
3000
3001       if (CN->getZExtValue() == 0) {
3002         Result = Op0;
3003       }
3004     }
3005     break;
3006   }
3007   case SPUISD::PROMOTE_SCALAR: {
3008     switch (Op0.getOpcode()) {
3009     default:
3010       break;
3011     case ISD::ANY_EXTEND:
3012     case ISD::ZERO_EXTEND:
3013     case ISD::SIGN_EXTEND: {
3014       // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
3015       // <arg>
3016       // but only if the SPUpromote_scalar and <arg> types match.
3017       SDValue Op00 = Op0.getOperand(0);
3018       if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
3019         SDValue Op000 = Op00.getOperand(0);
3020         if (Op000.getValueType() == N->getValueType(0)) {
3021           Result = Op000;
3022         }
3023       }
3024       break;
3025     }
3026     case SPUISD::VEC2PREFSLOT: {
3027       // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
3028       // <arg>
3029       Result = Op0.getOperand(0);
3030       break;
3031     }
3032     }
3033     break;
3034   }
3035   }
3036   // Otherwise, return unchanged.
3037 #ifdef NDEBUG
3038   if (Result.getNode()) {
3039     DEBUG(cerr << "\nReplace.SPU: ");
3040     DEBUG(N->dump(&DAG));
3041     DEBUG(cerr << "\nWith:        ");
3042     DEBUG(Result.getNode()->dump(&DAG));
3043     DEBUG(cerr << "\n");
3044   }
3045 #endif
3046
3047   return Result;
3048 }
3049
3050 //===----------------------------------------------------------------------===//
3051 // Inline Assembly Support
3052 //===----------------------------------------------------------------------===//
3053
3054 /// getConstraintType - Given a constraint letter, return the type of
3055 /// constraint it is for this target.
3056 SPUTargetLowering::ConstraintType
3057 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
3058   if (ConstraintLetter.size() == 1) {
3059     switch (ConstraintLetter[0]) {
3060     default: break;
3061     case 'b':
3062     case 'r':
3063     case 'f':
3064     case 'v':
3065     case 'y':
3066       return C_RegisterClass;
3067     }
3068   }
3069   return TargetLowering::getConstraintType(ConstraintLetter);
3070 }
3071
3072 std::pair<unsigned, const TargetRegisterClass*>
3073 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3074                                                 MVT VT) const
3075 {
3076   if (Constraint.size() == 1) {
3077     // GCC RS6000 Constraint Letters
3078     switch (Constraint[0]) {
3079     case 'b':   // R1-R31
3080     case 'r':   // R0-R31
3081       if (VT == MVT::i64)
3082         return std::make_pair(0U, SPU::R64CRegisterClass);
3083       return std::make_pair(0U, SPU::R32CRegisterClass);
3084     case 'f':
3085       if (VT == MVT::f32)
3086         return std::make_pair(0U, SPU::R32FPRegisterClass);
3087       else if (VT == MVT::f64)
3088         return std::make_pair(0U, SPU::R64FPRegisterClass);
3089       break;
3090     case 'v':
3091       return std::make_pair(0U, SPU::GPRCRegisterClass);
3092     }
3093   }
3094
3095   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3096 }
3097
3098 //! Compute used/known bits for a SPU operand
3099 void
3100 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3101                                                   const APInt &Mask,
3102                                                   APInt &KnownZero,
3103                                                   APInt &KnownOne,
3104                                                   const SelectionDAG &DAG,
3105                                                   unsigned Depth ) const {
3106 #if 0
3107   const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
3108 #endif
3109
3110   switch (Op.getOpcode()) {
3111   default:
3112     // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3113     break;
3114
3115 #if 0
3116   case CALL:
3117   case SHUFB:
3118   case SHUFFLE_MASK:
3119   case CNTB:
3120 #endif
3121
3122   case SPUISD::PROMOTE_SCALAR: {
3123     SDValue Op0 = Op.getOperand(0);
3124     MVT Op0VT = Op0.getValueType();
3125     unsigned Op0VTBits = Op0VT.getSizeInBits();
3126     uint64_t InMask = Op0VT.getIntegerVTBitMask();
3127     KnownZero |= APInt(Op0VTBits, ~InMask, false);
3128     KnownOne |= APInt(Op0VTBits, InMask, false);
3129     break;
3130   }
3131
3132   case SPUISD::LDRESULT:
3133   case SPUISD::VEC2PREFSLOT:
3134   case SPUISD::VEC2PREFSLOT_CHAINED: {
3135     MVT OpVT = Op.getValueType();
3136     unsigned OpVTBits = OpVT.getSizeInBits();
3137     uint64_t InMask = OpVT.getIntegerVTBitMask();
3138     KnownZero |= APInt(OpVTBits, ~InMask, false);
3139     KnownOne |= APInt(OpVTBits, InMask, false);
3140     break;
3141   }
3142
3143 #if 0
3144   case EXTRACT_I1_ZEXT:
3145   case EXTRACT_I1_SEXT:
3146   case EXTRACT_I8_ZEXT:
3147   case EXTRACT_I8_SEXT:
3148   case MPY:
3149   case MPYU:
3150   case MPYH:
3151   case MPYHH:
3152   case SPUISD::SHLQUAD_L_BITS:
3153   case SPUISD::SHLQUAD_L_BYTES:
3154   case SPUISD::VEC_SHL:
3155   case SPUISD::VEC_SRL:
3156   case SPUISD::VEC_SRA:
3157   case SPUISD::VEC_ROTL:
3158   case SPUISD::VEC_ROTR:
3159   case SPUISD::ROTQUAD_RZ_BYTES:
3160   case SPUISD::ROTQUAD_RZ_BITS:
3161   case SPUISD::ROTBYTES_LEFT:
3162   case SPUISD::ROTBYTES_LEFT_CHAINED:
3163   case SPUISD::SELECT_MASK:
3164   case SPUISD::SELB:
3165   case SPUISD::FPInterp:
3166   case SPUISD::FPRecipEst:
3167   case SPUISD::SEXT32TO64:
3168 #endif
3169   }
3170 }
3171
3172 // LowerAsmOperandForConstraint
3173 void
3174 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3175                                                 char ConstraintLetter,
3176                                                 bool hasMemory,
3177                                                 std::vector<SDValue> &Ops,
3178                                                 SelectionDAG &DAG) const {
3179   // Default, for the time being, to the base class handler
3180   TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3181                                                Ops, DAG);
3182 }
3183
3184 /// isLegalAddressImmediate - Return true if the integer value can be used
3185 /// as the offset of the target addressing mode.
3186 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3187                                                 const Type *Ty) const {
3188   // SPU's addresses are 256K:
3189   return (V > -(1 << 18) && V < (1 << 18) - 1);
3190 }
3191
3192 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3193   return false;
3194 }
3195
3196 bool
3197 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3198   // The SPU target isn't yet aware of offsets.
3199   return false;
3200 }