lib/Target/CellSPU/SPUISelLowering.cpp

   1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the SPUTargetLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "SPURegisterNames.h"
  15 #include "SPUISelLowering.h"
  16 #include "SPUTargetMachine.h"
  17 #include "SPUFrameInfo.h"
  18 #include "llvm/ADT/VectorExtras.h"
  19 #include "llvm/CodeGen/CallingConvLower.h"
  20 #include "llvm/CodeGen/MachineFrameInfo.h"
  21 #include "llvm/CodeGen/MachineFunction.h"
  22 #include "llvm/CodeGen/MachineInstrBuilder.h"
  23 #include "llvm/CodeGen/MachineRegisterInfo.h"
  24 #include "llvm/CodeGen/SelectionDAG.h"
  25 #include "llvm/Constants.h"
  26 #include "llvm/Function.h"
  27 #include "llvm/Intrinsics.h"
  28 #include "llvm/Support/Debug.h"
  29 #include "llvm/Support/MathExtras.h"
  30 #include "llvm/Target/TargetOptions.h"
  31
  32 #include <map>
  33
  34 using namespace llvm;
  35
  36 // Used in getTargetNodeName() below
  37 namespace {
  38   std::map<unsigned, const char *> node_names;
  39
  40   //! MVT mapping to useful data for Cell SPU
  41   struct valtype_map_s {
  42     const MVT   valtype;
  43     const int   prefslot_byte;
  44   };
  45
  46   const valtype_map_s valtype_map[] = {
  47     { MVT::i1,   3 },
  48     { MVT::i8,   3 },
  49     { MVT::i16,  2 },
  50     { MVT::i32,  0 },
  51     { MVT::f32,  0 },
  52     { MVT::i64,  0 },
  53     { MVT::f64,  0 },
  54     { MVT::i128, 0 }
  55   };
  56
  57   const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
  58
  59   const valtype_map_s *getValueTypeMapEntry(MVT VT) {
  60     const valtype_map_s *retval = 0;
  61
  62     for (size_t i = 0; i < n_valtype_map; ++i) {
  63       if (valtype_map[i].valtype == VT) {
  64         retval = valtype_map + i;
  65         break;
  66       }
  67     }
  68
  69 #ifndef NDEBUG
  70     if (retval == 0) {
  71       cerr << "getValueTypeMapEntry returns NULL for "
  72            << VT.getMVTString()
  73            << "\n";
  74       abort();
  75     }
  76 #endif
  77
  78     return retval;
  79   }
  80
  81   //! Predicate that returns true if operand is a memory target
  82   /*!
  83     \arg Op Operand to test
  84     \return true if the operand is a memory target (i.e., global
  85     address, external symbol, constant pool) or an A-form
  86     address.
  87    */
  88   bool isMemoryOperand(const SDValue &Op)
  89   {
  90     const unsigned Opc = Op.getOpcode();
  91     return (Opc == ISD::GlobalAddress
  92             || Opc == ISD::GlobalTLSAddress
  93             || Opc == ISD::JumpTable
  94             || Opc == ISD::ConstantPool
  95             || Opc == ISD::ExternalSymbol
  96             || Opc == ISD::TargetGlobalAddress
  97             || Opc == ISD::TargetGlobalTLSAddress
  98             || Opc == ISD::TargetJumpTable
  99             || Opc == ISD::TargetConstantPool
 100             || Opc == ISD::TargetExternalSymbol
 101             || Opc == SPUISD::AFormAddr);
 102   }
 103
 104   //! Predicate that returns true if the operand is an indirect target
 105   bool isIndirectOperand(const SDValue &Op)
 106   {
 107     const unsigned Opc = Op.getOpcode();
 108     return (Opc == ISD::Register
 109             || Opc == SPUISD::LDRESULT);
 110   }
 111 }
 112
 113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
 114   : TargetLowering(TM),
 115     SPUTM(TM)
 116 {
 117   // Fold away setcc operations if possible.
 118   setPow2DivIsCheap();
 119
 120   // Use _setjmp/_longjmp instead of setjmp/longjmp.
 121   setUseUnderscoreSetJmp(true);
 122   setUseUnderscoreLongJmp(true);
 123
 124   // Set up the SPU's register classes:
 125   addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
 126   addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
 127   addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
 128   addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
 129   addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
 130   addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
 131   addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
 132
 133   // Initialize libcalls:
 134   setLibcallName(RTLIB::MUL_I64, "__muldi3");
 135
 136   // SPU has no sign or zero extended loads for i1, i8, i16:
 137   setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
 138   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
 139   setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
 140
 141   setLoadExtAction(ISD::EXTLOAD,  MVT::i8, Custom);
 142   setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
 143   setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
 144   setTruncStoreAction(MVT::i8,    MVT::i8, Custom);
 145   setTruncStoreAction(MVT::i16,   MVT::i8, Custom);
 146   setTruncStoreAction(MVT::i32,   MVT::i8, Custom);
 147   setTruncStoreAction(MVT::i64,   MVT::i8, Custom);
 148   setTruncStoreAction(MVT::i128,  MVT::i8, Custom);
 149
 150   setLoadExtAction(ISD::EXTLOAD,  MVT::i16, Custom);
 151   setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
 152   setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
 153
 154   // SPU constant load actions are custom lowered:
 155   setOperationAction(ISD::Constant,   MVT::i64, Custom);
 156   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
 157   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
 158
 159   // SPU's loads and stores have to be custom lowered:
 160   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
 161        ++sctype) {
 162     MVT VT = (MVT::SimpleValueType)sctype;
 163
 164     setOperationAction(ISD::LOAD, VT, Custom);
 165     setOperationAction(ISD::STORE, VT, Custom);
 166   }
 167
 168   // Custom lower BRCOND for i8 to "promote" the result to i16
 169   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
 170
 171   // Expand the jumptable branches
 172   setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
 173   setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
 174
 175   // Custom lower SELECT_CC for most cases, but expand by default
 176   setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
 177   setOperationAction(ISD::SELECT_CC,    MVT::i8,    Custom);
 178   setOperationAction(ISD::SELECT_CC,    MVT::i16,   Custom);
 179   setOperationAction(ISD::SELECT_CC,    MVT::i32,   Custom);
 180 #if 0
 181   setOperationAction(ISD::SELECT_CC,    MVT::i64,   Custom);
 182 #endif
 183
 184   // SPU has no intrinsics for these particular operations:
 185   setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
 186
 187   // PowerPC has no SREM/UREM instructions
 188   setOperationAction(ISD::SREM, MVT::i32, Expand);
 189   setOperationAction(ISD::UREM, MVT::i32, Expand);
 190   setOperationAction(ISD::SREM, MVT::i64, Expand);
 191   setOperationAction(ISD::UREM, MVT::i64, Expand);
 192
 193   // We don't support sin/cos/sqrt/fmod
 194   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 195   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 196   setOperationAction(ISD::FREM , MVT::f64, Expand);
 197   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 198   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 199   setOperationAction(ISD::FREM , MVT::f32, Expand);
 200
 201   // If we're enabling GP optimizations, use hardware square root
 202   setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 203   setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 204
 205   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 206   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 207
 208   // SPU can do rotate right and left, so legalize it... but customize for i8
 209   // because instructions don't exist.
 210
 211   // FIXME: Change from "expand" to appropriate type once ROTR is supported in
 212   //        .td files.
 213   setOperationAction(ISD::ROTR, MVT::i32,    Expand /*Legal*/);
 214   setOperationAction(ISD::ROTR, MVT::i16,    Expand /*Legal*/);
 215   setOperationAction(ISD::ROTR, MVT::i8,     Expand /*Custom*/);
 216
 217   setOperationAction(ISD::ROTL, MVT::i32,    Legal);
 218   setOperationAction(ISD::ROTL, MVT::i16,    Legal);
 219   setOperationAction(ISD::ROTL, MVT::i8,     Custom);
 220
 221   // SPU has no native version of shift left/right for i8
 222   setOperationAction(ISD::SHL,  MVT::i8,     Custom);
 223   setOperationAction(ISD::SRL,  MVT::i8,     Custom);
 224   setOperationAction(ISD::SRA,  MVT::i8,     Custom);
 225
 226   // SPU needs custom lowering for shift left/right for i64
 227   setOperationAction(ISD::SHL,  MVT::i64,    Custom);
 228   setOperationAction(ISD::SRL,  MVT::i64,    Custom);
 229   setOperationAction(ISD::SRA,  MVT::i64,    Custom);
 230
 231   // Custom lower i8, i32 and i64 multiplications
 232   setOperationAction(ISD::MUL,  MVT::i8,     Custom);
 233   setOperationAction(ISD::MUL,  MVT::i32,    Custom);
 234   setOperationAction(ISD::MUL,  MVT::i64,    Expand);   // libcall
 235
 236   // SMUL_LOHI, UMUL_LOHI
 237   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom);
 238   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom);
 239   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Custom);
 240   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom);
 241
 242   // Need to custom handle (some) common i8, i64 math ops
 243   setOperationAction(ISD::ADD,  MVT::i64,    Custom);
 244   setOperationAction(ISD::SUB,  MVT::i8,     Custom);
 245   setOperationAction(ISD::SUB,  MVT::i64,    Custom);
 246
 247   // SPU does not have BSWAP. It does have i32 support CTLZ.
 248   // CTPOP has to be custom lowered.
 249   setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
 250   setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
 251
 252   setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
 253   setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
 254   setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
 255   setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
 256
 257   setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
 258   setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
 259
 260   setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
 261
 262   // SPU has a version of select that implements (a&~c)|(b&c), just like
 263   // select ought to work:
 264   setOperationAction(ISD::SELECT, MVT::i8,   Legal);
 265   setOperationAction(ISD::SELECT, MVT::i16,  Legal);
 266   setOperationAction(ISD::SELECT, MVT::i32,  Legal);
 267   setOperationAction(ISD::SELECT, MVT::i64,  Expand);
 268
 269   setOperationAction(ISD::SETCC, MVT::i8,    Legal);
 270   setOperationAction(ISD::SETCC, MVT::i16,   Legal);
 271   setOperationAction(ISD::SETCC, MVT::i32,   Legal);
 272   setOperationAction(ISD::SETCC, MVT::i64,   Expand);
 273
 274   // Zero extension and sign extension for i64 have to be
 275   // custom legalized
 276   setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
 277   setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
 278   setOperationAction(ISD::ANY_EXTEND,  MVT::i64, Custom);
 279
 280   // SPU has a legal FP -> signed INT instruction
 281   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
 282   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 283   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
 284   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
 285
 286   // FDIV on SPU requires custom lowering
 287   setOperationAction(ISD::FDIV, MVT::f32, Custom);
 288   //setOperationAction(ISD::FDIV, MVT::f64, Custom);
 289
 290   // SPU has [U|S]INT_TO_FP
 291   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
 292   setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
 293   setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
 294   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
 295   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
 296   setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
 297   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 298   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 299
 300   setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
 301   setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
 302   setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
 303   setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
 304
 305   // We cannot sextinreg(i1).  Expand to shifts.
 306   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 307
 308   // Support label based line numbers.
 309   setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
 310   setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
 311
 312   // We want to legalize GlobalAddress and ConstantPool nodes into the
 313   // appropriate instructions to materialize the address.
 314   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
 315        ++sctype) {
 316     MVT VT = (MVT::SimpleValueType)sctype;
 317
 318     setOperationAction(ISD::GlobalAddress, VT, Custom);
 319     setOperationAction(ISD::ConstantPool,  VT, Custom);
 320     setOperationAction(ISD::JumpTable,     VT, Custom);
 321   }
 322
 323   // RET must be custom lowered, to meet ABI requirements
 324   setOperationAction(ISD::RET,           MVT::Other, Custom);
 325
 326   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 327   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 328
 329   // Use the default implementation.
 330   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
 331   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 332   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 333   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 334   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
 335   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
 336   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
 337
 338   // Cell SPU has instructions for converting between i64 and fp.
 339   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 340   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 341
 342   // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
 343   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
 344
 345   // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 346   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 347
 348   // First set operation action for all vector types to expand. Then we
 349   // will selectively turn on ones that can be effectively codegen'd.
 350   addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
 351   addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
 352   addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
 353   addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
 354   addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
 355   addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
 356
 357   for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
 358        i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
 359     MVT VT = (MVT::SimpleValueType)i;
 360
 361     // add/sub are legal for all supported vector VT's.
 362     setOperationAction(ISD::ADD , VT, Legal);
 363     setOperationAction(ISD::SUB , VT, Legal);
 364     // mul has to be custom lowered.
 365     setOperationAction(ISD::MUL , VT, Custom);
 366
 367     setOperationAction(ISD::AND   , VT, Legal);
 368     setOperationAction(ISD::OR    , VT, Legal);
 369     setOperationAction(ISD::XOR   , VT, Legal);
 370     setOperationAction(ISD::LOAD  , VT, Legal);
 371     setOperationAction(ISD::SELECT, VT, Legal);
 372     setOperationAction(ISD::STORE,  VT, Legal);
 373
 374     // These operations need to be expanded:
 375     setOperationAction(ISD::SDIV, VT, Expand);
 376     setOperationAction(ISD::SREM, VT, Expand);
 377     setOperationAction(ISD::UDIV, VT, Expand);
 378     setOperationAction(ISD::UREM, VT, Expand);
 379     setOperationAction(ISD::FDIV, VT, Custom);
 380
 381     // Custom lower build_vector, constant pool spills, insert and
 382     // extract vector elements:
 383     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
 384     setOperationAction(ISD::ConstantPool, VT, Custom);
 385     setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
 386     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
 387     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
 388     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
 389   }
 390
 391   setOperationAction(ISD::MUL, MVT::v16i8, Custom);
 392   setOperationAction(ISD::AND, MVT::v16i8, Custom);
 393   setOperationAction(ISD::OR,  MVT::v16i8, Custom);
 394   setOperationAction(ISD::XOR, MVT::v16i8, Custom);
 395   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 396
 397   setShiftAmountType(MVT::i32);
 398   setBooleanContents(ZeroOrOneBooleanContent);
 399
 400   setStackPointerRegisterToSaveRestore(SPU::R1);
 401
 402   // We have target-specific dag combine patterns for the following nodes:
 403   setTargetDAGCombine(ISD::ADD);
 404   setTargetDAGCombine(ISD::ZERO_EXTEND);
 405   setTargetDAGCombine(ISD::SIGN_EXTEND);
 406   setTargetDAGCombine(ISD::ANY_EXTEND);
 407
 408   computeRegisterProperties();
 409
 410   // Set other properties:
 411   setSchedulingPreference(SchedulingForLatency);
 412 }
 413
 414 const char *
 415 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
 416 {
 417   if (node_names.empty()) {
 418     node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
 419     node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
 420     node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
 421     node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
 422     node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
 423     node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
 424     node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
 425     node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
 426     node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
 427     node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
 428     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
 429     node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
 430     node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
 431     node_names[(unsigned) SPUISD::VEC2PREFSLOT_CHAINED]
 432                                               = "SPUISD::VEC2PREFSLOT_CHAINED";
 433     node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
 434     node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
 435     node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
 436     node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
 437     node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
 438     node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
 439     node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
 440     node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
 441     node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
 442     node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
 443     node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
 444     node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
 445     node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
 446     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
 447     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
 448     node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
 449       "SPUISD::ROTQUAD_RZ_BYTES";
 450     node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
 451       "SPUISD::ROTQUAD_RZ_BITS";
 452     node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
 453     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
 454       "SPUISD::ROTBYTES_LEFT_CHAINED";
 455     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
 456       "SPUISD::ROTBYTES_LEFT_BITS";
 457     node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
 458     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
 459     node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
 460     node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
 461     node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
 462     node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
 463     node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
 464     node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
 465     node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
 466   }
 467
 468   std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
 469
 470   return ((i != node_names.end()) ? i->second : 0);
 471 }
 472
 473 MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
 474   MVT VT = Op.getValueType();
 475   return (VT.isInteger() ? VT : MVT(MVT::i32));
 476 }
 477
 478 //===----------------------------------------------------------------------===//
 479 // Calling convention code:
 480 //===----------------------------------------------------------------------===//
 481
 482 #include "SPUGenCallingConv.inc"
 483
 484 //===----------------------------------------------------------------------===//
 485 //  LowerOperation implementation
 486 //===----------------------------------------------------------------------===//
 487
 488 /// Aligned load common code for CellSPU
 489 /*!
 490   \param[in] Op The SelectionDAG load or store operand
 491   \param[in] DAG The selection DAG
 492   \param[in] ST CellSPU subtarget information structure
 493   \param[in,out] alignment Caller initializes this to the load or store node's
 494   value from getAlignment(), may be updated while generating the aligned load
 495   \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
 496   offset (divisible by 16, modulo 16 == 0)
 497   \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
 498   offset of the preferred slot (modulo 16 != 0)
 499   \param[in,out] VT Caller initializes this value type to the the load or store
 500   node's loaded or stored value type; may be updated if an i1-extended load or
 501   store.
 502   \param[out] was16aligned true if the base pointer had 16-byte alignment,
 503   otherwise false. Can help to determine if the chunk needs to be rotated.
 504
 505  Both load and store lowering load a block of data aligned on a 16-byte
 506  boundary. This is the common aligned load code shared between both.
 507  */
 508 static SDValue
 509 AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
 510             LSBaseSDNode *LSN,
 511             unsigned &alignment, int &alignOffs, int &prefSlotOffs,
 512             MVT &VT, bool &was16aligned)
 513 {
 514   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 515   const valtype_map_s *vtm = getValueTypeMapEntry(VT);
 516   SDValue basePtr = LSN->getBasePtr();
 517   SDValue chain = LSN->getChain();
 518
 519   if (basePtr.getOpcode() == ISD::ADD) {
 520     SDValue Op1 = basePtr.getNode()->getOperand(1);
 521
 522     if (Op1.getOpcode() == ISD::Constant
 523         || Op1.getOpcode() == ISD::TargetConstant) {
 524       const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
 525
 526       alignOffs = (int) CN->getZExtValue();
 527       prefSlotOffs = (int) (alignOffs & 0xf);
 528
 529       // Adjust the rotation amount to ensure that the final result ends up in
 530       // the preferred slot:
 531       prefSlotOffs -= vtm->prefslot_byte;
 532       basePtr = basePtr.getOperand(0);
 533
 534       // Loading from memory, can we adjust alignment?
 535       if (basePtr.getOpcode() == SPUISD::AFormAddr) {
 536         SDValue APtr = basePtr.getOperand(0);
 537         if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
 538           GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
 539           alignment = GSDN->getGlobal()->getAlignment();
 540         }
 541       }
 542     } else {
 543       alignOffs = 0;
 544       prefSlotOffs = -vtm->prefslot_byte;
 545     }
 546   } else if (basePtr.getOpcode() == ISD::FrameIndex) {
 547     FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
 548     alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
 549     prefSlotOffs = (int) (alignOffs & 0xf);
 550     prefSlotOffs -= vtm->prefslot_byte;
 551     basePtr = DAG.getRegister(SPU::R1, VT);
 552   } else {
 553     alignOffs = 0;
 554     prefSlotOffs = -vtm->prefslot_byte;
 555   }
 556
 557   if (alignment == 16) {
 558     // Realign the base pointer as a D-Form address:
 559     if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
 560       basePtr = DAG.getNode(ISD::ADD, PtrVT,
 561                             basePtr,
 562                             DAG.getConstant((alignOffs & ~0xf), PtrVT));
 563     }
 564
 565     // Emit the vector load:
 566     was16aligned = true;
 567     return DAG.getLoad(MVT::v16i8, chain, basePtr,
 568                        LSN->getSrcValue(), LSN->getSrcValueOffset(),
 569                        LSN->isVolatile(), 16);
 570   }
 571
 572   // Unaligned load or we're using the "large memory" model, which means that
 573   // we have to be very pessimistic:
 574   if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
 575     basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr,
 576                           DAG.getConstant(0, PtrVT));
 577   }
 578
 579   // Add the offset
 580   basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
 581                         DAG.getConstant((alignOffs & ~0xf), PtrVT));
 582   was16aligned = false;
 583   return DAG.getLoad(MVT::v16i8, chain, basePtr,
 584                      LSN->getSrcValue(), LSN->getSrcValueOffset(),
 585                      LSN->isVolatile(), 16);
 586 }
 587
 588 /// Custom lower loads for CellSPU
 589 /*!
 590  All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
 591  within a 16-byte block, we have to rotate to extract the requested element.
 592  */
 593 static SDValue
 594 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 595   LoadSDNode *LN = cast<LoadSDNode>(Op);
 596   SDValue the_chain = LN->getChain();
 597   MVT VT = LN->getMemoryVT();
 598   MVT OpVT = Op.getNode()->getValueType(0);
 599   ISD::LoadExtType ExtType = LN->getExtensionType();
 600   unsigned alignment = LN->getAlignment();
 601   SDValue Ops[8];
 602
 603   switch (LN->getAddressingMode()) {
 604   case ISD::UNINDEXED: {
 605     int offset, rotamt;
 606     bool was16aligned;
 607     SDValue result =
 608       AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
 609
 610     if (result.getNode() == 0)
 611       return result;
 612
 613     the_chain = result.getValue(1);
 614     // Rotate the chunk if necessary
 615     if (rotamt < 0)
 616       rotamt += 16;
 617     if (rotamt != 0 || !was16aligned) {
 618       SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
 619
 620       Ops[0] = the_chain;
 621       Ops[1] = result;
 622       if (was16aligned) {
 623         Ops[2] = DAG.getConstant(rotamt, MVT::i16);
 624       } else {
 625         MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 626         LoadSDNode *LN1 = cast<LoadSDNode>(result);
 627         Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
 628                              DAG.getConstant(rotamt, PtrVT));
 629       }
 630
 631       result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
 632       the_chain = result.getValue(1);
 633     }
 634
 635     if (VT == OpVT || ExtType == ISD::EXTLOAD) {
 636       SDVTList scalarvts;
 637       MVT vecVT = MVT::v16i8;
 638
 639       // Convert the loaded v16i8 vector to the appropriate vector type
 640       // specified by the operand:
 641       if (OpVT == VT) {
 642         if (VT != MVT::i1)
 643           vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
 644       } else
 645         vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
 646
 647       Ops[0] = the_chain;
 648       Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
 649       scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
 650       result = DAG.getNode(SPUISD::VEC2PREFSLOT_CHAINED, scalarvts, Ops, 2);
 651       the_chain = result.getValue(1);
 652     } else {
 653       // Handle the sign and zero-extending loads for i1 and i8:
 654       unsigned NewOpC;
 655
 656       if (ExtType == ISD::SEXTLOAD) {
 657         NewOpC = (OpVT == MVT::i1
 658                   ? SPUISD::EXTRACT_I1_SEXT
 659                   : SPUISD::EXTRACT_I8_SEXT);
 660       } else {
 661         assert(ExtType == ISD::ZEXTLOAD);
 662         NewOpC = (OpVT == MVT::i1
 663                   ? SPUISD::EXTRACT_I1_ZEXT
 664                   : SPUISD::EXTRACT_I8_ZEXT);
 665       }
 666
 667       result = DAG.getNode(NewOpC, OpVT, result);
 668     }
 669
 670     SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
 671     SDValue retops[2] = {
 672       result,
 673       the_chain
 674     };
 675
 676     result = DAG.getNode(SPUISD::LDRESULT, retvts,
 677                          retops, sizeof(retops) / sizeof(retops[0]));
 678     return result;
 679   }
 680   case ISD::PRE_INC:
 681   case ISD::PRE_DEC:
 682   case ISD::POST_INC:
 683   case ISD::POST_DEC:
 684   case ISD::LAST_INDEXED_MODE:
 685     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 686             "UNINDEXED\n";
 687     cerr << (unsigned) LN->getAddressingMode() << "\n";
 688     abort();
 689     /*NOTREACHED*/
 690   }
 691
 692   return SDValue();
 693 }
 694
 695 /// Custom lower stores for CellSPU
 696 /*!
 697  All CellSPU stores are aligned to 16-byte boundaries, so for elements
 698  within a 16-byte block, we have to generate a shuffle to insert the
 699  requested element into its place, then store the resulting block.
 700  */
 701 static SDValue
 702 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 703   StoreSDNode *SN = cast<StoreSDNode>(Op);
 704   SDValue Value = SN->getValue();
 705   MVT VT = Value.getValueType();
 706   MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
 707   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 708   unsigned alignment = SN->getAlignment();
 709
 710   switch (SN->getAddressingMode()) {
 711   case ISD::UNINDEXED: {
 712     int chunk_offset, slot_offset;
 713     bool was16aligned;
 714
 715     // The vector type we really want to load from the 16-byte chunk.
 716     MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
 717         stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
 718
 719     SDValue alignLoadVec =
 720       AlignedLoad(Op, DAG, ST, SN, alignment,
 721                   chunk_offset, slot_offset, VT, was16aligned);
 722
 723     if (alignLoadVec.getNode() == 0)
 724       return alignLoadVec;
 725
 726     LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
 727     SDValue basePtr = LN->getBasePtr();
 728     SDValue the_chain = alignLoadVec.getValue(1);
 729     SDValue theValue = SN->getValue();
 730     SDValue result;
 731
 732     if (StVT != VT
 733         && (theValue.getOpcode() == ISD::AssertZext
 734             || theValue.getOpcode() == ISD::AssertSext)) {
 735       // Drill down and get the value for zero- and sign-extended
 736       // quantities
 737       theValue = theValue.getOperand(0);
 738     }
 739
 740     chunk_offset &= 0xf;
 741
 742     SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
 743     SDValue insertEltPtr;
 744
 745     // If the base pointer is already a D-form address, then just create
 746     // a new D-form address with a slot offset and the orignal base pointer.
 747     // Otherwise generate a D-form address with the slot offset relative
 748     // to the stack pointer, which is always aligned.
 749     DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
 750     DEBUG(basePtr.getNode()->dump(&DAG));
 751     DEBUG(cerr << "\n");
 752
 753     if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
 754         (basePtr.getOpcode() == ISD::ADD
 755          && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
 756       insertEltPtr = basePtr;
 757     } else {
 758       insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
 759     }
 760
 761     SDValue insertEltOp =
 762             DAG.getNode(SPUISD::SHUFFLE_MASK, stVecVT, insertEltPtr);
 763     SDValue vectorizeOp =
 764             DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue);
 765
 766     result = DAG.getNode(SPUISD::SHUFB, vecVT, vectorizeOp, alignLoadVec,
 767                          DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
 768
 769     result = DAG.getStore(the_chain, result, basePtr,
 770                           LN->getSrcValue(), LN->getSrcValueOffset(),
 771                           LN->isVolatile(), LN->getAlignment());
 772
 773 #if 0 && defined(NDEBUG)
 774     if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
 775       const SDValue &currentRoot = DAG.getRoot();
 776
 777       DAG.setRoot(result);
 778       cerr << "------- CellSPU:LowerStore result:\n";
 779       DAG.dump();
 780       cerr << "-------\n";
 781       DAG.setRoot(currentRoot);
 782     }
 783 #endif
 784
 785     return result;
 786     /*UNREACHED*/
 787   }
 788   case ISD::PRE_INC:
 789   case ISD::PRE_DEC:
 790   case ISD::POST_INC:
 791   case ISD::POST_DEC:
 792   case ISD::LAST_INDEXED_MODE:
 793     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 794             "UNINDEXED\n";
 795     cerr << (unsigned) SN->getAddressingMode() << "\n";
 796     abort();
 797     /*NOTREACHED*/
 798   }
 799
 800   return SDValue();
 801 }
 802
 803 /// Generate the address of a constant pool entry.
 804 static SDValue
 805 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 806   MVT PtrVT = Op.getValueType();
 807   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 808   Constant *C = CP->getConstVal();
 809   SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
 810   SDValue Zero = DAG.getConstant(0, PtrVT);
 811   const TargetMachine &TM = DAG.getTarget();
 812
 813   if (TM.getRelocationModel() == Reloc::Static) {
 814     if (!ST->usingLargeMem()) {
 815       // Just return the SDValue with the constant pool address in it.
 816       return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
 817     } else {
 818       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
 819       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
 820       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 821     }
 822   }
 823
 824   assert(0 &&
 825          "LowerConstantPool: Relocation model other than static"
 826          " not supported.");
 827   return SDValue();
 828 }
 829
 830 static SDValue
 831 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 832   MVT PtrVT = Op.getValueType();
 833   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 834   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
 835   SDValue Zero = DAG.getConstant(0, PtrVT);
 836   const TargetMachine &TM = DAG.getTarget();
 837
 838   if (TM.getRelocationModel() == Reloc::Static) {
 839     if (!ST->usingLargeMem()) {
 840       return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
 841     } else {
 842       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
 843       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
 844       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 845     }
 846   }
 847
 848   assert(0 &&
 849          "LowerJumpTable: Relocation model other than static not supported.");
 850   return SDValue();
 851 }
 852
 853 static SDValue
 854 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 855   MVT PtrVT = Op.getValueType();
 856   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
 857   GlobalValue *GV = GSDN->getGlobal();
 858   SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
 859   const TargetMachine &TM = DAG.getTarget();
 860   SDValue Zero = DAG.getConstant(0, PtrVT);
 861
 862   if (TM.getRelocationModel() == Reloc::Static) {
 863     if (!ST->usingLargeMem()) {
 864       return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
 865     } else {
 866       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
 867       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
 868       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 869     }
 870   } else {
 871     cerr << "LowerGlobalAddress: Relocation model other than static not "
 872          << "supported.\n";
 873     abort();
 874     /*NOTREACHED*/
 875   }
 876
 877   return SDValue();
 878 }
 879
 880 //! Custom lower i64 integer constants
 881 /*!
 882  This code inserts all of the necessary juggling that needs to occur to load
 883  a 64-bit constant into a register.
 884  */
 885 static SDValue
 886 LowerConstant(SDValue Op, SelectionDAG &DAG) {
 887   MVT VT = Op.getValueType();
 888   ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
 889
 890   if (VT == MVT::i64) {
 891     SDValue T = DAG.getConstant(CN->getZExtValue(), MVT::i64);
 892     return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
 893                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
 894   } else {
 895     cerr << "LowerConstant: unhandled constant type "
 896          << VT.getMVTString()
 897          << "\n";
 898     abort();
 899     /*NOTREACHED*/
 900   }
 901
 902   return SDValue();
 903 }
 904
 905 //! Custom lower double precision floating point constants
 906 static SDValue
 907 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
 908   MVT VT = Op.getValueType();
 909   ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
 910
 911   assert((FP != 0) &&
 912          "LowerConstantFP: Node is not ConstantFPSDNode");
 913
 914   if (VT == MVT::f64) {
 915     uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
 916     return DAG.getNode(ISD::BIT_CONVERT, VT,
 917                        LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
 918   }
 919
 920   return SDValue();
 921 }
 922
 923 //! Lower MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
 924 static SDValue
 925 LowerBRCOND(SDValue Op, SelectionDAG &DAG)
 926 {
 927   SDValue Cond = Op.getOperand(1);
 928   MVT CondVT = Cond.getValueType();
 929   MVT CondNVT;
 930
 931   if (CondVT == MVT::i8) {
 932     CondNVT = MVT::i16;
 933     return DAG.getNode(ISD::BRCOND, Op.getValueType(),
 934                       Op.getOperand(0),
 935                       DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
 936                       Op.getOperand(2));
 937   } else
 938     return SDValue();                // Unchanged
 939 }
 940
 941 static SDValue
 942 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
 943 {
 944   MachineFunction &MF = DAG.getMachineFunction();
 945   MachineFrameInfo *MFI = MF.getFrameInfo();
 946   MachineRegisterInfo &RegInfo = MF.getRegInfo();
 947   SmallVector<SDValue, 48> ArgValues;
 948   SDValue Root = Op.getOperand(0);
 949   bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
 950
 951   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
 952   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
 953
 954   unsigned ArgOffset = SPUFrameInfo::minStackSize();
 955   unsigned ArgRegIdx = 0;
 956   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
 957
 958   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 959
 960   // Add DAG nodes to load the arguments or copy them out of registers.
 961   for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
 962        ArgNo != e; ++ArgNo) {
 963     MVT ObjectVT = Op.getValue(ArgNo).getValueType();
 964     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
 965     SDValue ArgVal;
 966
 967     if (ArgRegIdx < NumArgRegs) {
 968       const TargetRegisterClass *ArgRegClass;
 969
 970       switch (ObjectVT.getSimpleVT()) {
 971       default: {
 972         cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
 973              << ObjectVT.getMVTString()
 974              << "\n";
 975         abort();
 976       }
 977       case MVT::i8:
 978         ArgRegClass = &SPU::R8CRegClass;
 979         break;
 980       case MVT::i16:
 981         ArgRegClass = &SPU::R16CRegClass;
 982         break;
 983       case MVT::i32:
 984         ArgRegClass = &SPU::R32CRegClass;
 985         break;
 986       case MVT::i64:
 987         ArgRegClass = &SPU::R64CRegClass;
 988         break;
 989       case MVT::f32:
 990         ArgRegClass = &SPU::R32FPRegClass;
 991         break;
 992       case MVT::f64:
 993         ArgRegClass = &SPU::R64FPRegClass;
 994         break;
 995       case MVT::v2f64:
 996       case MVT::v4f32:
 997       case MVT::v2i64:
 998       case MVT::v4i32:
 999       case MVT::v8i16:
1000       case MVT::v16i8:
1001         ArgRegClass = &SPU::VECREGRegClass;
1002         break;
1003       }
1004
1005       unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1006       RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1007       ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1008       ++ArgRegIdx;
1009     } else {
1010       // We need to load the argument to a virtual register if we determined
1011       // above that we ran out of physical registers of the appropriate type
1012       // or we're forced to do vararg
1013       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1014       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1015       ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1016       ArgOffset += StackSlotSize;
1017     }
1018
1019     ArgValues.push_back(ArgVal);
1020     // Update the chain
1021     Root = ArgVal.getOperand(0);
1022   }
1023
1024   // vararg handling:
1025   if (isVarArg) {
1026     // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1027     // We will spill (79-3)+1 registers to the stack
1028     SmallVector<SDValue, 79-3+1> MemOps;
1029
1030     // Create the frame slot
1031
1032     for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1033       VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1034       SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1035       SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1036       SDValue Store = DAG.getStore(Root, ArgVal, FIN, NULL, 0);
1037       Root = Store.getOperand(0);
1038       MemOps.push_back(Store);
1039
1040       // Increment address by stack slot size for the next stored argument
1041       ArgOffset += StackSlotSize;
1042     }
1043     if (!MemOps.empty())
1044       Root = DAG.getNode(ISD::TokenFactor,MVT::Other,&MemOps[0],MemOps.size());
1045   }
1046
1047   ArgValues.push_back(Root);
1048
1049   // Return the new list of results.
1050   return DAG.getMergeValues(Op.getNode()->getVTList(), &ArgValues[0],
1051                             ArgValues.size());
1052 }
1053
1054 /// isLSAAddress - Return the immediate to use if the specified
1055 /// value is representable as a LSA address.
1056 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1057   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1058   if (!C) return 0;
1059
1060   int Addr = C->getZExtValue();
1061   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1062       (Addr << 14 >> 14) != Addr)
1063     return 0;  // Top 14 bits have to be sext of immediate.
1064
1065   return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1066 }
1067
1068 static
1069 SDValue
1070 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1071   CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1072   SDValue Chain = TheCall->getChain();
1073   SDValue Callee    = TheCall->getCallee();
1074   unsigned NumOps     = TheCall->getNumArgs();
1075   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1076   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1077   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1078
1079   // Handy pointer type
1080   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1081
1082   // Accumulate how many bytes are to be pushed on the stack, including the
1083   // linkage area, and parameter passing area.  According to the SPU ABI,
1084   // we minimally need space for [LR] and [SP]
1085   unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1086
1087   // Set up a copy of the stack pointer for use loading and storing any
1088   // arguments that may not fit in the registers available for argument
1089   // passing.
1090   SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1091
1092   // Figure out which arguments are going to go in registers, and which in
1093   // memory.
1094   unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1095   unsigned ArgRegIdx = 0;
1096
1097   // Keep track of registers passing arguments
1098   std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1099   // And the arguments passed on the stack
1100   SmallVector<SDValue, 8> MemOpChains;
1101
1102   for (unsigned i = 0; i != NumOps; ++i) {
1103     SDValue Arg = TheCall->getArg(i);
1104
1105     // PtrOff will be used to store the current argument to the stack if a
1106     // register cannot be found for it.
1107     SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1108     PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1109
1110     switch (Arg.getValueType().getSimpleVT()) {
1111     default: assert(0 && "Unexpected ValueType for argument!");
1112     case MVT::i32:
1113     case MVT::i64:
1114     case MVT::i128:
1115       if (ArgRegIdx != NumArgRegs) {
1116         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1117       } else {
1118         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1119         ArgOffset += StackSlotSize;
1120       }
1121       break;
1122     case MVT::f32:
1123     case MVT::f64:
1124       if (ArgRegIdx != NumArgRegs) {
1125         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1126       } else {
1127         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1128         ArgOffset += StackSlotSize;
1129       }
1130       break;
1131     case MVT::v4f32:
1132     case MVT::v4i32:
1133     case MVT::v8i16:
1134     case MVT::v16i8:
1135       if (ArgRegIdx != NumArgRegs) {
1136         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1137       } else {
1138         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1139         ArgOffset += StackSlotSize;
1140       }
1141       break;
1142     }
1143   }
1144
1145   // Update number of stack bytes actually used, insert a call sequence start
1146   NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1147   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1148                                                             true));
1149
1150   if (!MemOpChains.empty()) {
1151     // Adjust the stack pointer for the stack arguments.
1152     Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1153                         &MemOpChains[0], MemOpChains.size());
1154   }
1155
1156   // Build a sequence of copy-to-reg nodes chained together with token chain
1157   // and flag operands which copy the outgoing args into the appropriate regs.
1158   SDValue InFlag;
1159   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1160     Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1161                              InFlag);
1162     InFlag = Chain.getValue(1);
1163   }
1164
1165   SmallVector<SDValue, 8> Ops;
1166   unsigned CallOpc = SPUISD::CALL;
1167
1168   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1169   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1170   // node so that legalize doesn't hack it.
1171   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1172     GlobalValue *GV = G->getGlobal();
1173     MVT CalleeVT = Callee.getValueType();
1174     SDValue Zero = DAG.getConstant(0, PtrVT);
1175     SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1176
1177     if (!ST->usingLargeMem()) {
1178       // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1179       // style calls, otherwise, external symbols are BRASL calls. This assumes
1180       // that declared/defined symbols are in the same compilation unit and can
1181       // be reached through PC-relative jumps.
1182       //
1183       // NOTE:
1184       // This may be an unsafe assumption for JIT and really large compilation
1185       // units.
1186       if (GV->isDeclaration()) {
1187         Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1188       } else {
1189         Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1190       }
1191     } else {
1192       // "Large memory" mode: Turn all calls into indirect calls with a X-form
1193       // address pairs:
1194       Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1195     }
1196   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1197     Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1198   else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1199     // If this is an absolute destination address that appears to be a legal
1200     // local store address, use the munged value.
1201     Callee = SDValue(Dest, 0);
1202   }
1203
1204   Ops.push_back(Chain);
1205   Ops.push_back(Callee);
1206
1207   // Add argument registers to the end of the list so that they are known live
1208   // into the call.
1209   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1210     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1211                                   RegsToPass[i].second.getValueType()));
1212
1213   if (InFlag.getNode())
1214     Ops.push_back(InFlag);
1215   // Returns a chain and a flag for retval copy to use.
1216   Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1217                       &Ops[0], Ops.size());
1218   InFlag = Chain.getValue(1);
1219
1220   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1221                              DAG.getIntPtrConstant(0, true), InFlag);
1222   if (TheCall->getValueType(0) != MVT::Other)
1223     InFlag = Chain.getValue(1);
1224
1225   SDValue ResultVals[3];
1226   unsigned NumResults = 0;
1227
1228   // If the call has results, copy the values out of the ret val registers.
1229   switch (TheCall->getValueType(0).getSimpleVT()) {
1230   default: assert(0 && "Unexpected ret value!");
1231   case MVT::Other: break;
1232   case MVT::i32:
1233     if (TheCall->getValueType(1) == MVT::i32) {
1234       Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1235       ResultVals[0] = Chain.getValue(0);
1236       Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1237                                  Chain.getValue(2)).getValue(1);
1238       ResultVals[1] = Chain.getValue(0);
1239       NumResults = 2;
1240     } else {
1241       Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1242       ResultVals[0] = Chain.getValue(0);
1243       NumResults = 1;
1244     }
1245     break;
1246   case MVT::i64:
1247     Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1248     ResultVals[0] = Chain.getValue(0);
1249     NumResults = 1;
1250     break;
1251   case MVT::f32:
1252   case MVT::f64:
1253     Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1254                                InFlag).getValue(1);
1255     ResultVals[0] = Chain.getValue(0);
1256     NumResults = 1;
1257     break;
1258   case MVT::v2f64:
1259   case MVT::v4f32:
1260   case MVT::v4i32:
1261   case MVT::v8i16:
1262   case MVT::v16i8:
1263     Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1264                                    InFlag).getValue(1);
1265     ResultVals[0] = Chain.getValue(0);
1266     NumResults = 1;
1267     break;
1268   }
1269
1270   // If the function returns void, just return the chain.
1271   if (NumResults == 0)
1272     return Chain;
1273
1274   // Otherwise, merge everything together with a MERGE_VALUES node.
1275   ResultVals[NumResults++] = Chain;
1276   SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1277   return Res.getValue(Op.getResNo());
1278 }
1279
1280 static SDValue
1281 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1282   SmallVector<CCValAssign, 16> RVLocs;
1283   unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1284   bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1285   CCState CCInfo(CC, isVarArg, TM, RVLocs);
1286   CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1287
1288   // If this is the first return lowered for this function, add the regs to the
1289   // liveout set for the function.
1290   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1291     for (unsigned i = 0; i != RVLocs.size(); ++i)
1292       DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1293   }
1294
1295   SDValue Chain = Op.getOperand(0);
1296   SDValue Flag;
1297
1298   // Copy the result values into the output registers.
1299   for (unsigned i = 0; i != RVLocs.size(); ++i) {
1300     CCValAssign &VA = RVLocs[i];
1301     assert(VA.isRegLoc() && "Can only return in registers!");
1302     Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1303     Flag = Chain.getValue(1);
1304   }
1305
1306   if (Flag.getNode())
1307     return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1308   else
1309     return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1310 }
1311
1312
1313 //===----------------------------------------------------------------------===//
1314 // Vector related lowering:
1315 //===----------------------------------------------------------------------===//
1316
1317 static ConstantSDNode *
1318 getVecImm(SDNode *N) {
1319   SDValue OpVal(0, 0);
1320
1321   // Check to see if this buildvec has a single non-undef value in its elements.
1322   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1323     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1324     if (OpVal.getNode() == 0)
1325       OpVal = N->getOperand(i);
1326     else if (OpVal != N->getOperand(i))
1327       return 0;
1328   }
1329
1330   if (OpVal.getNode() != 0) {
1331     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1332       return CN;
1333     }
1334   }
1335
1336   return 0; // All UNDEF: use implicit def.; not Constant node
1337 }
1338
1339 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1340 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1341 /// constant
1342 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1343                               MVT ValueType) {
1344   if (ConstantSDNode *CN = getVecImm(N)) {
1345     uint64_t Value = CN->getZExtValue();
1346     if (ValueType == MVT::i64) {
1347       uint64_t UValue = CN->getZExtValue();
1348       uint32_t upper = uint32_t(UValue >> 32);
1349       uint32_t lower = uint32_t(UValue);
1350       if (upper != lower)
1351         return SDValue();
1352       Value = Value >> 32;
1353     }
1354     if (Value <= 0x3ffff)
1355       return DAG.getTargetConstant(Value, ValueType);
1356   }
1357
1358   return SDValue();
1359 }
1360
1361 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1362 /// and the value fits into a signed 16-bit constant, and if so, return the
1363 /// constant
1364 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1365                               MVT ValueType) {
1366   if (ConstantSDNode *CN = getVecImm(N)) {
1367     int64_t Value = CN->getSExtValue();
1368     if (ValueType == MVT::i64) {
1369       uint64_t UValue = CN->getZExtValue();
1370       uint32_t upper = uint32_t(UValue >> 32);
1371       uint32_t lower = uint32_t(UValue);
1372       if (upper != lower)
1373         return SDValue();
1374       Value = Value >> 32;
1375     }
1376     if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1377       return DAG.getTargetConstant(Value, ValueType);
1378     }
1379   }
1380
1381   return SDValue();
1382 }
1383
1384 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1385 /// and the value fits into a signed 10-bit constant, and if so, return the
1386 /// constant
1387 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1388                               MVT ValueType) {
1389   if (ConstantSDNode *CN = getVecImm(N)) {
1390     int64_t Value = CN->getSExtValue();
1391     if (ValueType == MVT::i64) {
1392       uint64_t UValue = CN->getZExtValue();
1393       uint32_t upper = uint32_t(UValue >> 32);
1394       uint32_t lower = uint32_t(UValue);
1395       if (upper != lower)
1396         return SDValue();
1397       Value = Value >> 32;
1398     }
1399     if (isS10Constant(Value))
1400       return DAG.getTargetConstant(Value, ValueType);
1401   }
1402
1403   return SDValue();
1404 }
1405
1406 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1407 /// and the value fits into a signed 8-bit constant, and if so, return the
1408 /// constant.
1409 ///
1410 /// @note: The incoming vector is v16i8 because that's the only way we can load
1411 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1412 /// same value.
1413 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1414                              MVT ValueType) {
1415   if (ConstantSDNode *CN = getVecImm(N)) {
1416     int Value = (int) CN->getZExtValue();
1417     if (ValueType == MVT::i16
1418         && Value <= 0xffff                 /* truncated from uint64_t */
1419         && ((short) Value >> 8) == ((short) Value & 0xff))
1420       return DAG.getTargetConstant(Value & 0xff, ValueType);
1421     else if (ValueType == MVT::i8
1422              && (Value & 0xff) == Value)
1423       return DAG.getTargetConstant(Value, ValueType);
1424   }
1425
1426   return SDValue();
1427 }
1428
1429 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1430 /// and the value fits into a signed 16-bit constant, and if so, return the
1431 /// constant
1432 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1433                                MVT ValueType) {
1434   if (ConstantSDNode *CN = getVecImm(N)) {
1435     uint64_t Value = CN->getZExtValue();
1436     if ((ValueType == MVT::i32
1437           && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1438         || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1439       return DAG.getTargetConstant(Value >> 16, ValueType);
1440   }
1441
1442   return SDValue();
1443 }
1444
1445 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1446 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1447   if (ConstantSDNode *CN = getVecImm(N)) {
1448     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1449   }
1450
1451   return SDValue();
1452 }
1453
1454 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1455 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1456   if (ConstantSDNode *CN = getVecImm(N)) {
1457     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1458   }
1459
1460   return SDValue();
1461 }
1462
1463 // If this is a vector of constants or undefs, get the bits.  A bit in
1464 // UndefBits is set if the corresponding element of the vector is an
1465 // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1466 // zero.   Return true if this is not an array of constants, false if it is.
1467 //
1468 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1469                                        uint64_t UndefBits[2]) {
1470   // Start with zero'd results.
1471   VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1472
1473   unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1474   for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1475     SDValue OpVal = BV->getOperand(i);
1476
1477     unsigned PartNo = i >= e/2;     // In the upper 128 bits?
1478     unsigned SlotNo = e/2 - (i & (e/2-1))-1;  // Which subpiece of the uint64_t.
1479
1480     uint64_t EltBits = 0;
1481     if (OpVal.getOpcode() == ISD::UNDEF) {
1482       uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1483       UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1484       continue;
1485     } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1486       EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1487     } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1488       const APFloat &apf = CN->getValueAPF();
1489       EltBits = (CN->getValueType(0) == MVT::f32
1490                  ? FloatToBits(apf.convertToFloat())
1491                  : DoubleToBits(apf.convertToDouble()));
1492     } else {
1493       // Nonconstant element.
1494       return true;
1495     }
1496
1497     VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1498   }
1499
1500   //printf("%llx %llx  %llx %llx\n",
1501   //       VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1502   return false;
1503 }
1504
1505 /// If this is a splat (repetition) of a value across the whole vector, return
1506 /// the smallest size that splats it.  For example, "0x01010101010101..." is a
1507 /// splat of 0x01, 0x0101, and 0x01010101.  We return SplatBits = 0x01 and
1508 /// SplatSize = 1 byte.
1509 static bool isConstantSplat(const uint64_t Bits128[2],
1510                             const uint64_t Undef128[2],
1511                             int MinSplatBits,
1512                             uint64_t &SplatBits, uint64_t &SplatUndef,
1513                             int &SplatSize) {
1514   // Don't let undefs prevent splats from matching.  See if the top 64-bits are
1515   // the same as the lower 64-bits, ignoring undefs.
1516   uint64_t Bits64  = Bits128[0] | Bits128[1];
1517   uint64_t Undef64 = Undef128[0] & Undef128[1];
1518   uint32_t Bits32  = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1519   uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1520   uint16_t Bits16  = uint16_t(Bits32)  | uint16_t(Bits32 >> 16);
1521   uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1522
1523   if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1524     if (MinSplatBits < 64) {
1525
1526       // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1527       // undefs.
1528       if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1529         if (MinSplatBits < 32) {
1530
1531           // If the top 16-bits are different than the lower 16-bits, ignoring
1532           // undefs, we have an i32 splat.
1533           if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1534             if (MinSplatBits < 16) {
1535               // If the top 8-bits are different than the lower 8-bits, ignoring
1536               // undefs, we have an i16 splat.
1537               if ((Bits16 & (uint16_t(~Undef16) >> 8))
1538                   == ((Bits16 >> 8) & ~Undef16)) {
1539                 // Otherwise, we have an 8-bit splat.
1540                 SplatBits  = uint8_t(Bits16)  | uint8_t(Bits16 >> 8);
1541                 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1542                 SplatSize = 1;
1543                 return true;
1544               }
1545             } else {
1546               SplatBits = Bits16;
1547               SplatUndef = Undef16;
1548               SplatSize = 2;
1549               return true;
1550             }
1551           }
1552         } else {
1553           SplatBits = Bits32;
1554           SplatUndef = Undef32;
1555           SplatSize = 4;
1556           return true;
1557         }
1558       }
1559     } else {
1560       SplatBits = Bits128[0];
1561       SplatUndef = Undef128[0];
1562       SplatSize = 8;
1563       return true;
1564     }
1565   }
1566
1567   return false;  // Can't be a splat if two pieces don't match.
1568 }
1569
1570 // If this is a case we can't handle, return null and let the default
1571 // expansion code take care of it.  If we CAN select this case, and if it
1572 // selects to a single instruction, return Op.  Otherwise, if we can codegen
1573 // this case more efficiently than a constant pool load, lower it to the
1574 // sequence of ops that should be used.
1575 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1576   MVT VT = Op.getValueType();
1577   // If this is a vector of constants or undefs, get the bits.  A bit in
1578   // UndefBits is set if the corresponding element of the vector is an
1579   // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1580   // zero.
1581   uint64_t VectorBits[2];
1582   uint64_t UndefBits[2];
1583   uint64_t SplatBits, SplatUndef;
1584   int SplatSize;
1585   if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1586       || !isConstantSplat(VectorBits, UndefBits,
1587                           VT.getVectorElementType().getSizeInBits(),
1588                           SplatBits, SplatUndef, SplatSize))
1589     return SDValue();   // Not a constant vector, not a splat.
1590
1591   switch (VT.getSimpleVT()) {
1592   default:
1593   case MVT::v4f32: {
1594     uint32_t Value32 = SplatBits;
1595     assert(SplatSize == 4
1596            && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1597     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1598     SDValue T = DAG.getConstant(Value32, MVT::i32);
1599     return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1600                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1601     break;
1602   }
1603   case MVT::v2f64: {
1604     uint64_t f64val = SplatBits;
1605     assert(SplatSize == 8
1606            && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1607     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1608     SDValue T = DAG.getConstant(f64val, MVT::i64);
1609     return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1610                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1611     break;
1612   }
1613   case MVT::v16i8: {
1614    // 8-bit constants have to be expanded to 16-bits
1615    unsigned short Value16 = SplatBits | (SplatBits << 8);
1616    SDValue Ops[8];
1617    for (int i = 0; i < 8; ++i)
1618      Ops[i] = DAG.getConstant(Value16, MVT::i16);
1619    return DAG.getNode(ISD::BIT_CONVERT, VT,
1620                       DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1621   }
1622   case MVT::v8i16: {
1623     unsigned short Value16;
1624     if (SplatSize == 2)
1625       Value16 = (unsigned short) (SplatBits & 0xffff);
1626     else
1627       Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1628     SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1629     SDValue Ops[8];
1630     for (int i = 0; i < 8; ++i) Ops[i] = T;
1631     return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1632   }
1633   case MVT::v4i32: {
1634     unsigned int Value = SplatBits;
1635     SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1636     return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1637   }
1638   case MVT::v2i64: {
1639     uint64_t val = SplatBits;
1640     uint32_t upper = uint32_t(val >> 32);
1641     uint32_t lower = uint32_t(val);
1642
1643     if (upper == lower) {
1644       // Magic constant that can be matched by IL, ILA, et. al.
1645       SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1646       return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1647     } else {
1648       SDValue LO32;
1649       SDValue HI32;
1650       SmallVector<SDValue, 16> ShufBytes;
1651       SDValue Result;
1652       bool upper_special, lower_special;
1653
1654       // NOTE: This code creates common-case shuffle masks that can be easily
1655       // detected as common expressions. It is not attempting to create highly
1656       // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1657
1658       // Detect if the upper or lower half is a special shuffle mask pattern:
1659       upper_special = (upper == 0||upper == 0xffffffff||upper == 0x80000000);
1660       lower_special = (lower == 0||lower == 0xffffffff||lower == 0x80000000);
1661
1662       // Create lower vector if not a special pattern
1663       if (!lower_special) {
1664         SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1665         LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1666                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1667                                        LO32C, LO32C, LO32C, LO32C));
1668       }
1669
1670       // Create upper vector if not a special pattern
1671       if (!upper_special) {
1672         SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1673         HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1674                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1675                                        HI32C, HI32C, HI32C, HI32C));
1676       }
1677
1678       // If either upper or lower are special, then the two input operands are
1679       // the same (basically, one of them is a "don't care")
1680       if (lower_special)
1681         LO32 = HI32;
1682       if (upper_special)
1683         HI32 = LO32;
1684       if (lower_special && upper_special) {
1685         // Unhappy situation... both upper and lower are special, so punt with
1686         // a target constant:
1687         SDValue Zero = DAG.getConstant(0, MVT::i32);
1688         HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1689                                   Zero, Zero);
1690       }
1691
1692       for (int i = 0; i < 4; ++i) {
1693         uint64_t val = 0;
1694         for (int j = 0; j < 4; ++j) {
1695           SDValue V;
1696           bool process_upper, process_lower;
1697           val <<= 8;
1698           process_upper = (upper_special && (i & 1) == 0);
1699           process_lower = (lower_special && (i & 1) == 1);
1700
1701           if (process_upper || process_lower) {
1702             if ((process_upper && upper == 0)
1703                 || (process_lower && lower == 0))
1704               val |= 0x80;
1705             else if ((process_upper && upper == 0xffffffff)
1706                      || (process_lower && lower == 0xffffffff))
1707               val |= 0xc0;
1708             else if ((process_upper && upper == 0x80000000)
1709                      || (process_lower && lower == 0x80000000))
1710               val |= (j == 0 ? 0xe0 : 0x80);
1711           } else
1712             val |= i * 4 + j + ((i & 1) * 16);
1713         }
1714
1715         ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1716       }
1717
1718       return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1719                          DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1720                                      &ShufBytes[0], ShufBytes.size()));
1721     }
1722   }
1723   }
1724
1725   return SDValue();
1726 }
1727
1728 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1729 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1730 /// permutation vector, V3, is monotonically increasing with one "exception"
1731 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1732 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1733 /// In either case, the net result is going to eventually invoke SHUFB to
1734 /// permute/shuffle the bytes from V1 and V2.
1735 /// \note
1736 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1737 /// control word for byte/halfword/word insertion. This takes care of a single
1738 /// element move from V2 into V1.
1739 /// \note
1740 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1741 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1742   SDValue V1 = Op.getOperand(0);
1743   SDValue V2 = Op.getOperand(1);
1744   SDValue PermMask = Op.getOperand(2);
1745
1746   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1747
1748   // If we have a single element being moved from V1 to V2, this can be handled
1749   // using the C*[DX] compute mask instructions, but the vector elements have
1750   // to be monotonically increasing with one exception element.
1751   MVT EltVT = V1.getValueType().getVectorElementType();
1752   unsigned EltsFromV2 = 0;
1753   unsigned V2Elt = 0;
1754   unsigned V2EltIdx0 = 0;
1755   unsigned CurrElt = 0;
1756   bool monotonic = true;
1757   if (EltVT == MVT::i8)
1758     V2EltIdx0 = 16;
1759   else if (EltVT == MVT::i16)
1760     V2EltIdx0 = 8;
1761   else if (EltVT == MVT::i32)
1762     V2EltIdx0 = 4;
1763   else
1764     assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1765
1766   for (unsigned i = 0, e = PermMask.getNumOperands();
1767        EltsFromV2 <= 1 && monotonic && i != e;
1768        ++i) {
1769     unsigned SrcElt;
1770     if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1771       SrcElt = 0;
1772     else
1773       SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1774
1775     if (SrcElt >= V2EltIdx0) {
1776       ++EltsFromV2;
1777       V2Elt = (V2EltIdx0 - SrcElt) << 2;
1778     } else if (CurrElt != SrcElt) {
1779       monotonic = false;
1780     }
1781
1782     ++CurrElt;
1783   }
1784
1785   if (EltsFromV2 == 1 && monotonic) {
1786     // Compute mask and shuffle
1787     MachineFunction &MF = DAG.getMachineFunction();
1788     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1789     unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1790     MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1791     // Initialize temporary register to 0
1792     SDValue InitTempReg =
1793       DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1794     // Copy register's contents as index in SHUFFLE_MASK:
1795     SDValue ShufMaskOp =
1796       DAG.getNode(SPUISD::SHUFFLE_MASK, V1.getValueType(),
1797                   DAG.getTargetConstant(V2Elt, MVT::i32),
1798                   DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1799     // Use shuffle mask in SHUFB synthetic instruction:
1800     return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1801   } else {
1802    // Convert the SHUFFLE_VECTOR mask's input element units to the
1803    // actual bytes.
1804     unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1805
1806     SmallVector<SDValue, 16> ResultMask;
1807     for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1808       unsigned SrcElt;
1809       if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1810         SrcElt = 0;
1811       else
1812         SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1813
1814       for (unsigned j = 0; j < BytesPerElement; ++j) {
1815         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1816                                              MVT::i8));
1817       }
1818     }
1819
1820     SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1821                                       &ResultMask[0], ResultMask.size());
1822     return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1823   }
1824 }
1825
1826 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1827   SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar
1828
1829   if (Op0.getNode()->getOpcode() == ISD::Constant) {
1830     // For a constant, build the appropriate constant vector, which will
1831     // eventually simplify to a vector register load.
1832
1833     ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1834     SmallVector<SDValue, 16> ConstVecValues;
1835     MVT VT;
1836     size_t n_copies;
1837
1838     // Create a constant vector:
1839     switch (Op.getValueType().getSimpleVT()) {
1840     default: assert(0 && "Unexpected constant value type in "
1841                          "LowerSCALAR_TO_VECTOR");
1842     case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1843     case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1844     case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1845     case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1846     case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1847     case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1848     }
1849
1850     SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1851     for (size_t j = 0; j < n_copies; ++j)
1852       ConstVecValues.push_back(CValue);
1853
1854     return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1855                        &ConstVecValues[0], ConstVecValues.size());
1856   } else {
1857     // Otherwise, copy the value from one register to another:
1858     switch (Op0.getValueType().getSimpleVT()) {
1859     default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1860     case MVT::i8:
1861     case MVT::i16:
1862     case MVT::i32:
1863     case MVT::i64:
1864     case MVT::f32:
1865     case MVT::f64:
1866       return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1867     }
1868   }
1869
1870   return SDValue();
1871 }
1872
1873 static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
1874   switch (Op.getValueType().getSimpleVT()) {
1875   default:
1876     cerr << "CellSPU: Unknown vector multiplication, got "
1877          << Op.getValueType().getMVTString()
1878          << "\n";
1879     abort();
1880     /*NOTREACHED*/
1881
1882   case MVT::v4i32: {
1883     SDValue rA = Op.getOperand(0);
1884     SDValue rB = Op.getOperand(1);
1885     SDValue HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1886     SDValue HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1887     SDValue LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1888     SDValue Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1889
1890     return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1891     break;
1892   }
1893
1894   // Multiply two v8i16 vectors (pipeline friendly version):
1895   // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1896   // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1897   // c) Use SELB to select upper and lower halves from the intermediate results
1898   //
1899   // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1900   // dual-issue. This code does manage to do this, even if it's a little on
1901   // the wacky side
1902   case MVT::v8i16: {
1903     MachineFunction &MF = DAG.getMachineFunction();
1904     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1905     SDValue Chain = Op.getOperand(0);
1906     SDValue rA = Op.getOperand(0);
1907     SDValue rB = Op.getOperand(1);
1908     unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1909     unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1910
1911     SDValue FSMBOp =
1912       DAG.getCopyToReg(Chain, FSMBIreg,
1913                        DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1914                                    DAG.getConstant(0xcccc, MVT::i16)));
1915
1916     SDValue HHProd =
1917       DAG.getCopyToReg(FSMBOp, HiProdReg,
1918                        DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1919
1920     SDValue HHProd_v4i32 =
1921       DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1922                   DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1923
1924     return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1925                        DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1926                        DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1927                                    DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1928                                                HHProd_v4i32,
1929                                                DAG.getConstant(16, MVT::i16))),
1930                        DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1931   }
1932
1933   // This M00sE is N@stI! (apologies to Monty Python)
1934   //
1935   // SPU doesn't know how to do any 8-bit multiplication, so the solution
1936   // is to break it all apart, sign extend, and reassemble the various
1937   // intermediate products.
1938   case MVT::v16i8: {
1939     SDValue rA = Op.getOperand(0);
1940     SDValue rB = Op.getOperand(1);
1941     SDValue c8 = DAG.getConstant(8, MVT::i32);
1942     SDValue c16 = DAG.getConstant(16, MVT::i32);
1943
1944     SDValue LLProd =
1945       DAG.getNode(SPUISD::MPY, MVT::v8i16,
1946                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1947                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1948
1949     SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1950
1951     SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1952
1953     SDValue LHProd =
1954       DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1955                   DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1956
1957     SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1958                                      DAG.getConstant(0x2222, MVT::i16));
1959
1960     SDValue LoProdParts =
1961       DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1962                   DAG.getNode(SPUISD::SELB, MVT::v8i16,
1963                               LLProd, LHProd, FSMBmask));
1964
1965     SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1966
1967     SDValue LoProd =
1968       DAG.getNode(ISD::AND, MVT::v4i32,
1969                   LoProdParts,
1970                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1971                               LoProdMask, LoProdMask,
1972                               LoProdMask, LoProdMask));
1973
1974     SDValue rAH =
1975       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1976                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1977
1978     SDValue rBH =
1979       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1980                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1981
1982     SDValue HLProd =
1983       DAG.getNode(SPUISD::MPY, MVT::v8i16,
1984                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1985                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1986
1987     SDValue HHProd_1 =
1988       DAG.getNode(SPUISD::MPY, MVT::v8i16,
1989                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1990                               DAG.getNode(SPUISD::VEC_SRA,
1991                                           MVT::v4i32, rAH, c8)),
1992                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1993                               DAG.getNode(SPUISD::VEC_SRA,
1994                                           MVT::v4i32, rBH, c8)));
1995
1996     SDValue HHProd =
1997       DAG.getNode(SPUISD::SELB, MVT::v8i16,
1998                   HLProd,
1999                   DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2000                   FSMBmask);
2001
2002     SDValue HiProd =
2003       DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
2004
2005     return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2006                        DAG.getNode(ISD::OR, MVT::v4i32,
2007                                    LoProd, HiProd));
2008   }
2009   }
2010
2011   return SDValue();
2012 }
2013
2014 static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) {
2015   MachineFunction &MF = DAG.getMachineFunction();
2016   MachineRegisterInfo &RegInfo = MF.getRegInfo();
2017
2018   SDValue A = Op.getOperand(0);
2019   SDValue B = Op.getOperand(1);
2020   MVT VT = Op.getValueType();
2021
2022   unsigned VRegBR, VRegC;
2023
2024   if (VT == MVT::f32) {
2025     VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2026     VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2027   } else {
2028     VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2029     VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2030   }
2031   // TODO: make sure we're feeding FPInterp the right arguments
2032   // Right now: fi B, frest(B)
2033
2034   // Computes BRcpl =
2035   // (Floating Interpolate (FP Reciprocal Estimate B))
2036   SDValue BRcpl =
2037       DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2038                        DAG.getNode(SPUISD::FPInterp, VT, B,
2039                                 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2040
2041   // Computes A * BRcpl and stores in a temporary register
2042   SDValue AxBRcpl =
2043       DAG.getCopyToReg(BRcpl, VRegC,
2044                  DAG.getNode(ISD::FMUL, VT, A,
2045                         DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2046   // What's the Chain variable do? It's magic!
2047   // TODO: set Chain = Op(0).getEntryNode()
2048
2049   return DAG.getNode(ISD::FADD, VT,
2050                 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2051                 DAG.getNode(ISD::FMUL, VT,
2052                         DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2053                         DAG.getNode(ISD::FSUB, VT, A,
2054                             DAG.getNode(ISD::FMUL, VT, B,
2055                             DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2056 }
2057
2058 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2059   MVT VT = Op.getValueType();
2060   SDValue N = Op.getOperand(0);
2061   SDValue Elt = Op.getOperand(1);
2062   SDValue retval;
2063
2064   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
2065     // Constant argument:
2066     int EltNo = (int) C->getZExtValue();
2067
2068     // sanity checks:
2069     if (VT == MVT::i8 && EltNo >= 16)
2070       assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2071     else if (VT == MVT::i16 && EltNo >= 8)
2072       assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2073     else if (VT == MVT::i32 && EltNo >= 4)
2074       assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2075     else if (VT == MVT::i64 && EltNo >= 2)
2076       assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2077
2078     if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2079       // i32 and i64: Element 0 is the preferred slot
2080       return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, N);
2081     }
2082
2083     // Need to generate shuffle mask and extract:
2084     int prefslot_begin = -1, prefslot_end = -1;
2085     int elt_byte = EltNo * VT.getSizeInBits() / 8;
2086
2087     switch (VT.getSimpleVT()) {
2088     default:
2089       assert(false && "Invalid value type!");
2090     case MVT::i8: {
2091       prefslot_begin = prefslot_end = 3;
2092       break;
2093     }
2094     case MVT::i16: {
2095       prefslot_begin = 2; prefslot_end = 3;
2096       break;
2097     }
2098     case MVT::i32:
2099     case MVT::f32: {
2100       prefslot_begin = 0; prefslot_end = 3;
2101       break;
2102     }
2103     case MVT::i64:
2104     case MVT::f64: {
2105       prefslot_begin = 0; prefslot_end = 7;
2106       break;
2107     }
2108     }
2109
2110     assert(prefslot_begin != -1 && prefslot_end != -1 &&
2111            "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2112
2113     unsigned int ShufBytes[16];
2114     for (int i = 0; i < 16; ++i) {
2115       // zero fill uppper part of preferred slot, don't care about the
2116       // other slots:
2117       unsigned int mask_val;
2118       if (i <= prefslot_end) {
2119         mask_val =
2120           ((i < prefslot_begin)
2121            ? 0x80
2122            : elt_byte + (i - prefslot_begin));
2123
2124         ShufBytes[i] = mask_val;
2125       } else
2126         ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
2127     }
2128
2129     SDValue ShufMask[4];
2130     for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
2131       unsigned bidx = i / 4;
2132       unsigned int bits = ((ShufBytes[bidx] << 24) |
2133                            (ShufBytes[bidx+1] << 16) |
2134                            (ShufBytes[bidx+2] << 8) |
2135                            ShufBytes[bidx+3]);
2136       ShufMask[i] = DAG.getConstant(bits, MVT::i32);
2137     }
2138
2139     SDValue ShufMaskVec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2140                                       &ShufMask[0],
2141                                       sizeof(ShufMask) / sizeof(ShufMask[0]));
2142
2143     retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2144                          DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2145                                      N, N, ShufMaskVec));
2146   } else {
2147     // Variable index: Rotate the requested element into slot 0, then replicate
2148     // slot 0 across the vector
2149     MVT VecVT = N.getValueType();
2150     if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
2151       cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
2152       abort();
2153     }
2154
2155     // Make life easier by making sure the index is zero-extended to i32
2156     if (Elt.getValueType() != MVT::i32)
2157       Elt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Elt);
2158
2159     // Scale the index to a bit/byte shift quantity
2160     APInt scaleFactor =
2161             APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2162     unsigned scaleShift = scaleFactor.logBase2();
2163     SDValue vecShift;
2164
2165     if (scaleShift > 0) {
2166       // Scale the shift factor:
2167       Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt,
2168               DAG.getConstant(scaleShift, MVT::i32));
2169     }
2170
2171     vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, N, Elt);
2172
2173     // Replicate the bytes starting at byte 0 across the entire vector (for
2174     // consistency with the notion of a unified register set)
2175     SDValue replicate;
2176
2177     switch (VT.getSimpleVT()) {
2178     default:
2179       cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
2180       abort();
2181       /*NOTREACHED*/
2182     case MVT::i8: {
2183       SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2184       replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2185                               factor, factor);
2186       break;
2187     }
2188     case MVT::i16: {
2189       SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2190       replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2191                               factor, factor);
2192       break;
2193     }
2194     case MVT::i32:
2195     case MVT::f32: {
2196       SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2197       replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2198                               factor, factor);
2199       break;
2200     }
2201     case MVT::i64:
2202     case MVT::f64: {
2203       SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2204       SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2205       replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, loFactor, hiFactor,
2206                               loFactor, hiFactor);
2207       break;
2208     }
2209     }
2210
2211     retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2212                          DAG.getNode(SPUISD::SHUFB, VecVT, vecShift, vecShift, replicate));
2213   }
2214
2215   return retval;
2216 }
2217
2218 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2219   SDValue VecOp = Op.getOperand(0);
2220   SDValue ValOp = Op.getOperand(1);
2221   SDValue IdxOp = Op.getOperand(2);
2222   MVT VT = Op.getValueType();
2223
2224   ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2225   assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2226
2227   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2228   // Use $2 because it's always 16-byte aligned and it's available:
2229   SDValue PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2230
2231   SDValue result =
2232     DAG.getNode(SPUISD::SHUFB, VT,
2233                 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2234                 VecOp,
2235                 DAG.getNode(SPUISD::SHUFFLE_MASK, VT,
2236                             DAG.getNode(ISD::ADD, PtrVT,
2237                                         PtrBase,
2238                                         DAG.getConstant(CN->getZExtValue(),
2239                                                         PtrVT))));
2240
2241   return result;
2242 }
2243
2244 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2245 {
2246   SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
2247
2248   assert(Op.getValueType() == MVT::i8);
2249   switch (Opc) {
2250   default:
2251     assert(0 && "Unhandled i8 math operator");
2252     /*NOTREACHED*/
2253     break;
2254   case ISD::SUB: {
2255     // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2256     // the result:
2257     SDValue N1 = Op.getOperand(1);
2258     N0 = (N0.getOpcode() != ISD::Constant
2259           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2260           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2261                             MVT::i16));
2262     N1 = (N1.getOpcode() != ISD::Constant
2263           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2264           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2265                             MVT::i16));
2266     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2267                        DAG.getNode(Opc, MVT::i16, N0, N1));
2268   }
2269   case ISD::ROTR:
2270   case ISD::ROTL: {
2271     SDValue N1 = Op.getOperand(1);
2272     unsigned N1Opc;
2273     N0 = (N0.getOpcode() != ISD::Constant
2274           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2275           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2276                             MVT::i16));
2277     N1Opc = N1.getValueType().bitsLT(MVT::i32)
2278             ? ISD::ZERO_EXTEND
2279             : ISD::TRUNCATE;
2280     N1 = (N1.getOpcode() != ISD::Constant
2281           ? DAG.getNode(N1Opc, MVT::i32, N1)
2282           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2283                             MVT::i32));
2284     SDValue ExpandArg =
2285       DAG.getNode(ISD::OR, MVT::i16, N0,
2286                   DAG.getNode(ISD::SHL, MVT::i16,
2287                               N0, DAG.getConstant(8, MVT::i32)));
2288     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2289                        DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2290   }
2291   case ISD::SRL:
2292   case ISD::SHL: {
2293     SDValue N1 = Op.getOperand(1);
2294     unsigned N1Opc;
2295     N0 = (N0.getOpcode() != ISD::Constant
2296           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2297           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2298                             MVT::i16));
2299     N1Opc = N1.getValueType().bitsLT(MVT::i16)
2300             ? ISD::ZERO_EXTEND
2301             : ISD::TRUNCATE;
2302     N1 = (N1.getOpcode() != ISD::Constant
2303           ? DAG.getNode(N1Opc, MVT::i16, N1)
2304           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2305                             MVT::i16));
2306     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2307                        DAG.getNode(Opc, MVT::i16, N0, N1));
2308   }
2309   case ISD::SRA: {
2310     SDValue N1 = Op.getOperand(1);
2311     unsigned N1Opc;
2312     N0 = (N0.getOpcode() != ISD::Constant
2313           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2314           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2315                             MVT::i16));
2316     N1Opc = N1.getValueType().bitsLT(MVT::i16)
2317             ? ISD::SIGN_EXTEND
2318             : ISD::TRUNCATE;
2319     N1 = (N1.getOpcode() != ISD::Constant
2320           ? DAG.getNode(N1Opc, MVT::i16, N1)
2321           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2322                             MVT::i16));
2323     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2324                        DAG.getNode(Opc, MVT::i16, N0, N1));
2325   }
2326   case ISD::MUL: {
2327     SDValue N1 = Op.getOperand(1);
2328     unsigned N1Opc;
2329     N0 = (N0.getOpcode() != ISD::Constant
2330           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2331           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2332                             MVT::i16));
2333     N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2334     N1 = (N1.getOpcode() != ISD::Constant
2335           ? DAG.getNode(N1Opc, MVT::i16, N1)
2336           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2337                             MVT::i16));
2338     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2339                        DAG.getNode(Opc, MVT::i16, N0, N1));
2340     break;
2341   }
2342   }
2343
2344   return SDValue();
2345 }
2346
2347 static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2348 {
2349   MVT VT = Op.getValueType();
2350   MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2351
2352   SDValue Op0 = Op.getOperand(0);
2353
2354   switch (Opc) {
2355   case ISD::ZERO_EXTEND:
2356   case ISD::SIGN_EXTEND:
2357   case ISD::ANY_EXTEND: {
2358     MVT Op0VT = Op0.getValueType();
2359     MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2360
2361     assert(Op0VT == MVT::i32
2362            && "CellSPU: Zero/sign extending something other than i32");
2363
2364     DEBUG(cerr << "CellSPU.LowerI64Math: lowering zero/sign/any extend\n");
2365
2366     SDValue PromoteScalar =
2367             DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2368
2369     if (Opc != ISD::SIGN_EXTEND) {
2370       // Use a shuffle to zero extend the i32 to i64 directly:
2371       SDValue shufMask =
2372               DAG.getNode(ISD::BUILD_VECTOR, Op0VecVT,
2373                           DAG.getConstant(0x80808080, MVT::i32),
2374                           DAG.getConstant(0x00010203, MVT::i32),
2375                           DAG.getConstant(0x80808080, MVT::i32),
2376                           DAG.getConstant(0x08090a0b, MVT::i32));
2377       SDValue zextShuffle =
2378               DAG.getNode(SPUISD::SHUFB, Op0VecVT,
2379                           PromoteScalar, PromoteScalar, shufMask);
2380
2381       return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2382                          DAG.getNode(ISD::BIT_CONVERT, VecVT, zextShuffle));
2383     } else {
2384       // SPU has no "rotate quadword and replicate bit 0" (i.e. rotate/shift
2385       // right and propagate the sign bit) instruction.
2386       SDValue RotQuad =
2387               DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, Op0VecVT,
2388                           PromoteScalar, DAG.getConstant(4, MVT::i32));
2389       SDValue SignQuad =
2390               DAG.getNode(SPUISD::VEC_SRA, Op0VecVT,
2391                           PromoteScalar, DAG.getConstant(32, MVT::i32));
2392       SDValue SelMask =
2393               DAG.getNode(SPUISD::SELECT_MASK, Op0VecVT,
2394                           DAG.getConstant(0xf0f0, MVT::i16));
2395       SDValue CombineQuad =
2396               DAG.getNode(SPUISD::SELB, Op0VecVT,
2397                           SignQuad, RotQuad, SelMask);
2398
2399       return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2400                          DAG.getNode(ISD::BIT_CONVERT, VecVT, CombineQuad));
2401     }
2402   }
2403
2404   case ISD::ADD: {
2405     // Turn operands into vectors to satisfy type checking (shufb works on
2406     // vectors)
2407     SDValue Op0 =
2408       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2409     SDValue Op1 =
2410       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2411     SmallVector<SDValue, 16> ShufBytes;
2412
2413     // Create the shuffle mask for "rotating" the borrow up one register slot
2414     // once the borrow is generated.
2415     ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2416     ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2417     ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2418     ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2419
2420     SDValue CarryGen =
2421       DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2422     SDValue ShiftedCarry =
2423       DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2424                   CarryGen, CarryGen,
2425                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2426                               &ShufBytes[0], ShufBytes.size()));
2427
2428     return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2429                        DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2430                                    Op0, Op1, ShiftedCarry));
2431   }
2432
2433   case ISD::SUB: {
2434     // Turn operands into vectors to satisfy type checking (shufb works on
2435     // vectors)
2436     SDValue Op0 =
2437       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2438     SDValue Op1 =
2439       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2440     SmallVector<SDValue, 16> ShufBytes;
2441
2442     // Create the shuffle mask for "rotating" the borrow up one register slot
2443     // once the borrow is generated.
2444     ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2445     ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2446     ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2447     ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2448
2449     SDValue BorrowGen =
2450       DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2451     SDValue ShiftedBorrow =
2452       DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2453                   BorrowGen, BorrowGen,
2454                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2455                               &ShufBytes[0], ShufBytes.size()));
2456
2457     return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2458                        DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2459                                    Op0, Op1, ShiftedBorrow));
2460   }
2461
2462   case ISD::SHL: {
2463     SDValue ShiftAmt = Op.getOperand(1);
2464     MVT ShiftAmtVT = ShiftAmt.getValueType();
2465     SDValue Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2466     SDValue MaskLower =
2467       DAG.getNode(SPUISD::SELB, VecVT,
2468                   Op0Vec,
2469                   DAG.getConstant(0, VecVT),
2470                   DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2471                               DAG.getConstant(0xff00ULL, MVT::i16)));
2472     SDValue ShiftAmtBytes =
2473       DAG.getNode(ISD::SRL, ShiftAmtVT,
2474                   ShiftAmt,
2475                   DAG.getConstant(3, ShiftAmtVT));
2476     SDValue ShiftAmtBits =
2477       DAG.getNode(ISD::AND, ShiftAmtVT,
2478                   ShiftAmt,
2479                   DAG.getConstant(7, ShiftAmtVT));
2480
2481     return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2482                        DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2483                                    DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2484                                                MaskLower, ShiftAmtBytes),
2485                                    ShiftAmtBits));
2486   }
2487
2488   case ISD::SRL: {
2489     MVT VT = Op.getValueType();
2490     SDValue ShiftAmt = Op.getOperand(1);
2491     MVT ShiftAmtVT = ShiftAmt.getValueType();
2492     SDValue ShiftAmtBytes =
2493       DAG.getNode(ISD::SRL, ShiftAmtVT,
2494                   ShiftAmt,
2495                   DAG.getConstant(3, ShiftAmtVT));
2496     SDValue ShiftAmtBits =
2497       DAG.getNode(ISD::AND, ShiftAmtVT,
2498                   ShiftAmt,
2499                   DAG.getConstant(7, ShiftAmtVT));
2500
2501     return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2502                        DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2503                                    Op0, ShiftAmtBytes),
2504                        ShiftAmtBits);
2505   }
2506
2507   case ISD::SRA: {
2508     // Promote Op0 to vector
2509     SDValue Op0 =
2510       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2511     SDValue ShiftAmt = Op.getOperand(1);
2512     MVT ShiftVT = ShiftAmt.getValueType();
2513
2514     // Negate variable shift amounts
2515     if (!isa<ConstantSDNode>(ShiftAmt)) {
2516       ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2517                              DAG.getConstant(0, ShiftVT), ShiftAmt);
2518     }
2519
2520     SDValue UpperHalfSign =
2521       DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i32,
2522                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2523                               DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2524                                           Op0, DAG.getConstant(31, MVT::i32))));
2525     SDValue UpperHalfSignMask =
2526       DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2527     SDValue UpperLowerMask =
2528       DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2529                   DAG.getConstant(0xff00, MVT::i16));
2530     SDValue UpperLowerSelect =
2531       DAG.getNode(SPUISD::SELB, MVT::v2i64,
2532                   UpperHalfSignMask, Op0, UpperLowerMask);
2533     SDValue RotateLeftBytes =
2534       DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2535                   UpperLowerSelect, ShiftAmt);
2536     SDValue RotateLeftBits =
2537       DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2538                   RotateLeftBytes, ShiftAmt);
2539
2540     return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2541                        RotateLeftBits);
2542   }
2543   }
2544
2545   return SDValue();
2546 }
2547
2548 //! Lower byte immediate operations for v16i8 vectors:
2549 static SDValue
2550 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2551   SDValue ConstVec;
2552   SDValue Arg;
2553   MVT VT = Op.getValueType();
2554
2555   ConstVec = Op.getOperand(0);
2556   Arg = Op.getOperand(1);
2557   if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2558     if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2559       ConstVec = ConstVec.getOperand(0);
2560     } else {
2561       ConstVec = Op.getOperand(1);
2562       Arg = Op.getOperand(0);
2563       if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2564         ConstVec = ConstVec.getOperand(0);
2565       }
2566     }
2567   }
2568
2569   if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2570     uint64_t VectorBits[2];
2571     uint64_t UndefBits[2];
2572     uint64_t SplatBits, SplatUndef;
2573     int SplatSize;
2574
2575     if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2576         && isConstantSplat(VectorBits, UndefBits,
2577                            VT.getVectorElementType().getSizeInBits(),
2578                            SplatBits, SplatUndef, SplatSize)) {
2579       SDValue tcVec[16];
2580       SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2581       const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2582
2583       // Turn the BUILD_VECTOR into a set of target constants:
2584       for (size_t i = 0; i < tcVecSize; ++i)
2585         tcVec[i] = tc;
2586
2587       return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2588                          DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2589     }
2590   }
2591   // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2592   // lowered.  Return the operation, rather than a null SDValue.
2593   return Op;
2594 }
2595
2596 //! Lower i32 multiplication
2597 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT,
2598                           unsigned Opc) {
2599   switch (VT.getSimpleVT()) {
2600   default:
2601     cerr << "CellSPU: Unknown LowerMUL value type, got "
2602          << Op.getValueType().getMVTString()
2603          << "\n";
2604     abort();
2605     /*NOTREACHED*/
2606
2607   case MVT::i32: {
2608     SDValue rA = Op.getOperand(0);
2609     SDValue rB = Op.getOperand(1);
2610
2611     return DAG.getNode(ISD::ADD, MVT::i32,
2612                        DAG.getNode(ISD::ADD, MVT::i32,
2613                                    DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2614                                    DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2615                        DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2616   }
2617   }
2618
2619   return SDValue();
2620 }
2621
2622 //! Custom lowering for CTPOP (count population)
2623 /*!
2624   Custom lowering code that counts the number ones in the input
2625   operand. SPU has such an instruction, but it counts the number of
2626   ones per byte, which then have to be accumulated.
2627 */
2628 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2629   MVT VT = Op.getValueType();
2630   MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2631
2632   switch (VT.getSimpleVT()) {
2633   default:
2634     assert(false && "Invalid value type!");
2635   case MVT::i8: {
2636     SDValue N = Op.getOperand(0);
2637     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2638
2639     SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2640     SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2641
2642     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2643   }
2644
2645   case MVT::i16: {
2646     MachineFunction &MF = DAG.getMachineFunction();
2647     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2648
2649     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2650
2651     SDValue N = Op.getOperand(0);
2652     SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2653     SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2654     SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2655
2656     SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2657     SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2658
2659     // CNTB_result becomes the chain to which all of the virtual registers
2660     // CNTB_reg, SUM1_reg become associated:
2661     SDValue CNTB_result =
2662       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2663
2664     SDValue CNTB_rescopy =
2665       DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2666
2667     SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2668
2669     return DAG.getNode(ISD::AND, MVT::i16,
2670                        DAG.getNode(ISD::ADD, MVT::i16,
2671                                    DAG.getNode(ISD::SRL, MVT::i16,
2672                                                Tmp1, Shift1),
2673                                    Tmp1),
2674                        Mask0);
2675   }
2676
2677   case MVT::i32: {
2678     MachineFunction &MF = DAG.getMachineFunction();
2679     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2680
2681     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2682     unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2683
2684     SDValue N = Op.getOperand(0);
2685     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2686     SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2687     SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2688     SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2689
2690     SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2691     SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2692
2693     // CNTB_result becomes the chain to which all of the virtual registers
2694     // CNTB_reg, SUM1_reg become associated:
2695     SDValue CNTB_result =
2696       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2697
2698     SDValue CNTB_rescopy =
2699       DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2700
2701     SDValue Comp1 =
2702       DAG.getNode(ISD::SRL, MVT::i32,
2703                   DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2704
2705     SDValue Sum1 =
2706       DAG.getNode(ISD::ADD, MVT::i32,
2707                   Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2708
2709     SDValue Sum1_rescopy =
2710       DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2711
2712     SDValue Comp2 =
2713       DAG.getNode(ISD::SRL, MVT::i32,
2714                   DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2715                   Shift2);
2716     SDValue Sum2 =
2717       DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2718                   DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2719
2720     return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2721   }
2722
2723   case MVT::i64:
2724     break;
2725   }
2726
2727   return SDValue();
2728 }
2729
2730 //! Lower ISD::SELECT_CC
2731 /*!
2732   ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2733   SELB instruction.
2734
2735   \note Need to revisit this in the future: if the code path through the true
2736   and false value computations is longer than the latency of a branch (6
2737   cycles), then it would be more advantageous to branch and insert a new basic
2738   block and branch on the condition. However, this code does not make that
2739   assumption, given the simplisitc uses so far.
2740  */
2741
2742 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
2743   MVT VT = Op.getValueType();
2744   SDValue lhs = Op.getOperand(0);
2745   SDValue rhs = Op.getOperand(1);
2746   SDValue trueval = Op.getOperand(2);
2747   SDValue falseval = Op.getOperand(3);
2748   SDValue condition = Op.getOperand(4);
2749
2750   // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2751   // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2752   // with another "cannot select select_cc" assert:
2753
2754   SDValue compare = DAG.getNode(ISD::SETCC, VT, lhs, rhs, condition);
2755   return DAG.getNode(SPUISD::SELB, VT, trueval, falseval, compare);
2756 }
2757
2758 //! Custom (target-specific) lowering entry point
2759 /*!
2760   This is where LLVM's DAG selection process calls to do target-specific
2761   lowering of nodes.
2762  */
2763 SDValue
2764 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2765 {
2766   unsigned Opc = (unsigned) Op.getOpcode();
2767   MVT VT = Op.getValueType();
2768
2769   switch (Opc) {
2770   default: {
2771     cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2772     cerr << "Op.getOpcode() = " << Opc << "\n";
2773     cerr << "*Op.getNode():\n";
2774     Op.getNode()->dump();
2775     abort();
2776   }
2777   case ISD::LOAD:
2778   case ISD::SEXTLOAD:
2779   case ISD::ZEXTLOAD:
2780     return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2781   case ISD::STORE:
2782     return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2783   case ISD::ConstantPool:
2784     return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2785   case ISD::GlobalAddress:
2786     return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2787   case ISD::JumpTable:
2788     return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2789   case ISD::Constant:
2790     return LowerConstant(Op, DAG);
2791   case ISD::ConstantFP:
2792     return LowerConstantFP(Op, DAG);
2793   case ISD::BRCOND:
2794     return LowerBRCOND(Op, DAG);
2795   case ISD::FORMAL_ARGUMENTS:
2796     return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2797   case ISD::CALL:
2798     return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2799   case ISD::RET:
2800     return LowerRET(Op, DAG, getTargetMachine());
2801
2802
2803   // i8, i64 math ops:
2804   case ISD::ZERO_EXTEND:
2805   case ISD::SIGN_EXTEND:
2806   case ISD::ANY_EXTEND:
2807   case ISD::ADD:
2808   case ISD::SUB:
2809   case ISD::ROTR:
2810   case ISD::ROTL:
2811   case ISD::SRL:
2812   case ISD::SHL:
2813   case ISD::SRA: {
2814     if (VT == MVT::i8)
2815       return LowerI8Math(Op, DAG, Opc);
2816     else if (VT == MVT::i64)
2817       return LowerI64Math(Op, DAG, Opc);
2818     break;
2819   }
2820
2821   // Vector-related lowering.
2822   case ISD::BUILD_VECTOR:
2823     return LowerBUILD_VECTOR(Op, DAG);
2824   case ISD::SCALAR_TO_VECTOR:
2825     return LowerSCALAR_TO_VECTOR(Op, DAG);
2826   case ISD::VECTOR_SHUFFLE:
2827     return LowerVECTOR_SHUFFLE(Op, DAG);
2828   case ISD::EXTRACT_VECTOR_ELT:
2829     return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2830   case ISD::INSERT_VECTOR_ELT:
2831     return LowerINSERT_VECTOR_ELT(Op, DAG);
2832
2833   // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2834   case ISD::AND:
2835   case ISD::OR:
2836   case ISD::XOR:
2837     return LowerByteImmed(Op, DAG);
2838
2839   // Vector and i8 multiply:
2840   case ISD::MUL:
2841     if (VT.isVector())
2842       return LowerVectorMUL(Op, DAG);
2843     else if (VT == MVT::i8)
2844       return LowerI8Math(Op, DAG, Opc);
2845     else
2846       return LowerMUL(Op, DAG, VT, Opc);
2847
2848   case ISD::FDIV:
2849     if (VT == MVT::f32 || VT == MVT::v4f32)
2850       return LowerFDIVf32(Op, DAG);
2851 #if 0
2852     // This is probably a libcall
2853     else if (Op.getValueType() == MVT::f64)
2854       return LowerFDIVf64(Op, DAG);
2855 #endif
2856     else
2857       assert(0 && "Calling FDIV on unsupported MVT");
2858
2859   case ISD::CTPOP:
2860     return LowerCTPOP(Op, DAG);
2861
2862   case ISD::SELECT_CC:
2863     return LowerSELECT_CC(Op, DAG);
2864   }
2865
2866   return SDValue();
2867 }
2868
2869 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2870                                            SmallVectorImpl<SDValue>&Results,
2871                                            SelectionDAG &DAG)
2872 {
2873 #if 0
2874   unsigned Opc = (unsigned) N->getOpcode();
2875   MVT OpVT = N->getValueType(0);
2876
2877   switch (Opc) {
2878   default: {
2879     cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2880     cerr << "Op.getOpcode() = " << Opc << "\n";
2881     cerr << "*Op.getNode():\n";
2882     N->dump();
2883     abort();
2884     /*NOTREACHED*/
2885   }
2886   }
2887 #endif
2888
2889   /* Otherwise, return unchanged */
2890 }
2891
2892 //===----------------------------------------------------------------------===//
2893 // Target Optimization Hooks
2894 //===----------------------------------------------------------------------===//
2895
2896 SDValue
2897 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2898 {
2899 #if 0
2900   TargetMachine &TM = getTargetMachine();
2901 #endif
2902   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2903   SelectionDAG &DAG = DCI.DAG;
2904   SDValue Op0 = N->getOperand(0);      // everything has at least one operand
2905   SDValue Result;                     // Initially, NULL result
2906
2907   switch (N->getOpcode()) {
2908   default: break;
2909   case ISD::ADD: {
2910     SDValue Op1 = N->getOperand(1);
2911
2912     if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
2913       SDValue Op01 = Op0.getOperand(1);
2914       if (Op01.getOpcode() == ISD::Constant
2915           || Op01.getOpcode() == ISD::TargetConstant) {
2916         // (add <const>, (SPUindirect <arg>, <const>)) ->
2917         // (SPUindirect <arg>, <const + const>)
2918         ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2919         ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2920         SDValue combinedConst =
2921           DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2922                           Op0.getValueType());
2923
2924         DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2925                    << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2926         DEBUG(cerr << "With:    (SPUindirect <arg>, "
2927                    << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2928         return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2929                            Op0.getOperand(0), combinedConst);
2930       }
2931     } else if (isa<ConstantSDNode>(Op0)
2932                && Op1.getOpcode() == SPUISD::IndirectAddr) {
2933       SDValue Op11 = Op1.getOperand(1);
2934       if (Op11.getOpcode() == ISD::Constant
2935           || Op11.getOpcode() == ISD::TargetConstant) {
2936         // (add (SPUindirect <arg>, <const>), <const>) ->
2937         // (SPUindirect <arg>, <const + const>)
2938         ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2939         ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2940         SDValue combinedConst =
2941           DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2942                           Op0.getValueType());
2943
2944         DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2945                    << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2946         DEBUG(cerr << "With:    (SPUindirect <arg>, "
2947                    << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2948
2949         return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2950                            Op1.getOperand(0), combinedConst);
2951       }
2952     }
2953     break;
2954   }
2955   case ISD::SIGN_EXTEND:
2956   case ISD::ZERO_EXTEND:
2957   case ISD::ANY_EXTEND: {
2958     if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT &&
2959         N->getValueType(0) == Op0.getValueType()) {
2960       // (any_extend (SPUextract_elt0 <arg>)) ->
2961       // (SPUextract_elt0 <arg>)
2962       // Types must match, however...
2963       DEBUG(cerr << "Replace: ");
2964       DEBUG(N->dump(&DAG));
2965       DEBUG(cerr << "\nWith:    ");
2966       DEBUG(Op0.getNode()->dump(&DAG));
2967       DEBUG(cerr << "\n");
2968
2969       return Op0;
2970     }
2971     break;
2972   }
2973   case SPUISD::IndirectAddr: {
2974     if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2975       ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2976       if (CN->getZExtValue() == 0) {
2977         // (SPUindirect (SPUaform <addr>, 0), 0) ->
2978         // (SPUaform <addr>, 0)
2979
2980         DEBUG(cerr << "Replace: ");
2981         DEBUG(N->dump(&DAG));
2982         DEBUG(cerr << "\nWith:    ");
2983         DEBUG(Op0.getNode()->dump(&DAG));
2984         DEBUG(cerr << "\n");
2985
2986         return Op0;
2987       }
2988     }
2989     break;
2990   }
2991   case SPUISD::SHLQUAD_L_BITS:
2992   case SPUISD::SHLQUAD_L_BYTES:
2993   case SPUISD::VEC_SHL:
2994   case SPUISD::VEC_SRL:
2995   case SPUISD::VEC_SRA:
2996   case SPUISD::ROTQUAD_RZ_BYTES:
2997   case SPUISD::ROTQUAD_RZ_BITS: {
2998     SDValue Op1 = N->getOperand(1);
2999
3000     if (isa<ConstantSDNode>(Op1)) {
3001       // Kill degenerate vector shifts:
3002       ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
3003
3004       if (CN->getZExtValue() == 0) {
3005         Result = Op0;
3006       }
3007     }
3008     break;
3009   }
3010   case SPUISD::PROMOTE_SCALAR: {
3011     switch (Op0.getOpcode()) {
3012     default:
3013       break;
3014     case ISD::ANY_EXTEND:
3015     case ISD::ZERO_EXTEND:
3016     case ISD::SIGN_EXTEND: {
3017       // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
3018       // <arg>
3019       // but only if the SPUpromote_scalar and <arg> types match.
3020       SDValue Op00 = Op0.getOperand(0);
3021       if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
3022         SDValue Op000 = Op00.getOperand(0);
3023         if (Op000.getValueType() == N->getValueType(0)) {
3024           Result = Op000;
3025         }
3026       }
3027       break;
3028     }
3029     case SPUISD::VEC2PREFSLOT: {
3030       // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
3031       // <arg>
3032       Result = Op0.getOperand(0);
3033       break;
3034     }
3035     }
3036     break;
3037   }
3038   }
3039   // Otherwise, return unchanged.
3040 #ifdef NDEBUG
3041   if (Result.getNode()) {
3042     DEBUG(cerr << "\nReplace.SPU: ");
3043     DEBUG(N->dump(&DAG));
3044     DEBUG(cerr << "\nWith:        ");
3045     DEBUG(Result.getNode()->dump(&DAG));
3046     DEBUG(cerr << "\n");
3047   }
3048 #endif
3049
3050   return Result;
3051 }
3052
3053 //===----------------------------------------------------------------------===//
3054 // Inline Assembly Support
3055 //===----------------------------------------------------------------------===//
3056
3057 /// getConstraintType - Given a constraint letter, return the type of
3058 /// constraint it is for this target.
3059 SPUTargetLowering::ConstraintType
3060 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
3061   if (ConstraintLetter.size() == 1) {
3062     switch (ConstraintLetter[0]) {
3063     default: break;
3064     case 'b':
3065     case 'r':
3066     case 'f':
3067     case 'v':
3068     case 'y':
3069       return C_RegisterClass;
3070     }
3071   }
3072   return TargetLowering::getConstraintType(ConstraintLetter);
3073 }
3074
3075 std::pair<unsigned, const TargetRegisterClass*>
3076 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3077                                                 MVT VT) const
3078 {
3079   if (Constraint.size() == 1) {
3080     // GCC RS6000 Constraint Letters
3081     switch (Constraint[0]) {
3082     case 'b':   // R1-R31
3083     case 'r':   // R0-R31
3084       if (VT == MVT::i64)
3085         return std::make_pair(0U, SPU::R64CRegisterClass);
3086       return std::make_pair(0U, SPU::R32CRegisterClass);
3087     case 'f':
3088       if (VT == MVT::f32)
3089         return std::make_pair(0U, SPU::R32FPRegisterClass);
3090       else if (VT == MVT::f64)
3091         return std::make_pair(0U, SPU::R64FPRegisterClass);
3092       break;
3093     case 'v':
3094       return std::make_pair(0U, SPU::GPRCRegisterClass);
3095     }
3096   }
3097
3098   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3099 }
3100
3101 //! Compute used/known bits for a SPU operand
3102 void
3103 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3104                                                   const APInt &Mask,
3105                                                   APInt &KnownZero,
3106                                                   APInt &KnownOne,
3107                                                   const SelectionDAG &DAG,
3108                                                   unsigned Depth ) const {
3109 #if 0
3110   const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
3111 #endif
3112
3113   switch (Op.getOpcode()) {
3114   default:
3115     // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3116     break;
3117
3118 #if 0
3119   case CALL:
3120   case SHUFB:
3121   case SHUFFLE_MASK:
3122   case CNTB:
3123 #endif
3124
3125   case SPUISD::PROMOTE_SCALAR: {
3126     SDValue Op0 = Op.getOperand(0);
3127     MVT Op0VT = Op0.getValueType();
3128     unsigned Op0VTBits = Op0VT.getSizeInBits();
3129     uint64_t InMask = Op0VT.getIntegerVTBitMask();
3130     KnownZero |= APInt(Op0VTBits, ~InMask, false);
3131     KnownOne |= APInt(Op0VTBits, InMask, false);
3132     break;
3133   }
3134
3135   case SPUISD::LDRESULT:
3136   case SPUISD::VEC2PREFSLOT:
3137   case SPUISD::VEC2PREFSLOT_CHAINED: {
3138     MVT OpVT = Op.getValueType();
3139     unsigned OpVTBits = OpVT.getSizeInBits();
3140     uint64_t InMask = OpVT.getIntegerVTBitMask();
3141     KnownZero |= APInt(OpVTBits, ~InMask, false);
3142     KnownOne |= APInt(OpVTBits, InMask, false);
3143     break;
3144   }
3145
3146 #if 0
3147   case EXTRACT_I1_ZEXT:
3148   case EXTRACT_I1_SEXT:
3149   case EXTRACT_I8_ZEXT:
3150   case EXTRACT_I8_SEXT:
3151   case MPY:
3152   case MPYU:
3153   case MPYH:
3154   case MPYHH:
3155   case SPUISD::SHLQUAD_L_BITS:
3156   case SPUISD::SHLQUAD_L_BYTES:
3157   case SPUISD::VEC_SHL:
3158   case SPUISD::VEC_SRL:
3159   case SPUISD::VEC_SRA:
3160   case SPUISD::VEC_ROTL:
3161   case SPUISD::VEC_ROTR:
3162   case SPUISD::ROTQUAD_RZ_BYTES:
3163   case SPUISD::ROTQUAD_RZ_BITS:
3164   case SPUISD::ROTBYTES_LEFT:
3165   case SPUISD::ROTBYTES_LEFT_CHAINED:
3166   case SPUISD::SELECT_MASK:
3167   case SPUISD::SELB:
3168   case SPUISD::FPInterp:
3169   case SPUISD::FPRecipEst:
3170   case SPUISD::SEXT32TO64:
3171 #endif
3172   }
3173 }
3174
3175 // LowerAsmOperandForConstraint
3176 void
3177 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3178                                                 char ConstraintLetter,
3179                                                 bool hasMemory,
3180                                                 std::vector<SDValue> &Ops,
3181                                                 SelectionDAG &DAG) const {
3182   // Default, for the time being, to the base class handler
3183   TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3184                                                Ops, DAG);
3185 }
3186
3187 /// isLegalAddressImmediate - Return true if the integer value can be used
3188 /// as the offset of the target addressing mode.
3189 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3190                                                 const Type *Ty) const {
3191   // SPU's addresses are 256K:
3192   return (V > -(1 << 18) && V < (1 << 18) - 1);
3193 }
3194
3195 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3196   return false;
3197 }
3198
3199 bool
3200 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3201   // The SPU target isn't yet aware of offsets.
3202   return false;
3203 }