lib/Target/CellSPU/SPUISelLowering.cpp

   1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the SPUTargetLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "SPURegisterNames.h"
  15 #include "SPUISelLowering.h"
  16 #include "SPUTargetMachine.h"
  17 #include "SPUFrameInfo.h"
  18 #include "llvm/ADT/VectorExtras.h"
  19 #include "llvm/CodeGen/CallingConvLower.h"
  20 #include "llvm/CodeGen/MachineFrameInfo.h"
  21 #include "llvm/CodeGen/MachineFunction.h"
  22 #include "llvm/CodeGen/MachineInstrBuilder.h"
  23 #include "llvm/CodeGen/MachineRegisterInfo.h"
  24 #include "llvm/CodeGen/SelectionDAG.h"
  25 #include "llvm/Constants.h"
  26 #include "llvm/Function.h"
  27 #include "llvm/Intrinsics.h"
  28 #include "llvm/Support/Debug.h"
  29 #include "llvm/Support/MathExtras.h"
  30 #include "llvm/Target/TargetOptions.h"
  31
  32 #include <map>
  33
  34 using namespace llvm;
  35
  36 // Used in getTargetNodeName() below
  37 namespace {
  38   std::map<unsigned, const char *> node_names;
  39
  40   //! MVT mapping to useful data for Cell SPU
  41   struct valtype_map_s {
  42     const MVT        valtype;
  43     const int                   prefslot_byte;
  44   };
  45
  46   const valtype_map_s valtype_map[] = {
  47     { MVT::i1,   3 },
  48     { MVT::i8,   3 },
  49     { MVT::i16,  2 },
  50     { MVT::i32,  0 },
  51     { MVT::f32,  0 },
  52     { MVT::i64,  0 },
  53     { MVT::f64,  0 },
  54     { MVT::i128, 0 }
  55   };
  56
  57   const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
  58
  59   const valtype_map_s *getValueTypeMapEntry(MVT VT) {
  60     const valtype_map_s *retval = 0;
  61
  62     for (size_t i = 0; i < n_valtype_map; ++i) {
  63       if (valtype_map[i].valtype == VT) {
  64         retval = valtype_map + i;
  65         break;
  66       }
  67     }
  68
  69 #ifndef NDEBUG
  70     if (retval == 0) {
  71       cerr << "getValueTypeMapEntry returns NULL for "
  72            << VT.getMVTString()
  73            << "\n";
  74       abort();
  75     }
  76 #endif
  77
  78     return retval;
  79   }
  80
  81   //! Predicate that returns true if operand is a memory target
  82   /*!
  83     \arg Op Operand to test
  84     \return true if the operand is a memory target (i.e., global
  85     address, external symbol, constant pool) or an A-form
  86     address.
  87    */
  88   bool isMemoryOperand(const SDValue &Op)
  89   {
  90     const unsigned Opc = Op.getOpcode();
  91     return (Opc == ISD::GlobalAddress
  92             || Opc == ISD::GlobalTLSAddress
  93             || Opc == ISD::JumpTable
  94             || Opc == ISD::ConstantPool
  95             || Opc == ISD::ExternalSymbol
  96             || Opc == ISD::TargetGlobalAddress
  97             || Opc == ISD::TargetGlobalTLSAddress
  98             || Opc == ISD::TargetJumpTable
  99             || Opc == ISD::TargetConstantPool
 100             || Opc == ISD::TargetExternalSymbol
 101             || Opc == SPUISD::AFormAddr);
 102   }
 103
 104   //! Predicate that returns true if the operand is an indirect target
 105   bool isIndirectOperand(const SDValue &Op)
 106   {
 107     const unsigned Opc = Op.getOpcode();
 108     return (Opc == ISD::Register
 109             || Opc == SPUISD::LDRESULT);
 110   }
 111 }
 112
 113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
 114   : TargetLowering(TM),
 115     SPUTM(TM)
 116 {
 117   // Fold away setcc operations if possible.
 118   setPow2DivIsCheap();
 119
 120   // Use _setjmp/_longjmp instead of setjmp/longjmp.
 121   setUseUnderscoreSetJmp(true);
 122   setUseUnderscoreLongJmp(true);
 123
 124   // Set up the SPU's register classes:
 125   addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
 126   addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
 127   addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
 128   addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
 129   addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
 130   addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
 131   addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
 132
 133   // SPU has no sign or zero extended loads for i1, i8, i16:
 134   setLoadXAction(ISD::EXTLOAD,  MVT::i1, Promote);
 135   setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
 136   setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
 137   setTruncStoreAction(MVT::i8, MVT::i1, Custom);
 138   setTruncStoreAction(MVT::i16, MVT::i1, Custom);
 139   setTruncStoreAction(MVT::i32, MVT::i1, Custom);
 140   setTruncStoreAction(MVT::i64, MVT::i1, Custom);
 141   setTruncStoreAction(MVT::i128, MVT::i1, Custom);
 142
 143   setLoadXAction(ISD::EXTLOAD,  MVT::i8, Custom);
 144   setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
 145   setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
 146   setTruncStoreAction(MVT::i8  , MVT::i8, Custom);
 147   setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
 148   setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
 149   setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
 150   setTruncStoreAction(MVT::i128, MVT::i8, Custom);
 151
 152   setLoadXAction(ISD::EXTLOAD,  MVT::i16, Custom);
 153   setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
 154   setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
 155
 156   // SPU constant load actions are custom lowered:
 157   setOperationAction(ISD::Constant,   MVT::i64, Custom);
 158   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
 159   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
 160
 161   // SPU's loads and stores have to be custom lowered:
 162   for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
 163        ++sctype) {
 164     MVT VT = (MVT::SimpleValueType)sctype;
 165
 166     setOperationAction(ISD::LOAD, VT, Custom);
 167     setOperationAction(ISD::STORE, VT, Custom);
 168   }
 169
 170   // Custom lower BRCOND for i1, i8 to "promote" the result to
 171   // i32 and i16, respectively.
 172   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
 173
 174   // Expand the jumptable branches
 175   setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
 176   setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
 177   setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
 178
 179   // SPU has no intrinsics for these particular operations:
 180   setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
 181
 182   // PowerPC has no SREM/UREM instructions
 183   setOperationAction(ISD::SREM, MVT::i32, Expand);
 184   setOperationAction(ISD::UREM, MVT::i32, Expand);
 185   setOperationAction(ISD::SREM, MVT::i64, Expand);
 186   setOperationAction(ISD::UREM, MVT::i64, Expand);
 187
 188   // We don't support sin/cos/sqrt/fmod
 189   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 190   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 191   setOperationAction(ISD::FREM , MVT::f64, Expand);
 192   setOperationAction(ISD::FLOG , MVT::f64, Expand);
 193   setOperationAction(ISD::FLOG2, MVT::f64, Expand);
 194   setOperationAction(ISD::FLOG10,MVT::f64, Expand);
 195   setOperationAction(ISD::FEXP , MVT::f64, Expand);
 196   setOperationAction(ISD::FEXP2, MVT::f64, Expand);
 197   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 198   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 199   setOperationAction(ISD::FREM , MVT::f32, Expand);
 200   setOperationAction(ISD::FLOG , MVT::f32, Expand);
 201   setOperationAction(ISD::FLOG2, MVT::f32, Expand);
 202   setOperationAction(ISD::FLOG10,MVT::f32, Expand);
 203   setOperationAction(ISD::FEXP , MVT::f32, Expand);
 204   setOperationAction(ISD::FEXP2, MVT::f32, Expand);
 205
 206   // If we're enabling GP optimizations, use hardware square root
 207   setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 208   setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 209
 210   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 211   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 212
 213   // SPU can do rotate right and left, so legalize it... but customize for i8
 214   // because instructions don't exist.
 215
 216   // FIXME: Change from "expand" to appropriate type once ROTR is supported in
 217   //        .td files.
 218   setOperationAction(ISD::ROTR, MVT::i32,    Expand /*Legal*/);
 219   setOperationAction(ISD::ROTR, MVT::i16,    Expand /*Legal*/);
 220   setOperationAction(ISD::ROTR, MVT::i8,     Expand /*Custom*/);
 221
 222   setOperationAction(ISD::ROTL, MVT::i32,    Legal);
 223   setOperationAction(ISD::ROTL, MVT::i16,    Legal);
 224   setOperationAction(ISD::ROTL, MVT::i8,     Custom);
 225   // SPU has no native version of shift left/right for i8
 226   setOperationAction(ISD::SHL,  MVT::i8,     Custom);
 227   setOperationAction(ISD::SRL,  MVT::i8,     Custom);
 228   setOperationAction(ISD::SRA,  MVT::i8,     Custom);
 229   // And SPU needs custom lowering for shift left/right for i64
 230   setOperationAction(ISD::SHL,  MVT::i64,    Custom);
 231   setOperationAction(ISD::SRL,  MVT::i64,    Custom);
 232   setOperationAction(ISD::SRA,  MVT::i64,    Custom);
 233
 234   // Custom lower i8, i32 and i64 multiplications
 235   setOperationAction(ISD::MUL,  MVT::i8,     Custom);
 236   setOperationAction(ISD::MUL,  MVT::i32,    Custom);
 237   setOperationAction(ISD::MUL,  MVT::i64,    Custom);
 238
 239   // Need to custom handle (some) common i8, i64 math ops
 240   setOperationAction(ISD::ADD,  MVT::i64,    Custom);
 241   setOperationAction(ISD::SUB,  MVT::i8,     Custom);
 242   setOperationAction(ISD::SUB,  MVT::i64,    Custom);
 243
 244   // SPU does not have BSWAP. It does have i32 support CTLZ.
 245   // CTPOP has to be custom lowered.
 246   setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
 247   setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
 248
 249   setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
 250   setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
 251   setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
 252   setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
 253
 254   setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
 255   setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
 256
 257   setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
 258
 259   // SPU has a version of select that implements (a&~c)|(b&c), just like
 260   // select ought to work:
 261   setOperationAction(ISD::SELECT, MVT::i1,   Promote);
 262   setOperationAction(ISD::SELECT, MVT::i8,   Legal);
 263   setOperationAction(ISD::SELECT, MVT::i16,  Legal);
 264   setOperationAction(ISD::SELECT, MVT::i32,  Legal);
 265   setOperationAction(ISD::SELECT, MVT::i64,  Expand);
 266
 267   setOperationAction(ISD::SETCC, MVT::i1,    Promote);
 268   setOperationAction(ISD::SETCC, MVT::i8,    Legal);
 269   setOperationAction(ISD::SETCC, MVT::i16,   Legal);
 270   setOperationAction(ISD::SETCC, MVT::i32,   Legal);
 271   setOperationAction(ISD::SETCC, MVT::i64,   Expand);
 272
 273   // Zero extension and sign extension for i64 have to be
 274   // custom legalized
 275   setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
 276   setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
 277   setOperationAction(ISD::ANY_EXTEND,  MVT::i64, Custom);
 278
 279   // SPU has a legal FP -> signed INT instruction
 280   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
 281   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 282   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
 283   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
 284
 285   // FDIV on SPU requires custom lowering
 286   setOperationAction(ISD::FDIV, MVT::f32, Custom);
 287   //setOperationAction(ISD::FDIV, MVT::f64, Custom);
 288
 289   // SPU has [U|S]INT_TO_FP
 290   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
 291   setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
 292   setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
 293   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
 294   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
 295   setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
 296   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 297   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 298
 299   setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
 300   setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
 301   setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
 302   setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
 303
 304   // We cannot sextinreg(i1).  Expand to shifts.
 305   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 306
 307   // Support label based line numbers.
 308   setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
 309   setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
 310
 311   // We want to legalize GlobalAddress and ConstantPool nodes into the
 312   // appropriate instructions to materialize the address.
 313   for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
 314        ++sctype) {
 315     MVT VT = (MVT::SimpleValueType)sctype;
 316
 317     setOperationAction(ISD::GlobalAddress, VT, Custom);
 318     setOperationAction(ISD::ConstantPool,  VT, Custom);
 319     setOperationAction(ISD::JumpTable,     VT, Custom);
 320   }
 321
 322   // RET must be custom lowered, to meet ABI requirements
 323   setOperationAction(ISD::RET,           MVT::Other, Custom);
 324
 325   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 326   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 327
 328   // Use the default implementation.
 329   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
 330   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 331   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 332   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 333   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
 334   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
 335   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
 336
 337   // Cell SPU has instructions for converting between i64 and fp.
 338   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 339   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 340
 341   // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
 342   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
 343
 344   // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 345   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 346
 347   // First set operation action for all vector types to expand. Then we
 348   // will selectively turn on ones that can be effectively codegen'd.
 349   addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
 350   addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
 351   addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
 352   addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
 353   addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
 354   addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
 355
 356   for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
 357        i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
 358     MVT VT = (MVT::SimpleValueType)i;
 359
 360     // add/sub are legal for all supported vector VT's.
 361     setOperationAction(ISD::ADD , VT, Legal);
 362     setOperationAction(ISD::SUB , VT, Legal);
 363     // mul has to be custom lowered.
 364     setOperationAction(ISD::MUL , VT, Custom);
 365
 366     setOperationAction(ISD::AND   , VT, Legal);
 367     setOperationAction(ISD::OR    , VT, Legal);
 368     setOperationAction(ISD::XOR   , VT, Legal);
 369     setOperationAction(ISD::LOAD  , VT, Legal);
 370     setOperationAction(ISD::SELECT, VT, Legal);
 371     setOperationAction(ISD::STORE,  VT, Legal);
 372
 373     // These operations need to be expanded:
 374     setOperationAction(ISD::SDIV, VT, Expand);
 375     setOperationAction(ISD::SREM, VT, Expand);
 376     setOperationAction(ISD::UDIV, VT, Expand);
 377     setOperationAction(ISD::UREM, VT, Expand);
 378     setOperationAction(ISD::FDIV, VT, Custom);
 379
 380     // Custom lower build_vector, constant pool spills, insert and
 381     // extract vector elements:
 382     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
 383     setOperationAction(ISD::ConstantPool, VT, Custom);
 384     setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
 385     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
 386     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
 387     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
 388   }
 389
 390   setOperationAction(ISD::MUL, MVT::v16i8, Custom);
 391   setOperationAction(ISD::AND, MVT::v16i8, Custom);
 392   setOperationAction(ISD::OR,  MVT::v16i8, Custom);
 393   setOperationAction(ISD::XOR, MVT::v16i8, Custom);
 394   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 395
 396   setShiftAmountType(MVT::i32);
 397   setSetCCResultContents(ZeroOrOneSetCCResult);
 398
 399   setStackPointerRegisterToSaveRestore(SPU::R1);
 400
 401   // We have target-specific dag combine patterns for the following nodes:
 402   setTargetDAGCombine(ISD::ADD);
 403   setTargetDAGCombine(ISD::ZERO_EXTEND);
 404   setTargetDAGCombine(ISD::SIGN_EXTEND);
 405   setTargetDAGCombine(ISD::ANY_EXTEND);
 406
 407   computeRegisterProperties();
 408 }
 409
 410 const char *
 411 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
 412 {
 413   if (node_names.empty()) {
 414     node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
 415     node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
 416     node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
 417     node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
 418     node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
 419     node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
 420     node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
 421     node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
 422     node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
 423     node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
 424     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
 425     node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
 426     node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
 427     node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED]
 428                                               = "SPUISD::EXTRACT_ELT0_CHAINED";
 429     node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
 430     node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
 431     node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
 432     node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
 433     node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
 434     node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
 435     node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
 436     node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
 437     node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
 438     node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
 439     node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
 440     node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
 441     node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
 442     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
 443     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
 444     node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
 445       "SPUISD::ROTQUAD_RZ_BYTES";
 446     node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
 447       "SPUISD::ROTQUAD_RZ_BITS";
 448     node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
 449       "SPUISD::ROTBYTES_RIGHT_S";
 450     node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
 451     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
 452       "SPUISD::ROTBYTES_LEFT_CHAINED";
 453     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
 454       "SPUISD::ROTBYTES_LEFT_BITS";
 455     node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
 456     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
 457     node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
 458     node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
 459     node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
 460     node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
 461     node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
 462     node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
 463     node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
 464   }
 465
 466   std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
 467
 468   return ((i != node_names.end()) ? i->second : 0);
 469 }
 470
 471 MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
 472   MVT VT = Op.getValueType();
 473   if (VT.isInteger())
 474     return VT;
 475   else
 476     return MVT::i32;
 477 }
 478
 479 //===----------------------------------------------------------------------===//
 480 // Calling convention code:
 481 //===----------------------------------------------------------------------===//
 482
 483 #include "SPUGenCallingConv.inc"
 484
 485 //===----------------------------------------------------------------------===//
 486 //  LowerOperation implementation
 487 //===----------------------------------------------------------------------===//
 488
 489 /// Aligned load common code for CellSPU
 490 /*!
 491   \param[in] Op The SelectionDAG load or store operand
 492   \param[in] DAG The selection DAG
 493   \param[in] ST CellSPU subtarget information structure
 494   \param[in,out] alignment Caller initializes this to the load or store node's
 495   value from getAlignment(), may be updated while generating the aligned load
 496   \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
 497   offset (divisible by 16, modulo 16 == 0)
 498   \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
 499   offset of the preferred slot (modulo 16 != 0)
 500   \param[in,out] VT Caller initializes this value type to the the load or store
 501   node's loaded or stored value type; may be updated if an i1-extended load or
 502   store.
 503   \param[out] was16aligned true if the base pointer had 16-byte alignment,
 504   otherwise false. Can help to determine if the chunk needs to be rotated.
 505
 506  Both load and store lowering load a block of data aligned on a 16-byte
 507  boundary. This is the common aligned load code shared between both.
 508  */
 509 static SDValue
 510 AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
 511             LSBaseSDNode *LSN,
 512             unsigned &alignment, int &alignOffs, int &prefSlotOffs,
 513             MVT &VT, bool &was16aligned)
 514 {
 515   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 516   const valtype_map_s *vtm = getValueTypeMapEntry(VT);
 517   SDValue basePtr = LSN->getBasePtr();
 518   SDValue chain = LSN->getChain();
 519
 520   if (basePtr.getOpcode() == ISD::ADD) {
 521     SDValue Op1 = basePtr.getNode()->getOperand(1);
 522
 523     if (Op1.getOpcode() == ISD::Constant
 524         || Op1.getOpcode() == ISD::TargetConstant) {
 525       const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
 526
 527       alignOffs = (int) CN->getZExtValue();
 528       prefSlotOffs = (int) (alignOffs & 0xf);
 529
 530       // Adjust the rotation amount to ensure that the final result ends up in
 531       // the preferred slot:
 532       prefSlotOffs -= vtm->prefslot_byte;
 533       basePtr = basePtr.getOperand(0);
 534
 535       // Loading from memory, can we adjust alignment?
 536       if (basePtr.getOpcode() == SPUISD::AFormAddr) {
 537         SDValue APtr = basePtr.getOperand(0);
 538         if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
 539           GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
 540           alignment = GSDN->getGlobal()->getAlignment();
 541         }
 542       }
 543     } else {
 544       alignOffs = 0;
 545       prefSlotOffs = -vtm->prefslot_byte;
 546     }
 547   } else if (basePtr.getOpcode() == ISD::FrameIndex) {
 548     FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
 549     alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
 550     prefSlotOffs = (int) (alignOffs & 0xf);
 551     prefSlotOffs -= vtm->prefslot_byte;
 552     basePtr = DAG.getRegister(SPU::R1, VT);
 553   } else {
 554     alignOffs = 0;
 555     prefSlotOffs = -vtm->prefslot_byte;
 556   }
 557
 558   if (alignment == 16) {
 559     // Realign the base pointer as a D-Form address:
 560     if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
 561       basePtr = DAG.getNode(ISD::ADD, PtrVT,
 562                             basePtr,
 563                             DAG.getConstant((alignOffs & ~0xf), PtrVT));
 564     }
 565
 566     // Emit the vector load:
 567     was16aligned = true;
 568     return DAG.getLoad(MVT::v16i8, chain, basePtr,
 569                        LSN->getSrcValue(), LSN->getSrcValueOffset(),
 570                        LSN->isVolatile(), 16);
 571   }
 572
 573   // Unaligned load or we're using the "large memory" model, which means that
 574   // we have to be very pessimistic:
 575   if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
 576     basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr,
 577                           DAG.getConstant(0, PtrVT));
 578   }
 579
 580   // Add the offset
 581   basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
 582                         DAG.getConstant((alignOffs & ~0xf), PtrVT));
 583   was16aligned = false;
 584   return DAG.getLoad(MVT::v16i8, chain, basePtr,
 585                      LSN->getSrcValue(), LSN->getSrcValueOffset(),
 586                      LSN->isVolatile(), 16);
 587 }
 588
 589 /// Custom lower loads for CellSPU
 590 /*!
 591  All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
 592  within a 16-byte block, we have to rotate to extract the requested element.
 593  */
 594 static SDValue
 595 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 596   LoadSDNode *LN = cast<LoadSDNode>(Op);
 597   SDValue the_chain = LN->getChain();
 598   MVT VT = LN->getMemoryVT();
 599   MVT OpVT = Op.getNode()->getValueType(0);
 600   ISD::LoadExtType ExtType = LN->getExtensionType();
 601   unsigned alignment = LN->getAlignment();
 602   SDValue Ops[8];
 603
 604   switch (LN->getAddressingMode()) {
 605   case ISD::UNINDEXED: {
 606     int offset, rotamt;
 607     bool was16aligned;
 608     SDValue result =
 609       AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
 610
 611     if (result.getNode() == 0)
 612       return result;
 613
 614     the_chain = result.getValue(1);
 615     // Rotate the chunk if necessary
 616     if (rotamt < 0)
 617       rotamt += 16;
 618     if (rotamt != 0 || !was16aligned) {
 619       SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
 620
 621       Ops[0] = the_chain;
 622       Ops[1] = result;
 623       if (was16aligned) {
 624         Ops[2] = DAG.getConstant(rotamt, MVT::i16);
 625       } else {
 626         MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 627         LoadSDNode *LN1 = cast<LoadSDNode>(result);
 628         Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
 629                              DAG.getConstant(rotamt, PtrVT));
 630       }
 631
 632       result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
 633       the_chain = result.getValue(1);
 634     }
 635
 636     if (VT == OpVT || ExtType == ISD::EXTLOAD) {
 637       SDVTList scalarvts;
 638       MVT vecVT = MVT::v16i8;
 639
 640       // Convert the loaded v16i8 vector to the appropriate vector type
 641       // specified by the operand:
 642       if (OpVT == VT) {
 643         if (VT != MVT::i1)
 644           vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
 645       } else
 646         vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
 647
 648       Ops[0] = the_chain;
 649       Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
 650       scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
 651       result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
 652       the_chain = result.getValue(1);
 653     } else {
 654       // Handle the sign and zero-extending loads for i1 and i8:
 655       unsigned NewOpC;
 656
 657       if (ExtType == ISD::SEXTLOAD) {
 658         NewOpC = (OpVT == MVT::i1
 659                   ? SPUISD::EXTRACT_I1_SEXT
 660                   : SPUISD::EXTRACT_I8_SEXT);
 661       } else {
 662         assert(ExtType == ISD::ZEXTLOAD);
 663         NewOpC = (OpVT == MVT::i1
 664                   ? SPUISD::EXTRACT_I1_ZEXT
 665                   : SPUISD::EXTRACT_I8_ZEXT);
 666       }
 667
 668       result = DAG.getNode(NewOpC, OpVT, result);
 669     }
 670
 671     SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
 672     SDValue retops[2] = {
 673       result,
 674       the_chain
 675     };
 676
 677     result = DAG.getNode(SPUISD::LDRESULT, retvts,
 678                          retops, sizeof(retops) / sizeof(retops[0]));
 679     return result;
 680   }
 681   case ISD::PRE_INC:
 682   case ISD::PRE_DEC:
 683   case ISD::POST_INC:
 684   case ISD::POST_DEC:
 685   case ISD::LAST_INDEXED_MODE:
 686     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 687             "UNINDEXED\n";
 688     cerr << (unsigned) LN->getAddressingMode() << "\n";
 689     abort();
 690     /*NOTREACHED*/
 691   }
 692
 693   return SDValue();
 694 }
 695
 696 /// Custom lower stores for CellSPU
 697 /*!
 698  All CellSPU stores are aligned to 16-byte boundaries, so for elements
 699  within a 16-byte block, we have to generate a shuffle to insert the
 700  requested element into its place, then store the resulting block.
 701  */
 702 static SDValue
 703 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 704   StoreSDNode *SN = cast<StoreSDNode>(Op);
 705   SDValue Value = SN->getValue();
 706   MVT VT = Value.getValueType();
 707   MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
 708   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 709   unsigned alignment = SN->getAlignment();
 710
 711   switch (SN->getAddressingMode()) {
 712   case ISD::UNINDEXED: {
 713     int chunk_offset, slot_offset;
 714     bool was16aligned;
 715
 716     // The vector type we really want to load from the 16-byte chunk, except
 717     // in the case of MVT::i1, which has to be v16i8.
 718     MVT vecVT, stVecVT = MVT::v16i8;
 719
 720     if (StVT != MVT::i1)
 721       stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
 722     vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
 723
 724     SDValue alignLoadVec =
 725       AlignedLoad(Op, DAG, ST, SN, alignment,
 726                   chunk_offset, slot_offset, VT, was16aligned);
 727
 728     if (alignLoadVec.getNode() == 0)
 729       return alignLoadVec;
 730
 731     LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
 732     SDValue basePtr = LN->getBasePtr();
 733     SDValue the_chain = alignLoadVec.getValue(1);
 734     SDValue theValue = SN->getValue();
 735     SDValue result;
 736
 737     if (StVT != VT
 738         && (theValue.getOpcode() == ISD::AssertZext
 739             || theValue.getOpcode() == ISD::AssertSext)) {
 740       // Drill down and get the value for zero- and sign-extended
 741       // quantities
 742       theValue = theValue.getOperand(0);
 743     }
 744
 745     chunk_offset &= 0xf;
 746
 747     SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
 748     SDValue insertEltPtr;
 749     SDValue insertEltOp;
 750
 751     // If the base pointer is already a D-form address, then just create
 752     // a new D-form address with a slot offset and the orignal base pointer.
 753     // Otherwise generate a D-form address with the slot offset relative
 754     // to the stack pointer, which is always aligned.
 755     DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
 756     DEBUG(basePtr.getNode()->dump(&DAG));
 757     DEBUG(cerr << "\n");
 758
 759     if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
 760         (basePtr.getOpcode() == ISD::ADD
 761          && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
 762       insertEltPtr = basePtr;
 763     } else {
 764       insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
 765     }
 766
 767     insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
 768     result = DAG.getNode(SPUISD::SHUFB, vecVT,
 769                          DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
 770                          alignLoadVec,
 771                          DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
 772
 773     result = DAG.getStore(the_chain, result, basePtr,
 774                           LN->getSrcValue(), LN->getSrcValueOffset(),
 775                           LN->isVolatile(), LN->getAlignment());
 776
 777     return result;
 778     /*UNREACHED*/
 779   }
 780   case ISD::PRE_INC:
 781   case ISD::PRE_DEC:
 782   case ISD::POST_INC:
 783   case ISD::POST_DEC:
 784   case ISD::LAST_INDEXED_MODE:
 785     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 786             "UNINDEXED\n";
 787     cerr << (unsigned) SN->getAddressingMode() << "\n";
 788     abort();
 789     /*NOTREACHED*/
 790   }
 791
 792   return SDValue();
 793 }
 794
 795 /// Generate the address of a constant pool entry.
 796 static SDValue
 797 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 798   MVT PtrVT = Op.getValueType();
 799   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 800   Constant *C = CP->getConstVal();
 801   SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
 802   SDValue Zero = DAG.getConstant(0, PtrVT);
 803   const TargetMachine &TM = DAG.getTarget();
 804
 805   if (TM.getRelocationModel() == Reloc::Static) {
 806     if (!ST->usingLargeMem()) {
 807       // Just return the SDValue with the constant pool address in it.
 808       return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
 809     } else {
 810       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
 811       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
 812       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 813     }
 814   }
 815
 816   assert(0 &&
 817          "LowerConstantPool: Relocation model other than static"
 818          " not supported.");
 819   return SDValue();
 820 }
 821
 822 static SDValue
 823 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 824   MVT PtrVT = Op.getValueType();
 825   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 826   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
 827   SDValue Zero = DAG.getConstant(0, PtrVT);
 828   const TargetMachine &TM = DAG.getTarget();
 829
 830   if (TM.getRelocationModel() == Reloc::Static) {
 831     if (!ST->usingLargeMem()) {
 832       return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
 833     } else {
 834       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
 835       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
 836       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 837     }
 838   }
 839
 840   assert(0 &&
 841          "LowerJumpTable: Relocation model other than static not supported.");
 842   return SDValue();
 843 }
 844
 845 static SDValue
 846 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 847   MVT PtrVT = Op.getValueType();
 848   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
 849   GlobalValue *GV = GSDN->getGlobal();
 850   SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
 851   const TargetMachine &TM = DAG.getTarget();
 852   SDValue Zero = DAG.getConstant(0, PtrVT);
 853
 854   if (TM.getRelocationModel() == Reloc::Static) {
 855     if (!ST->usingLargeMem()) {
 856       return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
 857     } else {
 858       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
 859       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
 860       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 861     }
 862   } else {
 863     cerr << "LowerGlobalAddress: Relocation model other than static not "
 864          << "supported.\n";
 865     abort();
 866     /*NOTREACHED*/
 867   }
 868
 869   return SDValue();
 870 }
 871
 872 //! Custom lower i64 integer constants
 873 /*!
 874  This code inserts all of the necessary juggling that needs to occur to load
 875  a 64-bit constant into a register.
 876  */
 877 static SDValue
 878 LowerConstant(SDValue Op, SelectionDAG &DAG) {
 879   MVT VT = Op.getValueType();
 880   ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
 881
 882   if (VT == MVT::i64) {
 883     SDValue T = DAG.getConstant(CN->getZExtValue(), MVT::i64);
 884     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
 885                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
 886   } else {
 887     cerr << "LowerConstant: unhandled constant type "
 888          << VT.getMVTString()
 889          << "\n";
 890     abort();
 891     /*NOTREACHED*/
 892   }
 893
 894   return SDValue();
 895 }
 896
 897 //! Custom lower double precision floating point constants
 898 static SDValue
 899 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
 900   MVT VT = Op.getValueType();
 901   ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
 902
 903   assert((FP != 0) &&
 904          "LowerConstantFP: Node is not ConstantFPSDNode");
 905
 906   if (VT == MVT::f64) {
 907     uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
 908     return DAG.getNode(ISD::BIT_CONVERT, VT,
 909                        LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
 910   }
 911
 912   return SDValue();
 913 }
 914
 915 //! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
 916 static SDValue
 917 LowerBRCOND(SDValue Op, SelectionDAG &DAG)
 918 {
 919   SDValue Cond = Op.getOperand(1);
 920   MVT CondVT = Cond.getValueType();
 921   MVT CondNVT;
 922
 923   if (CondVT == MVT::i1 || CondVT == MVT::i8) {
 924     CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
 925     return DAG.getNode(ISD::BRCOND, Op.getValueType(),
 926                       Op.getOperand(0),
 927                       DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
 928                       Op.getOperand(2));
 929   } else
 930     return SDValue();                // Unchanged
 931 }
 932
 933 static SDValue
 934 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
 935 {
 936   MachineFunction &MF = DAG.getMachineFunction();
 937   MachineFrameInfo *MFI = MF.getFrameInfo();
 938   MachineRegisterInfo &RegInfo = MF.getRegInfo();
 939   SmallVector<SDValue, 8> ArgValues;
 940   SDValue Root = Op.getOperand(0);
 941   bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
 942
 943   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
 944   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
 945
 946   unsigned ArgOffset = SPUFrameInfo::minStackSize();
 947   unsigned ArgRegIdx = 0;
 948   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
 949
 950   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 951
 952   // Add DAG nodes to load the arguments or copy them out of registers.
 953   for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
 954        ArgNo != e; ++ArgNo) {
 955     SDValue ArgVal;
 956     bool needsLoad = false;
 957     MVT ObjectVT = Op.getValue(ArgNo).getValueType();
 958     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
 959
 960     switch (ObjectVT.getSimpleVT()) {
 961     default: {
 962       cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
 963            << ObjectVT.getMVTString()
 964            << "\n";
 965       abort();
 966     }
 967     case MVT::i8:
 968       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 969         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass);
 970         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 971         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
 972         ++ArgRegIdx;
 973       } else {
 974         needsLoad = true;
 975       }
 976       break;
 977     case MVT::i16:
 978       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 979         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
 980         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 981         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
 982         ++ArgRegIdx;
 983       } else {
 984         needsLoad = true;
 985       }
 986       break;
 987     case MVT::i32:
 988       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 989         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 990         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 991         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
 992         ++ArgRegIdx;
 993       } else {
 994         needsLoad = true;
 995       }
 996       break;
 997     case MVT::i64:
 998       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 999         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass);
1000         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1001         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
1002         ++ArgRegIdx;
1003       } else {
1004         needsLoad = true;
1005       }
1006       break;
1007     case MVT::f32:
1008       if (!isVarArg && ArgRegIdx < NumArgRegs) {
1009         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
1010         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1011         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
1012         ++ArgRegIdx;
1013       } else {
1014         needsLoad = true;
1015       }
1016       break;
1017     case MVT::f64:
1018       if (!isVarArg && ArgRegIdx < NumArgRegs) {
1019         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass);
1020         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1021         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
1022         ++ArgRegIdx;
1023       } else {
1024         needsLoad = true;
1025       }
1026       break;
1027     case MVT::v2f64:
1028     case MVT::v4f32:
1029     case MVT::v2i64:
1030     case MVT::v4i32:
1031     case MVT::v8i16:
1032     case MVT::v16i8:
1033       if (!isVarArg && ArgRegIdx < NumArgRegs) {
1034         unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1035         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1036         ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1037         ++ArgRegIdx;
1038       } else {
1039         needsLoad = true;
1040       }
1041       break;
1042     }
1043
1044     // We need to load the argument to a virtual register if we determined above
1045     // that we ran out of physical registers of the appropriate type
1046     if (needsLoad) {
1047       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1048       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1049       ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1050       ArgOffset += StackSlotSize;
1051     }
1052
1053     ArgValues.push_back(ArgVal);
1054   }
1055
1056   // If the function takes variable number of arguments, make a frame index for
1057   // the start of the first vararg value... for expansion of llvm.va_start.
1058   if (isVarArg) {
1059     VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
1060                                                ArgOffset);
1061     SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1062     // If this function is vararg, store any remaining integer argument regs to
1063     // their spots on the stack so that they may be loaded by deferencing the
1064     // result of va_next.
1065     SmallVector<SDValue, 8> MemOps;
1066     for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1067       unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
1068       RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1069       SDValue Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1070       SDValue Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1071       MemOps.push_back(Store);
1072       // Increment the address by four for the next argument to store
1073       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
1074       FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1075     }
1076     if (!MemOps.empty())
1077       Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1078   }
1079
1080   ArgValues.push_back(Root);
1081
1082   // Return the new list of results.
1083   return DAG.getMergeValues(Op.getNode()->getVTList(), &ArgValues[0],
1084                             ArgValues.size());
1085 }
1086
1087 /// isLSAAddress - Return the immediate to use if the specified
1088 /// value is representable as a LSA address.
1089 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1090   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1091   if (!C) return 0;
1092
1093   int Addr = C->getZExtValue();
1094   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1095       (Addr << 14 >> 14) != Addr)
1096     return 0;  // Top 14 bits have to be sext of immediate.
1097
1098   return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1099 }
1100
1101 static
1102 SDValue
1103 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1104   SDValue Chain = Op.getOperand(0);
1105 #if 0
1106   bool isVarArg   = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
1107   bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue() != 0;
1108 #endif
1109   SDValue Callee    = Op.getOperand(4);
1110   unsigned NumOps     = (Op.getNumOperands() - 5) / 2;
1111   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1112   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1113   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1114
1115   // Handy pointer type
1116   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1117
1118   // Accumulate how many bytes are to be pushed on the stack, including the
1119   // linkage area, and parameter passing area.  According to the SPU ABI,
1120   // we minimally need space for [LR] and [SP]
1121   unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1122
1123   // Set up a copy of the stack pointer for use loading and storing any
1124   // arguments that may not fit in the registers available for argument
1125   // passing.
1126   SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1127
1128   // Figure out which arguments are going to go in registers, and which in
1129   // memory.
1130   unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1131   unsigned ArgRegIdx = 0;
1132
1133   // Keep track of registers passing arguments
1134   std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1135   // And the arguments passed on the stack
1136   SmallVector<SDValue, 8> MemOpChains;
1137
1138   for (unsigned i = 0; i != NumOps; ++i) {
1139     SDValue Arg = Op.getOperand(5+2*i);
1140
1141     // PtrOff will be used to store the current argument to the stack if a
1142     // register cannot be found for it.
1143     SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1144     PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1145
1146     switch (Arg.getValueType().getSimpleVT()) {
1147     default: assert(0 && "Unexpected ValueType for argument!");
1148     case MVT::i32:
1149     case MVT::i64:
1150     case MVT::i128:
1151       if (ArgRegIdx != NumArgRegs) {
1152         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1153       } else {
1154         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1155         ArgOffset += StackSlotSize;
1156       }
1157       break;
1158     case MVT::f32:
1159     case MVT::f64:
1160       if (ArgRegIdx != NumArgRegs) {
1161         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1162       } else {
1163         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1164         ArgOffset += StackSlotSize;
1165       }
1166       break;
1167     case MVT::v4f32:
1168     case MVT::v4i32:
1169     case MVT::v8i16:
1170     case MVT::v16i8:
1171       if (ArgRegIdx != NumArgRegs) {
1172         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1173       } else {
1174         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1175         ArgOffset += StackSlotSize;
1176       }
1177       break;
1178     }
1179   }
1180
1181   // Update number of stack bytes actually used, insert a call sequence start
1182   NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1183   Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1184
1185   if (!MemOpChains.empty()) {
1186     // Adjust the stack pointer for the stack arguments.
1187     Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1188                         &MemOpChains[0], MemOpChains.size());
1189   }
1190
1191   // Build a sequence of copy-to-reg nodes chained together with token chain
1192   // and flag operands which copy the outgoing args into the appropriate regs.
1193   SDValue InFlag;
1194   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1195     Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1196                              InFlag);
1197     InFlag = Chain.getValue(1);
1198   }
1199
1200   SmallVector<SDValue, 8> Ops;
1201   unsigned CallOpc = SPUISD::CALL;
1202
1203   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1204   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1205   // node so that legalize doesn't hack it.
1206   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1207     GlobalValue *GV = G->getGlobal();
1208     MVT CalleeVT = Callee.getValueType();
1209     SDValue Zero = DAG.getConstant(0, PtrVT);
1210     SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1211
1212     if (!ST->usingLargeMem()) {
1213       // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1214       // style calls, otherwise, external symbols are BRASL calls. This assumes
1215       // that declared/defined symbols are in the same compilation unit and can
1216       // be reached through PC-relative jumps.
1217       //
1218       // NOTE:
1219       // This may be an unsafe assumption for JIT and really large compilation
1220       // units.
1221       if (GV->isDeclaration()) {
1222         Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1223       } else {
1224         Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1225       }
1226     } else {
1227       // "Large memory" mode: Turn all calls into indirect calls with a X-form
1228       // address pairs:
1229       Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1230     }
1231   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1232     Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1233   else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1234     // If this is an absolute destination address that appears to be a legal
1235     // local store address, use the munged value.
1236     Callee = SDValue(Dest, 0);
1237   }
1238
1239   Ops.push_back(Chain);
1240   Ops.push_back(Callee);
1241
1242   // Add argument registers to the end of the list so that they are known live
1243   // into the call.
1244   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1245     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1246                                   RegsToPass[i].second.getValueType()));
1247
1248   if (InFlag.getNode())
1249     Ops.push_back(InFlag);
1250   // Returns a chain and a flag for retval copy to use.
1251   Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1252                       &Ops[0], Ops.size());
1253   InFlag = Chain.getValue(1);
1254
1255   Chain = DAG.getCALLSEQ_END(Chain,
1256                              DAG.getConstant(NumStackBytes, PtrVT),
1257                              DAG.getConstant(0, PtrVT),
1258                              InFlag);
1259   if (Op.getNode()->getValueType(0) != MVT::Other)
1260     InFlag = Chain.getValue(1);
1261
1262   SDValue ResultVals[3];
1263   unsigned NumResults = 0;
1264
1265   // If the call has results, copy the values out of the ret val registers.
1266   switch (Op.getNode()->getValueType(0).getSimpleVT()) {
1267   default: assert(0 && "Unexpected ret value!");
1268   case MVT::Other: break;
1269   case MVT::i32:
1270     if (Op.getNode()->getValueType(1) == MVT::i32) {
1271       Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1272       ResultVals[0] = Chain.getValue(0);
1273       Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1274                                  Chain.getValue(2)).getValue(1);
1275       ResultVals[1] = Chain.getValue(0);
1276       NumResults = 2;
1277     } else {
1278       Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1279       ResultVals[0] = Chain.getValue(0);
1280       NumResults = 1;
1281     }
1282     break;
1283   case MVT::i64:
1284     Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1285     ResultVals[0] = Chain.getValue(0);
1286     NumResults = 1;
1287     break;
1288   case MVT::f32:
1289   case MVT::f64:
1290     Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.getNode()->getValueType(0),
1291                                InFlag).getValue(1);
1292     ResultVals[0] = Chain.getValue(0);
1293     NumResults = 1;
1294     break;
1295   case MVT::v2f64:
1296   case MVT::v4f32:
1297   case MVT::v4i32:
1298   case MVT::v8i16:
1299   case MVT::v16i8:
1300     Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.getNode()->getValueType(0),
1301                                    InFlag).getValue(1);
1302     ResultVals[0] = Chain.getValue(0);
1303     NumResults = 1;
1304     break;
1305   }
1306
1307   // If the function returns void, just return the chain.
1308   if (NumResults == 0)
1309     return Chain;
1310
1311   // Otherwise, merge everything together with a MERGE_VALUES node.
1312   ResultVals[NumResults++] = Chain;
1313   SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1314   return Res.getValue(Op.getResNo());
1315 }
1316
1317 static SDValue
1318 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1319   SmallVector<CCValAssign, 16> RVLocs;
1320   unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1321   bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1322   CCState CCInfo(CC, isVarArg, TM, RVLocs);
1323   CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1324
1325   // If this is the first return lowered for this function, add the regs to the
1326   // liveout set for the function.
1327   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1328     for (unsigned i = 0; i != RVLocs.size(); ++i)
1329       DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1330   }
1331
1332   SDValue Chain = Op.getOperand(0);
1333   SDValue Flag;
1334
1335   // Copy the result values into the output registers.
1336   for (unsigned i = 0; i != RVLocs.size(); ++i) {
1337     CCValAssign &VA = RVLocs[i];
1338     assert(VA.isRegLoc() && "Can only return in registers!");
1339     Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1340     Flag = Chain.getValue(1);
1341   }
1342
1343   if (Flag.getNode())
1344     return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1345   else
1346     return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1347 }
1348
1349
1350 //===----------------------------------------------------------------------===//
1351 // Vector related lowering:
1352 //===----------------------------------------------------------------------===//
1353
1354 static ConstantSDNode *
1355 getVecImm(SDNode *N) {
1356   SDValue OpVal(0, 0);
1357
1358   // Check to see if this buildvec has a single non-undef value in its elements.
1359   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1360     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1361     if (OpVal.getNode() == 0)
1362       OpVal = N->getOperand(i);
1363     else if (OpVal != N->getOperand(i))
1364       return 0;
1365   }
1366
1367   if (OpVal.getNode() != 0) {
1368     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1369       return CN;
1370     }
1371   }
1372
1373   return 0; // All UNDEF: use implicit def.; not Constant node
1374 }
1375
1376 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1377 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1378 /// constant
1379 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1380                               MVT ValueType) {
1381   if (ConstantSDNode *CN = getVecImm(N)) {
1382     uint64_t Value = CN->getZExtValue();
1383     if (ValueType == MVT::i64) {
1384       uint64_t UValue = CN->getZExtValue();
1385       uint32_t upper = uint32_t(UValue >> 32);
1386       uint32_t lower = uint32_t(UValue);
1387       if (upper != lower)
1388         return SDValue();
1389       Value = Value >> 32;
1390     }
1391     if (Value <= 0x3ffff)
1392       return DAG.getConstant(Value, ValueType);
1393   }
1394
1395   return SDValue();
1396 }
1397
1398 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1399 /// and the value fits into a signed 16-bit constant, and if so, return the
1400 /// constant
1401 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1402                               MVT ValueType) {
1403   if (ConstantSDNode *CN = getVecImm(N)) {
1404     int64_t Value = CN->getSignExtended();
1405     if (ValueType == MVT::i64) {
1406       uint64_t UValue = CN->getZExtValue();
1407       uint32_t upper = uint32_t(UValue >> 32);
1408       uint32_t lower = uint32_t(UValue);
1409       if (upper != lower)
1410         return SDValue();
1411       Value = Value >> 32;
1412     }
1413     if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1414       return DAG.getConstant(Value, ValueType);
1415     }
1416   }
1417
1418   return SDValue();
1419 }
1420
1421 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1422 /// and the value fits into a signed 10-bit constant, and if so, return the
1423 /// constant
1424 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1425                               MVT ValueType) {
1426   if (ConstantSDNode *CN = getVecImm(N)) {
1427     int64_t Value = CN->getSignExtended();
1428     if (ValueType == MVT::i64) {
1429       uint64_t UValue = CN->getZExtValue();
1430       uint32_t upper = uint32_t(UValue >> 32);
1431       uint32_t lower = uint32_t(UValue);
1432       if (upper != lower)
1433         return SDValue();
1434       Value = Value >> 32;
1435     }
1436     if (isS10Constant(Value))
1437       return DAG.getConstant(Value, ValueType);
1438   }
1439
1440   return SDValue();
1441 }
1442
1443 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1444 /// and the value fits into a signed 8-bit constant, and if so, return the
1445 /// constant.
1446 ///
1447 /// @note: The incoming vector is v16i8 because that's the only way we can load
1448 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1449 /// same value.
1450 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1451                              MVT ValueType) {
1452   if (ConstantSDNode *CN = getVecImm(N)) {
1453     int Value = (int) CN->getZExtValue();
1454     if (ValueType == MVT::i16
1455         && Value <= 0xffff                 /* truncated from uint64_t */
1456         && ((short) Value >> 8) == ((short) Value & 0xff))
1457       return DAG.getConstant(Value & 0xff, ValueType);
1458     else if (ValueType == MVT::i8
1459              && (Value & 0xff) == Value)
1460       return DAG.getConstant(Value, ValueType);
1461   }
1462
1463   return SDValue();
1464 }
1465
1466 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1467 /// and the value fits into a signed 16-bit constant, and if so, return the
1468 /// constant
1469 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1470                                MVT ValueType) {
1471   if (ConstantSDNode *CN = getVecImm(N)) {
1472     uint64_t Value = CN->getZExtValue();
1473     if ((ValueType == MVT::i32
1474           && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1475         || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1476       return DAG.getConstant(Value >> 16, ValueType);
1477   }
1478
1479   return SDValue();
1480 }
1481
1482 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1483 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1484   if (ConstantSDNode *CN = getVecImm(N)) {
1485     return DAG.getConstant((unsigned) CN->getZExtValue(), MVT::i32);
1486   }
1487
1488   return SDValue();
1489 }
1490
1491 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1492 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1493   if (ConstantSDNode *CN = getVecImm(N)) {
1494     return DAG.getConstant((unsigned) CN->getZExtValue(), MVT::i64);
1495   }
1496
1497   return SDValue();
1498 }
1499
1500 // If this is a vector of constants or undefs, get the bits.  A bit in
1501 // UndefBits is set if the corresponding element of the vector is an
1502 // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1503 // zero.   Return true if this is not an array of constants, false if it is.
1504 //
1505 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1506                                        uint64_t UndefBits[2]) {
1507   // Start with zero'd results.
1508   VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1509
1510   unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1511   for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1512     SDValue OpVal = BV->getOperand(i);
1513
1514     unsigned PartNo = i >= e/2;     // In the upper 128 bits?
1515     unsigned SlotNo = e/2 - (i & (e/2-1))-1;  // Which subpiece of the uint64_t.
1516
1517     uint64_t EltBits = 0;
1518     if (OpVal.getOpcode() == ISD::UNDEF) {
1519       uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1520       UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1521       continue;
1522     } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1523       EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1524     } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1525       const APFloat &apf = CN->getValueAPF();
1526       EltBits = (CN->getValueType(0) == MVT::f32
1527                  ? FloatToBits(apf.convertToFloat())
1528                  : DoubleToBits(apf.convertToDouble()));
1529     } else {
1530       // Nonconstant element.
1531       return true;
1532     }
1533
1534     VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1535   }
1536
1537   //printf("%llx %llx  %llx %llx\n",
1538   //       VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1539   return false;
1540 }
1541
1542 /// If this is a splat (repetition) of a value across the whole vector, return
1543 /// the smallest size that splats it.  For example, "0x01010101010101..." is a
1544 /// splat of 0x01, 0x0101, and 0x01010101.  We return SplatBits = 0x01 and
1545 /// SplatSize = 1 byte.
1546 static bool isConstantSplat(const uint64_t Bits128[2],
1547                             const uint64_t Undef128[2],
1548                             int MinSplatBits,
1549                             uint64_t &SplatBits, uint64_t &SplatUndef,
1550                             int &SplatSize) {
1551   // Don't let undefs prevent splats from matching.  See if the top 64-bits are
1552   // the same as the lower 64-bits, ignoring undefs.
1553   uint64_t Bits64  = Bits128[0] | Bits128[1];
1554   uint64_t Undef64 = Undef128[0] & Undef128[1];
1555   uint32_t Bits32  = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1556   uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1557   uint16_t Bits16  = uint16_t(Bits32)  | uint16_t(Bits32 >> 16);
1558   uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1559
1560   if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1561     if (MinSplatBits < 64) {
1562
1563       // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1564       // undefs.
1565       if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1566         if (MinSplatBits < 32) {
1567
1568           // If the top 16-bits are different than the lower 16-bits, ignoring
1569           // undefs, we have an i32 splat.
1570           if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1571             if (MinSplatBits < 16) {
1572               // If the top 8-bits are different than the lower 8-bits, ignoring
1573               // undefs, we have an i16 splat.
1574               if ((Bits16 & (uint16_t(~Undef16) >> 8))
1575                   == ((Bits16 >> 8) & ~Undef16)) {
1576                 // Otherwise, we have an 8-bit splat.
1577                 SplatBits  = uint8_t(Bits16)  | uint8_t(Bits16 >> 8);
1578                 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1579                 SplatSize = 1;
1580                 return true;
1581               }
1582             } else {
1583               SplatBits = Bits16;
1584               SplatUndef = Undef16;
1585               SplatSize = 2;
1586               return true;
1587             }
1588           }
1589         } else {
1590           SplatBits = Bits32;
1591           SplatUndef = Undef32;
1592           SplatSize = 4;
1593           return true;
1594         }
1595       }
1596     } else {
1597       SplatBits = Bits128[0];
1598       SplatUndef = Undef128[0];
1599       SplatSize = 8;
1600       return true;
1601     }
1602   }
1603
1604   return false;  // Can't be a splat if two pieces don't match.
1605 }
1606
1607 // If this is a case we can't handle, return null and let the default
1608 // expansion code take care of it.  If we CAN select this case, and if it
1609 // selects to a single instruction, return Op.  Otherwise, if we can codegen
1610 // this case more efficiently than a constant pool load, lower it to the
1611 // sequence of ops that should be used.
1612 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1613   MVT VT = Op.getValueType();
1614   // If this is a vector of constants or undefs, get the bits.  A bit in
1615   // UndefBits is set if the corresponding element of the vector is an
1616   // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1617   // zero.
1618   uint64_t VectorBits[2];
1619   uint64_t UndefBits[2];
1620   uint64_t SplatBits, SplatUndef;
1621   int SplatSize;
1622   if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1623       || !isConstantSplat(VectorBits, UndefBits,
1624                           VT.getVectorElementType().getSizeInBits(),
1625                           SplatBits, SplatUndef, SplatSize))
1626     return SDValue();   // Not a constant vector, not a splat.
1627
1628   switch (VT.getSimpleVT()) {
1629   default:
1630   case MVT::v4f32: {
1631     uint32_t Value32 = SplatBits;
1632     assert(SplatSize == 4
1633            && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1634     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1635     SDValue T = DAG.getConstant(Value32, MVT::i32);
1636     return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1637                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1638     break;
1639   }
1640   case MVT::v2f64: {
1641     uint64_t f64val = SplatBits;
1642     assert(SplatSize == 8
1643            && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1644     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1645     SDValue T = DAG.getConstant(f64val, MVT::i64);
1646     return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1647                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1648     break;
1649   }
1650   case MVT::v16i8: {
1651    // 8-bit constants have to be expanded to 16-bits
1652    unsigned short Value16 = SplatBits | (SplatBits << 8);
1653    SDValue Ops[8];
1654    for (int i = 0; i < 8; ++i)
1655      Ops[i] = DAG.getConstant(Value16, MVT::i16);
1656    return DAG.getNode(ISD::BIT_CONVERT, VT,
1657                       DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1658   }
1659   case MVT::v8i16: {
1660     unsigned short Value16;
1661     if (SplatSize == 2)
1662       Value16 = (unsigned short) (SplatBits & 0xffff);
1663     else
1664       Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1665     SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1666     SDValue Ops[8];
1667     for (int i = 0; i < 8; ++i) Ops[i] = T;
1668     return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1669   }
1670   case MVT::v4i32: {
1671     unsigned int Value = SplatBits;
1672     SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1673     return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1674   }
1675   case MVT::v2i64: {
1676     uint64_t val = SplatBits;
1677     uint32_t upper = uint32_t(val >> 32);
1678     uint32_t lower = uint32_t(val);
1679
1680     if (upper == lower) {
1681       // Magic constant that can be matched by IL, ILA, et. al.
1682       SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1683       return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1684     } else {
1685       SDValue LO32;
1686       SDValue HI32;
1687       SmallVector<SDValue, 16> ShufBytes;
1688       SDValue Result;
1689       bool upper_special, lower_special;
1690
1691       // NOTE: This code creates common-case shuffle masks that can be easily
1692       // detected as common expressions. It is not attempting to create highly
1693       // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1694
1695       // Detect if the upper or lower half is a special shuffle mask pattern:
1696       upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1697       lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1698
1699       // Create lower vector if not a special pattern
1700       if (!lower_special) {
1701         SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1702         LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1703                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1704                                        LO32C, LO32C, LO32C, LO32C));
1705       }
1706
1707       // Create upper vector if not a special pattern
1708       if (!upper_special) {
1709         SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1710         HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1711                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1712                                        HI32C, HI32C, HI32C, HI32C));
1713       }
1714
1715       // If either upper or lower are special, then the two input operands are
1716       // the same (basically, one of them is a "don't care")
1717       if (lower_special)
1718         LO32 = HI32;
1719       if (upper_special)
1720         HI32 = LO32;
1721       if (lower_special && upper_special) {
1722         // Unhappy situation... both upper and lower are special, so punt with
1723         // a target constant:
1724         SDValue Zero = DAG.getConstant(0, MVT::i32);
1725         HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1726                                   Zero, Zero);
1727       }
1728
1729       for (int i = 0; i < 4; ++i) {
1730         uint64_t val = 0;
1731         for (int j = 0; j < 4; ++j) {
1732           SDValue V;
1733           bool process_upper, process_lower;
1734           val <<= 8;
1735           process_upper = (upper_special && (i & 1) == 0);
1736           process_lower = (lower_special && (i & 1) == 1);
1737
1738           if (process_upper || process_lower) {
1739             if ((process_upper && upper == 0)
1740                 || (process_lower && lower == 0))
1741               val |= 0x80;
1742             else if ((process_upper && upper == 0xffffffff)
1743                      || (process_lower && lower == 0xffffffff))
1744               val |= 0xc0;
1745             else if ((process_upper && upper == 0x80000000)
1746                      || (process_lower && lower == 0x80000000))
1747               val |= (j == 0 ? 0xe0 : 0x80);
1748           } else
1749             val |= i * 4 + j + ((i & 1) * 16);
1750         }
1751
1752         ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1753       }
1754
1755       return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1756                          DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1757                                      &ShufBytes[0], ShufBytes.size()));
1758     }
1759   }
1760   }
1761
1762   return SDValue();
1763 }
1764
1765 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1766 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1767 /// permutation vector, V3, is monotonically increasing with one "exception"
1768 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1769 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1770 /// In either case, the net result is going to eventually invoke SHUFB to
1771 /// permute/shuffle the bytes from V1 and V2.
1772 /// \note
1773 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1774 /// control word for byte/halfword/word insertion. This takes care of a single
1775 /// element move from V2 into V1.
1776 /// \note
1777 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1778 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1779   SDValue V1 = Op.getOperand(0);
1780   SDValue V2 = Op.getOperand(1);
1781   SDValue PermMask = Op.getOperand(2);
1782
1783   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1784
1785   // If we have a single element being moved from V1 to V2, this can be handled
1786   // using the C*[DX] compute mask instructions, but the vector elements have
1787   // to be monotonically increasing with one exception element.
1788   MVT EltVT = V1.getValueType().getVectorElementType();
1789   unsigned EltsFromV2 = 0;
1790   unsigned V2Elt = 0;
1791   unsigned V2EltIdx0 = 0;
1792   unsigned CurrElt = 0;
1793   bool monotonic = true;
1794   if (EltVT == MVT::i8)
1795     V2EltIdx0 = 16;
1796   else if (EltVT == MVT::i16)
1797     V2EltIdx0 = 8;
1798   else if (EltVT == MVT::i32)
1799     V2EltIdx0 = 4;
1800   else
1801     assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1802
1803   for (unsigned i = 0, e = PermMask.getNumOperands();
1804        EltsFromV2 <= 1 && monotonic && i != e;
1805        ++i) {
1806     unsigned SrcElt;
1807     if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1808       SrcElt = 0;
1809     else
1810       SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1811
1812     if (SrcElt >= V2EltIdx0) {
1813       ++EltsFromV2;
1814       V2Elt = (V2EltIdx0 - SrcElt) << 2;
1815     } else if (CurrElt != SrcElt) {
1816       monotonic = false;
1817     }
1818
1819     ++CurrElt;
1820   }
1821
1822   if (EltsFromV2 == 1 && monotonic) {
1823     // Compute mask and shuffle
1824     MachineFunction &MF = DAG.getMachineFunction();
1825     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1826     unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1827     MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1828     // Initialize temporary register to 0
1829     SDValue InitTempReg =
1830       DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1831     // Copy register's contents as index in INSERT_MASK:
1832     SDValue ShufMaskOp =
1833       DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1834                   DAG.getTargetConstant(V2Elt, MVT::i32),
1835                   DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1836     // Use shuffle mask in SHUFB synthetic instruction:
1837     return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1838   } else {
1839    // Convert the SHUFFLE_VECTOR mask's input element units to the
1840    // actual bytes.
1841     unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1842
1843     SmallVector<SDValue, 16> ResultMask;
1844     for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1845       unsigned SrcElt;
1846       if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1847         SrcElt = 0;
1848       else
1849         SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1850
1851       for (unsigned j = 0; j < BytesPerElement; ++j) {
1852         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1853                                              MVT::i8));
1854       }
1855     }
1856
1857     SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1858                                       &ResultMask[0], ResultMask.size());
1859     return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1860   }
1861 }
1862
1863 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1864   SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar
1865
1866   if (Op0.getNode()->getOpcode() == ISD::Constant) {
1867     // For a constant, build the appropriate constant vector, which will
1868     // eventually simplify to a vector register load.
1869
1870     ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1871     SmallVector<SDValue, 16> ConstVecValues;
1872     MVT VT;
1873     size_t n_copies;
1874
1875     // Create a constant vector:
1876     switch (Op.getValueType().getSimpleVT()) {
1877     default: assert(0 && "Unexpected constant value type in "
1878                          "LowerSCALAR_TO_VECTOR");
1879     case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1880     case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1881     case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1882     case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1883     case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1884     case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1885     }
1886
1887     SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1888     for (size_t j = 0; j < n_copies; ++j)
1889       ConstVecValues.push_back(CValue);
1890
1891     return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1892                        &ConstVecValues[0], ConstVecValues.size());
1893   } else {
1894     // Otherwise, copy the value from one register to another:
1895     switch (Op0.getValueType().getSimpleVT()) {
1896     default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1897     case MVT::i8:
1898     case MVT::i16:
1899     case MVT::i32:
1900     case MVT::i64:
1901     case MVT::f32:
1902     case MVT::f64:
1903       return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1904     }
1905   }
1906
1907   return SDValue();
1908 }
1909
1910 static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
1911   switch (Op.getValueType().getSimpleVT()) {
1912   default:
1913     cerr << "CellSPU: Unknown vector multiplication, got "
1914          << Op.getValueType().getMVTString()
1915          << "\n";
1916     abort();
1917     /*NOTREACHED*/
1918
1919   case MVT::v4i32: {
1920     SDValue rA = Op.getOperand(0);
1921     SDValue rB = Op.getOperand(1);
1922     SDValue HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1923     SDValue HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1924     SDValue LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1925     SDValue Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1926
1927     return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1928     break;
1929   }
1930
1931   // Multiply two v8i16 vectors (pipeline friendly version):
1932   // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1933   // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1934   // c) Use SELB to select upper and lower halves from the intermediate results
1935   //
1936   // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1937   // dual-issue. This code does manage to do this, even if it's a little on
1938   // the wacky side
1939   case MVT::v8i16: {
1940     MachineFunction &MF = DAG.getMachineFunction();
1941     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1942     SDValue Chain = Op.getOperand(0);
1943     SDValue rA = Op.getOperand(0);
1944     SDValue rB = Op.getOperand(1);
1945     unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1946     unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1947
1948     SDValue FSMBOp =
1949       DAG.getCopyToReg(Chain, FSMBIreg,
1950                        DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1951                                    DAG.getConstant(0xcccc, MVT::i16)));
1952
1953     SDValue HHProd =
1954       DAG.getCopyToReg(FSMBOp, HiProdReg,
1955                        DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1956
1957     SDValue HHProd_v4i32 =
1958       DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1959                   DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1960
1961     return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1962                        DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1963                        DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1964                                    DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1965                                                HHProd_v4i32,
1966                                                DAG.getConstant(16, MVT::i16))),
1967                        DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1968   }
1969
1970   // This M00sE is N@stI! (apologies to Monty Python)
1971   //
1972   // SPU doesn't know how to do any 8-bit multiplication, so the solution
1973   // is to break it all apart, sign extend, and reassemble the various
1974   // intermediate products.
1975   case MVT::v16i8: {
1976     SDValue rA = Op.getOperand(0);
1977     SDValue rB = Op.getOperand(1);
1978     SDValue c8 = DAG.getConstant(8, MVT::i32);
1979     SDValue c16 = DAG.getConstant(16, MVT::i32);
1980
1981     SDValue LLProd =
1982       DAG.getNode(SPUISD::MPY, MVT::v8i16,
1983                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1984                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1985
1986     SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1987
1988     SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1989
1990     SDValue LHProd =
1991       DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1992                   DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1993
1994     SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1995                                      DAG.getConstant(0x2222, MVT::i16));
1996
1997     SDValue LoProdParts =
1998       DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1999                   DAG.getNode(SPUISD::SELB, MVT::v8i16,
2000                               LLProd, LHProd, FSMBmask));
2001
2002     SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32);
2003
2004     SDValue LoProd =
2005       DAG.getNode(ISD::AND, MVT::v4i32,
2006                   LoProdParts,
2007                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2008                               LoProdMask, LoProdMask,
2009                               LoProdMask, LoProdMask));
2010
2011     SDValue rAH =
2012       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2013                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
2014
2015     SDValue rBH =
2016       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2017                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
2018
2019     SDValue HLProd =
2020       DAG.getNode(SPUISD::MPY, MVT::v8i16,
2021                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
2022                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
2023
2024     SDValue HHProd_1 =
2025       DAG.getNode(SPUISD::MPY, MVT::v8i16,
2026                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2027                               DAG.getNode(SPUISD::VEC_SRA,
2028                                           MVT::v4i32, rAH, c8)),
2029                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2030                               DAG.getNode(SPUISD::VEC_SRA,
2031                                           MVT::v4i32, rBH, c8)));
2032
2033     SDValue HHProd =
2034       DAG.getNode(SPUISD::SELB, MVT::v8i16,
2035                   HLProd,
2036                   DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2037                   FSMBmask);
2038
2039     SDValue HiProd =
2040       DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
2041
2042     return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2043                        DAG.getNode(ISD::OR, MVT::v4i32,
2044                                    LoProd, HiProd));
2045   }
2046   }
2047
2048   return SDValue();
2049 }
2050
2051 static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) {
2052   MachineFunction &MF = DAG.getMachineFunction();
2053   MachineRegisterInfo &RegInfo = MF.getRegInfo();
2054
2055   SDValue A = Op.getOperand(0);
2056   SDValue B = Op.getOperand(1);
2057   MVT VT = Op.getValueType();
2058
2059   unsigned VRegBR, VRegC;
2060
2061   if (VT == MVT::f32) {
2062     VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2063     VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2064   } else {
2065     VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2066     VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2067   }
2068   // TODO: make sure we're feeding FPInterp the right arguments
2069   // Right now: fi B, frest(B)
2070
2071   // Computes BRcpl =
2072   // (Floating Interpolate (FP Reciprocal Estimate B))
2073   SDValue BRcpl =
2074       DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2075                        DAG.getNode(SPUISD::FPInterp, VT, B,
2076                                 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2077
2078   // Computes A * BRcpl and stores in a temporary register
2079   SDValue AxBRcpl =
2080       DAG.getCopyToReg(BRcpl, VRegC,
2081                  DAG.getNode(ISD::FMUL, VT, A,
2082                         DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2083   // What's the Chain variable do? It's magic!
2084   // TODO: set Chain = Op(0).getEntryNode()
2085
2086   return DAG.getNode(ISD::FADD, VT,
2087                 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2088                 DAG.getNode(ISD::FMUL, VT,
2089                         DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2090                         DAG.getNode(ISD::FSUB, VT, A,
2091                             DAG.getNode(ISD::FMUL, VT, B,
2092                             DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2093 }
2094
2095 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2096   MVT VT = Op.getValueType();
2097   SDValue N = Op.getOperand(0);
2098   SDValue Elt = Op.getOperand(1);
2099   SDValue ShufMask[16];
2100   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2101
2102   assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2103
2104   int EltNo = (int) C->getZExtValue();
2105
2106   // sanity checks:
2107   if (VT == MVT::i8 && EltNo >= 16)
2108     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2109   else if (VT == MVT::i16 && EltNo >= 8)
2110     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2111   else if (VT == MVT::i32 && EltNo >= 4)
2112     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2113   else if (VT == MVT::i64 && EltNo >= 2)
2114     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2115
2116   if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2117     // i32 and i64: Element 0 is the preferred slot
2118     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2119   }
2120
2121   // Need to generate shuffle mask and extract:
2122   int prefslot_begin = -1, prefslot_end = -1;
2123   int elt_byte = EltNo * VT.getSizeInBits() / 8;
2124
2125   switch (VT.getSimpleVT()) {
2126   default:
2127     assert(false && "Invalid value type!");
2128   case MVT::i8: {
2129     prefslot_begin = prefslot_end = 3;
2130     break;
2131   }
2132   case MVT::i16: {
2133     prefslot_begin = 2; prefslot_end = 3;
2134     break;
2135   }
2136   case MVT::i32: {
2137     prefslot_begin = 0; prefslot_end = 3;
2138     break;
2139   }
2140   case MVT::i64: {
2141     prefslot_begin = 0; prefslot_end = 7;
2142     break;
2143   }
2144   }
2145
2146   assert(prefslot_begin != -1 && prefslot_end != -1 &&
2147          "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2148
2149   for (int i = 0; i < 16; ++i) {
2150     // zero fill uppper part of preferred slot, don't care about the
2151     // other slots:
2152     unsigned int mask_val;
2153
2154     if (i <= prefslot_end) {
2155       mask_val =
2156         ((i < prefslot_begin)
2157          ? 0x80
2158          : elt_byte + (i - prefslot_begin));
2159
2160       ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2161     } else
2162       ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2163   }
2164
2165   SDValue ShufMaskVec =
2166     DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2167                 &ShufMask[0],
2168                 sizeof(ShufMask) / sizeof(ShufMask[0]));
2169
2170   return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2171                      DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2172                                  N, N, ShufMaskVec));
2173
2174 }
2175
2176 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2177   SDValue VecOp = Op.getOperand(0);
2178   SDValue ValOp = Op.getOperand(1);
2179   SDValue IdxOp = Op.getOperand(2);
2180   MVT VT = Op.getValueType();
2181
2182   ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2183   assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2184
2185   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2186   // Use $2 because it's always 16-byte aligned and it's available:
2187   SDValue PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2188
2189   SDValue result =
2190     DAG.getNode(SPUISD::SHUFB, VT,
2191                 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2192                 VecOp,
2193                 DAG.getNode(SPUISD::INSERT_MASK, VT,
2194                             DAG.getNode(ISD::ADD, PtrVT,
2195                                         PtrBase,
2196                                         DAG.getConstant(CN->getZExtValue(),
2197                                                         PtrVT))));
2198
2199   return result;
2200 }
2201
2202 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2203 {
2204   SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
2205
2206   assert(Op.getValueType() == MVT::i8);
2207   switch (Opc) {
2208   default:
2209     assert(0 && "Unhandled i8 math operator");
2210     /*NOTREACHED*/
2211     break;
2212   case ISD::SUB: {
2213     // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2214     // the result:
2215     SDValue N1 = Op.getOperand(1);
2216     N0 = (N0.getOpcode() != ISD::Constant
2217           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2218           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2219                             MVT::i16));
2220     N1 = (N1.getOpcode() != ISD::Constant
2221           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2222           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2223                             MVT::i16));
2224     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2225                        DAG.getNode(Opc, MVT::i16, N0, N1));
2226   }
2227   case ISD::ROTR:
2228   case ISD::ROTL: {
2229     SDValue N1 = Op.getOperand(1);
2230     unsigned N1Opc;
2231     N0 = (N0.getOpcode() != ISD::Constant
2232           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2233           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2234                             MVT::i16));
2235     N1Opc = N1.getValueType().bitsLT(MVT::i16)
2236             ? ISD::ZERO_EXTEND
2237             : ISD::TRUNCATE;
2238     N1 = (N1.getOpcode() != ISD::Constant
2239           ? DAG.getNode(N1Opc, MVT::i16, N1)
2240           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2241                             MVT::i16));
2242     SDValue ExpandArg =
2243       DAG.getNode(ISD::OR, MVT::i16, N0,
2244                   DAG.getNode(ISD::SHL, MVT::i16,
2245                               N0, DAG.getConstant(8, MVT::i16)));
2246     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2247                        DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2248   }
2249   case ISD::SRL:
2250   case ISD::SHL: {
2251     SDValue N1 = Op.getOperand(1);
2252     unsigned N1Opc;
2253     N0 = (N0.getOpcode() != ISD::Constant
2254           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2255           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2256                             MVT::i16));
2257     N1Opc = N1.getValueType().bitsLT(MVT::i16)
2258             ? ISD::ZERO_EXTEND
2259             : ISD::TRUNCATE;
2260     N1 = (N1.getOpcode() != ISD::Constant
2261           ? DAG.getNode(N1Opc, MVT::i16, N1)
2262           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2263                             MVT::i16));
2264     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2265                        DAG.getNode(Opc, MVT::i16, N0, N1));
2266   }
2267   case ISD::SRA: {
2268     SDValue N1 = Op.getOperand(1);
2269     unsigned N1Opc;
2270     N0 = (N0.getOpcode() != ISD::Constant
2271           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2272           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2273                             MVT::i16));
2274     N1Opc = N1.getValueType().bitsLT(MVT::i16)
2275             ? ISD::SIGN_EXTEND
2276             : ISD::TRUNCATE;
2277     N1 = (N1.getOpcode() != ISD::Constant
2278           ? DAG.getNode(N1Opc, MVT::i16, N1)
2279           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2280                             MVT::i16));
2281     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2282                        DAG.getNode(Opc, MVT::i16, N0, N1));
2283   }
2284   case ISD::MUL: {
2285     SDValue N1 = Op.getOperand(1);
2286     unsigned N1Opc;
2287     N0 = (N0.getOpcode() != ISD::Constant
2288           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2289           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2290                             MVT::i16));
2291     N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2292     N1 = (N1.getOpcode() != ISD::Constant
2293           ? DAG.getNode(N1Opc, MVT::i16, N1)
2294           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2295                             MVT::i16));
2296     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2297                        DAG.getNode(Opc, MVT::i16, N0, N1));
2298     break;
2299   }
2300   }
2301
2302   return SDValue();
2303 }
2304
2305 static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2306 {
2307   MVT VT = Op.getValueType();
2308   MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2309
2310   SDValue Op0 = Op.getOperand(0);
2311
2312   switch (Opc) {
2313   case ISD::ZERO_EXTEND:
2314   case ISD::SIGN_EXTEND:
2315   case ISD::ANY_EXTEND: {
2316     MVT Op0VT = Op0.getValueType();
2317     MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2318
2319     assert(Op0VT == MVT::i32
2320            && "CellSPU: Zero/sign extending something other than i32");
2321     DEBUG(cerr << "CellSPU: LowerI64Math custom lowering zero/sign/any extend\n");
2322
2323     unsigned NewOpc = (Opc == ISD::SIGN_EXTEND
2324                       ? SPUISD::ROTBYTES_RIGHT_S
2325                       : SPUISD::ROTQUAD_RZ_BYTES);
2326     SDValue PromoteScalar =
2327       DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2328
2329     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2330                        DAG.getNode(ISD::BIT_CONVERT, VecVT,
2331                                    DAG.getNode(NewOpc, Op0VecVT,
2332                                                PromoteScalar,
2333                                                DAG.getConstant(4, MVT::i32))));
2334   }
2335
2336   case ISD::ADD: {
2337     // Turn operands into vectors to satisfy type checking (shufb works on
2338     // vectors)
2339     SDValue Op0 =
2340       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2341     SDValue Op1 =
2342       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2343     SmallVector<SDValue, 16> ShufBytes;
2344
2345     // Create the shuffle mask for "rotating" the borrow up one register slot
2346     // once the borrow is generated.
2347     ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2348     ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2349     ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2350     ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2351
2352     SDValue CarryGen =
2353       DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2354     SDValue ShiftedCarry =
2355       DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2356                   CarryGen, CarryGen,
2357                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2358                               &ShufBytes[0], ShufBytes.size()));
2359
2360     return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2361                        DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2362                                    Op0, Op1, ShiftedCarry));
2363   }
2364
2365   case ISD::SUB: {
2366     // Turn operands into vectors to satisfy type checking (shufb works on
2367     // vectors)
2368     SDValue Op0 =
2369       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2370     SDValue Op1 =
2371       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2372     SmallVector<SDValue, 16> ShufBytes;
2373
2374     // Create the shuffle mask for "rotating" the borrow up one register slot
2375     // once the borrow is generated.
2376     ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2377     ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2378     ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2379     ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2380
2381     SDValue BorrowGen =
2382       DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2383     SDValue ShiftedBorrow =
2384       DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2385                   BorrowGen, BorrowGen,
2386                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2387                               &ShufBytes[0], ShufBytes.size()));
2388
2389     return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2390                        DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2391                                    Op0, Op1, ShiftedBorrow));
2392   }
2393
2394   case ISD::SHL: {
2395     SDValue ShiftAmt = Op.getOperand(1);
2396     MVT ShiftAmtVT = ShiftAmt.getValueType();
2397     SDValue Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2398     SDValue MaskLower =
2399       DAG.getNode(SPUISD::SELB, VecVT,
2400                   Op0Vec,
2401                   DAG.getConstant(0, VecVT),
2402                   DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2403                               DAG.getConstant(0xff00ULL, MVT::i16)));
2404     SDValue ShiftAmtBytes =
2405       DAG.getNode(ISD::SRL, ShiftAmtVT,
2406                   ShiftAmt,
2407                   DAG.getConstant(3, ShiftAmtVT));
2408     SDValue ShiftAmtBits =
2409       DAG.getNode(ISD::AND, ShiftAmtVT,
2410                   ShiftAmt,
2411                   DAG.getConstant(7, ShiftAmtVT));
2412
2413     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2414                        DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2415                                    DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2416                                                MaskLower, ShiftAmtBytes),
2417                                    ShiftAmtBits));
2418   }
2419
2420   case ISD::SRL: {
2421     MVT VT = Op.getValueType();
2422     SDValue ShiftAmt = Op.getOperand(1);
2423     MVT ShiftAmtVT = ShiftAmt.getValueType();
2424     SDValue ShiftAmtBytes =
2425       DAG.getNode(ISD::SRL, ShiftAmtVT,
2426                   ShiftAmt,
2427                   DAG.getConstant(3, ShiftAmtVT));
2428     SDValue ShiftAmtBits =
2429       DAG.getNode(ISD::AND, ShiftAmtVT,
2430                   ShiftAmt,
2431                   DAG.getConstant(7, ShiftAmtVT));
2432
2433     return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2434                        DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2435                                    Op0, ShiftAmtBytes),
2436                        ShiftAmtBits);
2437   }
2438
2439   case ISD::SRA: {
2440     // Promote Op0 to vector
2441     SDValue Op0 =
2442       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2443     SDValue ShiftAmt = Op.getOperand(1);
2444     MVT ShiftVT = ShiftAmt.getValueType();
2445
2446     // Negate variable shift amounts
2447     if (!isa<ConstantSDNode>(ShiftAmt)) {
2448       ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2449                              DAG.getConstant(0, ShiftVT), ShiftAmt);
2450     }
2451
2452     SDValue UpperHalfSign =
2453       DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i32,
2454                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2455                               DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2456                                           Op0, DAG.getConstant(31, MVT::i32))));
2457     SDValue UpperHalfSignMask =
2458       DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2459     SDValue UpperLowerMask =
2460       DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2461                   DAG.getConstant(0xff00, MVT::i16));
2462     SDValue UpperLowerSelect =
2463       DAG.getNode(SPUISD::SELB, MVT::v2i64,
2464                   UpperHalfSignMask, Op0, UpperLowerMask);
2465     SDValue RotateLeftBytes =
2466       DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2467                   UpperLowerSelect, ShiftAmt);
2468     SDValue RotateLeftBits =
2469       DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2470                   RotateLeftBytes, ShiftAmt);
2471
2472     return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2473                        RotateLeftBits);
2474   }
2475   }
2476
2477   return SDValue();
2478 }
2479
2480 //! Lower byte immediate operations for v16i8 vectors:
2481 static SDValue
2482 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2483   SDValue ConstVec;
2484   SDValue Arg;
2485   MVT VT = Op.getValueType();
2486
2487   ConstVec = Op.getOperand(0);
2488   Arg = Op.getOperand(1);
2489   if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2490     if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2491       ConstVec = ConstVec.getOperand(0);
2492     } else {
2493       ConstVec = Op.getOperand(1);
2494       Arg = Op.getOperand(0);
2495       if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2496         ConstVec = ConstVec.getOperand(0);
2497       }
2498     }
2499   }
2500
2501   if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2502     uint64_t VectorBits[2];
2503     uint64_t UndefBits[2];
2504     uint64_t SplatBits, SplatUndef;
2505     int SplatSize;
2506
2507     if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2508         && isConstantSplat(VectorBits, UndefBits,
2509                            VT.getVectorElementType().getSizeInBits(),
2510                            SplatBits, SplatUndef, SplatSize)) {
2511       SDValue tcVec[16];
2512       SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2513       const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2514
2515       // Turn the BUILD_VECTOR into a set of target constants:
2516       for (size_t i = 0; i < tcVecSize; ++i)
2517         tcVec[i] = tc;
2518
2519       return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2520                          DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2521     }
2522   }
2523   // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2524   // lowered.  Return the operation, rather than a null SDValue.
2525   return Op;
2526 }
2527
2528 //! Lower i32 multiplication
2529 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT,
2530                           unsigned Opc) {
2531   switch (VT.getSimpleVT()) {
2532   default:
2533     cerr << "CellSPU: Unknown LowerMUL value type, got "
2534          << Op.getValueType().getMVTString()
2535          << "\n";
2536     abort();
2537     /*NOTREACHED*/
2538
2539   case MVT::i32: {
2540     SDValue rA = Op.getOperand(0);
2541     SDValue rB = Op.getOperand(1);
2542
2543     return DAG.getNode(ISD::ADD, MVT::i32,
2544                        DAG.getNode(ISD::ADD, MVT::i32,
2545                                    DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2546                                    DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2547                        DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2548   }
2549   }
2550
2551   return SDValue();
2552 }
2553
2554 //! Custom lowering for CTPOP (count population)
2555 /*!
2556   Custom lowering code that counts the number ones in the input
2557   operand. SPU has such an instruction, but it counts the number of
2558   ones per byte, which then have to be accumulated.
2559 */
2560 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2561   MVT VT = Op.getValueType();
2562   MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2563
2564   switch (VT.getSimpleVT()) {
2565   default:
2566     assert(false && "Invalid value type!");
2567   case MVT::i8: {
2568     SDValue N = Op.getOperand(0);
2569     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2570
2571     SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2572     SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2573
2574     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2575   }
2576
2577   case MVT::i16: {
2578     MachineFunction &MF = DAG.getMachineFunction();
2579     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2580
2581     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2582
2583     SDValue N = Op.getOperand(0);
2584     SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2585     SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2586     SDValue Shift1 = DAG.getConstant(8, MVT::i16);
2587
2588     SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2589     SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2590
2591     // CNTB_result becomes the chain to which all of the virtual registers
2592     // CNTB_reg, SUM1_reg become associated:
2593     SDValue CNTB_result =
2594       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2595
2596     SDValue CNTB_rescopy =
2597       DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2598
2599     SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2600
2601     return DAG.getNode(ISD::AND, MVT::i16,
2602                        DAG.getNode(ISD::ADD, MVT::i16,
2603                                    DAG.getNode(ISD::SRL, MVT::i16,
2604                                                Tmp1, Shift1),
2605                                    Tmp1),
2606                        Mask0);
2607   }
2608
2609   case MVT::i32: {
2610     MachineFunction &MF = DAG.getMachineFunction();
2611     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2612
2613     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2614     unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2615
2616     SDValue N = Op.getOperand(0);
2617     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2618     SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2619     SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2620     SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2621
2622     SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2623     SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2624
2625     // CNTB_result becomes the chain to which all of the virtual registers
2626     // CNTB_reg, SUM1_reg become associated:
2627     SDValue CNTB_result =
2628       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2629
2630     SDValue CNTB_rescopy =
2631       DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2632
2633     SDValue Comp1 =
2634       DAG.getNode(ISD::SRL, MVT::i32,
2635                   DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2636
2637     SDValue Sum1 =
2638       DAG.getNode(ISD::ADD, MVT::i32,
2639                   Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2640
2641     SDValue Sum1_rescopy =
2642       DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2643
2644     SDValue Comp2 =
2645       DAG.getNode(ISD::SRL, MVT::i32,
2646                   DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2647                   Shift2);
2648     SDValue Sum2 =
2649       DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2650                   DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2651
2652     return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2653   }
2654
2655   case MVT::i64:
2656     break;
2657   }
2658
2659   return SDValue();
2660 }
2661
2662 /// LowerOperation - Provide custom lowering hooks for some operations.
2663 ///
2664 SDValue
2665 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2666 {
2667   unsigned Opc = (unsigned) Op.getOpcode();
2668   MVT VT = Op.getValueType();
2669
2670   switch (Opc) {
2671   default: {
2672     cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2673     cerr << "Op.getOpcode() = " << Opc << "\n";
2674     cerr << "*Op.getNode():\n";
2675     Op.getNode()->dump();
2676     abort();
2677   }
2678   case ISD::LOAD:
2679   case ISD::SEXTLOAD:
2680   case ISD::ZEXTLOAD:
2681     return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2682   case ISD::STORE:
2683     return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2684   case ISD::ConstantPool:
2685     return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2686   case ISD::GlobalAddress:
2687     return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2688   case ISD::JumpTable:
2689     return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2690   case ISD::Constant:
2691     return LowerConstant(Op, DAG);
2692   case ISD::ConstantFP:
2693     return LowerConstantFP(Op, DAG);
2694   case ISD::BRCOND:
2695     return LowerBRCOND(Op, DAG);
2696   case ISD::FORMAL_ARGUMENTS:
2697     return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2698   case ISD::CALL:
2699     return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2700   case ISD::RET:
2701     return LowerRET(Op, DAG, getTargetMachine());
2702
2703
2704   // i8, i64 math ops:
2705   case ISD::ZERO_EXTEND:
2706   case ISD::SIGN_EXTEND:
2707   case ISD::ANY_EXTEND:
2708   case ISD::ADD:
2709   case ISD::SUB:
2710   case ISD::ROTR:
2711   case ISD::ROTL:
2712   case ISD::SRL:
2713   case ISD::SHL:
2714   case ISD::SRA: {
2715     if (VT == MVT::i8)
2716       return LowerI8Math(Op, DAG, Opc);
2717     else if (VT == MVT::i64)
2718       return LowerI64Math(Op, DAG, Opc);
2719     break;
2720   }
2721
2722   // Vector-related lowering.
2723   case ISD::BUILD_VECTOR:
2724     return LowerBUILD_VECTOR(Op, DAG);
2725   case ISD::SCALAR_TO_VECTOR:
2726     return LowerSCALAR_TO_VECTOR(Op, DAG);
2727   case ISD::VECTOR_SHUFFLE:
2728     return LowerVECTOR_SHUFFLE(Op, DAG);
2729   case ISD::EXTRACT_VECTOR_ELT:
2730     return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2731   case ISD::INSERT_VECTOR_ELT:
2732     return LowerINSERT_VECTOR_ELT(Op, DAG);
2733
2734   // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2735   case ISD::AND:
2736   case ISD::OR:
2737   case ISD::XOR:
2738     return LowerByteImmed(Op, DAG);
2739
2740   // Vector and i8 multiply:
2741   case ISD::MUL:
2742     if (VT.isVector())
2743       return LowerVectorMUL(Op, DAG);
2744     else if (VT == MVT::i8)
2745       return LowerI8Math(Op, DAG, Opc);
2746     else
2747       return LowerMUL(Op, DAG, VT, Opc);
2748
2749   case ISD::FDIV:
2750     if (VT == MVT::f32 || VT == MVT::v4f32)
2751       return LowerFDIVf32(Op, DAG);
2752 //    else if (Op.getValueType() == MVT::f64)
2753 //      return LowerFDIVf64(Op, DAG);
2754     else
2755       assert(0 && "Calling FDIV on unsupported MVT");
2756
2757   case ISD::CTPOP:
2758     return LowerCTPOP(Op, DAG);
2759   }
2760
2761   return SDValue();
2762 }
2763
2764 //===----------------------------------------------------------------------===//
2765 // Target Optimization Hooks
2766 //===----------------------------------------------------------------------===//
2767
2768 SDValue
2769 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2770 {
2771 #if 0
2772   TargetMachine &TM = getTargetMachine();
2773 #endif
2774   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2775   SelectionDAG &DAG = DCI.DAG;
2776   SDValue Op0 = N->getOperand(0);      // everything has at least one operand
2777   SDValue Result;                     // Initially, NULL result
2778
2779   switch (N->getOpcode()) {
2780   default: break;
2781   case ISD::ADD: {
2782     SDValue Op1 = N->getOperand(1);
2783
2784     if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
2785       SDValue Op01 = Op0.getOperand(1);
2786       if (Op01.getOpcode() == ISD::Constant
2787           || Op01.getOpcode() == ISD::TargetConstant) {
2788         // (add <const>, (SPUindirect <arg>, <const>)) ->
2789         // (SPUindirect <arg>, <const + const>)
2790         ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2791         ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2792         SDValue combinedConst =
2793           DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2794                           Op0.getValueType());
2795
2796         DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2797                    << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2798         DEBUG(cerr << "With:    (SPUindirect <arg>, "
2799                    << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2800         return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2801                            Op0.getOperand(0), combinedConst);
2802       }
2803     } else if (isa<ConstantSDNode>(Op0)
2804                && Op1.getOpcode() == SPUISD::IndirectAddr) {
2805       SDValue Op11 = Op1.getOperand(1);
2806       if (Op11.getOpcode() == ISD::Constant
2807           || Op11.getOpcode() == ISD::TargetConstant) {
2808         // (add (SPUindirect <arg>, <const>), <const>) ->
2809         // (SPUindirect <arg>, <const + const>)
2810         ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2811         ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2812         SDValue combinedConst =
2813           DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2814                           Op0.getValueType());
2815
2816         DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2817                    << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2818         DEBUG(cerr << "With:    (SPUindirect <arg>, "
2819                    << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2820
2821         return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2822                            Op1.getOperand(0), combinedConst);
2823       }
2824     }
2825     break;
2826   }
2827   case ISD::SIGN_EXTEND:
2828   case ISD::ZERO_EXTEND:
2829   case ISD::ANY_EXTEND: {
2830     if (Op0.getOpcode() == SPUISD::EXTRACT_ELT0 &&
2831         N->getValueType(0) == Op0.getValueType()) {
2832       // (any_extend (SPUextract_elt0 <arg>)) ->
2833       // (SPUextract_elt0 <arg>)
2834       // Types must match, however...
2835       DEBUG(cerr << "Replace: ");
2836       DEBUG(N->dump(&DAG));
2837       DEBUG(cerr << "\nWith:    ");
2838       DEBUG(Op0.getNode()->dump(&DAG));
2839       DEBUG(cerr << "\n");
2840
2841       return Op0;
2842     }
2843     break;
2844   }
2845   case SPUISD::IndirectAddr: {
2846     if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2847       ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2848       if (CN->getZExtValue() == 0) {
2849         // (SPUindirect (SPUaform <addr>, 0), 0) ->
2850         // (SPUaform <addr>, 0)
2851
2852         DEBUG(cerr << "Replace: ");
2853         DEBUG(N->dump(&DAG));
2854         DEBUG(cerr << "\nWith:    ");
2855         DEBUG(Op0.getNode()->dump(&DAG));
2856         DEBUG(cerr << "\n");
2857
2858         return Op0;
2859       }
2860     }
2861     break;
2862   }
2863   case SPUISD::SHLQUAD_L_BITS:
2864   case SPUISD::SHLQUAD_L_BYTES:
2865   case SPUISD::VEC_SHL:
2866   case SPUISD::VEC_SRL:
2867   case SPUISD::VEC_SRA:
2868   case SPUISD::ROTQUAD_RZ_BYTES:
2869   case SPUISD::ROTQUAD_RZ_BITS: {
2870     SDValue Op1 = N->getOperand(1);
2871
2872     if (isa<ConstantSDNode>(Op1)) {
2873       // Kill degenerate vector shifts:
2874       ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
2875
2876       if (CN->getZExtValue() == 0) {
2877         Result = Op0;
2878       }
2879     }
2880     break;
2881   }
2882   case SPUISD::PROMOTE_SCALAR: {
2883     switch (Op0.getOpcode()) {
2884     default:
2885       break;
2886     case ISD::ANY_EXTEND:
2887     case ISD::ZERO_EXTEND:
2888     case ISD::SIGN_EXTEND: {
2889       // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
2890       // <arg>
2891       // but only if the SPUpromote_scalar and <arg> types match.
2892       SDValue Op00 = Op0.getOperand(0);
2893       if (Op00.getOpcode() == SPUISD::EXTRACT_ELT0) {
2894         SDValue Op000 = Op00.getOperand(0);
2895         if (Op000.getValueType() == N->getValueType(0)) {
2896           Result = Op000;
2897         }
2898       }
2899       break;
2900     }
2901     case SPUISD::EXTRACT_ELT0: {
2902       // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
2903       // <arg>
2904       Result = Op0.getOperand(0);
2905       break;
2906     }
2907     }
2908     break;
2909   }
2910   }
2911   // Otherwise, return unchanged.
2912 #if 1
2913   if (Result.getNode()) {
2914     DEBUG(cerr << "\nReplace.SPU: ");
2915     DEBUG(N->dump(&DAG));
2916     DEBUG(cerr << "\nWith:        ");
2917     DEBUG(Result.getNode()->dump(&DAG));
2918     DEBUG(cerr << "\n");
2919   }
2920 #endif
2921
2922   return Result;
2923 }
2924
2925 //===----------------------------------------------------------------------===//
2926 // Inline Assembly Support
2927 //===----------------------------------------------------------------------===//
2928
2929 /// getConstraintType - Given a constraint letter, return the type of
2930 /// constraint it is for this target.
2931 SPUTargetLowering::ConstraintType
2932 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2933   if (ConstraintLetter.size() == 1) {
2934     switch (ConstraintLetter[0]) {
2935     default: break;
2936     case 'b':
2937     case 'r':
2938     case 'f':
2939     case 'v':
2940     case 'y':
2941       return C_RegisterClass;
2942     }
2943   }
2944   return TargetLowering::getConstraintType(ConstraintLetter);
2945 }
2946
2947 std::pair<unsigned, const TargetRegisterClass*>
2948 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2949                                                 MVT VT) const
2950 {
2951   if (Constraint.size() == 1) {
2952     // GCC RS6000 Constraint Letters
2953     switch (Constraint[0]) {
2954     case 'b':   // R1-R31
2955     case 'r':   // R0-R31
2956       if (VT == MVT::i64)
2957         return std::make_pair(0U, SPU::R64CRegisterClass);
2958       return std::make_pair(0U, SPU::R32CRegisterClass);
2959     case 'f':
2960       if (VT == MVT::f32)
2961         return std::make_pair(0U, SPU::R32FPRegisterClass);
2962       else if (VT == MVT::f64)
2963         return std::make_pair(0U, SPU::R64FPRegisterClass);
2964       break;
2965     case 'v':
2966       return std::make_pair(0U, SPU::GPRCRegisterClass);
2967     }
2968   }
2969
2970   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2971 }
2972
2973 //! Compute used/known bits for a SPU operand
2974 void
2975 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2976                                                   const APInt &Mask,
2977                                                   APInt &KnownZero,
2978                                                   APInt &KnownOne,
2979                                                   const SelectionDAG &DAG,
2980                                                   unsigned Depth ) const {
2981 #if 0
2982   const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2983 #endif
2984
2985   switch (Op.getOpcode()) {
2986   default:
2987     // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2988     break;
2989
2990 #if 0
2991   case CALL:
2992   case SHUFB:
2993   case INSERT_MASK:
2994   case CNTB:
2995 #endif
2996
2997   case SPUISD::PROMOTE_SCALAR: {
2998     SDValue Op0 = Op.getOperand(0);
2999     MVT Op0VT = Op0.getValueType();
3000     unsigned Op0VTBits = Op0VT.getSizeInBits();
3001     uint64_t InMask = Op0VT.getIntegerVTBitMask();
3002     KnownZero |= APInt(Op0VTBits, ~InMask, false);
3003     KnownOne |= APInt(Op0VTBits, InMask, false);
3004     break;
3005   }
3006
3007   case SPUISD::LDRESULT:
3008   case SPUISD::EXTRACT_ELT0:
3009   case SPUISD::EXTRACT_ELT0_CHAINED: {
3010     MVT OpVT = Op.getValueType();
3011     unsigned OpVTBits = OpVT.getSizeInBits();
3012     uint64_t InMask = OpVT.getIntegerVTBitMask();
3013     KnownZero |= APInt(OpVTBits, ~InMask, false);
3014     KnownOne |= APInt(OpVTBits, InMask, false);
3015     break;
3016   }
3017
3018 #if 0
3019   case EXTRACT_I1_ZEXT:
3020   case EXTRACT_I1_SEXT:
3021   case EXTRACT_I8_ZEXT:
3022   case EXTRACT_I8_SEXT:
3023   case MPY:
3024   case MPYU:
3025   case MPYH:
3026   case MPYHH:
3027   case SPUISD::SHLQUAD_L_BITS:
3028   case SPUISD::SHLQUAD_L_BYTES:
3029   case SPUISD::VEC_SHL:
3030   case SPUISD::VEC_SRL:
3031   case SPUISD::VEC_SRA:
3032   case SPUISD::VEC_ROTL:
3033   case SPUISD::VEC_ROTR:
3034   case SPUISD::ROTQUAD_RZ_BYTES:
3035   case SPUISD::ROTQUAD_RZ_BITS:
3036   case SPUISD::ROTBYTES_RIGHT_S:
3037   case SPUISD::ROTBYTES_LEFT:
3038   case SPUISD::ROTBYTES_LEFT_CHAINED:
3039   case SPUISD::SELECT_MASK:
3040   case SPUISD::SELB:
3041   case SPUISD::FPInterp:
3042   case SPUISD::FPRecipEst:
3043   case SPUISD::SEXT32TO64:
3044 #endif
3045   }
3046 }
3047
3048 // LowerAsmOperandForConstraint
3049 void
3050 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3051                                                 char ConstraintLetter,
3052                                                 std::vector<SDValue> &Ops,
3053                                                 SelectionDAG &DAG) const {
3054   // Default, for the time being, to the base class handler
3055   TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
3056 }
3057
3058 /// isLegalAddressImmediate - Return true if the integer value can be used
3059 /// as the offset of the target addressing mode.
3060 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3061                                                 const Type *Ty) const {
3062   // SPU's addresses are 256K:
3063   return (V > -(1 << 18) && V < (1 << 18) - 1);
3064 }
3065
3066 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3067   return false;
3068 }