lib/Target/CellSPU/SPUISelLowering.cpp

   1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the SPUTargetLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "SPURegisterNames.h"
  15 #include "SPUISelLowering.h"
  16 #include "SPUTargetMachine.h"
  17 #include "SPUFrameInfo.h"
  18 #include "llvm/ADT/VectorExtras.h"
  19 #include "llvm/CodeGen/CallingConvLower.h"
  20 #include "llvm/CodeGen/MachineFrameInfo.h"
  21 #include "llvm/CodeGen/MachineFunction.h"
  22 #include "llvm/CodeGen/MachineInstrBuilder.h"
  23 #include "llvm/CodeGen/MachineRegisterInfo.h"
  24 #include "llvm/CodeGen/SelectionDAG.h"
  25 #include "llvm/Constants.h"
  26 #include "llvm/Function.h"
  27 #include "llvm/Intrinsics.h"
  28 #include "llvm/Support/Debug.h"
  29 #include "llvm/Support/MathExtras.h"
  30 #include "llvm/Target/TargetOptions.h"
  31
  32 #include <map>
  33
  34 using namespace llvm;
  35
  36 // Used in getTargetNodeName() below
  37 namespace {
  38   std::map<unsigned, const char *> node_names;
  39
  40   //! MVT mapping to useful data for Cell SPU
  41   struct valtype_map_s {
  42     const MVT        valtype;
  43     const int                   prefslot_byte;
  44   };
  45
  46   const valtype_map_s valtype_map[] = {
  47     { MVT::i1,   3 },
  48     { MVT::i8,   3 },
  49     { MVT::i16,  2 },
  50     { MVT::i32,  0 },
  51     { MVT::f32,  0 },
  52     { MVT::i64,  0 },
  53     { MVT::f64,  0 },
  54     { MVT::i128, 0 }
  55   };
  56
  57   const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
  58
  59   const valtype_map_s *getValueTypeMapEntry(MVT VT) {
  60     const valtype_map_s *retval = 0;
  61
  62     for (size_t i = 0; i < n_valtype_map; ++i) {
  63       if (valtype_map[i].valtype == VT) {
  64         retval = valtype_map + i;
  65         break;
  66       }
  67     }
  68
  69 #ifndef NDEBUG
  70     if (retval == 0) {
  71       cerr << "getValueTypeMapEntry returns NULL for "
  72            << VT.getMVTString()
  73            << "\n";
  74       abort();
  75     }
  76 #endif
  77
  78     return retval;
  79   }
  80
  81   //! Predicate that returns true if operand is a memory target
  82   /*!
  83     \arg Op Operand to test
  84     \return true if the operand is a memory target (i.e., global
  85     address, external symbol, constant pool) or an A-form
  86     address.
  87    */
  88   bool isMemoryOperand(const SDValue &Op)
  89   {
  90     const unsigned Opc = Op.getOpcode();
  91     return (Opc == ISD::GlobalAddress
  92             || Opc == ISD::GlobalTLSAddress
  93             || Opc == ISD::JumpTable
  94             || Opc == ISD::ConstantPool
  95             || Opc == ISD::ExternalSymbol
  96             || Opc == ISD::TargetGlobalAddress
  97             || Opc == ISD::TargetGlobalTLSAddress
  98             || Opc == ISD::TargetJumpTable
  99             || Opc == ISD::TargetConstantPool
 100             || Opc == ISD::TargetExternalSymbol
 101             || Opc == SPUISD::AFormAddr);
 102   }
 103
 104   //! Predicate that returns true if the operand is an indirect target
 105   bool isIndirectOperand(const SDValue &Op)
 106   {
 107     const unsigned Opc = Op.getOpcode();
 108     return (Opc == ISD::Register
 109             || Opc == SPUISD::LDRESULT);
 110   }
 111 }
 112
 113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
 114   : TargetLowering(TM),
 115     SPUTM(TM)
 116 {
 117   // Fold away setcc operations if possible.
 118   setPow2DivIsCheap();
 119
 120   // Use _setjmp/_longjmp instead of setjmp/longjmp.
 121   setUseUnderscoreSetJmp(true);
 122   setUseUnderscoreLongJmp(true);
 123
 124   // Set up the SPU's register classes:
 125   addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
 126   addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
 127   addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
 128   addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
 129   addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
 130   addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
 131   addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
 132
 133   // SPU has no sign or zero extended loads for i1, i8, i16:
 134   setLoadXAction(ISD::EXTLOAD,  MVT::i1, Promote);
 135   setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
 136   setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
 137   setTruncStoreAction(MVT::i8, MVT::i1, Custom);
 138   setTruncStoreAction(MVT::i16, MVT::i1, Custom);
 139   setTruncStoreAction(MVT::i32, MVT::i1, Custom);
 140   setTruncStoreAction(MVT::i64, MVT::i1, Custom);
 141   setTruncStoreAction(MVT::i128, MVT::i1, Custom);
 142
 143   setLoadXAction(ISD::EXTLOAD,  MVT::i8, Custom);
 144   setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
 145   setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
 146   setTruncStoreAction(MVT::i8  , MVT::i8, Custom);
 147   setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
 148   setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
 149   setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
 150   setTruncStoreAction(MVT::i128, MVT::i8, Custom);
 151
 152   setLoadXAction(ISD::EXTLOAD,  MVT::i16, Custom);
 153   setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
 154   setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
 155
 156   // SPU constant load actions are custom lowered:
 157   setOperationAction(ISD::Constant,   MVT::i64, Custom);
 158   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
 159   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
 160
 161   // SPU's loads and stores have to be custom lowered:
 162   for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
 163        ++sctype) {
 164     MVT VT = (MVT::SimpleValueType)sctype;
 165
 166     setOperationAction(ISD::LOAD, VT, Custom);
 167     setOperationAction(ISD::STORE, VT, Custom);
 168   }
 169
 170   // Custom lower BRCOND for i1, i8 to "promote" the result to
 171   // i32 and i16, respectively.
 172   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
 173
 174   // Expand the jumptable branches
 175   setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
 176   setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
 177   setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
 178
 179   // SPU has no intrinsics for these particular operations:
 180   setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
 181
 182   // PowerPC has no SREM/UREM instructions
 183   setOperationAction(ISD::SREM, MVT::i32, Expand);
 184   setOperationAction(ISD::UREM, MVT::i32, Expand);
 185   setOperationAction(ISD::SREM, MVT::i64, Expand);
 186   setOperationAction(ISD::UREM, MVT::i64, Expand);
 187
 188   // We don't support sin/cos/sqrt/fmod
 189   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 190   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 191   setOperationAction(ISD::FREM , MVT::f64, Expand);
 192   setOperationAction(ISD::FLOG , MVT::f64, Expand);
 193   setOperationAction(ISD::FLOG2, MVT::f64, Expand);
 194   setOperationAction(ISD::FLOG10,MVT::f64, Expand);
 195   setOperationAction(ISD::FEXP , MVT::f64, Expand);
 196   setOperationAction(ISD::FEXP2, MVT::f64, Expand);
 197   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 198   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 199   setOperationAction(ISD::FREM , MVT::f32, Expand);
 200   setOperationAction(ISD::FLOG , MVT::f32, Expand);
 201   setOperationAction(ISD::FLOG2, MVT::f32, Expand);
 202   setOperationAction(ISD::FLOG10,MVT::f32, Expand);
 203   setOperationAction(ISD::FEXP , MVT::f32, Expand);
 204   setOperationAction(ISD::FEXP2, MVT::f32, Expand);
 205
 206   // If we're enabling GP optimizations, use hardware square root
 207   setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 208   setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 209
 210   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 211   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 212
 213   // SPU can do rotate right and left, so legalize it... but customize for i8
 214   // because instructions don't exist.
 215
 216   // FIXME: Change from "expand" to appropriate type once ROTR is supported in
 217   //        .td files.
 218   setOperationAction(ISD::ROTR, MVT::i32,    Expand /*Legal*/);
 219   setOperationAction(ISD::ROTR, MVT::i16,    Expand /*Legal*/);
 220   setOperationAction(ISD::ROTR, MVT::i8,     Expand /*Custom*/);
 221
 222   setOperationAction(ISD::ROTL, MVT::i32,    Legal);
 223   setOperationAction(ISD::ROTL, MVT::i16,    Legal);
 224   setOperationAction(ISD::ROTL, MVT::i8,     Custom);
 225   // SPU has no native version of shift left/right for i8
 226   setOperationAction(ISD::SHL,  MVT::i8,     Custom);
 227   setOperationAction(ISD::SRL,  MVT::i8,     Custom);
 228   setOperationAction(ISD::SRA,  MVT::i8,     Custom);
 229   // And SPU needs custom lowering for shift left/right for i64
 230   setOperationAction(ISD::SHL,  MVT::i64,    Custom);
 231   setOperationAction(ISD::SRL,  MVT::i64,    Custom);
 232   setOperationAction(ISD::SRA,  MVT::i64,    Custom);
 233
 234   // Custom lower i8, i32 and i64 multiplications
 235   setOperationAction(ISD::MUL,  MVT::i8,     Custom);
 236   setOperationAction(ISD::MUL,  MVT::i32,    Custom);
 237   setOperationAction(ISD::MUL,  MVT::i64,    Custom);
 238
 239   // Need to custom handle (some) common i8, i64 math ops
 240   setOperationAction(ISD::ADD,  MVT::i64,    Custom);
 241   setOperationAction(ISD::SUB,  MVT::i8,     Custom);
 242   setOperationAction(ISD::SUB,  MVT::i64,    Custom);
 243
 244   // SPU does not have BSWAP. It does have i32 support CTLZ.
 245   // CTPOP has to be custom lowered.
 246   setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
 247   setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
 248
 249   setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
 250   setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
 251   setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
 252   setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
 253
 254   setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
 255   setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
 256
 257   setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
 258
 259   // SPU has a version of select that implements (a&~c)|(b&c), just like
 260   // select ought to work:
 261   setOperationAction(ISD::SELECT, MVT::i1,   Promote);
 262   setOperationAction(ISD::SELECT, MVT::i8,   Legal);
 263   setOperationAction(ISD::SELECT, MVT::i16,  Legal);
 264   setOperationAction(ISD::SELECT, MVT::i32,  Legal);
 265   setOperationAction(ISD::SELECT, MVT::i64,  Expand);
 266
 267   setOperationAction(ISD::SETCC, MVT::i1,    Promote);
 268   setOperationAction(ISD::SETCC, MVT::i8,    Legal);
 269   setOperationAction(ISD::SETCC, MVT::i16,   Legal);
 270   setOperationAction(ISD::SETCC, MVT::i32,   Legal);
 271   setOperationAction(ISD::SETCC, MVT::i64,   Expand);
 272
 273   // Zero extension and sign extension for i64 have to be
 274   // custom legalized
 275   setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
 276   setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
 277   setOperationAction(ISD::ANY_EXTEND,  MVT::i64, Custom);
 278
 279   // SPU has a legal FP -> signed INT instruction
 280   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
 281   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 282   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
 283   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
 284
 285   // FDIV on SPU requires custom lowering
 286   setOperationAction(ISD::FDIV, MVT::f32, Custom);
 287   //setOperationAction(ISD::FDIV, MVT::f64, Custom);
 288
 289   // SPU has [U|S]INT_TO_FP
 290   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
 291   setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
 292   setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
 293   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
 294   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
 295   setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
 296   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 297   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 298
 299   setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
 300   setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
 301   setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
 302   setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
 303
 304   // We cannot sextinreg(i1).  Expand to shifts.
 305   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 306
 307   // Support label based line numbers.
 308   setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
 309   setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
 310
 311   // We want to legalize GlobalAddress and ConstantPool nodes into the
 312   // appropriate instructions to materialize the address.
 313   for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
 314        ++sctype) {
 315     MVT VT = (MVT::SimpleValueType)sctype;
 316
 317     setOperationAction(ISD::GlobalAddress, VT, Custom);
 318     setOperationAction(ISD::ConstantPool,  VT, Custom);
 319     setOperationAction(ISD::JumpTable,     VT, Custom);
 320   }
 321
 322   // RET must be custom lowered, to meet ABI requirements
 323   setOperationAction(ISD::RET,           MVT::Other, Custom);
 324
 325   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 326   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 327
 328   // Use the default implementation.
 329   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
 330   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 331   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 332   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 333   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
 334   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
 335   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
 336
 337   // Cell SPU has instructions for converting between i64 and fp.
 338   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 339   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 340
 341   // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
 342   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
 343
 344   // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 345   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 346
 347   // First set operation action for all vector types to expand. Then we
 348   // will selectively turn on ones that can be effectively codegen'd.
 349   addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
 350   addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
 351   addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
 352   addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
 353   addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
 354   addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
 355
 356   for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
 357        i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
 358     MVT VT = (MVT::SimpleValueType)i;
 359
 360     // add/sub are legal for all supported vector VT's.
 361     setOperationAction(ISD::ADD , VT, Legal);
 362     setOperationAction(ISD::SUB , VT, Legal);
 363     // mul has to be custom lowered.
 364     setOperationAction(ISD::MUL , VT, Custom);
 365
 366     setOperationAction(ISD::AND   , VT, Legal);
 367     setOperationAction(ISD::OR    , VT, Legal);
 368     setOperationAction(ISD::XOR   , VT, Legal);
 369     setOperationAction(ISD::LOAD  , VT, Legal);
 370     setOperationAction(ISD::SELECT, VT, Legal);
 371     setOperationAction(ISD::STORE,  VT, Legal);
 372
 373     // These operations need to be expanded:
 374     setOperationAction(ISD::SDIV, VT, Expand);
 375     setOperationAction(ISD::SREM, VT, Expand);
 376     setOperationAction(ISD::UDIV, VT, Expand);
 377     setOperationAction(ISD::UREM, VT, Expand);
 378     setOperationAction(ISD::FDIV, VT, Custom);
 379
 380     // Custom lower build_vector, constant pool spills, insert and
 381     // extract vector elements:
 382     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
 383     setOperationAction(ISD::ConstantPool, VT, Custom);
 384     setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
 385     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
 386     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
 387     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
 388   }
 389
 390   setOperationAction(ISD::MUL, MVT::v16i8, Custom);
 391   setOperationAction(ISD::AND, MVT::v16i8, Custom);
 392   setOperationAction(ISD::OR,  MVT::v16i8, Custom);
 393   setOperationAction(ISD::XOR, MVT::v16i8, Custom);
 394   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 395
 396   setShiftAmountType(MVT::i32);
 397   setSetCCResultContents(ZeroOrOneSetCCResult);
 398
 399   setStackPointerRegisterToSaveRestore(SPU::R1);
 400
 401   // We have target-specific dag combine patterns for the following nodes:
 402   setTargetDAGCombine(ISD::ADD);
 403   setTargetDAGCombine(ISD::ZERO_EXTEND);
 404   setTargetDAGCombine(ISD::SIGN_EXTEND);
 405   setTargetDAGCombine(ISD::ANY_EXTEND);
 406
 407   computeRegisterProperties();
 408 }
 409
 410 const char *
 411 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
 412 {
 413   if (node_names.empty()) {
 414     node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
 415     node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
 416     node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
 417     node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
 418     node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
 419     node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
 420     node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
 421     node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
 422     node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
 423     node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
 424     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
 425     node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
 426     node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
 427     node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED]
 428                                               = "SPUISD::EXTRACT_ELT0_CHAINED";
 429     node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
 430     node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
 431     node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
 432     node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
 433     node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
 434     node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
 435     node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
 436     node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
 437     node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
 438     node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
 439     node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
 440     node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
 441     node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
 442     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
 443     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
 444     node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
 445       "SPUISD::ROTQUAD_RZ_BYTES";
 446     node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
 447       "SPUISD::ROTQUAD_RZ_BITS";
 448     node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
 449       "SPUISD::ROTBYTES_RIGHT_S";
 450     node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
 451     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
 452       "SPUISD::ROTBYTES_LEFT_CHAINED";
 453     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
 454       "SPUISD::ROTBYTES_LEFT_BITS";
 455     node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
 456     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
 457     node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
 458     node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
 459     node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
 460     node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
 461     node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
 462     node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
 463     node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
 464   }
 465
 466   std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
 467
 468   return ((i != node_names.end()) ? i->second : 0);
 469 }
 470
 471 MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
 472   MVT VT = Op.getValueType();
 473   if (VT.isInteger())
 474     return VT;
 475   else
 476     return MVT::i32;
 477 }
 478
 479 //===----------------------------------------------------------------------===//
 480 // Calling convention code:
 481 //===----------------------------------------------------------------------===//
 482
 483 #include "SPUGenCallingConv.inc"
 484
 485 //===----------------------------------------------------------------------===//
 486 //  LowerOperation implementation
 487 //===----------------------------------------------------------------------===//
 488
 489 /// Aligned load common code for CellSPU
 490 /*!
 491   \param[in] Op The SelectionDAG load or store operand
 492   \param[in] DAG The selection DAG
 493   \param[in] ST CellSPU subtarget information structure
 494   \param[in,out] alignment Caller initializes this to the load or store node's
 495   value from getAlignment(), may be updated while generating the aligned load
 496   \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
 497   offset (divisible by 16, modulo 16 == 0)
 498   \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
 499   offset of the preferred slot (modulo 16 != 0)
 500   \param[in,out] VT Caller initializes this value type to the the load or store
 501   node's loaded or stored value type; may be updated if an i1-extended load or
 502   store.
 503   \param[out] was16aligned true if the base pointer had 16-byte alignment,
 504   otherwise false. Can help to determine if the chunk needs to be rotated.
 505
 506  Both load and store lowering load a block of data aligned on a 16-byte
 507  boundary. This is the common aligned load code shared between both.
 508  */
 509 static SDValue
 510 AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
 511             LSBaseSDNode *LSN,
 512             unsigned &alignment, int &alignOffs, int &prefSlotOffs,
 513             MVT &VT, bool &was16aligned)
 514 {
 515   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 516   const valtype_map_s *vtm = getValueTypeMapEntry(VT);
 517   SDValue basePtr = LSN->getBasePtr();
 518   SDValue chain = LSN->getChain();
 519
 520   if (basePtr.getOpcode() == ISD::ADD) {
 521     SDValue Op1 = basePtr.getNode()->getOperand(1);
 522
 523     if (Op1.getOpcode() == ISD::Constant
 524         || Op1.getOpcode() == ISD::TargetConstant) {
 525       const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
 526
 527       alignOffs = (int) CN->getZExtValue();
 528       prefSlotOffs = (int) (alignOffs & 0xf);
 529
 530       // Adjust the rotation amount to ensure that the final result ends up in
 531       // the preferred slot:
 532       prefSlotOffs -= vtm->prefslot_byte;
 533       basePtr = basePtr.getOperand(0);
 534
 535       // Loading from memory, can we adjust alignment?
 536       if (basePtr.getOpcode() == SPUISD::AFormAddr) {
 537         SDValue APtr = basePtr.getOperand(0);
 538         if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
 539           GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
 540           alignment = GSDN->getGlobal()->getAlignment();
 541         }
 542       }
 543     } else {
 544       alignOffs = 0;
 545       prefSlotOffs = -vtm->prefslot_byte;
 546     }
 547   } else if (basePtr.getOpcode() == ISD::FrameIndex) {
 548     FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
 549     alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
 550     prefSlotOffs = (int) (alignOffs & 0xf);
 551     prefSlotOffs -= vtm->prefslot_byte;
 552     basePtr = DAG.getRegister(SPU::R1, VT);
 553   } else {
 554     alignOffs = 0;
 555     prefSlotOffs = -vtm->prefslot_byte;
 556   }
 557
 558   if (alignment == 16) {
 559     // Realign the base pointer as a D-Form address:
 560     if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
 561       basePtr = DAG.getNode(ISD::ADD, PtrVT,
 562                             basePtr,
 563                             DAG.getConstant((alignOffs & ~0xf), PtrVT));
 564     }
 565
 566     // Emit the vector load:
 567     was16aligned = true;
 568     return DAG.getLoad(MVT::v16i8, chain, basePtr,
 569                        LSN->getSrcValue(), LSN->getSrcValueOffset(),
 570                        LSN->isVolatile(), 16);
 571   }
 572
 573   // Unaligned load or we're using the "large memory" model, which means that
 574   // we have to be very pessimistic:
 575   if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
 576     basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr,
 577                           DAG.getConstant(0, PtrVT));
 578   }
 579
 580   // Add the offset
 581   basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
 582                         DAG.getConstant((alignOffs & ~0xf), PtrVT));
 583   was16aligned = false;
 584   return DAG.getLoad(MVT::v16i8, chain, basePtr,
 585                      LSN->getSrcValue(), LSN->getSrcValueOffset(),
 586                      LSN->isVolatile(), 16);
 587 }
 588
 589 /// Custom lower loads for CellSPU
 590 /*!
 591  All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
 592  within a 16-byte block, we have to rotate to extract the requested element.
 593  */
 594 static SDValue
 595 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 596   LoadSDNode *LN = cast<LoadSDNode>(Op);
 597   SDValue the_chain = LN->getChain();
 598   MVT VT = LN->getMemoryVT();
 599   MVT OpVT = Op.getNode()->getValueType(0);
 600   ISD::LoadExtType ExtType = LN->getExtensionType();
 601   unsigned alignment = LN->getAlignment();
 602   SDValue Ops[8];
 603
 604   switch (LN->getAddressingMode()) {
 605   case ISD::UNINDEXED: {
 606     int offset, rotamt;
 607     bool was16aligned;
 608     SDValue result =
 609       AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
 610
 611     if (result.getNode() == 0)
 612       return result;
 613
 614     the_chain = result.getValue(1);
 615     // Rotate the chunk if necessary
 616     if (rotamt < 0)
 617       rotamt += 16;
 618     if (rotamt != 0 || !was16aligned) {
 619       SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
 620
 621       Ops[0] = the_chain;
 622       Ops[1] = result;
 623       if (was16aligned) {
 624         Ops[2] = DAG.getConstant(rotamt, MVT::i16);
 625       } else {
 626         MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 627         LoadSDNode *LN1 = cast<LoadSDNode>(result);
 628         Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
 629                              DAG.getConstant(rotamt, PtrVT));
 630       }
 631
 632       result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
 633       the_chain = result.getValue(1);
 634     }
 635
 636     if (VT == OpVT || ExtType == ISD::EXTLOAD) {
 637       SDVTList scalarvts;
 638       MVT vecVT = MVT::v16i8;
 639
 640       // Convert the loaded v16i8 vector to the appropriate vector type
 641       // specified by the operand:
 642       if (OpVT == VT) {
 643         if (VT != MVT::i1)
 644           vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
 645       } else
 646         vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
 647
 648       Ops[0] = the_chain;
 649       Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
 650       scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
 651       result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
 652       the_chain = result.getValue(1);
 653     } else {
 654       // Handle the sign and zero-extending loads for i1 and i8:
 655       unsigned NewOpC;
 656
 657       if (ExtType == ISD::SEXTLOAD) {
 658         NewOpC = (OpVT == MVT::i1
 659                   ? SPUISD::EXTRACT_I1_SEXT
 660                   : SPUISD::EXTRACT_I8_SEXT);
 661       } else {
 662         assert(ExtType == ISD::ZEXTLOAD);
 663         NewOpC = (OpVT == MVT::i1
 664                   ? SPUISD::EXTRACT_I1_ZEXT
 665                   : SPUISD::EXTRACT_I8_ZEXT);
 666       }
 667
 668       result = DAG.getNode(NewOpC, OpVT, result);
 669     }
 670
 671     SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
 672     SDValue retops[2] = {
 673       result,
 674       the_chain
 675     };
 676
 677     result = DAG.getNode(SPUISD::LDRESULT, retvts,
 678                          retops, sizeof(retops) / sizeof(retops[0]));
 679     return result;
 680   }
 681   case ISD::PRE_INC:
 682   case ISD::PRE_DEC:
 683   case ISD::POST_INC:
 684   case ISD::POST_DEC:
 685   case ISD::LAST_INDEXED_MODE:
 686     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 687             "UNINDEXED\n";
 688     cerr << (unsigned) LN->getAddressingMode() << "\n";
 689     abort();
 690     /*NOTREACHED*/
 691   }
 692
 693   return SDValue();
 694 }
 695
 696 /// Custom lower stores for CellSPU
 697 /*!
 698  All CellSPU stores are aligned to 16-byte boundaries, so for elements
 699  within a 16-byte block, we have to generate a shuffle to insert the
 700  requested element into its place, then store the resulting block.
 701  */
 702 static SDValue
 703 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 704   StoreSDNode *SN = cast<StoreSDNode>(Op);
 705   SDValue Value = SN->getValue();
 706   MVT VT = Value.getValueType();
 707   MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
 708   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 709   unsigned alignment = SN->getAlignment();
 710
 711   switch (SN->getAddressingMode()) {
 712   case ISD::UNINDEXED: {
 713     int chunk_offset, slot_offset;
 714     bool was16aligned;
 715
 716     // The vector type we really want to load from the 16-byte chunk, except
 717     // in the case of MVT::i1, which has to be v16i8.
 718     MVT vecVT, stVecVT = MVT::v16i8;
 719
 720     if (StVT != MVT::i1)
 721       stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
 722     vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
 723
 724     SDValue alignLoadVec =
 725       AlignedLoad(Op, DAG, ST, SN, alignment,
 726                   chunk_offset, slot_offset, VT, was16aligned);
 727
 728     if (alignLoadVec.getNode() == 0)
 729       return alignLoadVec;
 730
 731     LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
 732     SDValue basePtr = LN->getBasePtr();
 733     SDValue the_chain = alignLoadVec.getValue(1);
 734     SDValue theValue = SN->getValue();
 735     SDValue result;
 736
 737     if (StVT != VT
 738         && (theValue.getOpcode() == ISD::AssertZext
 739             || theValue.getOpcode() == ISD::AssertSext)) {
 740       // Drill down and get the value for zero- and sign-extended
 741       // quantities
 742       theValue = theValue.getOperand(0);
 743     }
 744
 745     chunk_offset &= 0xf;
 746
 747     SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
 748     SDValue insertEltPtr;
 749     SDValue insertEltOp;
 750
 751     // If the base pointer is already a D-form address, then just create
 752     // a new D-form address with a slot offset and the orignal base pointer.
 753     // Otherwise generate a D-form address with the slot offset relative
 754     // to the stack pointer, which is always aligned.
 755     DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
 756     DEBUG(basePtr.getNode()->dump(&DAG));
 757     DEBUG(cerr << "\n");
 758
 759     if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
 760         (basePtr.getOpcode() == ISD::ADD
 761          && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
 762       insertEltPtr = basePtr;
 763     } else {
 764       insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
 765     }
 766
 767     insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
 768     result = DAG.getNode(SPUISD::SHUFB, vecVT,
 769                          DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
 770                          alignLoadVec,
 771                          DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
 772
 773     result = DAG.getStore(the_chain, result, basePtr,
 774                           LN->getSrcValue(), LN->getSrcValueOffset(),
 775                           LN->isVolatile(), LN->getAlignment());
 776
 777     return result;
 778     /*UNREACHED*/
 779   }
 780   case ISD::PRE_INC:
 781   case ISD::PRE_DEC:
 782   case ISD::POST_INC:
 783   case ISD::POST_DEC:
 784   case ISD::LAST_INDEXED_MODE:
 785     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 786             "UNINDEXED\n";
 787     cerr << (unsigned) SN->getAddressingMode() << "\n";
 788     abort();
 789     /*NOTREACHED*/
 790   }
 791
 792   return SDValue();
 793 }
 794
 795 /// Generate the address of a constant pool entry.
 796 static SDValue
 797 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 798   MVT PtrVT = Op.getValueType();
 799   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 800   Constant *C = CP->getConstVal();
 801   SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
 802   SDValue Zero = DAG.getConstant(0, PtrVT);
 803   const TargetMachine &TM = DAG.getTarget();
 804
 805   if (TM.getRelocationModel() == Reloc::Static) {
 806     if (!ST->usingLargeMem()) {
 807       // Just return the SDValue with the constant pool address in it.
 808       return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
 809     } else {
 810       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
 811       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
 812       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 813     }
 814   }
 815
 816   assert(0 &&
 817          "LowerConstantPool: Relocation model other than static"
 818          " not supported.");
 819   return SDValue();
 820 }
 821
 822 static SDValue
 823 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 824   MVT PtrVT = Op.getValueType();
 825   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 826   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
 827   SDValue Zero = DAG.getConstant(0, PtrVT);
 828   const TargetMachine &TM = DAG.getTarget();
 829
 830   if (TM.getRelocationModel() == Reloc::Static) {
 831     if (!ST->usingLargeMem()) {
 832       return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
 833     } else {
 834       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
 835       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
 836       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 837     }
 838   }
 839
 840   assert(0 &&
 841          "LowerJumpTable: Relocation model other than static not supported.");
 842   return SDValue();
 843 }
 844
 845 static SDValue
 846 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 847   MVT PtrVT = Op.getValueType();
 848   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
 849   GlobalValue *GV = GSDN->getGlobal();
 850   SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
 851   const TargetMachine &TM = DAG.getTarget();
 852   SDValue Zero = DAG.getConstant(0, PtrVT);
 853
 854   if (TM.getRelocationModel() == Reloc::Static) {
 855     if (!ST->usingLargeMem()) {
 856       return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
 857     } else {
 858       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
 859       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
 860       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 861     }
 862   } else {
 863     cerr << "LowerGlobalAddress: Relocation model other than static not "
 864          << "supported.\n";
 865     abort();
 866     /*NOTREACHED*/
 867   }
 868
 869   return SDValue();
 870 }
 871
 872 //! Custom lower i64 integer constants
 873 /*!
 874  This code inserts all of the necessary juggling that needs to occur to load
 875  a 64-bit constant into a register.
 876  */
 877 static SDValue
 878 LowerConstant(SDValue Op, SelectionDAG &DAG) {
 879   MVT VT = Op.getValueType();
 880   ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
 881
 882   if (VT == MVT::i64) {
 883     SDValue T = DAG.getConstant(CN->getZExtValue(), MVT::i64);
 884     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
 885                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
 886   } else {
 887     cerr << "LowerConstant: unhandled constant type "
 888          << VT.getMVTString()
 889          << "\n";
 890     abort();
 891     /*NOTREACHED*/
 892   }
 893
 894   return SDValue();
 895 }
 896
 897 //! Custom lower double precision floating point constants
 898 static SDValue
 899 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
 900   MVT VT = Op.getValueType();
 901   ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
 902
 903   assert((FP != 0) &&
 904          "LowerConstantFP: Node is not ConstantFPSDNode");
 905
 906   if (VT == MVT::f64) {
 907     uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
 908     return DAG.getNode(ISD::BIT_CONVERT, VT,
 909                        LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
 910   }
 911
 912   return SDValue();
 913 }
 914
 915 //! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
 916 static SDValue
 917 LowerBRCOND(SDValue Op, SelectionDAG &DAG)
 918 {
 919   SDValue Cond = Op.getOperand(1);
 920   MVT CondVT = Cond.getValueType();
 921   MVT CondNVT;
 922
 923   if (CondVT == MVT::i1 || CondVT == MVT::i8) {
 924     CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
 925     return DAG.getNode(ISD::BRCOND, Op.getValueType(),
 926                       Op.getOperand(0),
 927                       DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
 928                       Op.getOperand(2));
 929   } else
 930     return SDValue();                // Unchanged
 931 }
 932
 933 static SDValue
 934 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
 935 {
 936   MachineFunction &MF = DAG.getMachineFunction();
 937   MachineFrameInfo *MFI = MF.getFrameInfo();
 938   MachineRegisterInfo &RegInfo = MF.getRegInfo();
 939   SmallVector<SDValue, 8> ArgValues;
 940   SDValue Root = Op.getOperand(0);
 941   bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
 942
 943   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
 944   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
 945
 946   unsigned ArgOffset = SPUFrameInfo::minStackSize();
 947   unsigned ArgRegIdx = 0;
 948   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
 949
 950   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 951
 952   // Add DAG nodes to load the arguments or copy them out of registers.
 953   for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
 954        ArgNo != e; ++ArgNo) {
 955     SDValue ArgVal;
 956     bool needsLoad = false;
 957     MVT ObjectVT = Op.getValue(ArgNo).getValueType();
 958     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
 959
 960     switch (ObjectVT.getSimpleVT()) {
 961     default: {
 962       cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
 963            << ObjectVT.getMVTString()
 964            << "\n";
 965       abort();
 966     }
 967     case MVT::i8:
 968       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 969         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass);
 970         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 971         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
 972         ++ArgRegIdx;
 973       } else {
 974         needsLoad = true;
 975       }
 976       break;
 977     case MVT::i16:
 978       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 979         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
 980         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 981         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
 982         ++ArgRegIdx;
 983       } else {
 984         needsLoad = true;
 985       }
 986       break;
 987     case MVT::i32:
 988       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 989         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 990         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 991         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
 992         ++ArgRegIdx;
 993       } else {
 994         needsLoad = true;
 995       }
 996       break;
 997     case MVT::i64:
 998       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 999         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass);
1000         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1001         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
1002         ++ArgRegIdx;
1003       } else {
1004         needsLoad = true;
1005       }
1006       break;
1007     case MVT::f32:
1008       if (!isVarArg && ArgRegIdx < NumArgRegs) {
1009         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
1010         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1011         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
1012         ++ArgRegIdx;
1013       } else {
1014         needsLoad = true;
1015       }
1016       break;
1017     case MVT::f64:
1018       if (!isVarArg && ArgRegIdx < NumArgRegs) {
1019         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass);
1020         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1021         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
1022         ++ArgRegIdx;
1023       } else {
1024         needsLoad = true;
1025       }
1026       break;
1027     case MVT::v2f64:
1028     case MVT::v4f32:
1029     case MVT::v2i64:
1030     case MVT::v4i32:
1031     case MVT::v8i16:
1032     case MVT::v16i8:
1033       if (!isVarArg && ArgRegIdx < NumArgRegs) {
1034         unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1035         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1036         ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1037         ++ArgRegIdx;
1038       } else {
1039         needsLoad = true;
1040       }
1041       break;
1042     }
1043
1044     // We need to load the argument to a virtual register if we determined above
1045     // that we ran out of physical registers of the appropriate type
1046     if (needsLoad) {
1047       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1048       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1049       ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1050       ArgOffset += StackSlotSize;
1051     }
1052
1053     ArgValues.push_back(ArgVal);
1054   }
1055
1056   // If the function takes variable number of arguments, make a frame index for
1057   // the start of the first vararg value... for expansion of llvm.va_start.
1058   if (isVarArg) {
1059     VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
1060                                                ArgOffset);
1061     SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1062     // If this function is vararg, store any remaining integer argument regs to
1063     // their spots on the stack so that they may be loaded by deferencing the
1064     // result of va_next.
1065     SmallVector<SDValue, 8> MemOps;
1066     for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1067       unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
1068       RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1069       SDValue Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1070       SDValue Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1071       MemOps.push_back(Store);
1072       // Increment the address by four for the next argument to store
1073       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
1074       FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1075     }
1076     if (!MemOps.empty())
1077       Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1078   }
1079
1080   ArgValues.push_back(Root);
1081
1082   // Return the new list of results.
1083   return DAG.getMergeValues(Op.getNode()->getVTList(), &ArgValues[0],
1084                             ArgValues.size());
1085 }
1086
1087 /// isLSAAddress - Return the immediate to use if the specified
1088 /// value is representable as a LSA address.
1089 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1090   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1091   if (!C) return 0;
1092
1093   int Addr = C->getZExtValue();
1094   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1095       (Addr << 14 >> 14) != Addr)
1096     return 0;  // Top 14 bits have to be sext of immediate.
1097
1098   return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1099 }
1100
1101 static
1102 SDValue
1103 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1104   CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1105   SDValue Chain = TheCall->getChain();
1106 #if 0
1107   bool isVarArg   = TheCall->isVarArg();
1108   bool isTailCall = TheCall->isTailCall();
1109 #endif
1110   SDValue Callee    = TheCall->getCallee();
1111   unsigned NumOps     = TheCall->getNumArgs();
1112   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1113   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1114   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1115
1116   // Handy pointer type
1117   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1118
1119   // Accumulate how many bytes are to be pushed on the stack, including the
1120   // linkage area, and parameter passing area.  According to the SPU ABI,
1121   // we minimally need space for [LR] and [SP]
1122   unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1123
1124   // Set up a copy of the stack pointer for use loading and storing any
1125   // arguments that may not fit in the registers available for argument
1126   // passing.
1127   SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1128
1129   // Figure out which arguments are going to go in registers, and which in
1130   // memory.
1131   unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1132   unsigned ArgRegIdx = 0;
1133
1134   // Keep track of registers passing arguments
1135   std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1136   // And the arguments passed on the stack
1137   SmallVector<SDValue, 8> MemOpChains;
1138
1139   for (unsigned i = 0; i != NumOps; ++i) {
1140     SDValue Arg = TheCall->getArg(i);
1141
1142     // PtrOff will be used to store the current argument to the stack if a
1143     // register cannot be found for it.
1144     SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1145     PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1146
1147     switch (Arg.getValueType().getSimpleVT()) {
1148     default: assert(0 && "Unexpected ValueType for argument!");
1149     case MVT::i32:
1150     case MVT::i64:
1151     case MVT::i128:
1152       if (ArgRegIdx != NumArgRegs) {
1153         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1154       } else {
1155         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1156         ArgOffset += StackSlotSize;
1157       }
1158       break;
1159     case MVT::f32:
1160     case MVT::f64:
1161       if (ArgRegIdx != NumArgRegs) {
1162         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1163       } else {
1164         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1165         ArgOffset += StackSlotSize;
1166       }
1167       break;
1168     case MVT::v4f32:
1169     case MVT::v4i32:
1170     case MVT::v8i16:
1171     case MVT::v16i8:
1172       if (ArgRegIdx != NumArgRegs) {
1173         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1174       } else {
1175         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1176         ArgOffset += StackSlotSize;
1177       }
1178       break;
1179     }
1180   }
1181
1182   // Update number of stack bytes actually used, insert a call sequence start
1183   NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1184   Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1185
1186   if (!MemOpChains.empty()) {
1187     // Adjust the stack pointer for the stack arguments.
1188     Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1189                         &MemOpChains[0], MemOpChains.size());
1190   }
1191
1192   // Build a sequence of copy-to-reg nodes chained together with token chain
1193   // and flag operands which copy the outgoing args into the appropriate regs.
1194   SDValue InFlag;
1195   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1196     Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1197                              InFlag);
1198     InFlag = Chain.getValue(1);
1199   }
1200
1201   SmallVector<SDValue, 8> Ops;
1202   unsigned CallOpc = SPUISD::CALL;
1203
1204   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1205   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1206   // node so that legalize doesn't hack it.
1207   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1208     GlobalValue *GV = G->getGlobal();
1209     MVT CalleeVT = Callee.getValueType();
1210     SDValue Zero = DAG.getConstant(0, PtrVT);
1211     SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1212
1213     if (!ST->usingLargeMem()) {
1214       // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1215       // style calls, otherwise, external symbols are BRASL calls. This assumes
1216       // that declared/defined symbols are in the same compilation unit and can
1217       // be reached through PC-relative jumps.
1218       //
1219       // NOTE:
1220       // This may be an unsafe assumption for JIT and really large compilation
1221       // units.
1222       if (GV->isDeclaration()) {
1223         Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1224       } else {
1225         Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1226       }
1227     } else {
1228       // "Large memory" mode: Turn all calls into indirect calls with a X-form
1229       // address pairs:
1230       Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1231     }
1232   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1233     Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1234   else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1235     // If this is an absolute destination address that appears to be a legal
1236     // local store address, use the munged value.
1237     Callee = SDValue(Dest, 0);
1238   }
1239
1240   Ops.push_back(Chain);
1241   Ops.push_back(Callee);
1242
1243   // Add argument registers to the end of the list so that they are known live
1244   // into the call.
1245   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1246     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1247                                   RegsToPass[i].second.getValueType()));
1248
1249   if (InFlag.getNode())
1250     Ops.push_back(InFlag);
1251   // Returns a chain and a flag for retval copy to use.
1252   Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1253                       &Ops[0], Ops.size());
1254   InFlag = Chain.getValue(1);
1255
1256   Chain = DAG.getCALLSEQ_END(Chain,
1257                              DAG.getConstant(NumStackBytes, PtrVT),
1258                              DAG.getConstant(0, PtrVT),
1259                              InFlag);
1260   if (TheCall->getValueType(0) != MVT::Other)
1261     InFlag = Chain.getValue(1);
1262
1263   SDValue ResultVals[3];
1264   unsigned NumResults = 0;
1265
1266   // If the call has results, copy the values out of the ret val registers.
1267   switch (TheCall->getValueType(0).getSimpleVT()) {
1268   default: assert(0 && "Unexpected ret value!");
1269   case MVT::Other: break;
1270   case MVT::i32:
1271     if (TheCall->getValueType(1) == MVT::i32) {
1272       Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1273       ResultVals[0] = Chain.getValue(0);
1274       Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1275                                  Chain.getValue(2)).getValue(1);
1276       ResultVals[1] = Chain.getValue(0);
1277       NumResults = 2;
1278     } else {
1279       Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1280       ResultVals[0] = Chain.getValue(0);
1281       NumResults = 1;
1282     }
1283     break;
1284   case MVT::i64:
1285     Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1286     ResultVals[0] = Chain.getValue(0);
1287     NumResults = 1;
1288     break;
1289   case MVT::f32:
1290   case MVT::f64:
1291     Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1292                                InFlag).getValue(1);
1293     ResultVals[0] = Chain.getValue(0);
1294     NumResults = 1;
1295     break;
1296   case MVT::v2f64:
1297   case MVT::v4f32:
1298   case MVT::v4i32:
1299   case MVT::v8i16:
1300   case MVT::v16i8:
1301     Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1302                                    InFlag).getValue(1);
1303     ResultVals[0] = Chain.getValue(0);
1304     NumResults = 1;
1305     break;
1306   }
1307
1308   // If the function returns void, just return the chain.
1309   if (NumResults == 0)
1310     return Chain;
1311
1312   // Otherwise, merge everything together with a MERGE_VALUES node.
1313   ResultVals[NumResults++] = Chain;
1314   SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1315   return Res.getValue(Op.getResNo());
1316 }
1317
1318 static SDValue
1319 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1320   SmallVector<CCValAssign, 16> RVLocs;
1321   unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1322   bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1323   CCState CCInfo(CC, isVarArg, TM, RVLocs);
1324   CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1325
1326   // If this is the first return lowered for this function, add the regs to the
1327   // liveout set for the function.
1328   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1329     for (unsigned i = 0; i != RVLocs.size(); ++i)
1330       DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1331   }
1332
1333   SDValue Chain = Op.getOperand(0);
1334   SDValue Flag;
1335
1336   // Copy the result values into the output registers.
1337   for (unsigned i = 0; i != RVLocs.size(); ++i) {
1338     CCValAssign &VA = RVLocs[i];
1339     assert(VA.isRegLoc() && "Can only return in registers!");
1340     Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1341     Flag = Chain.getValue(1);
1342   }
1343
1344   if (Flag.getNode())
1345     return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1346   else
1347     return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1348 }
1349
1350
1351 //===----------------------------------------------------------------------===//
1352 // Vector related lowering:
1353 //===----------------------------------------------------------------------===//
1354
1355 static ConstantSDNode *
1356 getVecImm(SDNode *N) {
1357   SDValue OpVal(0, 0);
1358
1359   // Check to see if this buildvec has a single non-undef value in its elements.
1360   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1361     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1362     if (OpVal.getNode() == 0)
1363       OpVal = N->getOperand(i);
1364     else if (OpVal != N->getOperand(i))
1365       return 0;
1366   }
1367
1368   if (OpVal.getNode() != 0) {
1369     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1370       return CN;
1371     }
1372   }
1373
1374   return 0; // All UNDEF: use implicit def.; not Constant node
1375 }
1376
1377 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1378 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1379 /// constant
1380 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1381                               MVT ValueType) {
1382   if (ConstantSDNode *CN = getVecImm(N)) {
1383     uint64_t Value = CN->getZExtValue();
1384     if (ValueType == MVT::i64) {
1385       uint64_t UValue = CN->getZExtValue();
1386       uint32_t upper = uint32_t(UValue >> 32);
1387       uint32_t lower = uint32_t(UValue);
1388       if (upper != lower)
1389         return SDValue();
1390       Value = Value >> 32;
1391     }
1392     if (Value <= 0x3ffff)
1393       return DAG.getConstant(Value, ValueType);
1394   }
1395
1396   return SDValue();
1397 }
1398
1399 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1400 /// and the value fits into a signed 16-bit constant, and if so, return the
1401 /// constant
1402 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1403                               MVT ValueType) {
1404   if (ConstantSDNode *CN = getVecImm(N)) {
1405     int64_t Value = CN->getSignExtended();
1406     if (ValueType == MVT::i64) {
1407       uint64_t UValue = CN->getZExtValue();
1408       uint32_t upper = uint32_t(UValue >> 32);
1409       uint32_t lower = uint32_t(UValue);
1410       if (upper != lower)
1411         return SDValue();
1412       Value = Value >> 32;
1413     }
1414     if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1415       return DAG.getConstant(Value, ValueType);
1416     }
1417   }
1418
1419   return SDValue();
1420 }
1421
1422 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1423 /// and the value fits into a signed 10-bit constant, and if so, return the
1424 /// constant
1425 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1426                               MVT ValueType) {
1427   if (ConstantSDNode *CN = getVecImm(N)) {
1428     int64_t Value = CN->getSignExtended();
1429     if (ValueType == MVT::i64) {
1430       uint64_t UValue = CN->getZExtValue();
1431       uint32_t upper = uint32_t(UValue >> 32);
1432       uint32_t lower = uint32_t(UValue);
1433       if (upper != lower)
1434         return SDValue();
1435       Value = Value >> 32;
1436     }
1437     if (isS10Constant(Value))
1438       return DAG.getConstant(Value, ValueType);
1439   }
1440
1441   return SDValue();
1442 }
1443
1444 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1445 /// and the value fits into a signed 8-bit constant, and if so, return the
1446 /// constant.
1447 ///
1448 /// @note: The incoming vector is v16i8 because that's the only way we can load
1449 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1450 /// same value.
1451 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1452                              MVT ValueType) {
1453   if (ConstantSDNode *CN = getVecImm(N)) {
1454     int Value = (int) CN->getZExtValue();
1455     if (ValueType == MVT::i16
1456         && Value <= 0xffff                 /* truncated from uint64_t */
1457         && ((short) Value >> 8) == ((short) Value & 0xff))
1458       return DAG.getConstant(Value & 0xff, ValueType);
1459     else if (ValueType == MVT::i8
1460              && (Value & 0xff) == Value)
1461       return DAG.getConstant(Value, ValueType);
1462   }
1463
1464   return SDValue();
1465 }
1466
1467 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1468 /// and the value fits into a signed 16-bit constant, and if so, return the
1469 /// constant
1470 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1471                                MVT ValueType) {
1472   if (ConstantSDNode *CN = getVecImm(N)) {
1473     uint64_t Value = CN->getZExtValue();
1474     if ((ValueType == MVT::i32
1475           && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1476         || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1477       return DAG.getConstant(Value >> 16, ValueType);
1478   }
1479
1480   return SDValue();
1481 }
1482
1483 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1484 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1485   if (ConstantSDNode *CN = getVecImm(N)) {
1486     return DAG.getConstant((unsigned) CN->getZExtValue(), MVT::i32);
1487   }
1488
1489   return SDValue();
1490 }
1491
1492 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1493 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1494   if (ConstantSDNode *CN = getVecImm(N)) {
1495     return DAG.getConstant((unsigned) CN->getZExtValue(), MVT::i64);
1496   }
1497
1498   return SDValue();
1499 }
1500
1501 // If this is a vector of constants or undefs, get the bits.  A bit in
1502 // UndefBits is set if the corresponding element of the vector is an
1503 // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1504 // zero.   Return true if this is not an array of constants, false if it is.
1505 //
1506 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1507                                        uint64_t UndefBits[2]) {
1508   // Start with zero'd results.
1509   VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1510
1511   unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1512   for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1513     SDValue OpVal = BV->getOperand(i);
1514
1515     unsigned PartNo = i >= e/2;     // In the upper 128 bits?
1516     unsigned SlotNo = e/2 - (i & (e/2-1))-1;  // Which subpiece of the uint64_t.
1517
1518     uint64_t EltBits = 0;
1519     if (OpVal.getOpcode() == ISD::UNDEF) {
1520       uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1521       UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1522       continue;
1523     } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1524       EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1525     } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1526       const APFloat &apf = CN->getValueAPF();
1527       EltBits = (CN->getValueType(0) == MVT::f32
1528                  ? FloatToBits(apf.convertToFloat())
1529                  : DoubleToBits(apf.convertToDouble()));
1530     } else {
1531       // Nonconstant element.
1532       return true;
1533     }
1534
1535     VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1536   }
1537
1538   //printf("%llx %llx  %llx %llx\n",
1539   //       VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1540   return false;
1541 }
1542
1543 /// If this is a splat (repetition) of a value across the whole vector, return
1544 /// the smallest size that splats it.  For example, "0x01010101010101..." is a
1545 /// splat of 0x01, 0x0101, and 0x01010101.  We return SplatBits = 0x01 and
1546 /// SplatSize = 1 byte.
1547 static bool isConstantSplat(const uint64_t Bits128[2],
1548                             const uint64_t Undef128[2],
1549                             int MinSplatBits,
1550                             uint64_t &SplatBits, uint64_t &SplatUndef,
1551                             int &SplatSize) {
1552   // Don't let undefs prevent splats from matching.  See if the top 64-bits are
1553   // the same as the lower 64-bits, ignoring undefs.
1554   uint64_t Bits64  = Bits128[0] | Bits128[1];
1555   uint64_t Undef64 = Undef128[0] & Undef128[1];
1556   uint32_t Bits32  = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1557   uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1558   uint16_t Bits16  = uint16_t(Bits32)  | uint16_t(Bits32 >> 16);
1559   uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1560
1561   if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1562     if (MinSplatBits < 64) {
1563
1564       // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1565       // undefs.
1566       if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1567         if (MinSplatBits < 32) {
1568
1569           // If the top 16-bits are different than the lower 16-bits, ignoring
1570           // undefs, we have an i32 splat.
1571           if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1572             if (MinSplatBits < 16) {
1573               // If the top 8-bits are different than the lower 8-bits, ignoring
1574               // undefs, we have an i16 splat.
1575               if ((Bits16 & (uint16_t(~Undef16) >> 8))
1576                   == ((Bits16 >> 8) & ~Undef16)) {
1577                 // Otherwise, we have an 8-bit splat.
1578                 SplatBits  = uint8_t(Bits16)  | uint8_t(Bits16 >> 8);
1579                 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1580                 SplatSize = 1;
1581                 return true;
1582               }
1583             } else {
1584               SplatBits = Bits16;
1585               SplatUndef = Undef16;
1586               SplatSize = 2;
1587               return true;
1588             }
1589           }
1590         } else {
1591           SplatBits = Bits32;
1592           SplatUndef = Undef32;
1593           SplatSize = 4;
1594           return true;
1595         }
1596       }
1597     } else {
1598       SplatBits = Bits128[0];
1599       SplatUndef = Undef128[0];
1600       SplatSize = 8;
1601       return true;
1602     }
1603   }
1604
1605   return false;  // Can't be a splat if two pieces don't match.
1606 }
1607
1608 // If this is a case we can't handle, return null and let the default
1609 // expansion code take care of it.  If we CAN select this case, and if it
1610 // selects to a single instruction, return Op.  Otherwise, if we can codegen
1611 // this case more efficiently than a constant pool load, lower it to the
1612 // sequence of ops that should be used.
1613 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1614   MVT VT = Op.getValueType();
1615   // If this is a vector of constants or undefs, get the bits.  A bit in
1616   // UndefBits is set if the corresponding element of the vector is an
1617   // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1618   // zero.
1619   uint64_t VectorBits[2];
1620   uint64_t UndefBits[2];
1621   uint64_t SplatBits, SplatUndef;
1622   int SplatSize;
1623   if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1624       || !isConstantSplat(VectorBits, UndefBits,
1625                           VT.getVectorElementType().getSizeInBits(),
1626                           SplatBits, SplatUndef, SplatSize))
1627     return SDValue();   // Not a constant vector, not a splat.
1628
1629   switch (VT.getSimpleVT()) {
1630   default:
1631   case MVT::v4f32: {
1632     uint32_t Value32 = SplatBits;
1633     assert(SplatSize == 4
1634            && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1635     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1636     SDValue T = DAG.getConstant(Value32, MVT::i32);
1637     return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1638                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1639     break;
1640   }
1641   case MVT::v2f64: {
1642     uint64_t f64val = SplatBits;
1643     assert(SplatSize == 8
1644            && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1645     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1646     SDValue T = DAG.getConstant(f64val, MVT::i64);
1647     return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1648                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1649     break;
1650   }
1651   case MVT::v16i8: {
1652    // 8-bit constants have to be expanded to 16-bits
1653    unsigned short Value16 = SplatBits | (SplatBits << 8);
1654    SDValue Ops[8];
1655    for (int i = 0; i < 8; ++i)
1656      Ops[i] = DAG.getConstant(Value16, MVT::i16);
1657    return DAG.getNode(ISD::BIT_CONVERT, VT,
1658                       DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1659   }
1660   case MVT::v8i16: {
1661     unsigned short Value16;
1662     if (SplatSize == 2)
1663       Value16 = (unsigned short) (SplatBits & 0xffff);
1664     else
1665       Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1666     SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1667     SDValue Ops[8];
1668     for (int i = 0; i < 8; ++i) Ops[i] = T;
1669     return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1670   }
1671   case MVT::v4i32: {
1672     unsigned int Value = SplatBits;
1673     SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1674     return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1675   }
1676   case MVT::v2i64: {
1677     uint64_t val = SplatBits;
1678     uint32_t upper = uint32_t(val >> 32);
1679     uint32_t lower = uint32_t(val);
1680
1681     if (upper == lower) {
1682       // Magic constant that can be matched by IL, ILA, et. al.
1683       SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1684       return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1685     } else {
1686       SDValue LO32;
1687       SDValue HI32;
1688       SmallVector<SDValue, 16> ShufBytes;
1689       SDValue Result;
1690       bool upper_special, lower_special;
1691
1692       // NOTE: This code creates common-case shuffle masks that can be easily
1693       // detected as common expressions. It is not attempting to create highly
1694       // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1695
1696       // Detect if the upper or lower half is a special shuffle mask pattern:
1697       upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1698       lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1699
1700       // Create lower vector if not a special pattern
1701       if (!lower_special) {
1702         SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1703         LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1704                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1705                                        LO32C, LO32C, LO32C, LO32C));
1706       }
1707
1708       // Create upper vector if not a special pattern
1709       if (!upper_special) {
1710         SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1711         HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1712                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1713                                        HI32C, HI32C, HI32C, HI32C));
1714       }
1715
1716       // If either upper or lower are special, then the two input operands are
1717       // the same (basically, one of them is a "don't care")
1718       if (lower_special)
1719         LO32 = HI32;
1720       if (upper_special)
1721         HI32 = LO32;
1722       if (lower_special && upper_special) {
1723         // Unhappy situation... both upper and lower are special, so punt with
1724         // a target constant:
1725         SDValue Zero = DAG.getConstant(0, MVT::i32);
1726         HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1727                                   Zero, Zero);
1728       }
1729
1730       for (int i = 0; i < 4; ++i) {
1731         uint64_t val = 0;
1732         for (int j = 0; j < 4; ++j) {
1733           SDValue V;
1734           bool process_upper, process_lower;
1735           val <<= 8;
1736           process_upper = (upper_special && (i & 1) == 0);
1737           process_lower = (lower_special && (i & 1) == 1);
1738
1739           if (process_upper || process_lower) {
1740             if ((process_upper && upper == 0)
1741                 || (process_lower && lower == 0))
1742               val |= 0x80;
1743             else if ((process_upper && upper == 0xffffffff)
1744                      || (process_lower && lower == 0xffffffff))
1745               val |= 0xc0;
1746             else if ((process_upper && upper == 0x80000000)
1747                      || (process_lower && lower == 0x80000000))
1748               val |= (j == 0 ? 0xe0 : 0x80);
1749           } else
1750             val |= i * 4 + j + ((i & 1) * 16);
1751         }
1752
1753         ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1754       }
1755
1756       return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1757                          DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1758                                      &ShufBytes[0], ShufBytes.size()));
1759     }
1760   }
1761   }
1762
1763   return SDValue();
1764 }
1765
1766 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1767 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1768 /// permutation vector, V3, is monotonically increasing with one "exception"
1769 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1770 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1771 /// In either case, the net result is going to eventually invoke SHUFB to
1772 /// permute/shuffle the bytes from V1 and V2.
1773 /// \note
1774 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1775 /// control word for byte/halfword/word insertion. This takes care of a single
1776 /// element move from V2 into V1.
1777 /// \note
1778 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1779 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1780   SDValue V1 = Op.getOperand(0);
1781   SDValue V2 = Op.getOperand(1);
1782   SDValue PermMask = Op.getOperand(2);
1783
1784   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1785
1786   // If we have a single element being moved from V1 to V2, this can be handled
1787   // using the C*[DX] compute mask instructions, but the vector elements have
1788   // to be monotonically increasing with one exception element.
1789   MVT EltVT = V1.getValueType().getVectorElementType();
1790   unsigned EltsFromV2 = 0;
1791   unsigned V2Elt = 0;
1792   unsigned V2EltIdx0 = 0;
1793   unsigned CurrElt = 0;
1794   bool monotonic = true;
1795   if (EltVT == MVT::i8)
1796     V2EltIdx0 = 16;
1797   else if (EltVT == MVT::i16)
1798     V2EltIdx0 = 8;
1799   else if (EltVT == MVT::i32)
1800     V2EltIdx0 = 4;
1801   else
1802     assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1803
1804   for (unsigned i = 0, e = PermMask.getNumOperands();
1805        EltsFromV2 <= 1 && monotonic && i != e;
1806        ++i) {
1807     unsigned SrcElt;
1808     if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1809       SrcElt = 0;
1810     else
1811       SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1812
1813     if (SrcElt >= V2EltIdx0) {
1814       ++EltsFromV2;
1815       V2Elt = (V2EltIdx0 - SrcElt) << 2;
1816     } else if (CurrElt != SrcElt) {
1817       monotonic = false;
1818     }
1819
1820     ++CurrElt;
1821   }
1822
1823   if (EltsFromV2 == 1 && monotonic) {
1824     // Compute mask and shuffle
1825     MachineFunction &MF = DAG.getMachineFunction();
1826     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1827     unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1828     MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1829     // Initialize temporary register to 0
1830     SDValue InitTempReg =
1831       DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1832     // Copy register's contents as index in INSERT_MASK:
1833     SDValue ShufMaskOp =
1834       DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1835                   DAG.getTargetConstant(V2Elt, MVT::i32),
1836                   DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1837     // Use shuffle mask in SHUFB synthetic instruction:
1838     return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1839   } else {
1840    // Convert the SHUFFLE_VECTOR mask's input element units to the
1841    // actual bytes.
1842     unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1843
1844     SmallVector<SDValue, 16> ResultMask;
1845     for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1846       unsigned SrcElt;
1847       if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1848         SrcElt = 0;
1849       else
1850         SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1851
1852       for (unsigned j = 0; j < BytesPerElement; ++j) {
1853         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1854                                              MVT::i8));
1855       }
1856     }
1857
1858     SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1859                                       &ResultMask[0], ResultMask.size());
1860     return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1861   }
1862 }
1863
1864 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1865   SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar
1866
1867   if (Op0.getNode()->getOpcode() == ISD::Constant) {
1868     // For a constant, build the appropriate constant vector, which will
1869     // eventually simplify to a vector register load.
1870
1871     ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1872     SmallVector<SDValue, 16> ConstVecValues;
1873     MVT VT;
1874     size_t n_copies;
1875
1876     // Create a constant vector:
1877     switch (Op.getValueType().getSimpleVT()) {
1878     default: assert(0 && "Unexpected constant value type in "
1879                          "LowerSCALAR_TO_VECTOR");
1880     case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1881     case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1882     case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1883     case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1884     case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1885     case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1886     }
1887
1888     SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1889     for (size_t j = 0; j < n_copies; ++j)
1890       ConstVecValues.push_back(CValue);
1891
1892     return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1893                        &ConstVecValues[0], ConstVecValues.size());
1894   } else {
1895     // Otherwise, copy the value from one register to another:
1896     switch (Op0.getValueType().getSimpleVT()) {
1897     default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1898     case MVT::i8:
1899     case MVT::i16:
1900     case MVT::i32:
1901     case MVT::i64:
1902     case MVT::f32:
1903     case MVT::f64:
1904       return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1905     }
1906   }
1907
1908   return SDValue();
1909 }
1910
1911 static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
1912   switch (Op.getValueType().getSimpleVT()) {
1913   default:
1914     cerr << "CellSPU: Unknown vector multiplication, got "
1915          << Op.getValueType().getMVTString()
1916          << "\n";
1917     abort();
1918     /*NOTREACHED*/
1919
1920   case MVT::v4i32: {
1921     SDValue rA = Op.getOperand(0);
1922     SDValue rB = Op.getOperand(1);
1923     SDValue HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1924     SDValue HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1925     SDValue LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1926     SDValue Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1927
1928     return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1929     break;
1930   }
1931
1932   // Multiply two v8i16 vectors (pipeline friendly version):
1933   // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1934   // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1935   // c) Use SELB to select upper and lower halves from the intermediate results
1936   //
1937   // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1938   // dual-issue. This code does manage to do this, even if it's a little on
1939   // the wacky side
1940   case MVT::v8i16: {
1941     MachineFunction &MF = DAG.getMachineFunction();
1942     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1943     SDValue Chain = Op.getOperand(0);
1944     SDValue rA = Op.getOperand(0);
1945     SDValue rB = Op.getOperand(1);
1946     unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1947     unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1948
1949     SDValue FSMBOp =
1950       DAG.getCopyToReg(Chain, FSMBIreg,
1951                        DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1952                                    DAG.getConstant(0xcccc, MVT::i16)));
1953
1954     SDValue HHProd =
1955       DAG.getCopyToReg(FSMBOp, HiProdReg,
1956                        DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1957
1958     SDValue HHProd_v4i32 =
1959       DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1960                   DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1961
1962     return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1963                        DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1964                        DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1965                                    DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1966                                                HHProd_v4i32,
1967                                                DAG.getConstant(16, MVT::i16))),
1968                        DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1969   }
1970
1971   // This M00sE is N@stI! (apologies to Monty Python)
1972   //
1973   // SPU doesn't know how to do any 8-bit multiplication, so the solution
1974   // is to break it all apart, sign extend, and reassemble the various
1975   // intermediate products.
1976   case MVT::v16i8: {
1977     SDValue rA = Op.getOperand(0);
1978     SDValue rB = Op.getOperand(1);
1979     SDValue c8 = DAG.getConstant(8, MVT::i32);
1980     SDValue c16 = DAG.getConstant(16, MVT::i32);
1981
1982     SDValue LLProd =
1983       DAG.getNode(SPUISD::MPY, MVT::v8i16,
1984                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1985                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1986
1987     SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1988
1989     SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1990
1991     SDValue LHProd =
1992       DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1993                   DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1994
1995     SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1996                                      DAG.getConstant(0x2222, MVT::i16));
1997
1998     SDValue LoProdParts =
1999       DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2000                   DAG.getNode(SPUISD::SELB, MVT::v8i16,
2001                               LLProd, LHProd, FSMBmask));
2002
2003     SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32);
2004
2005     SDValue LoProd =
2006       DAG.getNode(ISD::AND, MVT::v4i32,
2007                   LoProdParts,
2008                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2009                               LoProdMask, LoProdMask,
2010                               LoProdMask, LoProdMask));
2011
2012     SDValue rAH =
2013       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2014                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
2015
2016     SDValue rBH =
2017       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2018                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
2019
2020     SDValue HLProd =
2021       DAG.getNode(SPUISD::MPY, MVT::v8i16,
2022                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
2023                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
2024
2025     SDValue HHProd_1 =
2026       DAG.getNode(SPUISD::MPY, MVT::v8i16,
2027                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2028                               DAG.getNode(SPUISD::VEC_SRA,
2029                                           MVT::v4i32, rAH, c8)),
2030                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2031                               DAG.getNode(SPUISD::VEC_SRA,
2032                                           MVT::v4i32, rBH, c8)));
2033
2034     SDValue HHProd =
2035       DAG.getNode(SPUISD::SELB, MVT::v8i16,
2036                   HLProd,
2037                   DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2038                   FSMBmask);
2039
2040     SDValue HiProd =
2041       DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
2042
2043     return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2044                        DAG.getNode(ISD::OR, MVT::v4i32,
2045                                    LoProd, HiProd));
2046   }
2047   }
2048
2049   return SDValue();
2050 }
2051
2052 static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) {
2053   MachineFunction &MF = DAG.getMachineFunction();
2054   MachineRegisterInfo &RegInfo = MF.getRegInfo();
2055
2056   SDValue A = Op.getOperand(0);
2057   SDValue B = Op.getOperand(1);
2058   MVT VT = Op.getValueType();
2059
2060   unsigned VRegBR, VRegC;
2061
2062   if (VT == MVT::f32) {
2063     VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2064     VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2065   } else {
2066     VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2067     VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2068   }
2069   // TODO: make sure we're feeding FPInterp the right arguments
2070   // Right now: fi B, frest(B)
2071
2072   // Computes BRcpl =
2073   // (Floating Interpolate (FP Reciprocal Estimate B))
2074   SDValue BRcpl =
2075       DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2076                        DAG.getNode(SPUISD::FPInterp, VT, B,
2077                                 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2078
2079   // Computes A * BRcpl and stores in a temporary register
2080   SDValue AxBRcpl =
2081       DAG.getCopyToReg(BRcpl, VRegC,
2082                  DAG.getNode(ISD::FMUL, VT, A,
2083                         DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2084   // What's the Chain variable do? It's magic!
2085   // TODO: set Chain = Op(0).getEntryNode()
2086
2087   return DAG.getNode(ISD::FADD, VT,
2088                 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2089                 DAG.getNode(ISD::FMUL, VT,
2090                         DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2091                         DAG.getNode(ISD::FSUB, VT, A,
2092                             DAG.getNode(ISD::FMUL, VT, B,
2093                             DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2094 }
2095
2096 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2097   MVT VT = Op.getValueType();
2098   SDValue N = Op.getOperand(0);
2099   SDValue Elt = Op.getOperand(1);
2100   SDValue ShufMask[16];
2101   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2102
2103   assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2104
2105   int EltNo = (int) C->getZExtValue();
2106
2107   // sanity checks:
2108   if (VT == MVT::i8 && EltNo >= 16)
2109     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2110   else if (VT == MVT::i16 && EltNo >= 8)
2111     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2112   else if (VT == MVT::i32 && EltNo >= 4)
2113     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2114   else if (VT == MVT::i64 && EltNo >= 2)
2115     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2116
2117   if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2118     // i32 and i64: Element 0 is the preferred slot
2119     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2120   }
2121
2122   // Need to generate shuffle mask and extract:
2123   int prefslot_begin = -1, prefslot_end = -1;
2124   int elt_byte = EltNo * VT.getSizeInBits() / 8;
2125
2126   switch (VT.getSimpleVT()) {
2127   default:
2128     assert(false && "Invalid value type!");
2129   case MVT::i8: {
2130     prefslot_begin = prefslot_end = 3;
2131     break;
2132   }
2133   case MVT::i16: {
2134     prefslot_begin = 2; prefslot_end = 3;
2135     break;
2136   }
2137   case MVT::i32: {
2138     prefslot_begin = 0; prefslot_end = 3;
2139     break;
2140   }
2141   case MVT::i64: {
2142     prefslot_begin = 0; prefslot_end = 7;
2143     break;
2144   }
2145   }
2146
2147   assert(prefslot_begin != -1 && prefslot_end != -1 &&
2148          "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2149
2150   for (int i = 0; i < 16; ++i) {
2151     // zero fill uppper part of preferred slot, don't care about the
2152     // other slots:
2153     unsigned int mask_val;
2154
2155     if (i <= prefslot_end) {
2156       mask_val =
2157         ((i < prefslot_begin)
2158          ? 0x80
2159          : elt_byte + (i - prefslot_begin));
2160
2161       ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2162     } else
2163       ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2164   }
2165
2166   SDValue ShufMaskVec =
2167     DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2168                 &ShufMask[0],
2169                 sizeof(ShufMask) / sizeof(ShufMask[0]));
2170
2171   return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2172                      DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2173                                  N, N, ShufMaskVec));
2174
2175 }
2176
2177 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2178   SDValue VecOp = Op.getOperand(0);
2179   SDValue ValOp = Op.getOperand(1);
2180   SDValue IdxOp = Op.getOperand(2);
2181   MVT VT = Op.getValueType();
2182
2183   ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2184   assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2185
2186   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2187   // Use $2 because it's always 16-byte aligned and it's available:
2188   SDValue PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2189
2190   SDValue result =
2191     DAG.getNode(SPUISD::SHUFB, VT,
2192                 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2193                 VecOp,
2194                 DAG.getNode(SPUISD::INSERT_MASK, VT,
2195                             DAG.getNode(ISD::ADD, PtrVT,
2196                                         PtrBase,
2197                                         DAG.getConstant(CN->getZExtValue(),
2198                                                         PtrVT))));
2199
2200   return result;
2201 }
2202
2203 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2204 {
2205   SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
2206
2207   assert(Op.getValueType() == MVT::i8);
2208   switch (Opc) {
2209   default:
2210     assert(0 && "Unhandled i8 math operator");
2211     /*NOTREACHED*/
2212     break;
2213   case ISD::SUB: {
2214     // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2215     // the result:
2216     SDValue N1 = Op.getOperand(1);
2217     N0 = (N0.getOpcode() != ISD::Constant
2218           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2219           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2220                             MVT::i16));
2221     N1 = (N1.getOpcode() != ISD::Constant
2222           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2223           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2224                             MVT::i16));
2225     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2226                        DAG.getNode(Opc, MVT::i16, N0, N1));
2227   }
2228   case ISD::ROTR:
2229   case ISD::ROTL: {
2230     SDValue N1 = Op.getOperand(1);
2231     unsigned N1Opc;
2232     N0 = (N0.getOpcode() != ISD::Constant
2233           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2234           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2235                             MVT::i16));
2236     N1Opc = N1.getValueType().bitsLT(MVT::i16)
2237             ? ISD::ZERO_EXTEND
2238             : ISD::TRUNCATE;
2239     N1 = (N1.getOpcode() != ISD::Constant
2240           ? DAG.getNode(N1Opc, MVT::i16, N1)
2241           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2242                             MVT::i16));
2243     SDValue ExpandArg =
2244       DAG.getNode(ISD::OR, MVT::i16, N0,
2245                   DAG.getNode(ISD::SHL, MVT::i16,
2246                               N0, DAG.getConstant(8, MVT::i16)));
2247     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2248                        DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2249   }
2250   case ISD::SRL:
2251   case ISD::SHL: {
2252     SDValue N1 = Op.getOperand(1);
2253     unsigned N1Opc;
2254     N0 = (N0.getOpcode() != ISD::Constant
2255           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2256           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2257                             MVT::i16));
2258     N1Opc = N1.getValueType().bitsLT(MVT::i16)
2259             ? ISD::ZERO_EXTEND
2260             : ISD::TRUNCATE;
2261     N1 = (N1.getOpcode() != ISD::Constant
2262           ? DAG.getNode(N1Opc, MVT::i16, N1)
2263           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2264                             MVT::i16));
2265     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2266                        DAG.getNode(Opc, MVT::i16, N0, N1));
2267   }
2268   case ISD::SRA: {
2269     SDValue N1 = Op.getOperand(1);
2270     unsigned N1Opc;
2271     N0 = (N0.getOpcode() != ISD::Constant
2272           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2273           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2274                             MVT::i16));
2275     N1Opc = N1.getValueType().bitsLT(MVT::i16)
2276             ? ISD::SIGN_EXTEND
2277             : ISD::TRUNCATE;
2278     N1 = (N1.getOpcode() != ISD::Constant
2279           ? DAG.getNode(N1Opc, MVT::i16, N1)
2280           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2281                             MVT::i16));
2282     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2283                        DAG.getNode(Opc, MVT::i16, N0, N1));
2284   }
2285   case ISD::MUL: {
2286     SDValue N1 = Op.getOperand(1);
2287     unsigned N1Opc;
2288     N0 = (N0.getOpcode() != ISD::Constant
2289           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2290           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2291                             MVT::i16));
2292     N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2293     N1 = (N1.getOpcode() != ISD::Constant
2294           ? DAG.getNode(N1Opc, MVT::i16, N1)
2295           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2296                             MVT::i16));
2297     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2298                        DAG.getNode(Opc, MVT::i16, N0, N1));
2299     break;
2300   }
2301   }
2302
2303   return SDValue();
2304 }
2305
2306 static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2307 {
2308   MVT VT = Op.getValueType();
2309   MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2310
2311   SDValue Op0 = Op.getOperand(0);
2312
2313   switch (Opc) {
2314   case ISD::ZERO_EXTEND:
2315   case ISD::SIGN_EXTEND:
2316   case ISD::ANY_EXTEND: {
2317     MVT Op0VT = Op0.getValueType();
2318     MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2319
2320     assert(Op0VT == MVT::i32
2321            && "CellSPU: Zero/sign extending something other than i32");
2322     DEBUG(cerr << "CellSPU: LowerI64Math custom lowering zero/sign/any extend\n");
2323
2324     unsigned NewOpc = (Opc == ISD::SIGN_EXTEND
2325                       ? SPUISD::ROTBYTES_RIGHT_S
2326                       : SPUISD::ROTQUAD_RZ_BYTES);
2327     SDValue PromoteScalar =
2328       DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2329
2330     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2331                        DAG.getNode(ISD::BIT_CONVERT, VecVT,
2332                                    DAG.getNode(NewOpc, Op0VecVT,
2333                                                PromoteScalar,
2334                                                DAG.getConstant(4, MVT::i32))));
2335   }
2336
2337   case ISD::ADD: {
2338     // Turn operands into vectors to satisfy type checking (shufb works on
2339     // vectors)
2340     SDValue Op0 =
2341       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2342     SDValue Op1 =
2343       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2344     SmallVector<SDValue, 16> ShufBytes;
2345
2346     // Create the shuffle mask for "rotating" the borrow up one register slot
2347     // once the borrow is generated.
2348     ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2349     ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2350     ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2351     ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2352
2353     SDValue CarryGen =
2354       DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2355     SDValue ShiftedCarry =
2356       DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2357                   CarryGen, CarryGen,
2358                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2359                               &ShufBytes[0], ShufBytes.size()));
2360
2361     return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2362                        DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2363                                    Op0, Op1, ShiftedCarry));
2364   }
2365
2366   case ISD::SUB: {
2367     // Turn operands into vectors to satisfy type checking (shufb works on
2368     // vectors)
2369     SDValue Op0 =
2370       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2371     SDValue Op1 =
2372       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2373     SmallVector<SDValue, 16> ShufBytes;
2374
2375     // Create the shuffle mask for "rotating" the borrow up one register slot
2376     // once the borrow is generated.
2377     ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2378     ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2379     ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2380     ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2381
2382     SDValue BorrowGen =
2383       DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2384     SDValue ShiftedBorrow =
2385       DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2386                   BorrowGen, BorrowGen,
2387                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2388                               &ShufBytes[0], ShufBytes.size()));
2389
2390     return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2391                        DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2392                                    Op0, Op1, ShiftedBorrow));
2393   }
2394
2395   case ISD::SHL: {
2396     SDValue ShiftAmt = Op.getOperand(1);
2397     MVT ShiftAmtVT = ShiftAmt.getValueType();
2398     SDValue Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2399     SDValue MaskLower =
2400       DAG.getNode(SPUISD::SELB, VecVT,
2401                   Op0Vec,
2402                   DAG.getConstant(0, VecVT),
2403                   DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2404                               DAG.getConstant(0xff00ULL, MVT::i16)));
2405     SDValue ShiftAmtBytes =
2406       DAG.getNode(ISD::SRL, ShiftAmtVT,
2407                   ShiftAmt,
2408                   DAG.getConstant(3, ShiftAmtVT));
2409     SDValue ShiftAmtBits =
2410       DAG.getNode(ISD::AND, ShiftAmtVT,
2411                   ShiftAmt,
2412                   DAG.getConstant(7, ShiftAmtVT));
2413
2414     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2415                        DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2416                                    DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2417                                                MaskLower, ShiftAmtBytes),
2418                                    ShiftAmtBits));
2419   }
2420
2421   case ISD::SRL: {
2422     MVT VT = Op.getValueType();
2423     SDValue ShiftAmt = Op.getOperand(1);
2424     MVT ShiftAmtVT = ShiftAmt.getValueType();
2425     SDValue ShiftAmtBytes =
2426       DAG.getNode(ISD::SRL, ShiftAmtVT,
2427                   ShiftAmt,
2428                   DAG.getConstant(3, ShiftAmtVT));
2429     SDValue ShiftAmtBits =
2430       DAG.getNode(ISD::AND, ShiftAmtVT,
2431                   ShiftAmt,
2432                   DAG.getConstant(7, ShiftAmtVT));
2433
2434     return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2435                        DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2436                                    Op0, ShiftAmtBytes),
2437                        ShiftAmtBits);
2438   }
2439
2440   case ISD::SRA: {
2441     // Promote Op0 to vector
2442     SDValue Op0 =
2443       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2444     SDValue ShiftAmt = Op.getOperand(1);
2445     MVT ShiftVT = ShiftAmt.getValueType();
2446
2447     // Negate variable shift amounts
2448     if (!isa<ConstantSDNode>(ShiftAmt)) {
2449       ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2450                              DAG.getConstant(0, ShiftVT), ShiftAmt);
2451     }
2452
2453     SDValue UpperHalfSign =
2454       DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i32,
2455                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2456                               DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2457                                           Op0, DAG.getConstant(31, MVT::i32))));
2458     SDValue UpperHalfSignMask =
2459       DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2460     SDValue UpperLowerMask =
2461       DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2462                   DAG.getConstant(0xff00, MVT::i16));
2463     SDValue UpperLowerSelect =
2464       DAG.getNode(SPUISD::SELB, MVT::v2i64,
2465                   UpperHalfSignMask, Op0, UpperLowerMask);
2466     SDValue RotateLeftBytes =
2467       DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2468                   UpperLowerSelect, ShiftAmt);
2469     SDValue RotateLeftBits =
2470       DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2471                   RotateLeftBytes, ShiftAmt);
2472
2473     return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2474                        RotateLeftBits);
2475   }
2476   }
2477
2478   return SDValue();
2479 }
2480
2481 //! Lower byte immediate operations for v16i8 vectors:
2482 static SDValue
2483 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2484   SDValue ConstVec;
2485   SDValue Arg;
2486   MVT VT = Op.getValueType();
2487
2488   ConstVec = Op.getOperand(0);
2489   Arg = Op.getOperand(1);
2490   if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2491     if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2492       ConstVec = ConstVec.getOperand(0);
2493     } else {
2494       ConstVec = Op.getOperand(1);
2495       Arg = Op.getOperand(0);
2496       if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2497         ConstVec = ConstVec.getOperand(0);
2498       }
2499     }
2500   }
2501
2502   if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2503     uint64_t VectorBits[2];
2504     uint64_t UndefBits[2];
2505     uint64_t SplatBits, SplatUndef;
2506     int SplatSize;
2507
2508     if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2509         && isConstantSplat(VectorBits, UndefBits,
2510                            VT.getVectorElementType().getSizeInBits(),
2511                            SplatBits, SplatUndef, SplatSize)) {
2512       SDValue tcVec[16];
2513       SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2514       const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2515
2516       // Turn the BUILD_VECTOR into a set of target constants:
2517       for (size_t i = 0; i < tcVecSize; ++i)
2518         tcVec[i] = tc;
2519
2520       return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2521                          DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2522     }
2523   }
2524   // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2525   // lowered.  Return the operation, rather than a null SDValue.
2526   return Op;
2527 }
2528
2529 //! Lower i32 multiplication
2530 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT,
2531                           unsigned Opc) {
2532   switch (VT.getSimpleVT()) {
2533   default:
2534     cerr << "CellSPU: Unknown LowerMUL value type, got "
2535          << Op.getValueType().getMVTString()
2536          << "\n";
2537     abort();
2538     /*NOTREACHED*/
2539
2540   case MVT::i32: {
2541     SDValue rA = Op.getOperand(0);
2542     SDValue rB = Op.getOperand(1);
2543
2544     return DAG.getNode(ISD::ADD, MVT::i32,
2545                        DAG.getNode(ISD::ADD, MVT::i32,
2546                                    DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2547                                    DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2548                        DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2549   }
2550   }
2551
2552   return SDValue();
2553 }
2554
2555 //! Custom lowering for CTPOP (count population)
2556 /*!
2557   Custom lowering code that counts the number ones in the input
2558   operand. SPU has such an instruction, but it counts the number of
2559   ones per byte, which then have to be accumulated.
2560 */
2561 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2562   MVT VT = Op.getValueType();
2563   MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2564
2565   switch (VT.getSimpleVT()) {
2566   default:
2567     assert(false && "Invalid value type!");
2568   case MVT::i8: {
2569     SDValue N = Op.getOperand(0);
2570     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2571
2572     SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2573     SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2574
2575     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2576   }
2577
2578   case MVT::i16: {
2579     MachineFunction &MF = DAG.getMachineFunction();
2580     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2581
2582     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2583
2584     SDValue N = Op.getOperand(0);
2585     SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2586     SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2587     SDValue Shift1 = DAG.getConstant(8, MVT::i16);
2588
2589     SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2590     SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2591
2592     // CNTB_result becomes the chain to which all of the virtual registers
2593     // CNTB_reg, SUM1_reg become associated:
2594     SDValue CNTB_result =
2595       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2596
2597     SDValue CNTB_rescopy =
2598       DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2599
2600     SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2601
2602     return DAG.getNode(ISD::AND, MVT::i16,
2603                        DAG.getNode(ISD::ADD, MVT::i16,
2604                                    DAG.getNode(ISD::SRL, MVT::i16,
2605                                                Tmp1, Shift1),
2606                                    Tmp1),
2607                        Mask0);
2608   }
2609
2610   case MVT::i32: {
2611     MachineFunction &MF = DAG.getMachineFunction();
2612     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2613
2614     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2615     unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2616
2617     SDValue N = Op.getOperand(0);
2618     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2619     SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2620     SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2621     SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2622
2623     SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2624     SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2625
2626     // CNTB_result becomes the chain to which all of the virtual registers
2627     // CNTB_reg, SUM1_reg become associated:
2628     SDValue CNTB_result =
2629       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2630
2631     SDValue CNTB_rescopy =
2632       DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2633
2634     SDValue Comp1 =
2635       DAG.getNode(ISD::SRL, MVT::i32,
2636                   DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2637
2638     SDValue Sum1 =
2639       DAG.getNode(ISD::ADD, MVT::i32,
2640                   Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2641
2642     SDValue Sum1_rescopy =
2643       DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2644
2645     SDValue Comp2 =
2646       DAG.getNode(ISD::SRL, MVT::i32,
2647                   DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2648                   Shift2);
2649     SDValue Sum2 =
2650       DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2651                   DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2652
2653     return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2654   }
2655
2656   case MVT::i64:
2657     break;
2658   }
2659
2660   return SDValue();
2661 }
2662
2663 /// LowerOperation - Provide custom lowering hooks for some operations.
2664 ///
2665 SDValue
2666 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2667 {
2668   unsigned Opc = (unsigned) Op.getOpcode();
2669   MVT VT = Op.getValueType();
2670
2671   switch (Opc) {
2672   default: {
2673     cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2674     cerr << "Op.getOpcode() = " << Opc << "\n";
2675     cerr << "*Op.getNode():\n";
2676     Op.getNode()->dump();
2677     abort();
2678   }
2679   case ISD::LOAD:
2680   case ISD::SEXTLOAD:
2681   case ISD::ZEXTLOAD:
2682     return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2683   case ISD::STORE:
2684     return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2685   case ISD::ConstantPool:
2686     return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2687   case ISD::GlobalAddress:
2688     return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2689   case ISD::JumpTable:
2690     return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2691   case ISD::Constant:
2692     return LowerConstant(Op, DAG);
2693   case ISD::ConstantFP:
2694     return LowerConstantFP(Op, DAG);
2695   case ISD::BRCOND:
2696     return LowerBRCOND(Op, DAG);
2697   case ISD::FORMAL_ARGUMENTS:
2698     return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2699   case ISD::CALL:
2700     return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2701   case ISD::RET:
2702     return LowerRET(Op, DAG, getTargetMachine());
2703
2704
2705   // i8, i64 math ops:
2706   case ISD::ZERO_EXTEND:
2707   case ISD::SIGN_EXTEND:
2708   case ISD::ANY_EXTEND:
2709   case ISD::ADD:
2710   case ISD::SUB:
2711   case ISD::ROTR:
2712   case ISD::ROTL:
2713   case ISD::SRL:
2714   case ISD::SHL:
2715   case ISD::SRA: {
2716     if (VT == MVT::i8)
2717       return LowerI8Math(Op, DAG, Opc);
2718     else if (VT == MVT::i64)
2719       return LowerI64Math(Op, DAG, Opc);
2720     break;
2721   }
2722
2723   // Vector-related lowering.
2724   case ISD::BUILD_VECTOR:
2725     return LowerBUILD_VECTOR(Op, DAG);
2726   case ISD::SCALAR_TO_VECTOR:
2727     return LowerSCALAR_TO_VECTOR(Op, DAG);
2728   case ISD::VECTOR_SHUFFLE:
2729     return LowerVECTOR_SHUFFLE(Op, DAG);
2730   case ISD::EXTRACT_VECTOR_ELT:
2731     return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2732   case ISD::INSERT_VECTOR_ELT:
2733     return LowerINSERT_VECTOR_ELT(Op, DAG);
2734
2735   // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2736   case ISD::AND:
2737   case ISD::OR:
2738   case ISD::XOR:
2739     return LowerByteImmed(Op, DAG);
2740
2741   // Vector and i8 multiply:
2742   case ISD::MUL:
2743     if (VT.isVector())
2744       return LowerVectorMUL(Op, DAG);
2745     else if (VT == MVT::i8)
2746       return LowerI8Math(Op, DAG, Opc);
2747     else
2748       return LowerMUL(Op, DAG, VT, Opc);
2749
2750   case ISD::FDIV:
2751     if (VT == MVT::f32 || VT == MVT::v4f32)
2752       return LowerFDIVf32(Op, DAG);
2753 //    else if (Op.getValueType() == MVT::f64)
2754 //      return LowerFDIVf64(Op, DAG);
2755     else
2756       assert(0 && "Calling FDIV on unsupported MVT");
2757
2758   case ISD::CTPOP:
2759     return LowerCTPOP(Op, DAG);
2760   }
2761
2762   return SDValue();
2763 }
2764
2765 //===----------------------------------------------------------------------===//
2766 // Target Optimization Hooks
2767 //===----------------------------------------------------------------------===//
2768
2769 SDValue
2770 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2771 {
2772 #if 0
2773   TargetMachine &TM = getTargetMachine();
2774 #endif
2775   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2776   SelectionDAG &DAG = DCI.DAG;
2777   SDValue Op0 = N->getOperand(0);      // everything has at least one operand
2778   SDValue Result;                     // Initially, NULL result
2779
2780   switch (N->getOpcode()) {
2781   default: break;
2782   case ISD::ADD: {
2783     SDValue Op1 = N->getOperand(1);
2784
2785     if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
2786       SDValue Op01 = Op0.getOperand(1);
2787       if (Op01.getOpcode() == ISD::Constant
2788           || Op01.getOpcode() == ISD::TargetConstant) {
2789         // (add <const>, (SPUindirect <arg>, <const>)) ->
2790         // (SPUindirect <arg>, <const + const>)
2791         ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2792         ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2793         SDValue combinedConst =
2794           DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2795                           Op0.getValueType());
2796
2797         DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2798                    << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2799         DEBUG(cerr << "With:    (SPUindirect <arg>, "
2800                    << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2801         return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2802                            Op0.getOperand(0), combinedConst);
2803       }
2804     } else if (isa<ConstantSDNode>(Op0)
2805                && Op1.getOpcode() == SPUISD::IndirectAddr) {
2806       SDValue Op11 = Op1.getOperand(1);
2807       if (Op11.getOpcode() == ISD::Constant
2808           || Op11.getOpcode() == ISD::TargetConstant) {
2809         // (add (SPUindirect <arg>, <const>), <const>) ->
2810         // (SPUindirect <arg>, <const + const>)
2811         ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2812         ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2813         SDValue combinedConst =
2814           DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2815                           Op0.getValueType());
2816
2817         DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2818                    << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2819         DEBUG(cerr << "With:    (SPUindirect <arg>, "
2820                    << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2821
2822         return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2823                            Op1.getOperand(0), combinedConst);
2824       }
2825     }
2826     break;
2827   }
2828   case ISD::SIGN_EXTEND:
2829   case ISD::ZERO_EXTEND:
2830   case ISD::ANY_EXTEND: {
2831     if (Op0.getOpcode() == SPUISD::EXTRACT_ELT0 &&
2832         N->getValueType(0) == Op0.getValueType()) {
2833       // (any_extend (SPUextract_elt0 <arg>)) ->
2834       // (SPUextract_elt0 <arg>)
2835       // Types must match, however...
2836       DEBUG(cerr << "Replace: ");
2837       DEBUG(N->dump(&DAG));
2838       DEBUG(cerr << "\nWith:    ");
2839       DEBUG(Op0.getNode()->dump(&DAG));
2840       DEBUG(cerr << "\n");
2841
2842       return Op0;
2843     }
2844     break;
2845   }
2846   case SPUISD::IndirectAddr: {
2847     if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2848       ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2849       if (CN->getZExtValue() == 0) {
2850         // (SPUindirect (SPUaform <addr>, 0), 0) ->
2851         // (SPUaform <addr>, 0)
2852
2853         DEBUG(cerr << "Replace: ");
2854         DEBUG(N->dump(&DAG));
2855         DEBUG(cerr << "\nWith:    ");
2856         DEBUG(Op0.getNode()->dump(&DAG));
2857         DEBUG(cerr << "\n");
2858
2859         return Op0;
2860       }
2861     }
2862     break;
2863   }
2864   case SPUISD::SHLQUAD_L_BITS:
2865   case SPUISD::SHLQUAD_L_BYTES:
2866   case SPUISD::VEC_SHL:
2867   case SPUISD::VEC_SRL:
2868   case SPUISD::VEC_SRA:
2869   case SPUISD::ROTQUAD_RZ_BYTES:
2870   case SPUISD::ROTQUAD_RZ_BITS: {
2871     SDValue Op1 = N->getOperand(1);
2872
2873     if (isa<ConstantSDNode>(Op1)) {
2874       // Kill degenerate vector shifts:
2875       ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
2876
2877       if (CN->getZExtValue() == 0) {
2878         Result = Op0;
2879       }
2880     }
2881     break;
2882   }
2883   case SPUISD::PROMOTE_SCALAR: {
2884     switch (Op0.getOpcode()) {
2885     default:
2886       break;
2887     case ISD::ANY_EXTEND:
2888     case ISD::ZERO_EXTEND:
2889     case ISD::SIGN_EXTEND: {
2890       // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
2891       // <arg>
2892       // but only if the SPUpromote_scalar and <arg> types match.
2893       SDValue Op00 = Op0.getOperand(0);
2894       if (Op00.getOpcode() == SPUISD::EXTRACT_ELT0) {
2895         SDValue Op000 = Op00.getOperand(0);
2896         if (Op000.getValueType() == N->getValueType(0)) {
2897           Result = Op000;
2898         }
2899       }
2900       break;
2901     }
2902     case SPUISD::EXTRACT_ELT0: {
2903       // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
2904       // <arg>
2905       Result = Op0.getOperand(0);
2906       break;
2907     }
2908     }
2909     break;
2910   }
2911   }
2912   // Otherwise, return unchanged.
2913 #if 1
2914   if (Result.getNode()) {
2915     DEBUG(cerr << "\nReplace.SPU: ");
2916     DEBUG(N->dump(&DAG));
2917     DEBUG(cerr << "\nWith:        ");
2918     DEBUG(Result.getNode()->dump(&DAG));
2919     DEBUG(cerr << "\n");
2920   }
2921 #endif
2922
2923   return Result;
2924 }
2925
2926 //===----------------------------------------------------------------------===//
2927 // Inline Assembly Support
2928 //===----------------------------------------------------------------------===//
2929
2930 /// getConstraintType - Given a constraint letter, return the type of
2931 /// constraint it is for this target.
2932 SPUTargetLowering::ConstraintType
2933 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2934   if (ConstraintLetter.size() == 1) {
2935     switch (ConstraintLetter[0]) {
2936     default: break;
2937     case 'b':
2938     case 'r':
2939     case 'f':
2940     case 'v':
2941     case 'y':
2942       return C_RegisterClass;
2943     }
2944   }
2945   return TargetLowering::getConstraintType(ConstraintLetter);
2946 }
2947
2948 std::pair<unsigned, const TargetRegisterClass*>
2949 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2950                                                 MVT VT) const
2951 {
2952   if (Constraint.size() == 1) {
2953     // GCC RS6000 Constraint Letters
2954     switch (Constraint[0]) {
2955     case 'b':   // R1-R31
2956     case 'r':   // R0-R31
2957       if (VT == MVT::i64)
2958         return std::make_pair(0U, SPU::R64CRegisterClass);
2959       return std::make_pair(0U, SPU::R32CRegisterClass);
2960     case 'f':
2961       if (VT == MVT::f32)
2962         return std::make_pair(0U, SPU::R32FPRegisterClass);
2963       else if (VT == MVT::f64)
2964         return std::make_pair(0U, SPU::R64FPRegisterClass);
2965       break;
2966     case 'v':
2967       return std::make_pair(0U, SPU::GPRCRegisterClass);
2968     }
2969   }
2970
2971   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2972 }
2973
2974 //! Compute used/known bits for a SPU operand
2975 void
2976 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2977                                                   const APInt &Mask,
2978                                                   APInt &KnownZero,
2979                                                   APInt &KnownOne,
2980                                                   const SelectionDAG &DAG,
2981                                                   unsigned Depth ) const {
2982 #if 0
2983   const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2984 #endif
2985
2986   switch (Op.getOpcode()) {
2987   default:
2988     // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2989     break;
2990
2991 #if 0
2992   case CALL:
2993   case SHUFB:
2994   case INSERT_MASK:
2995   case CNTB:
2996 #endif
2997
2998   case SPUISD::PROMOTE_SCALAR: {
2999     SDValue Op0 = Op.getOperand(0);
3000     MVT Op0VT = Op0.getValueType();
3001     unsigned Op0VTBits = Op0VT.getSizeInBits();
3002     uint64_t InMask = Op0VT.getIntegerVTBitMask();
3003     KnownZero |= APInt(Op0VTBits, ~InMask, false);
3004     KnownOne |= APInt(Op0VTBits, InMask, false);
3005     break;
3006   }
3007
3008   case SPUISD::LDRESULT:
3009   case SPUISD::EXTRACT_ELT0:
3010   case SPUISD::EXTRACT_ELT0_CHAINED: {
3011     MVT OpVT = Op.getValueType();
3012     unsigned OpVTBits = OpVT.getSizeInBits();
3013     uint64_t InMask = OpVT.getIntegerVTBitMask();
3014     KnownZero |= APInt(OpVTBits, ~InMask, false);
3015     KnownOne |= APInt(OpVTBits, InMask, false);
3016     break;
3017   }
3018
3019 #if 0
3020   case EXTRACT_I1_ZEXT:
3021   case EXTRACT_I1_SEXT:
3022   case EXTRACT_I8_ZEXT:
3023   case EXTRACT_I8_SEXT:
3024   case MPY:
3025   case MPYU:
3026   case MPYH:
3027   case MPYHH:
3028   case SPUISD::SHLQUAD_L_BITS:
3029   case SPUISD::SHLQUAD_L_BYTES:
3030   case SPUISD::VEC_SHL:
3031   case SPUISD::VEC_SRL:
3032   case SPUISD::VEC_SRA:
3033   case SPUISD::VEC_ROTL:
3034   case SPUISD::VEC_ROTR:
3035   case SPUISD::ROTQUAD_RZ_BYTES:
3036   case SPUISD::ROTQUAD_RZ_BITS:
3037   case SPUISD::ROTBYTES_RIGHT_S:
3038   case SPUISD::ROTBYTES_LEFT:
3039   case SPUISD::ROTBYTES_LEFT_CHAINED:
3040   case SPUISD::SELECT_MASK:
3041   case SPUISD::SELB:
3042   case SPUISD::FPInterp:
3043   case SPUISD::FPRecipEst:
3044   case SPUISD::SEXT32TO64:
3045 #endif
3046   }
3047 }
3048
3049 // LowerAsmOperandForConstraint
3050 void
3051 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3052                                                 char ConstraintLetter,
3053                                                 std::vector<SDValue> &Ops,
3054                                                 SelectionDAG &DAG) const {
3055   // Default, for the time being, to the base class handler
3056   TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
3057 }
3058
3059 /// isLegalAddressImmediate - Return true if the integer value can be used
3060 /// as the offset of the target addressing mode.
3061 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3062                                                 const Type *Ty) const {
3063   // SPU's addresses are 256K:
3064   return (V > -(1 << 18) && V < (1 << 18) - 1);
3065 }
3066
3067 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3068   return false;
3069 }