lib/Target/CellSPU/SPUISelLowering.cpp

   1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the SPUTargetLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "SPURegisterNames.h"
  15 #include "SPUISelLowering.h"
  16 #include "SPUTargetMachine.h"
  17 #include "SPUFrameInfo.h"
  18 #include "llvm/ADT/VectorExtras.h"
  19 #include "llvm/CodeGen/CallingConvLower.h"
  20 #include "llvm/CodeGen/MachineFrameInfo.h"
  21 #include "llvm/CodeGen/MachineFunction.h"
  22 #include "llvm/CodeGen/MachineInstrBuilder.h"
  23 #include "llvm/CodeGen/MachineRegisterInfo.h"
  24 #include "llvm/CodeGen/SelectionDAG.h"
  25 #include "llvm/Constants.h"
  26 #include "llvm/Function.h"
  27 #include "llvm/Intrinsics.h"
  28 #include "llvm/Support/Debug.h"
  29 #include "llvm/Support/MathExtras.h"
  30 #include "llvm/Target/TargetOptions.h"
  31
  32 #include <map>
  33
  34 using namespace llvm;
  35
  36 // Used in getTargetNodeName() below
  37 namespace {
  38   std::map<unsigned, const char *> node_names;
  39
  40   //! MVT mapping to useful data for Cell SPU
  41   struct valtype_map_s {
  42     const MVT        valtype;
  43     const int                   prefslot_byte;
  44   };
  45
  46   const valtype_map_s valtype_map[] = {
  47     { MVT::i1,   3 },
  48     { MVT::i8,   3 },
  49     { MVT::i16,  2 },
  50     { MVT::i32,  0 },
  51     { MVT::f32,  0 },
  52     { MVT::i64,  0 },
  53     { MVT::f64,  0 },
  54     { MVT::i128, 0 }
  55   };
  56
  57   const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
  58
  59   const valtype_map_s *getValueTypeMapEntry(MVT VT) {
  60     const valtype_map_s *retval = 0;
  61
  62     for (size_t i = 0; i < n_valtype_map; ++i) {
  63       if (valtype_map[i].valtype == VT) {
  64         retval = valtype_map + i;
  65         break;
  66       }
  67     }
  68
  69 #ifndef NDEBUG
  70     if (retval == 0) {
  71       cerr << "getValueTypeMapEntry returns NULL for "
  72            << VT.getMVTString()
  73            << "\n";
  74       abort();
  75     }
  76 #endif
  77
  78     return retval;
  79   }
  80
  81   //! Predicate that returns true if operand is a memory target
  82   /*!
  83     \arg Op Operand to test
  84     \return true if the operand is a memory target (i.e., global
  85     address, external symbol, constant pool) or an A-form
  86     address.
  87    */
  88   bool isMemoryOperand(const SDValue &Op)
  89   {
  90     const unsigned Opc = Op.getOpcode();
  91     return (Opc == ISD::GlobalAddress
  92             || Opc == ISD::GlobalTLSAddress
  93             || Opc == ISD::JumpTable
  94             || Opc == ISD::ConstantPool
  95             || Opc == ISD::Symbol
  96             || Opc == ISD::TargetGlobalAddress
  97             || Opc == ISD::TargetGlobalTLSAddress
  98             || Opc == ISD::TargetJumpTable
  99             || Opc == ISD::TargetConstantPool
 100             || Opc == ISD::TargetSymbol
 101             || Opc == SPUISD::AFormAddr);
 102   }
 103
 104   //! Predicate that returns true if the operand is an indirect target
 105   bool isIndirectOperand(const SDValue &Op)
 106   {
 107     const unsigned Opc = Op.getOpcode();
 108     return (Opc == ISD::Register
 109             || Opc == SPUISD::LDRESULT);
 110   }
 111 }
 112
 113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
 114   : TargetLowering(TM),
 115     SPUTM(TM)
 116 {
 117   // Fold away setcc operations if possible.
 118   setPow2DivIsCheap();
 119
 120   // Use _setjmp/_longjmp instead of setjmp/longjmp.
 121   setUseUnderscoreSetJmp(true);
 122   setUseUnderscoreLongJmp(true);
 123
 124   // Set up the SPU's register classes:
 125   addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
 126   addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
 127   addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
 128   addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
 129   addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
 130   addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
 131   addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
 132
 133   // SPU has no sign or zero extended loads for i1, i8, i16:
 134   setLoadXAction(ISD::EXTLOAD,  MVT::i1, Promote);
 135   setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
 136   setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
 137   setTruncStoreAction(MVT::i8, MVT::i1, Custom);
 138   setTruncStoreAction(MVT::i16, MVT::i1, Custom);
 139   setTruncStoreAction(MVT::i32, MVT::i1, Custom);
 140   setTruncStoreAction(MVT::i64, MVT::i1, Custom);
 141   setTruncStoreAction(MVT::i128, MVT::i1, Custom);
 142
 143   setLoadXAction(ISD::EXTLOAD,  MVT::i8, Custom);
 144   setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
 145   setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
 146   setTruncStoreAction(MVT::i8  , MVT::i8, Custom);
 147   setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
 148   setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
 149   setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
 150   setTruncStoreAction(MVT::i128, MVT::i8, Custom);
 151
 152   setLoadXAction(ISD::EXTLOAD,  MVT::i16, Custom);
 153   setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
 154   setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
 155
 156   // SPU constant load actions are custom lowered:
 157   setOperationAction(ISD::Constant,   MVT::i64, Custom);
 158   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
 159   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
 160
 161   // SPU's loads and stores have to be custom lowered:
 162   for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
 163        ++sctype) {
 164     MVT VT = (MVT::SimpleValueType)sctype;
 165
 166     setOperationAction(ISD::LOAD, VT, Custom);
 167     setOperationAction(ISD::STORE, VT, Custom);
 168   }
 169
 170   // Custom lower BRCOND for i1, i8 to "promote" the result to
 171   // i32 and i16, respectively.
 172   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
 173
 174   // Expand the jumptable branches
 175   setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
 176   setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
 177   setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
 178
 179   // SPU has no intrinsics for these particular operations:
 180   setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
 181
 182   // PowerPC has no SREM/UREM instructions
 183   setOperationAction(ISD::SREM, MVT::i32, Expand);
 184   setOperationAction(ISD::UREM, MVT::i32, Expand);
 185   setOperationAction(ISD::SREM, MVT::i64, Expand);
 186   setOperationAction(ISD::UREM, MVT::i64, Expand);
 187
 188   // We don't support sin/cos/sqrt/fmod
 189   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 190   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 191   setOperationAction(ISD::FREM , MVT::f64, Expand);
 192   setOperationAction(ISD::FLOG , MVT::f64, Expand);
 193   setOperationAction(ISD::FLOG2, MVT::f64, Expand);
 194   setOperationAction(ISD::FLOG10,MVT::f64, Expand);
 195   setOperationAction(ISD::FEXP , MVT::f64, Expand);
 196   setOperationAction(ISD::FEXP2, MVT::f64, Expand);
 197   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 198   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 199   setOperationAction(ISD::FREM , MVT::f32, Expand);
 200   setOperationAction(ISD::FLOG , MVT::f32, Expand);
 201   setOperationAction(ISD::FLOG2, MVT::f32, Expand);
 202   setOperationAction(ISD::FLOG10,MVT::f32, Expand);
 203   setOperationAction(ISD::FEXP , MVT::f32, Expand);
 204   setOperationAction(ISD::FEXP2, MVT::f32, Expand);
 205
 206   // If we're enabling GP optimizations, use hardware square root
 207   setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 208   setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 209
 210   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 211   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 212
 213   // SPU can do rotate right and left, so legalize it... but customize for i8
 214   // because instructions don't exist.
 215
 216   // FIXME: Change from "expand" to appropriate type once ROTR is supported in
 217   //        .td files.
 218   setOperationAction(ISD::ROTR, MVT::i32,    Expand /*Legal*/);
 219   setOperationAction(ISD::ROTR, MVT::i16,    Expand /*Legal*/);
 220   setOperationAction(ISD::ROTR, MVT::i8,     Expand /*Custom*/);
 221
 222   setOperationAction(ISD::ROTL, MVT::i32,    Legal);
 223   setOperationAction(ISD::ROTL, MVT::i16,    Legal);
 224   setOperationAction(ISD::ROTL, MVT::i8,     Custom);
 225   // SPU has no native version of shift left/right for i8
 226   setOperationAction(ISD::SHL,  MVT::i8,     Custom);
 227   setOperationAction(ISD::SRL,  MVT::i8,     Custom);
 228   setOperationAction(ISD::SRA,  MVT::i8,     Custom);
 229   // And SPU needs custom lowering for shift left/right for i64
 230   setOperationAction(ISD::SHL,  MVT::i64,    Custom);
 231   setOperationAction(ISD::SRL,  MVT::i64,    Custom);
 232   setOperationAction(ISD::SRA,  MVT::i64,    Custom);
 233
 234   // Custom lower i8, i32 and i64 multiplications
 235   setOperationAction(ISD::MUL,  MVT::i8,     Custom);
 236   setOperationAction(ISD::MUL,  MVT::i32,    Custom);
 237   setOperationAction(ISD::MUL,  MVT::i64,    Custom);
 238
 239   // Need to custom handle (some) common i8, i64 math ops
 240   setOperationAction(ISD::ADD,  MVT::i64,    Custom);
 241   setOperationAction(ISD::SUB,  MVT::i8,     Custom);
 242   setOperationAction(ISD::SUB,  MVT::i64,    Custom);
 243
 244   // SPU does not have BSWAP. It does have i32 support CTLZ.
 245   // CTPOP has to be custom lowered.
 246   setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
 247   setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
 248
 249   setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
 250   setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
 251   setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
 252   setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
 253
 254   setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
 255   setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
 256
 257   setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
 258
 259   // SPU has a version of select that implements (a&~c)|(b&c), just like
 260   // select ought to work:
 261   setOperationAction(ISD::SELECT, MVT::i1,   Promote);
 262   setOperationAction(ISD::SELECT, MVT::i8,   Legal);
 263   setOperationAction(ISD::SELECT, MVT::i16,  Legal);
 264   setOperationAction(ISD::SELECT, MVT::i32,  Legal);
 265   setOperationAction(ISD::SELECT, MVT::i64,  Expand);
 266
 267   setOperationAction(ISD::SETCC, MVT::i1,    Promote);
 268   setOperationAction(ISD::SETCC, MVT::i8,    Legal);
 269   setOperationAction(ISD::SETCC, MVT::i16,   Legal);
 270   setOperationAction(ISD::SETCC, MVT::i32,   Legal);
 271   setOperationAction(ISD::SETCC, MVT::i64,   Expand);
 272
 273   // Zero extension and sign extension for i64 have to be
 274   // custom legalized
 275   setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
 276   setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
 277   setOperationAction(ISD::ANY_EXTEND,  MVT::i64, Custom);
 278
 279   // SPU has a legal FP -> signed INT instruction
 280   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
 281   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 282   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
 283   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
 284
 285   // FDIV on SPU requires custom lowering
 286   setOperationAction(ISD::FDIV, MVT::f32, Custom);
 287   //setOperationAction(ISD::FDIV, MVT::f64, Custom);
 288
 289   // SPU has [U|S]INT_TO_FP
 290   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
 291   setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
 292   setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
 293   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
 294   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
 295   setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
 296   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 297   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 298
 299   setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
 300   setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
 301   setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
 302   setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
 303
 304   // We cannot sextinreg(i1).  Expand to shifts.
 305   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 306
 307   // Support label based line numbers.
 308   setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
 309   setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
 310
 311   // We want to legalize GlobalAddress and ConstantPool nodes into the
 312   // appropriate instructions to materialize the address.
 313   for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
 314        ++sctype) {
 315     MVT VT = (MVT::SimpleValueType)sctype;
 316
 317     setOperationAction(ISD::GlobalAddress, VT, Custom);
 318     setOperationAction(ISD::ConstantPool,  VT, Custom);
 319     setOperationAction(ISD::JumpTable,     VT, Custom);
 320   }
 321
 322   // RET must be custom lowered, to meet ABI requirements
 323   setOperationAction(ISD::RET,           MVT::Other, Custom);
 324
 325   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 326   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 327
 328   // Use the default implementation.
 329   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
 330   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 331   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 332   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 333   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
 334   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
 335   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
 336
 337   // Cell SPU has instructions for converting between i64 and fp.
 338   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 339   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 340
 341   // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
 342   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
 343
 344   // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 345   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 346
 347   // First set operation action for all vector types to expand. Then we
 348   // will selectively turn on ones that can be effectively codegen'd.
 349   addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
 350   addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
 351   addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
 352   addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
 353   addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
 354   addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
 355
 356   for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
 357        i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
 358     MVT VT = (MVT::SimpleValueType)i;
 359
 360     // add/sub are legal for all supported vector VT's.
 361     setOperationAction(ISD::ADD , VT, Legal);
 362     setOperationAction(ISD::SUB , VT, Legal);
 363     // mul has to be custom lowered.
 364     setOperationAction(ISD::MUL , VT, Custom);
 365
 366     setOperationAction(ISD::AND   , VT, Legal);
 367     setOperationAction(ISD::OR    , VT, Legal);
 368     setOperationAction(ISD::XOR   , VT, Legal);
 369     setOperationAction(ISD::LOAD  , VT, Legal);
 370     setOperationAction(ISD::SELECT, VT, Legal);
 371     setOperationAction(ISD::STORE,  VT, Legal);
 372
 373     // These operations need to be expanded:
 374     setOperationAction(ISD::SDIV, VT, Expand);
 375     setOperationAction(ISD::SREM, VT, Expand);
 376     setOperationAction(ISD::UDIV, VT, Expand);
 377     setOperationAction(ISD::UREM, VT, Expand);
 378     setOperationAction(ISD::FDIV, VT, Custom);
 379
 380     // Custom lower build_vector, constant pool spills, insert and
 381     // extract vector elements:
 382     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
 383     setOperationAction(ISD::ConstantPool, VT, Custom);
 384     setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
 385     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
 386     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
 387     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
 388   }
 389
 390   setOperationAction(ISD::MUL, MVT::v16i8, Custom);
 391   setOperationAction(ISD::AND, MVT::v16i8, Custom);
 392   setOperationAction(ISD::OR,  MVT::v16i8, Custom);
 393   setOperationAction(ISD::XOR, MVT::v16i8, Custom);
 394   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 395
 396   setShiftAmountType(MVT::i32);
 397   setSetCCResultContents(ZeroOrOneSetCCResult);
 398
 399   setStackPointerRegisterToSaveRestore(SPU::R1);
 400
 401   // We have target-specific dag combine patterns for the following nodes:
 402   setTargetDAGCombine(ISD::ADD);
 403   setTargetDAGCombine(ISD::ZERO_EXTEND);
 404   setTargetDAGCombine(ISD::SIGN_EXTEND);
 405   setTargetDAGCombine(ISD::ANY_EXTEND);
 406
 407   computeRegisterProperties();
 408 }
 409
 410 const char *
 411 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
 412 {
 413   if (node_names.empty()) {
 414     node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
 415     node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
 416     node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
 417     node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
 418     node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
 419     node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
 420     node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
 421     node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
 422     node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
 423     node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
 424     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
 425     node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
 426     node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
 427     node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED]
 428                                               = "SPUISD::EXTRACT_ELT0_CHAINED";
 429     node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
 430     node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
 431     node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
 432     node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
 433     node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
 434     node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
 435     node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
 436     node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
 437     node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
 438     node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
 439     node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
 440     node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
 441     node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
 442     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
 443     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
 444     node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
 445       "SPUISD::ROTQUAD_RZ_BYTES";
 446     node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
 447       "SPUISD::ROTQUAD_RZ_BITS";
 448     node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
 449       "SPUISD::ROTBYTES_RIGHT_S";
 450     node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
 451     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
 452       "SPUISD::ROTBYTES_LEFT_CHAINED";
 453     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
 454       "SPUISD::ROTBYTES_LEFT_BITS";
 455     node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
 456     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
 457     node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
 458     node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
 459     node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
 460     node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
 461     node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
 462     node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
 463     node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
 464   }
 465
 466   std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
 467
 468   return ((i != node_names.end()) ? i->second : 0);
 469 }
 470
 471 MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
 472   MVT VT = Op.getValueType();
 473   if (VT.isInteger())
 474     return VT;
 475   else
 476     return MVT::i32;
 477 }
 478
 479 //===----------------------------------------------------------------------===//
 480 // Calling convention code:
 481 //===----------------------------------------------------------------------===//
 482
 483 #include "SPUGenCallingConv.inc"
 484
 485 //===----------------------------------------------------------------------===//
 486 //  LowerOperation implementation
 487 //===----------------------------------------------------------------------===//
 488
 489 /// Aligned load common code for CellSPU
 490 /*!
 491   \param[in] Op The SelectionDAG load or store operand
 492   \param[in] DAG The selection DAG
 493   \param[in] ST CellSPU subtarget information structure
 494   \param[in,out] alignment Caller initializes this to the load or store node's
 495   value from getAlignment(), may be updated while generating the aligned load
 496   \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
 497   offset (divisible by 16, modulo 16 == 0)
 498   \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
 499   offset of the preferred slot (modulo 16 != 0)
 500   \param[in,out] VT Caller initializes this value type to the the load or store
 501   node's loaded or stored value type; may be updated if an i1-extended load or
 502   store.
 503   \param[out] was16aligned true if the base pointer had 16-byte alignment,
 504   otherwise false. Can help to determine if the chunk needs to be rotated.
 505
 506  Both load and store lowering load a block of data aligned on a 16-byte
 507  boundary. This is the common aligned load code shared between both.
 508  */
 509 static SDValue
 510 AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
 511             LSBaseSDNode *LSN,
 512             unsigned &alignment, int &alignOffs, int &prefSlotOffs,
 513             MVT &VT, bool &was16aligned)
 514 {
 515   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 516   const valtype_map_s *vtm = getValueTypeMapEntry(VT);
 517   SDValue basePtr = LSN->getBasePtr();
 518   SDValue chain = LSN->getChain();
 519
 520   if (basePtr.getOpcode() == ISD::ADD) {
 521     SDValue Op1 = basePtr.getNode()->getOperand(1);
 522
 523     if (Op1.getOpcode() == ISD::Constant
 524         || Op1.getOpcode() == ISD::TargetConstant) {
 525       const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
 526
 527       alignOffs = (int) CN->getZExtValue();
 528       prefSlotOffs = (int) (alignOffs & 0xf);
 529
 530       // Adjust the rotation amount to ensure that the final result ends up in
 531       // the preferred slot:
 532       prefSlotOffs -= vtm->prefslot_byte;
 533       basePtr = basePtr.getOperand(0);
 534
 535       // Loading from memory, can we adjust alignment?
 536       if (basePtr.getOpcode() == SPUISD::AFormAddr) {
 537         SDValue APtr = basePtr.getOperand(0);
 538         if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
 539           GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
 540           alignment = GSDN->getGlobal()->getAlignment();
 541         }
 542       }
 543     } else {
 544       alignOffs = 0;
 545       prefSlotOffs = -vtm->prefslot_byte;
 546     }
 547   } else if (basePtr.getOpcode() == ISD::FrameIndex) {
 548     FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
 549     alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
 550     prefSlotOffs = (int) (alignOffs & 0xf);
 551     prefSlotOffs -= vtm->prefslot_byte;
 552     basePtr = DAG.getRegister(SPU::R1, VT);
 553   } else {
 554     alignOffs = 0;
 555     prefSlotOffs = -vtm->prefslot_byte;
 556   }
 557
 558   if (alignment == 16) {
 559     // Realign the base pointer as a D-Form address:
 560     if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
 561       basePtr = DAG.getNode(ISD::ADD, PtrVT,
 562                             basePtr,
 563                             DAG.getConstant((alignOffs & ~0xf), PtrVT));
 564     }
 565
 566     // Emit the vector load:
 567     was16aligned = true;
 568     return DAG.getLoad(MVT::v16i8, chain, basePtr,
 569                        LSN->getSrcValue(), LSN->getSrcValueOffset(),
 570                        LSN->isVolatile(), 16);
 571   }
 572
 573   // Unaligned load or we're using the "large memory" model, which means that
 574   // we have to be very pessimistic:
 575   if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
 576     basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr,
 577                           DAG.getConstant(0, PtrVT));
 578   }
 579
 580   // Add the offset
 581   basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
 582                         DAG.getConstant((alignOffs & ~0xf), PtrVT));
 583   was16aligned = false;
 584   return DAG.getLoad(MVT::v16i8, chain, basePtr,
 585                      LSN->getSrcValue(), LSN->getSrcValueOffset(),
 586                      LSN->isVolatile(), 16);
 587 }
 588
 589 /// Custom lower loads for CellSPU
 590 /*!
 591  All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
 592  within a 16-byte block, we have to rotate to extract the requested element.
 593  */
 594 static SDValue
 595 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 596   LoadSDNode *LN = cast<LoadSDNode>(Op);
 597   SDValue the_chain = LN->getChain();
 598   MVT VT = LN->getMemoryVT();
 599   MVT OpVT = Op.getNode()->getValueType(0);
 600   ISD::LoadExtType ExtType = LN->getExtensionType();
 601   unsigned alignment = LN->getAlignment();
 602   SDValue Ops[8];
 603
 604   switch (LN->getAddressingMode()) {
 605   case ISD::UNINDEXED: {
 606     int offset, rotamt;
 607     bool was16aligned;
 608     SDValue result =
 609       AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
 610
 611     if (result.getNode() == 0)
 612       return result;
 613
 614     the_chain = result.getValue(1);
 615     // Rotate the chunk if necessary
 616     if (rotamt < 0)
 617       rotamt += 16;
 618     if (rotamt != 0 || !was16aligned) {
 619       SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
 620
 621       Ops[0] = the_chain;
 622       Ops[1] = result;
 623       if (was16aligned) {
 624         Ops[2] = DAG.getConstant(rotamt, MVT::i16);
 625       } else {
 626         MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 627         LoadSDNode *LN1 = cast<LoadSDNode>(result);
 628         Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
 629                              DAG.getConstant(rotamt, PtrVT));
 630       }
 631
 632       result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
 633       the_chain = result.getValue(1);
 634     }
 635
 636     if (VT == OpVT || ExtType == ISD::EXTLOAD) {
 637       SDVTList scalarvts;
 638       MVT vecVT = MVT::v16i8;
 639
 640       // Convert the loaded v16i8 vector to the appropriate vector type
 641       // specified by the operand:
 642       if (OpVT == VT) {
 643         if (VT != MVT::i1)
 644           vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
 645       } else
 646         vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
 647
 648       Ops[0] = the_chain;
 649       Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
 650       scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
 651       result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
 652       the_chain = result.getValue(1);
 653     } else {
 654       // Handle the sign and zero-extending loads for i1 and i8:
 655       unsigned NewOpC;
 656
 657       if (ExtType == ISD::SEXTLOAD) {
 658         NewOpC = (OpVT == MVT::i1
 659                   ? SPUISD::EXTRACT_I1_SEXT
 660                   : SPUISD::EXTRACT_I8_SEXT);
 661       } else {
 662         assert(ExtType == ISD::ZEXTLOAD);
 663         NewOpC = (OpVT == MVT::i1
 664                   ? SPUISD::EXTRACT_I1_ZEXT
 665                   : SPUISD::EXTRACT_I8_ZEXT);
 666       }
 667
 668       result = DAG.getNode(NewOpC, OpVT, result);
 669     }
 670
 671     SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
 672     SDValue retops[2] = {
 673       result,
 674       the_chain
 675     };
 676
 677     result = DAG.getNode(SPUISD::LDRESULT, retvts,
 678                          retops, sizeof(retops) / sizeof(retops[0]));
 679     return result;
 680   }
 681   case ISD::PRE_INC:
 682   case ISD::PRE_DEC:
 683   case ISD::POST_INC:
 684   case ISD::POST_DEC:
 685   case ISD::LAST_INDEXED_MODE:
 686     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 687             "UNINDEXED\n";
 688     cerr << (unsigned) LN->getAddressingMode() << "\n";
 689     abort();
 690     /*NOTREACHED*/
 691   }
 692
 693   return SDValue();
 694 }
 695
 696 /// Custom lower stores for CellSPU
 697 /*!
 698  All CellSPU stores are aligned to 16-byte boundaries, so for elements
 699  within a 16-byte block, we have to generate a shuffle to insert the
 700  requested element into its place, then store the resulting block.
 701  */
 702 static SDValue
 703 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 704   StoreSDNode *SN = cast<StoreSDNode>(Op);
 705   SDValue Value = SN->getValue();
 706   MVT VT = Value.getValueType();
 707   MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
 708   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 709   unsigned alignment = SN->getAlignment();
 710
 711   switch (SN->getAddressingMode()) {
 712   case ISD::UNINDEXED: {
 713     int chunk_offset, slot_offset;
 714     bool was16aligned;
 715
 716     // The vector type we really want to load from the 16-byte chunk, except
 717     // in the case of MVT::i1, which has to be v16i8.
 718     MVT vecVT, stVecVT = MVT::v16i8;
 719
 720     if (StVT != MVT::i1)
 721       stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
 722     vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
 723
 724     SDValue alignLoadVec =
 725       AlignedLoad(Op, DAG, ST, SN, alignment,
 726                   chunk_offset, slot_offset, VT, was16aligned);
 727
 728     if (alignLoadVec.getNode() == 0)
 729       return alignLoadVec;
 730
 731     LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
 732     SDValue basePtr = LN->getBasePtr();
 733     SDValue the_chain = alignLoadVec.getValue(1);
 734     SDValue theValue = SN->getValue();
 735     SDValue result;
 736
 737     if (StVT != VT
 738         && (theValue.getOpcode() == ISD::AssertZext
 739             || theValue.getOpcode() == ISD::AssertSext)) {
 740       // Drill down and get the value for zero- and sign-extended
 741       // quantities
 742       theValue = theValue.getOperand(0);
 743     }
 744
 745     chunk_offset &= 0xf;
 746
 747     SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
 748     SDValue insertEltPtr;
 749     SDValue insertEltOp;
 750
 751     // If the base pointer is already a D-form address, then just create
 752     // a new D-form address with a slot offset and the orignal base pointer.
 753     // Otherwise generate a D-form address with the slot offset relative
 754     // to the stack pointer, which is always aligned.
 755     DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
 756     DEBUG(basePtr.getNode()->dump(&DAG));
 757     DEBUG(cerr << "\n");
 758
 759     if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
 760         (basePtr.getOpcode() == ISD::ADD
 761          && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
 762       insertEltPtr = basePtr;
 763     } else {
 764       insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
 765     }
 766
 767     insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
 768     result = DAG.getNode(SPUISD::SHUFB, vecVT,
 769                          DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
 770                          alignLoadVec,
 771                          DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
 772
 773     result = DAG.getStore(the_chain, result, basePtr,
 774                           LN->getSrcValue(), LN->getSrcValueOffset(),
 775                           LN->isVolatile(), LN->getAlignment());
 776
 777     return result;
 778     /*UNREACHED*/
 779   }
 780   case ISD::PRE_INC:
 781   case ISD::PRE_DEC:
 782   case ISD::POST_INC:
 783   case ISD::POST_DEC:
 784   case ISD::LAST_INDEXED_MODE:
 785     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 786             "UNINDEXED\n";
 787     cerr << (unsigned) SN->getAddressingMode() << "\n";
 788     abort();
 789     /*NOTREACHED*/
 790   }
 791
 792   return SDValue();
 793 }
 794
 795 /// Generate the address of a constant pool entry.
 796 static SDValue
 797 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 798   MVT PtrVT = Op.getValueType();
 799   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 800   Constant *C = CP->getConstVal();
 801   SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
 802   SDValue Zero = DAG.getConstant(0, PtrVT);
 803   const TargetMachine &TM = DAG.getTarget();
 804
 805   if (TM.getRelocationModel() == Reloc::Static) {
 806     if (!ST->usingLargeMem()) {
 807       // Just return the SDValue with the constant pool address in it.
 808       return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
 809     } else {
 810       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
 811       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
 812       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 813     }
 814   }
 815
 816   assert(0 &&
 817          "LowerConstantPool: Relocation model other than static"
 818          " not supported.");
 819   return SDValue();
 820 }
 821
 822 static SDValue
 823 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 824   MVT PtrVT = Op.getValueType();
 825   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 826   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
 827   SDValue Zero = DAG.getConstant(0, PtrVT);
 828   const TargetMachine &TM = DAG.getTarget();
 829
 830   if (TM.getRelocationModel() == Reloc::Static) {
 831     if (!ST->usingLargeMem()) {
 832       return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
 833     } else {
 834       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
 835       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
 836       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 837     }
 838   }
 839
 840   assert(0 &&
 841          "LowerJumpTable: Relocation model other than static not supported.");
 842   return SDValue();
 843 }
 844
 845 static SDValue
 846 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 847   MVT PtrVT = Op.getValueType();
 848   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
 849   GlobalValue *GV = GSDN->getGlobal();
 850   SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
 851   const TargetMachine &TM = DAG.getTarget();
 852   SDValue Zero = DAG.getConstant(0, PtrVT);
 853
 854   if (TM.getRelocationModel() == Reloc::Static) {
 855     if (!ST->usingLargeMem()) {
 856       return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
 857     } else {
 858       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
 859       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
 860       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 861     }
 862   } else {
 863     cerr << "LowerGlobalAddress: Relocation model other than static not "
 864          << "supported.\n";
 865     abort();
 866     /*NOTREACHED*/
 867   }
 868
 869   return SDValue();
 870 }
 871
 872 //! Custom lower i64 integer constants
 873 /*!
 874  This code inserts all of the necessary juggling that needs to occur to load
 875  a 64-bit constant into a register.
 876  */
 877 static SDValue
 878 LowerConstant(SDValue Op, SelectionDAG &DAG) {
 879   MVT VT = Op.getValueType();
 880   ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
 881
 882   if (VT == MVT::i64) {
 883     SDValue T = DAG.getConstant(CN->getZExtValue(), MVT::i64);
 884     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
 885                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
 886   } else {
 887     cerr << "LowerConstant: unhandled constant type "
 888          << VT.getMVTString()
 889          << "\n";
 890     abort();
 891     /*NOTREACHED*/
 892   }
 893
 894   return SDValue();
 895 }
 896
 897 //! Custom lower double precision floating point constants
 898 static SDValue
 899 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
 900   MVT VT = Op.getValueType();
 901   ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
 902
 903   assert((FP != 0) &&
 904          "LowerConstantFP: Node is not ConstantFPSDNode");
 905
 906   if (VT == MVT::f64) {
 907     uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
 908     return DAG.getNode(ISD::BIT_CONVERT, VT,
 909                        LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
 910   }
 911
 912   return SDValue();
 913 }
 914
 915 //! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
 916 static SDValue
 917 LowerBRCOND(SDValue Op, SelectionDAG &DAG)
 918 {
 919   SDValue Cond = Op.getOperand(1);
 920   MVT CondVT = Cond.getValueType();
 921   MVT CondNVT;
 922
 923   if (CondVT == MVT::i1 || CondVT == MVT::i8) {
 924     CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
 925     return DAG.getNode(ISD::BRCOND, Op.getValueType(),
 926                       Op.getOperand(0),
 927                       DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
 928                       Op.getOperand(2));
 929   } else
 930     return SDValue();                // Unchanged
 931 }
 932
 933 static SDValue
 934 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
 935 {
 936   MachineFunction &MF = DAG.getMachineFunction();
 937   MachineFrameInfo *MFI = MF.getFrameInfo();
 938   MachineRegisterInfo &RegInfo = MF.getRegInfo();
 939   SmallVector<SDValue, 8> ArgValues;
 940   SDValue Root = Op.getOperand(0);
 941   bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
 942
 943   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
 944   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
 945
 946   unsigned ArgOffset = SPUFrameInfo::minStackSize();
 947   unsigned ArgRegIdx = 0;
 948   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
 949
 950   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 951
 952   // Add DAG nodes to load the arguments or copy them out of registers.
 953   for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
 954        ArgNo != e; ++ArgNo) {
 955     SDValue ArgVal;
 956     bool needsLoad = false;
 957     MVT ObjectVT = Op.getValue(ArgNo).getValueType();
 958     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
 959
 960     switch (ObjectVT.getSimpleVT()) {
 961     default: {
 962       cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
 963            << ObjectVT.getMVTString()
 964            << "\n";
 965       abort();
 966     }
 967     case MVT::i8:
 968       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 969         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass);
 970         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 971         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
 972         ++ArgRegIdx;
 973       } else {
 974         needsLoad = true;
 975       }
 976       break;
 977     case MVT::i16:
 978       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 979         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
 980         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 981         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
 982         ++ArgRegIdx;
 983       } else {
 984         needsLoad = true;
 985       }
 986       break;
 987     case MVT::i32:
 988       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 989         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 990         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 991         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
 992         ++ArgRegIdx;
 993       } else {
 994         needsLoad = true;
 995       }
 996       break;
 997     case MVT::i64:
 998       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 999         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass);
1000         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1001         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
1002         ++ArgRegIdx;
1003       } else {
1004         needsLoad = true;
1005       }
1006       break;
1007     case MVT::f32:
1008       if (!isVarArg && ArgRegIdx < NumArgRegs) {
1009         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
1010         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1011         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
1012         ++ArgRegIdx;
1013       } else {
1014         needsLoad = true;
1015       }
1016       break;
1017     case MVT::f64:
1018       if (!isVarArg && ArgRegIdx < NumArgRegs) {
1019         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass);
1020         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1021         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
1022         ++ArgRegIdx;
1023       } else {
1024         needsLoad = true;
1025       }
1026       break;
1027     case MVT::v2f64:
1028     case MVT::v4f32:
1029     case MVT::v2i64:
1030     case MVT::v4i32:
1031     case MVT::v8i16:
1032     case MVT::v16i8:
1033       if (!isVarArg && ArgRegIdx < NumArgRegs) {
1034         unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1035         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1036         ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1037         ++ArgRegIdx;
1038       } else {
1039         needsLoad = true;
1040       }
1041       break;
1042     }
1043
1044     // We need to load the argument to a virtual register if we determined above
1045     // that we ran out of physical registers of the appropriate type
1046     if (needsLoad) {
1047       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1048       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1049       ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1050       ArgOffset += StackSlotSize;
1051     }
1052
1053     ArgValues.push_back(ArgVal);
1054   }
1055
1056   // If the function takes variable number of arguments, make a frame index for
1057   // the start of the first vararg value... for expansion of llvm.va_start.
1058   if (isVarArg) {
1059     VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
1060                                                ArgOffset);
1061     SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1062     // If this function is vararg, store any remaining integer argument regs to
1063     // their spots on the stack so that they may be loaded by deferencing the
1064     // result of va_next.
1065     SmallVector<SDValue, 8> MemOps;
1066     for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1067       unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
1068       RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1069       SDValue Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1070       SDValue Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1071       MemOps.push_back(Store);
1072       // Increment the address by four for the next argument to store
1073       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
1074       FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1075     }
1076     if (!MemOps.empty())
1077       Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1078   }
1079
1080   ArgValues.push_back(Root);
1081
1082   // Return the new list of results.
1083   return DAG.getMergeValues(Op.getNode()->getVTList(), &ArgValues[0],
1084                             ArgValues.size());
1085 }
1086
1087 /// isLSAAddress - Return the immediate to use if the specified
1088 /// value is representable as a LSA address.
1089 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1090   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1091   if (!C) return 0;
1092
1093   int Addr = C->getZExtValue();
1094   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1095       (Addr << 14 >> 14) != Addr)
1096     return 0;  // Top 14 bits have to be sext of immediate.
1097
1098   return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1099 }
1100
1101 static
1102 SDValue
1103 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1104   CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1105   SDValue Chain = TheCall->getChain();
1106 #if 0
1107   bool isVarArg   = TheCall->isVarArg();
1108   bool isTailCall = TheCall->isTailCall();
1109 #endif
1110   SDValue Callee    = TheCall->getCallee();
1111   unsigned NumOps     = TheCall->getNumArgs();
1112   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1113   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1114   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1115
1116   // Handy pointer type
1117   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1118
1119   // Accumulate how many bytes are to be pushed on the stack, including the
1120   // linkage area, and parameter passing area.  According to the SPU ABI,
1121   // we minimally need space for [LR] and [SP]
1122   unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1123
1124   // Set up a copy of the stack pointer for use loading and storing any
1125   // arguments that may not fit in the registers available for argument
1126   // passing.
1127   SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1128
1129   // Figure out which arguments are going to go in registers, and which in
1130   // memory.
1131   unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1132   unsigned ArgRegIdx = 0;
1133
1134   // Keep track of registers passing arguments
1135   std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1136   // And the arguments passed on the stack
1137   SmallVector<SDValue, 8> MemOpChains;
1138
1139   for (unsigned i = 0; i != NumOps; ++i) {
1140     SDValue Arg = TheCall->getArg(i);
1141
1142     // PtrOff will be used to store the current argument to the stack if a
1143     // register cannot be found for it.
1144     SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1145     PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1146
1147     switch (Arg.getValueType().getSimpleVT()) {
1148     default: assert(0 && "Unexpected ValueType for argument!");
1149     case MVT::i32:
1150     case MVT::i64:
1151     case MVT::i128:
1152       if (ArgRegIdx != NumArgRegs) {
1153         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1154       } else {
1155         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1156         ArgOffset += StackSlotSize;
1157       }
1158       break;
1159     case MVT::f32:
1160     case MVT::f64:
1161       if (ArgRegIdx != NumArgRegs) {
1162         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1163       } else {
1164         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1165         ArgOffset += StackSlotSize;
1166       }
1167       break;
1168     case MVT::v4f32:
1169     case MVT::v4i32:
1170     case MVT::v8i16:
1171     case MVT::v16i8:
1172       if (ArgRegIdx != NumArgRegs) {
1173         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1174       } else {
1175         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1176         ArgOffset += StackSlotSize;
1177       }
1178       break;
1179     }
1180   }
1181
1182   // Update number of stack bytes actually used, insert a call sequence start
1183   NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1184   Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1185
1186   if (!MemOpChains.empty()) {
1187     // Adjust the stack pointer for the stack arguments.
1188     Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1189                         &MemOpChains[0], MemOpChains.size());
1190   }
1191
1192   // Build a sequence of copy-to-reg nodes chained together with token chain
1193   // and flag operands which copy the outgoing args into the appropriate regs.
1194   SDValue InFlag;
1195   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1196     Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1197                              InFlag);
1198     InFlag = Chain.getValue(1);
1199   }
1200
1201   SmallVector<SDValue, 8> Ops;
1202   unsigned CallOpc = SPUISD::CALL;
1203
1204   // If the callee is a GlobalAddress/Symbol node (quite common, every direct
1205   // call is) turn it into a TargetGlobalAddress/TargetSymbol node so that
1206   // legalize doesn't hack it.
1207   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1208     GlobalValue *GV = G->getGlobal();
1209     MVT CalleeVT = Callee.getValueType();
1210     SDValue Zero = DAG.getConstant(0, PtrVT);
1211     SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1212
1213     if (!ST->usingLargeMem()) {
1214       // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1215       // style calls, otherwise, external symbols are BRASL calls. This assumes
1216       // that declared/defined symbols are in the same compilation unit and can
1217       // be reached through PC-relative jumps.
1218       //
1219       // NOTE:
1220       // This may be an unsafe assumption for JIT and really large compilation
1221       // units.
1222       if (GV->isDeclaration()) {
1223         Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1224       } else {
1225         Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1226       }
1227     } else {
1228       // "Large memory" mode: Turn all calls into indirect calls with a X-form
1229       // address pairs:
1230       Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1231     }
1232   } else if (SymbolSDNode *S = dyn_cast<SymbolSDNode>(Callee))
1233     Callee = DAG.getSymbol(S->getSymbol(), Callee.getValueType(),
1234                            S->getLinkage());
1235   else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1236     // If this is an absolute destination address that appears to be a legal
1237     // local store address, use the munged value.
1238     Callee = SDValue(Dest, 0);
1239   }
1240
1241   Ops.push_back(Chain);
1242   Ops.push_back(Callee);
1243
1244   // Add argument registers to the end of the list so that they are known live
1245   // into the call.
1246   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1247     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1248                                   RegsToPass[i].second.getValueType()));
1249
1250   if (InFlag.getNode())
1251     Ops.push_back(InFlag);
1252   // Returns a chain and a flag for retval copy to use.
1253   Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1254                       &Ops[0], Ops.size());
1255   InFlag = Chain.getValue(1);
1256
1257   Chain = DAG.getCALLSEQ_END(Chain,
1258                              DAG.getConstant(NumStackBytes, PtrVT),
1259                              DAG.getConstant(0, PtrVT),
1260                              InFlag);
1261   if (TheCall->getValueType(0) != MVT::Other)
1262     InFlag = Chain.getValue(1);
1263
1264   SDValue ResultVals[3];
1265   unsigned NumResults = 0;
1266
1267   // If the call has results, copy the values out of the ret val registers.
1268   switch (TheCall->getValueType(0).getSimpleVT()) {
1269   default: assert(0 && "Unexpected ret value!");
1270   case MVT::Other: break;
1271   case MVT::i32:
1272     if (TheCall->getValueType(1) == MVT::i32) {
1273       Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1274       ResultVals[0] = Chain.getValue(0);
1275       Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1276                                  Chain.getValue(2)).getValue(1);
1277       ResultVals[1] = Chain.getValue(0);
1278       NumResults = 2;
1279     } else {
1280       Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1281       ResultVals[0] = Chain.getValue(0);
1282       NumResults = 1;
1283     }
1284     break;
1285   case MVT::i64:
1286     Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1287     ResultVals[0] = Chain.getValue(0);
1288     NumResults = 1;
1289     break;
1290   case MVT::f32:
1291   case MVT::f64:
1292     Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1293                                InFlag).getValue(1);
1294     ResultVals[0] = Chain.getValue(0);
1295     NumResults = 1;
1296     break;
1297   case MVT::v2f64:
1298   case MVT::v4f32:
1299   case MVT::v4i32:
1300   case MVT::v8i16:
1301   case MVT::v16i8:
1302     Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1303                                    InFlag).getValue(1);
1304     ResultVals[0] = Chain.getValue(0);
1305     NumResults = 1;
1306     break;
1307   }
1308
1309   // If the function returns void, just return the chain.
1310   if (NumResults == 0)
1311     return Chain;
1312
1313   // Otherwise, merge everything together with a MERGE_VALUES node.
1314   ResultVals[NumResults++] = Chain;
1315   SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1316   return Res.getValue(Op.getResNo());
1317 }
1318
1319 static SDValue
1320 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1321   SmallVector<CCValAssign, 16> RVLocs;
1322   unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1323   bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1324   CCState CCInfo(CC, isVarArg, TM, RVLocs);
1325   CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1326
1327   // If this is the first return lowered for this function, add the regs to the
1328   // liveout set for the function.
1329   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1330     for (unsigned i = 0; i != RVLocs.size(); ++i)
1331       DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1332   }
1333
1334   SDValue Chain = Op.getOperand(0);
1335   SDValue Flag;
1336
1337   // Copy the result values into the output registers.
1338   for (unsigned i = 0; i != RVLocs.size(); ++i) {
1339     CCValAssign &VA = RVLocs[i];
1340     assert(VA.isRegLoc() && "Can only return in registers!");
1341     Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1342     Flag = Chain.getValue(1);
1343   }
1344
1345   if (Flag.getNode())
1346     return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1347   else
1348     return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1349 }
1350
1351
1352 //===----------------------------------------------------------------------===//
1353 // Vector related lowering:
1354 //===----------------------------------------------------------------------===//
1355
1356 static ConstantSDNode *
1357 getVecImm(SDNode *N) {
1358   SDValue OpVal(0, 0);
1359
1360   // Check to see if this buildvec has a single non-undef value in its elements.
1361   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1362     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1363     if (OpVal.getNode() == 0)
1364       OpVal = N->getOperand(i);
1365     else if (OpVal != N->getOperand(i))
1366       return 0;
1367   }
1368
1369   if (OpVal.getNode() != 0) {
1370     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1371       return CN;
1372     }
1373   }
1374
1375   return 0; // All UNDEF: use implicit def.; not Constant node
1376 }
1377
1378 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1379 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1380 /// constant
1381 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1382                               MVT ValueType) {
1383   if (ConstantSDNode *CN = getVecImm(N)) {
1384     uint64_t Value = CN->getZExtValue();
1385     if (ValueType == MVT::i64) {
1386       uint64_t UValue = CN->getZExtValue();
1387       uint32_t upper = uint32_t(UValue >> 32);
1388       uint32_t lower = uint32_t(UValue);
1389       if (upper != lower)
1390         return SDValue();
1391       Value = Value >> 32;
1392     }
1393     if (Value <= 0x3ffff)
1394       return DAG.getConstant(Value, ValueType);
1395   }
1396
1397   return SDValue();
1398 }
1399
1400 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1401 /// and the value fits into a signed 16-bit constant, and if so, return the
1402 /// constant
1403 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1404                               MVT ValueType) {
1405   if (ConstantSDNode *CN = getVecImm(N)) {
1406     int64_t Value = CN->getSignExtended();
1407     if (ValueType == MVT::i64) {
1408       uint64_t UValue = CN->getZExtValue();
1409       uint32_t upper = uint32_t(UValue >> 32);
1410       uint32_t lower = uint32_t(UValue);
1411       if (upper != lower)
1412         return SDValue();
1413       Value = Value >> 32;
1414     }
1415     if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1416       return DAG.getConstant(Value, ValueType);
1417     }
1418   }
1419
1420   return SDValue();
1421 }
1422
1423 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1424 /// and the value fits into a signed 10-bit constant, and if so, return the
1425 /// constant
1426 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1427                               MVT ValueType) {
1428   if (ConstantSDNode *CN = getVecImm(N)) {
1429     int64_t Value = CN->getSignExtended();
1430     if (ValueType == MVT::i64) {
1431       uint64_t UValue = CN->getZExtValue();
1432       uint32_t upper = uint32_t(UValue >> 32);
1433       uint32_t lower = uint32_t(UValue);
1434       if (upper != lower)
1435         return SDValue();
1436       Value = Value >> 32;
1437     }
1438     if (isS10Constant(Value))
1439       return DAG.getConstant(Value, ValueType);
1440   }
1441
1442   return SDValue();
1443 }
1444
1445 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1446 /// and the value fits into a signed 8-bit constant, and if so, return the
1447 /// constant.
1448 ///
1449 /// @note: The incoming vector is v16i8 because that's the only way we can load
1450 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1451 /// same value.
1452 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1453                              MVT ValueType) {
1454   if (ConstantSDNode *CN = getVecImm(N)) {
1455     int Value = (int) CN->getZExtValue();
1456     if (ValueType == MVT::i16
1457         && Value <= 0xffff                 /* truncated from uint64_t */
1458         && ((short) Value >> 8) == ((short) Value & 0xff))
1459       return DAG.getConstant(Value & 0xff, ValueType);
1460     else if (ValueType == MVT::i8
1461              && (Value & 0xff) == Value)
1462       return DAG.getConstant(Value, ValueType);
1463   }
1464
1465   return SDValue();
1466 }
1467
1468 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1469 /// and the value fits into a signed 16-bit constant, and if so, return the
1470 /// constant
1471 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1472                                MVT ValueType) {
1473   if (ConstantSDNode *CN = getVecImm(N)) {
1474     uint64_t Value = CN->getZExtValue();
1475     if ((ValueType == MVT::i32
1476           && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1477         || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1478       return DAG.getConstant(Value >> 16, ValueType);
1479   }
1480
1481   return SDValue();
1482 }
1483
1484 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1485 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1486   if (ConstantSDNode *CN = getVecImm(N)) {
1487     return DAG.getConstant((unsigned) CN->getZExtValue(), MVT::i32);
1488   }
1489
1490   return SDValue();
1491 }
1492
1493 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1494 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1495   if (ConstantSDNode *CN = getVecImm(N)) {
1496     return DAG.getConstant((unsigned) CN->getZExtValue(), MVT::i64);
1497   }
1498
1499   return SDValue();
1500 }
1501
1502 // If this is a vector of constants or undefs, get the bits.  A bit in
1503 // UndefBits is set if the corresponding element of the vector is an
1504 // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1505 // zero.   Return true if this is not an array of constants, false if it is.
1506 //
1507 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1508                                        uint64_t UndefBits[2]) {
1509   // Start with zero'd results.
1510   VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1511
1512   unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1513   for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1514     SDValue OpVal = BV->getOperand(i);
1515
1516     unsigned PartNo = i >= e/2;     // In the upper 128 bits?
1517     unsigned SlotNo = e/2 - (i & (e/2-1))-1;  // Which subpiece of the uint64_t.
1518
1519     uint64_t EltBits = 0;
1520     if (OpVal.getOpcode() == ISD::UNDEF) {
1521       uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1522       UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1523       continue;
1524     } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1525       EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1526     } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1527       const APFloat &apf = CN->getValueAPF();
1528       EltBits = (CN->getValueType(0) == MVT::f32
1529                  ? FloatToBits(apf.convertToFloat())
1530                  : DoubleToBits(apf.convertToDouble()));
1531     } else {
1532       // Nonconstant element.
1533       return true;
1534     }
1535
1536     VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1537   }
1538
1539   //printf("%llx %llx  %llx %llx\n",
1540   //       VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1541   return false;
1542 }
1543
1544 /// If this is a splat (repetition) of a value across the whole vector, return
1545 /// the smallest size that splats it.  For example, "0x01010101010101..." is a
1546 /// splat of 0x01, 0x0101, and 0x01010101.  We return SplatBits = 0x01 and
1547 /// SplatSize = 1 byte.
1548 static bool isConstantSplat(const uint64_t Bits128[2],
1549                             const uint64_t Undef128[2],
1550                             int MinSplatBits,
1551                             uint64_t &SplatBits, uint64_t &SplatUndef,
1552                             int &SplatSize) {
1553   // Don't let undefs prevent splats from matching.  See if the top 64-bits are
1554   // the same as the lower 64-bits, ignoring undefs.
1555   uint64_t Bits64  = Bits128[0] | Bits128[1];
1556   uint64_t Undef64 = Undef128[0] & Undef128[1];
1557   uint32_t Bits32  = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1558   uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1559   uint16_t Bits16  = uint16_t(Bits32)  | uint16_t(Bits32 >> 16);
1560   uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1561
1562   if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1563     if (MinSplatBits < 64) {
1564
1565       // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1566       // undefs.
1567       if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1568         if (MinSplatBits < 32) {
1569
1570           // If the top 16-bits are different than the lower 16-bits, ignoring
1571           // undefs, we have an i32 splat.
1572           if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1573             if (MinSplatBits < 16) {
1574               // If the top 8-bits are different than the lower 8-bits, ignoring
1575               // undefs, we have an i16 splat.
1576               if ((Bits16 & (uint16_t(~Undef16) >> 8))
1577                   == ((Bits16 >> 8) & ~Undef16)) {
1578                 // Otherwise, we have an 8-bit splat.
1579                 SplatBits  = uint8_t(Bits16)  | uint8_t(Bits16 >> 8);
1580                 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1581                 SplatSize = 1;
1582                 return true;
1583               }
1584             } else {
1585               SplatBits = Bits16;
1586               SplatUndef = Undef16;
1587               SplatSize = 2;
1588               return true;
1589             }
1590           }
1591         } else {
1592           SplatBits = Bits32;
1593           SplatUndef = Undef32;
1594           SplatSize = 4;
1595           return true;
1596         }
1597       }
1598     } else {
1599       SplatBits = Bits128[0];
1600       SplatUndef = Undef128[0];
1601       SplatSize = 8;
1602       return true;
1603     }
1604   }
1605
1606   return false;  // Can't be a splat if two pieces don't match.
1607 }
1608
1609 // If this is a case we can't handle, return null and let the default
1610 // expansion code take care of it.  If we CAN select this case, and if it
1611 // selects to a single instruction, return Op.  Otherwise, if we can codegen
1612 // this case more efficiently than a constant pool load, lower it to the
1613 // sequence of ops that should be used.
1614 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1615   MVT VT = Op.getValueType();
1616   // If this is a vector of constants or undefs, get the bits.  A bit in
1617   // UndefBits is set if the corresponding element of the vector is an
1618   // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1619   // zero.
1620   uint64_t VectorBits[2];
1621   uint64_t UndefBits[2];
1622   uint64_t SplatBits, SplatUndef;
1623   int SplatSize;
1624   if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1625       || !isConstantSplat(VectorBits, UndefBits,
1626                           VT.getVectorElementType().getSizeInBits(),
1627                           SplatBits, SplatUndef, SplatSize))
1628     return SDValue();   // Not a constant vector, not a splat.
1629
1630   switch (VT.getSimpleVT()) {
1631   default:
1632   case MVT::v4f32: {
1633     uint32_t Value32 = SplatBits;
1634     assert(SplatSize == 4
1635            && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1636     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1637     SDValue T = DAG.getConstant(Value32, MVT::i32);
1638     return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1639                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1640     break;
1641   }
1642   case MVT::v2f64: {
1643     uint64_t f64val = SplatBits;
1644     assert(SplatSize == 8
1645            && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1646     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1647     SDValue T = DAG.getConstant(f64val, MVT::i64);
1648     return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1649                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1650     break;
1651   }
1652   case MVT::v16i8: {
1653    // 8-bit constants have to be expanded to 16-bits
1654    unsigned short Value16 = SplatBits | (SplatBits << 8);
1655    SDValue Ops[8];
1656    for (int i = 0; i < 8; ++i)
1657      Ops[i] = DAG.getConstant(Value16, MVT::i16);
1658    return DAG.getNode(ISD::BIT_CONVERT, VT,
1659                       DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1660   }
1661   case MVT::v8i16: {
1662     unsigned short Value16;
1663     if (SplatSize == 2)
1664       Value16 = (unsigned short) (SplatBits & 0xffff);
1665     else
1666       Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1667     SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1668     SDValue Ops[8];
1669     for (int i = 0; i < 8; ++i) Ops[i] = T;
1670     return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1671   }
1672   case MVT::v4i32: {
1673     unsigned int Value = SplatBits;
1674     SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1675     return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1676   }
1677   case MVT::v2i64: {
1678     uint64_t val = SplatBits;
1679     uint32_t upper = uint32_t(val >> 32);
1680     uint32_t lower = uint32_t(val);
1681
1682     if (upper == lower) {
1683       // Magic constant that can be matched by IL, ILA, et. al.
1684       SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1685       return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1686     } else {
1687       SDValue LO32;
1688       SDValue HI32;
1689       SmallVector<SDValue, 16> ShufBytes;
1690       SDValue Result;
1691       bool upper_special, lower_special;
1692
1693       // NOTE: This code creates common-case shuffle masks that can be easily
1694       // detected as common expressions. It is not attempting to create highly
1695       // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1696
1697       // Detect if the upper or lower half is a special shuffle mask pattern:
1698       upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1699       lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1700
1701       // Create lower vector if not a special pattern
1702       if (!lower_special) {
1703         SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1704         LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1705                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1706                                        LO32C, LO32C, LO32C, LO32C));
1707       }
1708
1709       // Create upper vector if not a special pattern
1710       if (!upper_special) {
1711         SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1712         HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1713                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1714                                        HI32C, HI32C, HI32C, HI32C));
1715       }
1716
1717       // If either upper or lower are special, then the two input operands are
1718       // the same (basically, one of them is a "don't care")
1719       if (lower_special)
1720         LO32 = HI32;
1721       if (upper_special)
1722         HI32 = LO32;
1723       if (lower_special && upper_special) {
1724         // Unhappy situation... both upper and lower are special, so punt with
1725         // a target constant:
1726         SDValue Zero = DAG.getConstant(0, MVT::i32);
1727         HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1728                                   Zero, Zero);
1729       }
1730
1731       for (int i = 0; i < 4; ++i) {
1732         uint64_t val = 0;
1733         for (int j = 0; j < 4; ++j) {
1734           SDValue V;
1735           bool process_upper, process_lower;
1736           val <<= 8;
1737           process_upper = (upper_special && (i & 1) == 0);
1738           process_lower = (lower_special && (i & 1) == 1);
1739
1740           if (process_upper || process_lower) {
1741             if ((process_upper && upper == 0)
1742                 || (process_lower && lower == 0))
1743               val |= 0x80;
1744             else if ((process_upper && upper == 0xffffffff)
1745                      || (process_lower && lower == 0xffffffff))
1746               val |= 0xc0;
1747             else if ((process_upper && upper == 0x80000000)
1748                      || (process_lower && lower == 0x80000000))
1749               val |= (j == 0 ? 0xe0 : 0x80);
1750           } else
1751             val |= i * 4 + j + ((i & 1) * 16);
1752         }
1753
1754         ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1755       }
1756
1757       return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1758                          DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1759                                      &ShufBytes[0], ShufBytes.size()));
1760     }
1761   }
1762   }
1763
1764   return SDValue();
1765 }
1766
1767 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1768 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1769 /// permutation vector, V3, is monotonically increasing with one "exception"
1770 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1771 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1772 /// In either case, the net result is going to eventually invoke SHUFB to
1773 /// permute/shuffle the bytes from V1 and V2.
1774 /// \note
1775 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1776 /// control word for byte/halfword/word insertion. This takes care of a single
1777 /// element move from V2 into V1.
1778 /// \note
1779 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1780 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1781   SDValue V1 = Op.getOperand(0);
1782   SDValue V2 = Op.getOperand(1);
1783   SDValue PermMask = Op.getOperand(2);
1784
1785   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1786
1787   // If we have a single element being moved from V1 to V2, this can be handled
1788   // using the C*[DX] compute mask instructions, but the vector elements have
1789   // to be monotonically increasing with one exception element.
1790   MVT EltVT = V1.getValueType().getVectorElementType();
1791   unsigned EltsFromV2 = 0;
1792   unsigned V2Elt = 0;
1793   unsigned V2EltIdx0 = 0;
1794   unsigned CurrElt = 0;
1795   bool monotonic = true;
1796   if (EltVT == MVT::i8)
1797     V2EltIdx0 = 16;
1798   else if (EltVT == MVT::i16)
1799     V2EltIdx0 = 8;
1800   else if (EltVT == MVT::i32)
1801     V2EltIdx0 = 4;
1802   else
1803     assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1804
1805   for (unsigned i = 0, e = PermMask.getNumOperands();
1806        EltsFromV2 <= 1 && monotonic && i != e;
1807        ++i) {
1808     unsigned SrcElt;
1809     if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1810       SrcElt = 0;
1811     else
1812       SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1813
1814     if (SrcElt >= V2EltIdx0) {
1815       ++EltsFromV2;
1816       V2Elt = (V2EltIdx0 - SrcElt) << 2;
1817     } else if (CurrElt != SrcElt) {
1818       monotonic = false;
1819     }
1820
1821     ++CurrElt;
1822   }
1823
1824   if (EltsFromV2 == 1 && monotonic) {
1825     // Compute mask and shuffle
1826     MachineFunction &MF = DAG.getMachineFunction();
1827     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1828     unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1829     MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1830     // Initialize temporary register to 0
1831     SDValue InitTempReg =
1832       DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1833     // Copy register's contents as index in INSERT_MASK:
1834     SDValue ShufMaskOp =
1835       DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1836                   DAG.getTargetConstant(V2Elt, MVT::i32),
1837                   DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1838     // Use shuffle mask in SHUFB synthetic instruction:
1839     return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1840   } else {
1841    // Convert the SHUFFLE_VECTOR mask's input element units to the
1842    // actual bytes.
1843     unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1844
1845     SmallVector<SDValue, 16> ResultMask;
1846     for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1847       unsigned SrcElt;
1848       if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1849         SrcElt = 0;
1850       else
1851         SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1852
1853       for (unsigned j = 0; j < BytesPerElement; ++j) {
1854         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1855                                              MVT::i8));
1856       }
1857     }
1858
1859     SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1860                                       &ResultMask[0], ResultMask.size());
1861     return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1862   }
1863 }
1864
1865 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1866   SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar
1867
1868   if (Op0.getNode()->getOpcode() == ISD::Constant) {
1869     // For a constant, build the appropriate constant vector, which will
1870     // eventually simplify to a vector register load.
1871
1872     ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1873     SmallVector<SDValue, 16> ConstVecValues;
1874     MVT VT;
1875     size_t n_copies;
1876
1877     // Create a constant vector:
1878     switch (Op.getValueType().getSimpleVT()) {
1879     default: assert(0 && "Unexpected constant value type in "
1880                          "LowerSCALAR_TO_VECTOR");
1881     case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1882     case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1883     case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1884     case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1885     case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1886     case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1887     }
1888
1889     SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1890     for (size_t j = 0; j < n_copies; ++j)
1891       ConstVecValues.push_back(CValue);
1892
1893     return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1894                        &ConstVecValues[0], ConstVecValues.size());
1895   } else {
1896     // Otherwise, copy the value from one register to another:
1897     switch (Op0.getValueType().getSimpleVT()) {
1898     default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1899     case MVT::i8:
1900     case MVT::i16:
1901     case MVT::i32:
1902     case MVT::i64:
1903     case MVT::f32:
1904     case MVT::f64:
1905       return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1906     }
1907   }
1908
1909   return SDValue();
1910 }
1911
1912 static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
1913   switch (Op.getValueType().getSimpleVT()) {
1914   default:
1915     cerr << "CellSPU: Unknown vector multiplication, got "
1916          << Op.getValueType().getMVTString()
1917          << "\n";
1918     abort();
1919     /*NOTREACHED*/
1920
1921   case MVT::v4i32: {
1922     SDValue rA = Op.getOperand(0);
1923     SDValue rB = Op.getOperand(1);
1924     SDValue HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1925     SDValue HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1926     SDValue LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1927     SDValue Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1928
1929     return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1930     break;
1931   }
1932
1933   // Multiply two v8i16 vectors (pipeline friendly version):
1934   // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1935   // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1936   // c) Use SELB to select upper and lower halves from the intermediate results
1937   //
1938   // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1939   // dual-issue. This code does manage to do this, even if it's a little on
1940   // the wacky side
1941   case MVT::v8i16: {
1942     MachineFunction &MF = DAG.getMachineFunction();
1943     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1944     SDValue Chain = Op.getOperand(0);
1945     SDValue rA = Op.getOperand(0);
1946     SDValue rB = Op.getOperand(1);
1947     unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1948     unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1949
1950     SDValue FSMBOp =
1951       DAG.getCopyToReg(Chain, FSMBIreg,
1952                        DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1953                                    DAG.getConstant(0xcccc, MVT::i16)));
1954
1955     SDValue HHProd =
1956       DAG.getCopyToReg(FSMBOp, HiProdReg,
1957                        DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1958
1959     SDValue HHProd_v4i32 =
1960       DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1961                   DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1962
1963     return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1964                        DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1965                        DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1966                                    DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1967                                                HHProd_v4i32,
1968                                                DAG.getConstant(16, MVT::i16))),
1969                        DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1970   }
1971
1972   // This M00sE is N@stI! (apologies to Monty Python)
1973   //
1974   // SPU doesn't know how to do any 8-bit multiplication, so the solution
1975   // is to break it all apart, sign extend, and reassemble the various
1976   // intermediate products.
1977   case MVT::v16i8: {
1978     SDValue rA = Op.getOperand(0);
1979     SDValue rB = Op.getOperand(1);
1980     SDValue c8 = DAG.getConstant(8, MVT::i32);
1981     SDValue c16 = DAG.getConstant(16, MVT::i32);
1982
1983     SDValue LLProd =
1984       DAG.getNode(SPUISD::MPY, MVT::v8i16,
1985                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1986                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1987
1988     SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1989
1990     SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1991
1992     SDValue LHProd =
1993       DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1994                   DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1995
1996     SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1997                                      DAG.getConstant(0x2222, MVT::i16));
1998
1999     SDValue LoProdParts =
2000       DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2001                   DAG.getNode(SPUISD::SELB, MVT::v8i16,
2002                               LLProd, LHProd, FSMBmask));
2003
2004     SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32);
2005
2006     SDValue LoProd =
2007       DAG.getNode(ISD::AND, MVT::v4i32,
2008                   LoProdParts,
2009                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2010                               LoProdMask, LoProdMask,
2011                               LoProdMask, LoProdMask));
2012
2013     SDValue rAH =
2014       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2015                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
2016
2017     SDValue rBH =
2018       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2019                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
2020
2021     SDValue HLProd =
2022       DAG.getNode(SPUISD::MPY, MVT::v8i16,
2023                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
2024                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
2025
2026     SDValue HHProd_1 =
2027       DAG.getNode(SPUISD::MPY, MVT::v8i16,
2028                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2029                               DAG.getNode(SPUISD::VEC_SRA,
2030                                           MVT::v4i32, rAH, c8)),
2031                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2032                               DAG.getNode(SPUISD::VEC_SRA,
2033                                           MVT::v4i32, rBH, c8)));
2034
2035     SDValue HHProd =
2036       DAG.getNode(SPUISD::SELB, MVT::v8i16,
2037                   HLProd,
2038                   DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2039                   FSMBmask);
2040
2041     SDValue HiProd =
2042       DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
2043
2044     return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2045                        DAG.getNode(ISD::OR, MVT::v4i32,
2046                                    LoProd, HiProd));
2047   }
2048   }
2049
2050   return SDValue();
2051 }
2052
2053 static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) {
2054   MachineFunction &MF = DAG.getMachineFunction();
2055   MachineRegisterInfo &RegInfo = MF.getRegInfo();
2056
2057   SDValue A = Op.getOperand(0);
2058   SDValue B = Op.getOperand(1);
2059   MVT VT = Op.getValueType();
2060
2061   unsigned VRegBR, VRegC;
2062
2063   if (VT == MVT::f32) {
2064     VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2065     VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2066   } else {
2067     VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2068     VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2069   }
2070   // TODO: make sure we're feeding FPInterp the right arguments
2071   // Right now: fi B, frest(B)
2072
2073   // Computes BRcpl =
2074   // (Floating Interpolate (FP Reciprocal Estimate B))
2075   SDValue BRcpl =
2076       DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2077                        DAG.getNode(SPUISD::FPInterp, VT, B,
2078                                 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2079
2080   // Computes A * BRcpl and stores in a temporary register
2081   SDValue AxBRcpl =
2082       DAG.getCopyToReg(BRcpl, VRegC,
2083                  DAG.getNode(ISD::FMUL, VT, A,
2084                         DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2085   // What's the Chain variable do? It's magic!
2086   // TODO: set Chain = Op(0).getEntryNode()
2087
2088   return DAG.getNode(ISD::FADD, VT,
2089                 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2090                 DAG.getNode(ISD::FMUL, VT,
2091                         DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2092                         DAG.getNode(ISD::FSUB, VT, A,
2093                             DAG.getNode(ISD::FMUL, VT, B,
2094                             DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2095 }
2096
2097 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2098   MVT VT = Op.getValueType();
2099   SDValue N = Op.getOperand(0);
2100   SDValue Elt = Op.getOperand(1);
2101   SDValue ShufMask[16];
2102   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2103
2104   assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2105
2106   int EltNo = (int) C->getZExtValue();
2107
2108   // sanity checks:
2109   if (VT == MVT::i8 && EltNo >= 16)
2110     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2111   else if (VT == MVT::i16 && EltNo >= 8)
2112     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2113   else if (VT == MVT::i32 && EltNo >= 4)
2114     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2115   else if (VT == MVT::i64 && EltNo >= 2)
2116     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2117
2118   if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2119     // i32 and i64: Element 0 is the preferred slot
2120     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2121   }
2122
2123   // Need to generate shuffle mask and extract:
2124   int prefslot_begin = -1, prefslot_end = -1;
2125   int elt_byte = EltNo * VT.getSizeInBits() / 8;
2126
2127   switch (VT.getSimpleVT()) {
2128   default:
2129     assert(false && "Invalid value type!");
2130   case MVT::i8: {
2131     prefslot_begin = prefslot_end = 3;
2132     break;
2133   }
2134   case MVT::i16: {
2135     prefslot_begin = 2; prefslot_end = 3;
2136     break;
2137   }
2138   case MVT::i32: {
2139     prefslot_begin = 0; prefslot_end = 3;
2140     break;
2141   }
2142   case MVT::i64: {
2143     prefslot_begin = 0; prefslot_end = 7;
2144     break;
2145   }
2146   }
2147
2148   assert(prefslot_begin != -1 && prefslot_end != -1 &&
2149          "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2150
2151   for (int i = 0; i < 16; ++i) {
2152     // zero fill uppper part of preferred slot, don't care about the
2153     // other slots:
2154     unsigned int mask_val;
2155
2156     if (i <= prefslot_end) {
2157       mask_val =
2158         ((i < prefslot_begin)
2159          ? 0x80
2160          : elt_byte + (i - prefslot_begin));
2161
2162       ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2163     } else
2164       ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2165   }
2166
2167   SDValue ShufMaskVec =
2168     DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2169                 &ShufMask[0],
2170                 sizeof(ShufMask) / sizeof(ShufMask[0]));
2171
2172   return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2173                      DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2174                                  N, N, ShufMaskVec));
2175
2176 }
2177
2178 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2179   SDValue VecOp = Op.getOperand(0);
2180   SDValue ValOp = Op.getOperand(1);
2181   SDValue IdxOp = Op.getOperand(2);
2182   MVT VT = Op.getValueType();
2183
2184   ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2185   assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2186
2187   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2188   // Use $2 because it's always 16-byte aligned and it's available:
2189   SDValue PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2190
2191   SDValue result =
2192     DAG.getNode(SPUISD::SHUFB, VT,
2193                 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2194                 VecOp,
2195                 DAG.getNode(SPUISD::INSERT_MASK, VT,
2196                             DAG.getNode(ISD::ADD, PtrVT,
2197                                         PtrBase,
2198                                         DAG.getConstant(CN->getZExtValue(),
2199                                                         PtrVT))));
2200
2201   return result;
2202 }
2203
2204 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2205 {
2206   SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
2207
2208   assert(Op.getValueType() == MVT::i8);
2209   switch (Opc) {
2210   default:
2211     assert(0 && "Unhandled i8 math operator");
2212     /*NOTREACHED*/
2213     break;
2214   case ISD::SUB: {
2215     // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2216     // the result:
2217     SDValue N1 = Op.getOperand(1);
2218     N0 = (N0.getOpcode() != ISD::Constant
2219           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2220           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2221                             MVT::i16));
2222     N1 = (N1.getOpcode() != ISD::Constant
2223           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2224           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2225                             MVT::i16));
2226     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2227                        DAG.getNode(Opc, MVT::i16, N0, N1));
2228   }
2229   case ISD::ROTR:
2230   case ISD::ROTL: {
2231     SDValue N1 = Op.getOperand(1);
2232     unsigned N1Opc;
2233     N0 = (N0.getOpcode() != ISD::Constant
2234           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2235           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2236                             MVT::i16));
2237     N1Opc = N1.getValueType().bitsLT(MVT::i16)
2238             ? ISD::ZERO_EXTEND
2239             : ISD::TRUNCATE;
2240     N1 = (N1.getOpcode() != ISD::Constant
2241           ? DAG.getNode(N1Opc, MVT::i16, N1)
2242           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2243                             MVT::i16));
2244     SDValue ExpandArg =
2245       DAG.getNode(ISD::OR, MVT::i16, N0,
2246                   DAG.getNode(ISD::SHL, MVT::i16,
2247                               N0, DAG.getConstant(8, MVT::i16)));
2248     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2249                        DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2250   }
2251   case ISD::SRL:
2252   case ISD::SHL: {
2253     SDValue N1 = Op.getOperand(1);
2254     unsigned N1Opc;
2255     N0 = (N0.getOpcode() != ISD::Constant
2256           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2257           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2258                             MVT::i16));
2259     N1Opc = N1.getValueType().bitsLT(MVT::i16)
2260             ? ISD::ZERO_EXTEND
2261             : ISD::TRUNCATE;
2262     N1 = (N1.getOpcode() != ISD::Constant
2263           ? DAG.getNode(N1Opc, MVT::i16, N1)
2264           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2265                             MVT::i16));
2266     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2267                        DAG.getNode(Opc, MVT::i16, N0, N1));
2268   }
2269   case ISD::SRA: {
2270     SDValue N1 = Op.getOperand(1);
2271     unsigned N1Opc;
2272     N0 = (N0.getOpcode() != ISD::Constant
2273           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2274           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2275                             MVT::i16));
2276     N1Opc = N1.getValueType().bitsLT(MVT::i16)
2277             ? ISD::SIGN_EXTEND
2278             : ISD::TRUNCATE;
2279     N1 = (N1.getOpcode() != ISD::Constant
2280           ? DAG.getNode(N1Opc, MVT::i16, N1)
2281           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2282                             MVT::i16));
2283     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2284                        DAG.getNode(Opc, MVT::i16, N0, N1));
2285   }
2286   case ISD::MUL: {
2287     SDValue N1 = Op.getOperand(1);
2288     unsigned N1Opc;
2289     N0 = (N0.getOpcode() != ISD::Constant
2290           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2291           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2292                             MVT::i16));
2293     N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2294     N1 = (N1.getOpcode() != ISD::Constant
2295           ? DAG.getNode(N1Opc, MVT::i16, N1)
2296           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2297                             MVT::i16));
2298     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2299                        DAG.getNode(Opc, MVT::i16, N0, N1));
2300     break;
2301   }
2302   }
2303
2304   return SDValue();
2305 }
2306
2307 static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2308 {
2309   MVT VT = Op.getValueType();
2310   MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2311
2312   SDValue Op0 = Op.getOperand(0);
2313
2314   switch (Opc) {
2315   case ISD::ZERO_EXTEND:
2316   case ISD::SIGN_EXTEND:
2317   case ISD::ANY_EXTEND: {
2318     MVT Op0VT = Op0.getValueType();
2319     MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2320
2321     assert(Op0VT == MVT::i32
2322            && "CellSPU: Zero/sign extending something other than i32");
2323     DEBUG(cerr << "CellSPU: LowerI64Math custom lowering zero/sign/any extend\n");
2324
2325     unsigned NewOpc = (Opc == ISD::SIGN_EXTEND
2326                       ? SPUISD::ROTBYTES_RIGHT_S
2327                       : SPUISD::ROTQUAD_RZ_BYTES);
2328     SDValue PromoteScalar =
2329       DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2330
2331     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2332                        DAG.getNode(ISD::BIT_CONVERT, VecVT,
2333                                    DAG.getNode(NewOpc, Op0VecVT,
2334                                                PromoteScalar,
2335                                                DAG.getConstant(4, MVT::i32))));
2336   }
2337
2338   case ISD::ADD: {
2339     // Turn operands into vectors to satisfy type checking (shufb works on
2340     // vectors)
2341     SDValue Op0 =
2342       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2343     SDValue Op1 =
2344       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2345     SmallVector<SDValue, 16> ShufBytes;
2346
2347     // Create the shuffle mask for "rotating" the borrow up one register slot
2348     // once the borrow is generated.
2349     ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2350     ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2351     ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2352     ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2353
2354     SDValue CarryGen =
2355       DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2356     SDValue ShiftedCarry =
2357       DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2358                   CarryGen, CarryGen,
2359                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2360                               &ShufBytes[0], ShufBytes.size()));
2361
2362     return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2363                        DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2364                                    Op0, Op1, ShiftedCarry));
2365   }
2366
2367   case ISD::SUB: {
2368     // Turn operands into vectors to satisfy type checking (shufb works on
2369     // vectors)
2370     SDValue Op0 =
2371       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2372     SDValue Op1 =
2373       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2374     SmallVector<SDValue, 16> ShufBytes;
2375
2376     // Create the shuffle mask for "rotating" the borrow up one register slot
2377     // once the borrow is generated.
2378     ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2379     ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2380     ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2381     ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2382
2383     SDValue BorrowGen =
2384       DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2385     SDValue ShiftedBorrow =
2386       DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2387                   BorrowGen, BorrowGen,
2388                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2389                               &ShufBytes[0], ShufBytes.size()));
2390
2391     return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2392                        DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2393                                    Op0, Op1, ShiftedBorrow));
2394   }
2395
2396   case ISD::SHL: {
2397     SDValue ShiftAmt = Op.getOperand(1);
2398     MVT ShiftAmtVT = ShiftAmt.getValueType();
2399     SDValue Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2400     SDValue MaskLower =
2401       DAG.getNode(SPUISD::SELB, VecVT,
2402                   Op0Vec,
2403                   DAG.getConstant(0, VecVT),
2404                   DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2405                               DAG.getConstant(0xff00ULL, MVT::i16)));
2406     SDValue ShiftAmtBytes =
2407       DAG.getNode(ISD::SRL, ShiftAmtVT,
2408                   ShiftAmt,
2409                   DAG.getConstant(3, ShiftAmtVT));
2410     SDValue ShiftAmtBits =
2411       DAG.getNode(ISD::AND, ShiftAmtVT,
2412                   ShiftAmt,
2413                   DAG.getConstant(7, ShiftAmtVT));
2414
2415     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2416                        DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2417                                    DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2418                                                MaskLower, ShiftAmtBytes),
2419                                    ShiftAmtBits));
2420   }
2421
2422   case ISD::SRL: {
2423     MVT VT = Op.getValueType();
2424     SDValue ShiftAmt = Op.getOperand(1);
2425     MVT ShiftAmtVT = ShiftAmt.getValueType();
2426     SDValue ShiftAmtBytes =
2427       DAG.getNode(ISD::SRL, ShiftAmtVT,
2428                   ShiftAmt,
2429                   DAG.getConstant(3, ShiftAmtVT));
2430     SDValue ShiftAmtBits =
2431       DAG.getNode(ISD::AND, ShiftAmtVT,
2432                   ShiftAmt,
2433                   DAG.getConstant(7, ShiftAmtVT));
2434
2435     return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2436                        DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2437                                    Op0, ShiftAmtBytes),
2438                        ShiftAmtBits);
2439   }
2440
2441   case ISD::SRA: {
2442     // Promote Op0 to vector
2443     SDValue Op0 =
2444       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2445     SDValue ShiftAmt = Op.getOperand(1);
2446     MVT ShiftVT = ShiftAmt.getValueType();
2447
2448     // Negate variable shift amounts
2449     if (!isa<ConstantSDNode>(ShiftAmt)) {
2450       ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2451                              DAG.getConstant(0, ShiftVT), ShiftAmt);
2452     }
2453
2454     SDValue UpperHalfSign =
2455       DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i32,
2456                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2457                               DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2458                                           Op0, DAG.getConstant(31, MVT::i32))));
2459     SDValue UpperHalfSignMask =
2460       DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2461     SDValue UpperLowerMask =
2462       DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2463                   DAG.getConstant(0xff00, MVT::i16));
2464     SDValue UpperLowerSelect =
2465       DAG.getNode(SPUISD::SELB, MVT::v2i64,
2466                   UpperHalfSignMask, Op0, UpperLowerMask);
2467     SDValue RotateLeftBytes =
2468       DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2469                   UpperLowerSelect, ShiftAmt);
2470     SDValue RotateLeftBits =
2471       DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2472                   RotateLeftBytes, ShiftAmt);
2473
2474     return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2475                        RotateLeftBits);
2476   }
2477   }
2478
2479   return SDValue();
2480 }
2481
2482 //! Lower byte immediate operations for v16i8 vectors:
2483 static SDValue
2484 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2485   SDValue ConstVec;
2486   SDValue Arg;
2487   MVT VT = Op.getValueType();
2488
2489   ConstVec = Op.getOperand(0);
2490   Arg = Op.getOperand(1);
2491   if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2492     if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2493       ConstVec = ConstVec.getOperand(0);
2494     } else {
2495       ConstVec = Op.getOperand(1);
2496       Arg = Op.getOperand(0);
2497       if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2498         ConstVec = ConstVec.getOperand(0);
2499       }
2500     }
2501   }
2502
2503   if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2504     uint64_t VectorBits[2];
2505     uint64_t UndefBits[2];
2506     uint64_t SplatBits, SplatUndef;
2507     int SplatSize;
2508
2509     if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2510         && isConstantSplat(VectorBits, UndefBits,
2511                            VT.getVectorElementType().getSizeInBits(),
2512                            SplatBits, SplatUndef, SplatSize)) {
2513       SDValue tcVec[16];
2514       SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2515       const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2516
2517       // Turn the BUILD_VECTOR into a set of target constants:
2518       for (size_t i = 0; i < tcVecSize; ++i)
2519         tcVec[i] = tc;
2520
2521       return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2522                          DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2523     }
2524   }
2525   // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2526   // lowered.  Return the operation, rather than a null SDValue.
2527   return Op;
2528 }
2529
2530 //! Lower i32 multiplication
2531 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT,
2532                           unsigned Opc) {
2533   switch (VT.getSimpleVT()) {
2534   default:
2535     cerr << "CellSPU: Unknown LowerMUL value type, got "
2536          << Op.getValueType().getMVTString()
2537          << "\n";
2538     abort();
2539     /*NOTREACHED*/
2540
2541   case MVT::i32: {
2542     SDValue rA = Op.getOperand(0);
2543     SDValue rB = Op.getOperand(1);
2544
2545     return DAG.getNode(ISD::ADD, MVT::i32,
2546                        DAG.getNode(ISD::ADD, MVT::i32,
2547                                    DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2548                                    DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2549                        DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2550   }
2551   }
2552
2553   return SDValue();
2554 }
2555
2556 //! Custom lowering for CTPOP (count population)
2557 /*!
2558   Custom lowering code that counts the number ones in the input
2559   operand. SPU has such an instruction, but it counts the number of
2560   ones per byte, which then have to be accumulated.
2561 */
2562 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2563   MVT VT = Op.getValueType();
2564   MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2565
2566   switch (VT.getSimpleVT()) {
2567   default:
2568     assert(false && "Invalid value type!");
2569   case MVT::i8: {
2570     SDValue N = Op.getOperand(0);
2571     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2572
2573     SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2574     SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2575
2576     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2577   }
2578
2579   case MVT::i16: {
2580     MachineFunction &MF = DAG.getMachineFunction();
2581     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2582
2583     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2584
2585     SDValue N = Op.getOperand(0);
2586     SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2587     SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2588     SDValue Shift1 = DAG.getConstant(8, MVT::i16);
2589
2590     SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2591     SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2592
2593     // CNTB_result becomes the chain to which all of the virtual registers
2594     // CNTB_reg, SUM1_reg become associated:
2595     SDValue CNTB_result =
2596       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2597
2598     SDValue CNTB_rescopy =
2599       DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2600
2601     SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2602
2603     return DAG.getNode(ISD::AND, MVT::i16,
2604                        DAG.getNode(ISD::ADD, MVT::i16,
2605                                    DAG.getNode(ISD::SRL, MVT::i16,
2606                                                Tmp1, Shift1),
2607                                    Tmp1),
2608                        Mask0);
2609   }
2610
2611   case MVT::i32: {
2612     MachineFunction &MF = DAG.getMachineFunction();
2613     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2614
2615     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2616     unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2617
2618     SDValue N = Op.getOperand(0);
2619     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2620     SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2621     SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2622     SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2623
2624     SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2625     SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2626
2627     // CNTB_result becomes the chain to which all of the virtual registers
2628     // CNTB_reg, SUM1_reg become associated:
2629     SDValue CNTB_result =
2630       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2631
2632     SDValue CNTB_rescopy =
2633       DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2634
2635     SDValue Comp1 =
2636       DAG.getNode(ISD::SRL, MVT::i32,
2637                   DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2638
2639     SDValue Sum1 =
2640       DAG.getNode(ISD::ADD, MVT::i32,
2641                   Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2642
2643     SDValue Sum1_rescopy =
2644       DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2645
2646     SDValue Comp2 =
2647       DAG.getNode(ISD::SRL, MVT::i32,
2648                   DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2649                   Shift2);
2650     SDValue Sum2 =
2651       DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2652                   DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2653
2654     return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2655   }
2656
2657   case MVT::i64:
2658     break;
2659   }
2660
2661   return SDValue();
2662 }
2663
2664 /// LowerOperation - Provide custom lowering hooks for some operations.
2665 ///
2666 SDValue
2667 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2668 {
2669   unsigned Opc = (unsigned) Op.getOpcode();
2670   MVT VT = Op.getValueType();
2671
2672   switch (Opc) {
2673   default: {
2674     cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2675     cerr << "Op.getOpcode() = " << Opc << "\n";
2676     cerr << "*Op.getNode():\n";
2677     Op.getNode()->dump();
2678     abort();
2679   }
2680   case ISD::LOAD:
2681   case ISD::SEXTLOAD:
2682   case ISD::ZEXTLOAD:
2683     return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2684   case ISD::STORE:
2685     return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2686   case ISD::ConstantPool:
2687     return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2688   case ISD::GlobalAddress:
2689     return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2690   case ISD::JumpTable:
2691     return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2692   case ISD::Constant:
2693     return LowerConstant(Op, DAG);
2694   case ISD::ConstantFP:
2695     return LowerConstantFP(Op, DAG);
2696   case ISD::BRCOND:
2697     return LowerBRCOND(Op, DAG);
2698   case ISD::FORMAL_ARGUMENTS:
2699     return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2700   case ISD::CALL:
2701     return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2702   case ISD::RET:
2703     return LowerRET(Op, DAG, getTargetMachine());
2704
2705
2706   // i8, i64 math ops:
2707   case ISD::ZERO_EXTEND:
2708   case ISD::SIGN_EXTEND:
2709   case ISD::ANY_EXTEND:
2710   case ISD::ADD:
2711   case ISD::SUB:
2712   case ISD::ROTR:
2713   case ISD::ROTL:
2714   case ISD::SRL:
2715   case ISD::SHL:
2716   case ISD::SRA: {
2717     if (VT == MVT::i8)
2718       return LowerI8Math(Op, DAG, Opc);
2719     else if (VT == MVT::i64)
2720       return LowerI64Math(Op, DAG, Opc);
2721     break;
2722   }
2723
2724   // Vector-related lowering.
2725   case ISD::BUILD_VECTOR:
2726     return LowerBUILD_VECTOR(Op, DAG);
2727   case ISD::SCALAR_TO_VECTOR:
2728     return LowerSCALAR_TO_VECTOR(Op, DAG);
2729   case ISD::VECTOR_SHUFFLE:
2730     return LowerVECTOR_SHUFFLE(Op, DAG);
2731   case ISD::EXTRACT_VECTOR_ELT:
2732     return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2733   case ISD::INSERT_VECTOR_ELT:
2734     return LowerINSERT_VECTOR_ELT(Op, DAG);
2735
2736   // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2737   case ISD::AND:
2738   case ISD::OR:
2739   case ISD::XOR:
2740     return LowerByteImmed(Op, DAG);
2741
2742   // Vector and i8 multiply:
2743   case ISD::MUL:
2744     if (VT.isVector())
2745       return LowerVectorMUL(Op, DAG);
2746     else if (VT == MVT::i8)
2747       return LowerI8Math(Op, DAG, Opc);
2748     else
2749       return LowerMUL(Op, DAG, VT, Opc);
2750
2751   case ISD::FDIV:
2752     if (VT == MVT::f32 || VT == MVT::v4f32)
2753       return LowerFDIVf32(Op, DAG);
2754 //    else if (Op.getValueType() == MVT::f64)
2755 //      return LowerFDIVf64(Op, DAG);
2756     else
2757       assert(0 && "Calling FDIV on unsupported MVT");
2758
2759   case ISD::CTPOP:
2760     return LowerCTPOP(Op, DAG);
2761   }
2762
2763   return SDValue();
2764 }
2765
2766 //===----------------------------------------------------------------------===//
2767 // Target Optimization Hooks
2768 //===----------------------------------------------------------------------===//
2769
2770 SDValue
2771 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2772 {
2773 #if 0
2774   TargetMachine &TM = getTargetMachine();
2775 #endif
2776   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2777   SelectionDAG &DAG = DCI.DAG;
2778   SDValue Op0 = N->getOperand(0);      // everything has at least one operand
2779   SDValue Result;                     // Initially, NULL result
2780
2781   switch (N->getOpcode()) {
2782   default: break;
2783   case ISD::ADD: {
2784     SDValue Op1 = N->getOperand(1);
2785
2786     if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
2787       SDValue Op01 = Op0.getOperand(1);
2788       if (Op01.getOpcode() == ISD::Constant
2789           || Op01.getOpcode() == ISD::TargetConstant) {
2790         // (add <const>, (SPUindirect <arg>, <const>)) ->
2791         // (SPUindirect <arg>, <const + const>)
2792         ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2793         ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2794         SDValue combinedConst =
2795           DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2796                           Op0.getValueType());
2797
2798         DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2799                    << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2800         DEBUG(cerr << "With:    (SPUindirect <arg>, "
2801                    << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2802         return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2803                            Op0.getOperand(0), combinedConst);
2804       }
2805     } else if (isa<ConstantSDNode>(Op0)
2806                && Op1.getOpcode() == SPUISD::IndirectAddr) {
2807       SDValue Op11 = Op1.getOperand(1);
2808       if (Op11.getOpcode() == ISD::Constant
2809           || Op11.getOpcode() == ISD::TargetConstant) {
2810         // (add (SPUindirect <arg>, <const>), <const>) ->
2811         // (SPUindirect <arg>, <const + const>)
2812         ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2813         ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2814         SDValue combinedConst =
2815           DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2816                           Op0.getValueType());
2817
2818         DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2819                    << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2820         DEBUG(cerr << "With:    (SPUindirect <arg>, "
2821                    << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2822
2823         return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2824                            Op1.getOperand(0), combinedConst);
2825       }
2826     }
2827     break;
2828   }
2829   case ISD::SIGN_EXTEND:
2830   case ISD::ZERO_EXTEND:
2831   case ISD::ANY_EXTEND: {
2832     if (Op0.getOpcode() == SPUISD::EXTRACT_ELT0 &&
2833         N->getValueType(0) == Op0.getValueType()) {
2834       // (any_extend (SPUextract_elt0 <arg>)) ->
2835       // (SPUextract_elt0 <arg>)
2836       // Types must match, however...
2837       DEBUG(cerr << "Replace: ");
2838       DEBUG(N->dump(&DAG));
2839       DEBUG(cerr << "\nWith:    ");
2840       DEBUG(Op0.getNode()->dump(&DAG));
2841       DEBUG(cerr << "\n");
2842
2843       return Op0;
2844     }
2845     break;
2846   }
2847   case SPUISD::IndirectAddr: {
2848     if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2849       ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2850       if (CN->getZExtValue() == 0) {
2851         // (SPUindirect (SPUaform <addr>, 0), 0) ->
2852         // (SPUaform <addr>, 0)
2853
2854         DEBUG(cerr << "Replace: ");
2855         DEBUG(N->dump(&DAG));
2856         DEBUG(cerr << "\nWith:    ");
2857         DEBUG(Op0.getNode()->dump(&DAG));
2858         DEBUG(cerr << "\n");
2859
2860         return Op0;
2861       }
2862     }
2863     break;
2864   }
2865   case SPUISD::SHLQUAD_L_BITS:
2866   case SPUISD::SHLQUAD_L_BYTES:
2867   case SPUISD::VEC_SHL:
2868   case SPUISD::VEC_SRL:
2869   case SPUISD::VEC_SRA:
2870   case SPUISD::ROTQUAD_RZ_BYTES:
2871   case SPUISD::ROTQUAD_RZ_BITS: {
2872     SDValue Op1 = N->getOperand(1);
2873
2874     if (isa<ConstantSDNode>(Op1)) {
2875       // Kill degenerate vector shifts:
2876       ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
2877
2878       if (CN->getZExtValue() == 0) {
2879         Result = Op0;
2880       }
2881     }
2882     break;
2883   }
2884   case SPUISD::PROMOTE_SCALAR: {
2885     switch (Op0.getOpcode()) {
2886     default:
2887       break;
2888     case ISD::ANY_EXTEND:
2889     case ISD::ZERO_EXTEND:
2890     case ISD::SIGN_EXTEND: {
2891       // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
2892       // <arg>
2893       // but only if the SPUpromote_scalar and <arg> types match.
2894       SDValue Op00 = Op0.getOperand(0);
2895       if (Op00.getOpcode() == SPUISD::EXTRACT_ELT0) {
2896         SDValue Op000 = Op00.getOperand(0);
2897         if (Op000.getValueType() == N->getValueType(0)) {
2898           Result = Op000;
2899         }
2900       }
2901       break;
2902     }
2903     case SPUISD::EXTRACT_ELT0: {
2904       // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
2905       // <arg>
2906       Result = Op0.getOperand(0);
2907       break;
2908     }
2909     }
2910     break;
2911   }
2912   }
2913   // Otherwise, return unchanged.
2914 #if 1
2915   if (Result.getNode()) {
2916     DEBUG(cerr << "\nReplace.SPU: ");
2917     DEBUG(N->dump(&DAG));
2918     DEBUG(cerr << "\nWith:        ");
2919     DEBUG(Result.getNode()->dump(&DAG));
2920     DEBUG(cerr << "\n");
2921   }
2922 #endif
2923
2924   return Result;
2925 }
2926
2927 //===----------------------------------------------------------------------===//
2928 // Inline Assembly Support
2929 //===----------------------------------------------------------------------===//
2930
2931 /// getConstraintType - Given a constraint letter, return the type of
2932 /// constraint it is for this target.
2933 SPUTargetLowering::ConstraintType
2934 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2935   if (ConstraintLetter.size() == 1) {
2936     switch (ConstraintLetter[0]) {
2937     default: break;
2938     case 'b':
2939     case 'r':
2940     case 'f':
2941     case 'v':
2942     case 'y':
2943       return C_RegisterClass;
2944     }
2945   }
2946   return TargetLowering::getConstraintType(ConstraintLetter);
2947 }
2948
2949 std::pair<unsigned, const TargetRegisterClass*>
2950 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2951                                                 MVT VT) const
2952 {
2953   if (Constraint.size() == 1) {
2954     // GCC RS6000 Constraint Letters
2955     switch (Constraint[0]) {
2956     case 'b':   // R1-R31
2957     case 'r':   // R0-R31
2958       if (VT == MVT::i64)
2959         return std::make_pair(0U, SPU::R64CRegisterClass);
2960       return std::make_pair(0U, SPU::R32CRegisterClass);
2961     case 'f':
2962       if (VT == MVT::f32)
2963         return std::make_pair(0U, SPU::R32FPRegisterClass);
2964       else if (VT == MVT::f64)
2965         return std::make_pair(0U, SPU::R64FPRegisterClass);
2966       break;
2967     case 'v':
2968       return std::make_pair(0U, SPU::GPRCRegisterClass);
2969     }
2970   }
2971
2972   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2973 }
2974
2975 //! Compute used/known bits for a SPU operand
2976 void
2977 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2978                                                   const APInt &Mask,
2979                                                   APInt &KnownZero,
2980                                                   APInt &KnownOne,
2981                                                   const SelectionDAG &DAG,
2982                                                   unsigned Depth ) const {
2983 #if 0
2984   const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2985 #endif
2986
2987   switch (Op.getOpcode()) {
2988   default:
2989     // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2990     break;
2991
2992 #if 0
2993   case CALL:
2994   case SHUFB:
2995   case INSERT_MASK:
2996   case CNTB:
2997 #endif
2998
2999   case SPUISD::PROMOTE_SCALAR: {
3000     SDValue Op0 = Op.getOperand(0);
3001     MVT Op0VT = Op0.getValueType();
3002     unsigned Op0VTBits = Op0VT.getSizeInBits();
3003     uint64_t InMask = Op0VT.getIntegerVTBitMask();
3004     KnownZero |= APInt(Op0VTBits, ~InMask, false);
3005     KnownOne |= APInt(Op0VTBits, InMask, false);
3006     break;
3007   }
3008
3009   case SPUISD::LDRESULT:
3010   case SPUISD::EXTRACT_ELT0:
3011   case SPUISD::EXTRACT_ELT0_CHAINED: {
3012     MVT OpVT = Op.getValueType();
3013     unsigned OpVTBits = OpVT.getSizeInBits();
3014     uint64_t InMask = OpVT.getIntegerVTBitMask();
3015     KnownZero |= APInt(OpVTBits, ~InMask, false);
3016     KnownOne |= APInt(OpVTBits, InMask, false);
3017     break;
3018   }
3019
3020 #if 0
3021   case EXTRACT_I1_ZEXT:
3022   case EXTRACT_I1_SEXT:
3023   case EXTRACT_I8_ZEXT:
3024   case EXTRACT_I8_SEXT:
3025   case MPY:
3026   case MPYU:
3027   case MPYH:
3028   case MPYHH:
3029   case SPUISD::SHLQUAD_L_BITS:
3030   case SPUISD::SHLQUAD_L_BYTES:
3031   case SPUISD::VEC_SHL:
3032   case SPUISD::VEC_SRL:
3033   case SPUISD::VEC_SRA:
3034   case SPUISD::VEC_ROTL:
3035   case SPUISD::VEC_ROTR:
3036   case SPUISD::ROTQUAD_RZ_BYTES:
3037   case SPUISD::ROTQUAD_RZ_BITS:
3038   case SPUISD::ROTBYTES_RIGHT_S:
3039   case SPUISD::ROTBYTES_LEFT:
3040   case SPUISD::ROTBYTES_LEFT_CHAINED:
3041   case SPUISD::SELECT_MASK:
3042   case SPUISD::SELB:
3043   case SPUISD::FPInterp:
3044   case SPUISD::FPRecipEst:
3045   case SPUISD::SEXT32TO64:
3046 #endif
3047   }
3048 }
3049
3050 // LowerAsmOperandForConstraint
3051 void
3052 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3053                                                 char ConstraintLetter,
3054                                                 std::vector<SDValue> &Ops,
3055                                                 SelectionDAG &DAG) const {
3056   // Default, for the time being, to the base class handler
3057   TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
3058 }
3059
3060 /// isLegalAddressImmediate - Return true if the integer value can be used
3061 /// as the offset of the target addressing mode.
3062 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3063                                                 const Type *Ty) const {
3064   // SPU's addresses are 256K:
3065   return (V > -(1 << 18) && V < (1 << 18) - 1);
3066 }
3067
3068 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3069   return false;
3070 }