lib/Target/CellSPU/SPUISelLowering.cpp

   1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the SPUTargetLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "SPURegisterNames.h"
  15 #include "SPUISelLowering.h"
  16 #include "SPUTargetMachine.h"
  17 #include "llvm/ADT/VectorExtras.h"
  18 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  19 #include "llvm/CodeGen/CallingConvLower.h"
  20 #include "llvm/CodeGen/MachineFrameInfo.h"
  21 #include "llvm/CodeGen/MachineFunction.h"
  22 #include "llvm/CodeGen/MachineInstrBuilder.h"
  23 #include "llvm/CodeGen/MachineRegisterInfo.h"
  24 #include "llvm/CodeGen/SelectionDAG.h"
  25 #include "llvm/Constants.h"
  26 #include "llvm/Function.h"
  27 #include "llvm/Intrinsics.h"
  28 #include "llvm/Support/Debug.h"
  29 #include "llvm/Support/MathExtras.h"
  30 #include "llvm/Target/TargetOptions.h"
  31
  32 #include <map>
  33
  34 using namespace llvm;
  35
  36 // Used in getTargetNodeName() below
  37 namespace {
  38   std::map<unsigned, const char *> node_names;
  39
  40   //! MVT::ValueType mapping to useful data for Cell SPU
  41   struct valtype_map_s {
  42     const MVT::ValueType        valtype;
  43     const int                   prefslot_byte;
  44   };
  45
  46   const valtype_map_s valtype_map[] = {
  47     { MVT::i1,   3 },
  48     { MVT::i8,   3 },
  49     { MVT::i16,  2 },
  50     { MVT::i32,  0 },
  51     { MVT::f32,  0 },
  52     { MVT::i64,  0 },
  53     { MVT::f64,  0 },
  54     { MVT::i128, 0 }
  55   };
  56
  57   const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
  58
  59   const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
  60     const valtype_map_s *retval = 0;
  61
  62     for (size_t i = 0; i < n_valtype_map; ++i) {
  63       if (valtype_map[i].valtype == VT) {
  64         retval = valtype_map + i;
  65         break;
  66       }
  67     }
  68
  69 #ifndef NDEBUG
  70     if (retval == 0) {
  71       cerr << "getValueTypeMapEntry returns NULL for "
  72            << MVT::getValueTypeString(VT)
  73            << "\n";
  74       abort();
  75     }
  76 #endif
  77
  78     return retval;
  79   }
  80
  81   //! Predicate that returns true if operand is a memory target
  82   /*!
  83     \arg Op Operand to test
  84     \return true if the operand is a memory target (i.e., global
  85     address, external symbol, constant pool) or an A-form
  86     address.
  87    */
  88   bool isMemoryOperand(const SDOperand &Op)
  89   {
  90     const unsigned Opc = Op.getOpcode();
  91     return (Opc == ISD::GlobalAddress
  92             || Opc == ISD::GlobalTLSAddress
  93             || Opc == ISD::JumpTable
  94             || Opc == ISD::ConstantPool
  95             || Opc == ISD::ExternalSymbol
  96             || Opc == ISD::TargetGlobalAddress
  97             || Opc == ISD::TargetGlobalTLSAddress
  98             || Opc == ISD::TargetJumpTable
  99             || Opc == ISD::TargetConstantPool
 100             || Opc == ISD::TargetExternalSymbol
 101             || Opc == SPUISD::AFormAddr);
 102   }
 103
 104   //! Predicate that returns true if the operand is an indirect target
 105   bool isIndirectOperand(const SDOperand &Op)
 106   {
 107     const unsigned Opc = Op.getOpcode();
 108     return (Opc == ISD::Register
 109             || Opc == SPUISD::LDRESULT);
 110   }
 111 }
 112
 113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
 114   : TargetLowering(TM),
 115     SPUTM(TM)
 116 {
 117   // Fold away setcc operations if possible.
 118   setPow2DivIsCheap();
 119
 120   // Use _setjmp/_longjmp instead of setjmp/longjmp.
 121   setUseUnderscoreSetJmp(true);
 122   setUseUnderscoreLongJmp(true);
 123
 124   // Set up the SPU's register classes:
 125   // NOTE: i8 register class is not registered because we cannot determine when
 126   // we need to zero or sign extend for custom-lowered loads and stores.
 127   // NOTE: Ignore the previous note. For now. :-)
 128   addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
 129   addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
 130   addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
 131   addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
 132   addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
 133   addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
 134   addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
 135
 136   // SPU has no sign or zero extended loads for i1, i8, i16:
 137   setLoadXAction(ISD::EXTLOAD,  MVT::i1, Promote);
 138   setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
 139   setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
 140   setTruncStoreAction(MVT::i8, MVT::i1, Custom);
 141   setTruncStoreAction(MVT::i16, MVT::i1, Custom);
 142   setTruncStoreAction(MVT::i32, MVT::i1, Custom);
 143   setTruncStoreAction(MVT::i64, MVT::i1, Custom);
 144   setTruncStoreAction(MVT::i128, MVT::i1, Custom);
 145
 146   setLoadXAction(ISD::EXTLOAD,  MVT::i8, Custom);
 147   setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
 148   setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
 149   setTruncStoreAction(MVT::i8  , MVT::i8, Custom);
 150   setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
 151   setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
 152   setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
 153   setTruncStoreAction(MVT::i128, MVT::i8, Custom);
 154
 155   setLoadXAction(ISD::EXTLOAD,  MVT::i16, Custom);
 156   setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
 157   setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
 158
 159   // SPU constant load actions are custom lowered:
 160   setOperationAction(ISD::Constant,   MVT::i64, Custom);
 161   setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
 162   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
 163
 164   // SPU's loads and stores have to be custom lowered:
 165   for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
 166        ++sctype) {
 167     setOperationAction(ISD::LOAD, sctype, Custom);
 168     setOperationAction(ISD::STORE, sctype, Custom);
 169   }
 170
 171   // Custom lower BRCOND for i1, i8 to "promote" the result to
 172   // i32 and i16, respectively.
 173   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
 174
 175   // Expand the jumptable branches
 176   setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
 177   setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
 178   setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
 179
 180   // SPU has no intrinsics for these particular operations:
 181   setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
 182   setOperationAction(ISD::MEMSET, MVT::Other, Expand);
 183   setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
 184
 185   // PowerPC has no SREM/UREM instructions
 186   setOperationAction(ISD::SREM, MVT::i32, Expand);
 187   setOperationAction(ISD::UREM, MVT::i32, Expand);
 188   setOperationAction(ISD::SREM, MVT::i64, Expand);
 189   setOperationAction(ISD::UREM, MVT::i64, Expand);
 190
 191   // We don't support sin/cos/sqrt/fmod
 192   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 193   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 194   setOperationAction(ISD::FREM , MVT::f64, Expand);
 195   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 196   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 197   setOperationAction(ISD::FREM , MVT::f32, Expand);
 198
 199   // If we're enabling GP optimizations, use hardware square root
 200   setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 201   setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 202
 203   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 204   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 205
 206   // SPU can do rotate right and left, so legalize it... but customize for i8
 207   // because instructions don't exist.
 208   setOperationAction(ISD::ROTR, MVT::i32,    Legal);
 209   setOperationAction(ISD::ROTR, MVT::i16,    Legal);
 210   setOperationAction(ISD::ROTR, MVT::i8,     Custom);
 211   setOperationAction(ISD::ROTL, MVT::i32,    Legal);
 212   setOperationAction(ISD::ROTL, MVT::i16,    Legal);
 213   setOperationAction(ISD::ROTL, MVT::i8,     Custom);
 214   // SPU has no native version of shift left/right for i8
 215   setOperationAction(ISD::SHL,  MVT::i8,     Custom);
 216   setOperationAction(ISD::SRL,  MVT::i8,     Custom);
 217   setOperationAction(ISD::SRA,  MVT::i8,     Custom);
 218
 219   // Custom lower i32 multiplications
 220   setOperationAction(ISD::MUL,  MVT::i32,    Custom);
 221
 222   // Need to custom handle (some) common i8 math ops
 223   setOperationAction(ISD::SUB,  MVT::i8,     Custom);
 224   setOperationAction(ISD::MUL,  MVT::i8,     Custom);
 225
 226   // SPU does not have BSWAP. It does have i32 support CTLZ.
 227   // CTPOP has to be custom lowered.
 228   setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
 229   setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
 230
 231   setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
 232   setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
 233   setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
 234   setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
 235
 236   setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
 237   setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
 238
 239   setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
 240
 241   // SPU does not have select or setcc
 242   setOperationAction(ISD::SELECT, MVT::i1,   Expand);
 243   setOperationAction(ISD::SELECT, MVT::i8,   Expand);
 244   setOperationAction(ISD::SELECT, MVT::i16,  Expand);
 245   setOperationAction(ISD::SELECT, MVT::i32,  Expand);
 246   setOperationAction(ISD::SELECT, MVT::i64,  Expand);
 247   setOperationAction(ISD::SELECT, MVT::f32,  Expand);
 248   setOperationAction(ISD::SELECT, MVT::f64,  Expand);
 249
 250   setOperationAction(ISD::SETCC, MVT::i1,   Expand);
 251   setOperationAction(ISD::SETCC, MVT::i8,   Expand);
 252   setOperationAction(ISD::SETCC, MVT::i16,  Expand);
 253   setOperationAction(ISD::SETCC, MVT::i32,  Expand);
 254   setOperationAction(ISD::SETCC, MVT::i64,  Expand);
 255   setOperationAction(ISD::SETCC, MVT::f32,  Expand);
 256   setOperationAction(ISD::SETCC, MVT::f64,  Expand);
 257
 258   // SPU has a legal FP -> signed INT instruction
 259   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
 260   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 261   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
 262   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
 263
 264   // FDIV on SPU requires custom lowering
 265   setOperationAction(ISD::FDIV, MVT::f32, Custom);
 266   //setOperationAction(ISD::FDIV, MVT::f64, Custom);
 267
 268   // SPU has [U|S]INT_TO_FP
 269   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
 270   setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
 271   setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
 272   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
 273   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
 274   setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
 275   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 276   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 277
 278   setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
 279   setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
 280   setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
 281   setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
 282
 283   // We cannot sextinreg(i1).  Expand to shifts.
 284   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 285
 286   // Support label based line numbers.
 287   setOperationAction(ISD::LOCATION, MVT::Other, Expand);
 288   setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
 289
 290   // We want to legalize GlobalAddress and ConstantPool nodes into the
 291   // appropriate instructions to materialize the address.
 292   for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
 293        ++sctype) {
 294     setOperationAction(ISD::GlobalAddress, sctype, Custom);
 295     setOperationAction(ISD::ConstantPool,  sctype, Custom);
 296     setOperationAction(ISD::JumpTable,     sctype, Custom);
 297   }
 298
 299   // RET must be custom lowered, to meet ABI requirements
 300   setOperationAction(ISD::RET,           MVT::Other, Custom);
 301
 302   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 303   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 304
 305   // Use the default implementation.
 306   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
 307   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 308   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 309   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 310   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
 311   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
 312   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
 313
 314   // Cell SPU has instructions for converting between i64 and fp.
 315   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 316   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 317
 318   // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
 319   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
 320
 321   // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 322   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 323
 324   // First set operation action for all vector types to expand. Then we
 325   // will selectively turn on ones that can be effectively codegen'd.
 326   addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
 327   addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
 328   addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
 329   addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
 330   addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
 331   addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
 332
 333   for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
 334        VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
 335     // add/sub are legal for all supported vector VT's.
 336     setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
 337     setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
 338     // mul has to be custom lowered.
 339     setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
 340
 341     setOperationAction(ISD::AND   , (MVT::ValueType)VT, Legal);
 342     setOperationAction(ISD::OR    , (MVT::ValueType)VT, Legal);
 343     setOperationAction(ISD::XOR   , (MVT::ValueType)VT, Legal);
 344     setOperationAction(ISD::LOAD  , (MVT::ValueType)VT, Legal);
 345     setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
 346     setOperationAction(ISD::STORE,  (MVT::ValueType)VT, Legal);
 347
 348     // These operations need to be expanded:
 349     setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
 350     setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
 351     setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
 352     setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
 353     setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
 354
 355     // Custom lower build_vector, constant pool spills, insert and
 356     // extract vector elements:
 357     setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
 358     setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
 359     setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
 360     setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
 361     setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
 362     setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
 363   }
 364
 365   setOperationAction(ISD::MUL, MVT::v16i8, Custom);
 366   setOperationAction(ISD::AND, MVT::v16i8, Custom);
 367   setOperationAction(ISD::OR,  MVT::v16i8, Custom);
 368   setOperationAction(ISD::XOR, MVT::v16i8, Custom);
 369   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 370
 371   setSetCCResultType(MVT::i32);
 372   setShiftAmountType(MVT::i32);
 373   setSetCCResultContents(ZeroOrOneSetCCResult);
 374
 375   setStackPointerRegisterToSaveRestore(SPU::R1);
 376
 377   // We have target-specific dag combine patterns for the following nodes:
 378   setTargetDAGCombine(ISD::ADD);
 379
 380   computeRegisterProperties();
 381 }
 382
 383 const char *
 384 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
 385 {
 386   if (node_names.empty()) {
 387     node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
 388     node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
 389     node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
 390     node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
 391     node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
 392     node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
 393     node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
 394     node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
 395     node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
 396     node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
 397     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
 398     node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
 399     node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
 400     node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
 401     node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
 402     node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
 403     node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
 404     node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
 405     node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
 406     node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
 407     node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
 408     node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
 409     node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
 410     node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
 411     node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
 412     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
 413     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
 414     node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] =
 415       "SPUISD::ROTBYTES_RIGHT_Z";
 416     node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
 417       "SPUISD::ROTBYTES_RIGHT_S";
 418     node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
 419     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
 420       "SPUISD::ROTBYTES_LEFT_CHAINED";
 421     node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
 422     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
 423     node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant";
 424     node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
 425     node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
 426     node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
 427   }
 428
 429   std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
 430
 431   return ((i != node_names.end()) ? i->second : 0);
 432 }
 433
 434 //===----------------------------------------------------------------------===//
 435 // Calling convention code:
 436 //===----------------------------------------------------------------------===//
 437
 438 #include "SPUGenCallingConv.inc"
 439
 440 //===----------------------------------------------------------------------===//
 441 //  LowerOperation implementation
 442 //===----------------------------------------------------------------------===//
 443
 444 /// Aligned load common code for CellSPU
 445 /*!
 446   \param[in] Op The SelectionDAG load or store operand
 447   \param[in] DAG The selection DAG
 448   \param[in] ST CellSPU subtarget information structure
 449   \param[in,out] alignment Caller initializes this to the load or store node's
 450   value from getAlignment(), may be updated while generating the aligned load
 451   \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
 452   offset (divisible by 16, modulo 16 == 0)
 453   \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
 454   offset of the preferred slot (modulo 16 != 0)
 455   \param[in,out] VT Caller initializes this value type to the the load or store
 456   node's loaded or stored value type; may be updated if an i1-extended load or
 457   store.
 458   \param[out] was16aligned true if the base pointer had 16-byte alignment,
 459   otherwise false. Can help to determine if the chunk needs to be rotated.
 460
 461  Both load and store lowering load a block of data aligned on a 16-byte
 462  boundary. This is the common aligned load code shared between both.
 463  */
 464 static SDOperand
 465 AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST,
 466             LSBaseSDNode *LSN,
 467             unsigned &alignment, int &alignOffs, int &prefSlotOffs,
 468             MVT::ValueType &VT, bool &was16aligned)
 469 {
 470   MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 471   const valtype_map_s *vtm = getValueTypeMapEntry(VT);
 472   SDOperand basePtr = LSN->getBasePtr();
 473   SDOperand chain = LSN->getChain();
 474
 475   if (basePtr.getOpcode() == ISD::ADD) {
 476     SDOperand Op1 = basePtr.Val->getOperand(1);
 477
 478     if (Op1.getOpcode() == ISD::Constant || Op1.getOpcode() == ISD::TargetConstant) {
 479       const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
 480
 481       alignOffs = (int) CN->getValue();
 482       prefSlotOffs = (int) (alignOffs & 0xf);
 483
 484       // Adjust the rotation amount to ensure that the final result ends up in
 485       // the preferred slot:
 486       prefSlotOffs -= vtm->prefslot_byte;
 487       basePtr = basePtr.getOperand(0);
 488
 489       // Loading from memory, can we adjust alignment?
 490       if (basePtr.getOpcode() == SPUISD::AFormAddr) {
 491         SDOperand APtr = basePtr.getOperand(0);
 492         if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
 493           GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
 494           alignment = GSDN->getGlobal()->getAlignment();
 495         }
 496       }
 497     } else {
 498       alignOffs = 0;
 499       prefSlotOffs = -vtm->prefslot_byte;
 500     }
 501   } else {
 502     alignOffs = 0;
 503     prefSlotOffs = -vtm->prefslot_byte;
 504   }
 505
 506   if (alignment == 16) {
 507     // Realign the base pointer as a D-Form address:
 508     if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
 509       basePtr = DAG.getNode(ISD::ADD, PtrVT,
 510                             basePtr,
 511                             DAG.getConstant((alignOffs & ~0xf), PtrVT));
 512     }
 513
 514     // Emit the vector load:
 515     was16aligned = true;
 516     return DAG.getLoad(MVT::v16i8, chain, basePtr,
 517                        LSN->getSrcValue(), LSN->getSrcValueOffset(),
 518                        LSN->isVolatile(), 16);
 519   }
 520
 521   // Unaligned load or we're using the "large memory" model, which means that
 522   // we have to be very pessimistic:
 523   if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
 524     basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT));
 525   }
 526
 527   // Add the offset
 528   basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
 529                         DAG.getConstant((alignOffs & ~0xf), PtrVT));
 530   was16aligned = false;
 531   return DAG.getLoad(MVT::v16i8, chain, basePtr,
 532                      LSN->getSrcValue(), LSN->getSrcValueOffset(),
 533                      LSN->isVolatile(), 16);
 534 }
 535
 536 /// Custom lower loads for CellSPU
 537 /*!
 538  All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
 539  within a 16-byte block, we have to rotate to extract the requested element.
 540  */
 541 static SDOperand
 542 LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 543   LoadSDNode *LN = cast<LoadSDNode>(Op);
 544   SDOperand the_chain = LN->getChain();
 545   MVT::ValueType VT = LN->getMemoryVT();
 546   MVT::ValueType OpVT = Op.Val->getValueType(0);
 547   ISD::LoadExtType ExtType = LN->getExtensionType();
 548   unsigned alignment = LN->getAlignment();
 549   SDOperand Ops[8];
 550
 551   switch (LN->getAddressingMode()) {
 552   case ISD::UNINDEXED: {
 553     int offset, rotamt;
 554     bool was16aligned;
 555     SDOperand result =
 556       AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
 557
 558     if (result.Val == 0)
 559       return result;
 560
 561     the_chain = result.getValue(1);
 562     // Rotate the chunk if necessary
 563     if (rotamt < 0)
 564       rotamt += 16;
 565     if (rotamt != 0 || !was16aligned) {
 566       SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
 567
 568       Ops[0] = the_chain;
 569       Ops[1] = result;
 570       if (was16aligned) {
 571         Ops[2] = DAG.getConstant(rotamt, MVT::i16);
 572       } else {
 573         MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 574         LoadSDNode *LN1 = cast<LoadSDNode>(result);
 575         Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
 576                              DAG.getConstant(rotamt, PtrVT));
 577       }
 578
 579       result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
 580       the_chain = result.getValue(1);
 581     }
 582
 583     if (VT == OpVT || ExtType == ISD::EXTLOAD) {
 584       SDVTList scalarvts;
 585       MVT::ValueType vecVT = MVT::v16i8;
 586
 587       // Convert the loaded v16i8 vector to the appropriate vector type
 588       // specified by the operand:
 589       if (OpVT == VT) {
 590         if (VT != MVT::i1)
 591           vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
 592       } else
 593         vecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
 594
 595       Ops[0] = the_chain;
 596       Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
 597       scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
 598       result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
 599       the_chain = result.getValue(1);
 600     } else {
 601       // Handle the sign and zero-extending loads for i1 and i8:
 602       unsigned NewOpC;
 603
 604       if (ExtType == ISD::SEXTLOAD) {
 605         NewOpC = (OpVT == MVT::i1
 606                   ? SPUISD::EXTRACT_I1_SEXT
 607                   : SPUISD::EXTRACT_I8_SEXT);
 608       } else {
 609         assert(ExtType == ISD::ZEXTLOAD);
 610         NewOpC = (OpVT == MVT::i1
 611                   ? SPUISD::EXTRACT_I1_ZEXT
 612                   : SPUISD::EXTRACT_I8_ZEXT);
 613       }
 614
 615       result = DAG.getNode(NewOpC, OpVT, result);
 616     }
 617
 618     SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
 619     SDOperand retops[2] = {
 620       result,
 621       the_chain
 622     };
 623
 624     result = DAG.getNode(SPUISD::LDRESULT, retvts,
 625                          retops, sizeof(retops) / sizeof(retops[0]));
 626     return result;
 627   }
 628   case ISD::PRE_INC:
 629   case ISD::PRE_DEC:
 630   case ISD::POST_INC:
 631   case ISD::POST_DEC:
 632   case ISD::LAST_INDEXED_MODE:
 633     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 634             "UNINDEXED\n";
 635     cerr << (unsigned) LN->getAddressingMode() << "\n";
 636     abort();
 637     /*NOTREACHED*/
 638   }
 639
 640   return SDOperand();
 641 }
 642
 643 /// Custom lower stores for CellSPU
 644 /*!
 645  All CellSPU stores are aligned to 16-byte boundaries, so for elements
 646  within a 16-byte block, we have to generate a shuffle to insert the
 647  requested element into its place, then store the resulting block.
 648  */
 649 static SDOperand
 650 LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 651   StoreSDNode *SN = cast<StoreSDNode>(Op);
 652   SDOperand Value = SN->getValue();
 653   MVT::ValueType VT = Value.getValueType();
 654   MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
 655   MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 656   unsigned alignment = SN->getAlignment();
 657
 658   switch (SN->getAddressingMode()) {
 659   case ISD::UNINDEXED: {
 660     int chunk_offset, slot_offset;
 661     bool was16aligned;
 662
 663     // The vector type we really want to load from the 16-byte chunk, except
 664     // in the case of MVT::i1, which has to be v16i8.
 665     unsigned vecVT, stVecVT = MVT::v16i8;
 666
 667     if (StVT != MVT::i1)
 668       stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
 669     vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
 670
 671     SDOperand alignLoadVec =
 672       AlignedLoad(Op, DAG, ST, SN, alignment,
 673                   chunk_offset, slot_offset, VT, was16aligned);
 674
 675     if (alignLoadVec.Val == 0)
 676       return alignLoadVec;
 677
 678     LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
 679     SDOperand basePtr = LN->getBasePtr();
 680     SDOperand the_chain = alignLoadVec.getValue(1);
 681     SDOperand theValue = SN->getValue();
 682     SDOperand result;
 683
 684     if (StVT != VT
 685         && (theValue.getOpcode() == ISD::AssertZext
 686             || theValue.getOpcode() == ISD::AssertSext)) {
 687       // Drill down and get the value for zero- and sign-extended
 688       // quantities
 689       theValue = theValue.getOperand(0);
 690     }
 691
 692     chunk_offset &= 0xf;
 693
 694     SDOperand insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
 695     SDOperand insertEltPtr;
 696     SDOperand insertEltOp;
 697
 698     // If the base pointer is already a D-form address, then just create
 699     // a new D-form address with a slot offset and the orignal base pointer.
 700     // Otherwise generate a D-form address with the slot offset relative
 701     // to the stack pointer, which is always aligned.
 702     DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
 703     DEBUG(basePtr.Val->dump(&DAG));
 704     DEBUG(cerr << "\n");
 705
 706     if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
 707         (basePtr.getOpcode() == ISD::ADD
 708          && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
 709       insertEltPtr = basePtr;
 710     } else {
 711 #if 0
 712       // $sp is always aligned, so use it when necessary to avoid loading
 713       // an address
 714       SDOperand ptrP =
 715         basePtr.Val->hasOneUse() ? DAG.getRegister(SPU::R1, PtrVT) : basePtr;
 716       insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, ptrP, insertEltOffs);
 717 #else
 718       insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
 719 #endif
 720     }
 721
 722     insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
 723     result = DAG.getNode(SPUISD::SHUFB, vecVT,
 724                          DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
 725                          alignLoadVec,
 726                          DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
 727
 728     result = DAG.getStore(the_chain, result, basePtr,
 729                           LN->getSrcValue(), LN->getSrcValueOffset(),
 730                           LN->isVolatile(), LN->getAlignment());
 731
 732     return result;
 733     /*UNREACHED*/
 734   }
 735   case ISD::PRE_INC:
 736   case ISD::PRE_DEC:
 737   case ISD::POST_INC:
 738   case ISD::POST_DEC:
 739   case ISD::LAST_INDEXED_MODE:
 740     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 741             "UNINDEXED\n";
 742     cerr << (unsigned) SN->getAddressingMode() << "\n";
 743     abort();
 744     /*NOTREACHED*/
 745   }
 746
 747   return SDOperand();
 748 }
 749
 750 /// Generate the address of a constant pool entry.
 751 static SDOperand
 752 LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 753   MVT::ValueType PtrVT = Op.getValueType();
 754   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 755   Constant *C = CP->getConstVal();
 756   SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
 757   SDOperand Zero = DAG.getConstant(0, PtrVT);
 758   const TargetMachine &TM = DAG.getTarget();
 759
 760   if (TM.getRelocationModel() == Reloc::Static) {
 761     if (!ST->usingLargeMem()) {
 762       // Just return the SDOperand with the constant pool address in it.
 763       return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
 764     } else {
 765 #if 1
 766       SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
 767       SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
 768
 769       return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
 770 #else
 771       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, CPI, Zero);
 772 #endif
 773     }
 774   }
 775
 776   assert(0 &&
 777          "LowerConstantPool: Relocation model other than static not supported.");
 778   return SDOperand();
 779 }
 780
 781 static SDOperand
 782 LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 783   MVT::ValueType PtrVT = Op.getValueType();
 784   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 785   SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
 786   SDOperand Zero = DAG.getConstant(0, PtrVT);
 787   const TargetMachine &TM = DAG.getTarget();
 788
 789   if (TM.getRelocationModel() == Reloc::Static) {
 790     SDOperand JmpAForm = DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
 791     return (!ST->usingLargeMem()
 792             ? JmpAForm
 793             : DAG.getNode(SPUISD::IndirectAddr, PtrVT, JmpAForm, Zero));
 794   }
 795
 796   assert(0 &&
 797          "LowerJumpTable: Relocation model other than static not supported.");
 798   return SDOperand();
 799 }
 800
 801 static SDOperand
 802 LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 803   MVT::ValueType PtrVT = Op.getValueType();
 804   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
 805   GlobalValue *GV = GSDN->getGlobal();
 806   SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
 807   const TargetMachine &TM = DAG.getTarget();
 808   SDOperand Zero = DAG.getConstant(0, PtrVT);
 809
 810   if (TM.getRelocationModel() == Reloc::Static) {
 811     if (!ST->usingLargeMem()) {
 812       return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
 813     } else {
 814       SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
 815       SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
 816       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 817     }
 818   } else {
 819     cerr << "LowerGlobalAddress: Relocation model other than static not "
 820          << "supported.\n";
 821     abort();
 822     /*NOTREACHED*/
 823   }
 824
 825   return SDOperand();
 826 }
 827
 828 //! Custom lower i64 integer constants
 829 /*!
 830  This code inserts all of the necessary juggling that needs to occur to load
 831  a 64-bit constant into a register.
 832  */
 833 static SDOperand
 834 LowerConstant(SDOperand Op, SelectionDAG &DAG) {
 835   unsigned VT = Op.getValueType();
 836   ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
 837
 838   if (VT == MVT::i64) {
 839     SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
 840     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
 841                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
 842
 843   } else {
 844     cerr << "LowerConstant: unhandled constant type "
 845          << MVT::getValueTypeString(VT)
 846          << "\n";
 847     abort();
 848     /*NOTREACHED*/
 849   }
 850
 851   return SDOperand();
 852 }
 853
 854 //! Custom lower single precision floating point constants
 855 /*!
 856   "float" immediates can be lowered as if they were unsigned 32-bit integers.
 857   The SPUISD::SFPConstant pseudo-instruction handles this in the instruction
 858   target description.
 859  */
 860 static SDOperand
 861 LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
 862   unsigned VT = Op.getValueType();
 863   ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
 864
 865   assert((FP != 0) &&
 866          "LowerConstantFP: Node is not ConstantFPSDNode");
 867
 868   if (VT == MVT::f32) {
 869     float targetConst = FP->getValueAPF().convertToFloat();
 870     return DAG.getNode(SPUISD::SFPConstant, VT,
 871                        DAG.getTargetConstantFP(targetConst, VT));
 872   } else if (VT == MVT::f64) {
 873     uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
 874     return DAG.getNode(ISD::BIT_CONVERT, VT,
 875                        LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
 876   }
 877
 878   return SDOperand();
 879 }
 880
 881 //! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
 882 static SDOperand
 883 LowerBRCOND(SDOperand Op, SelectionDAG &DAG)
 884 {
 885   SDOperand Cond = Op.getOperand(1);
 886   MVT::ValueType CondVT = Cond.getValueType();
 887   MVT::ValueType CondNVT;
 888
 889   if (CondVT == MVT::i1 || CondVT == MVT::i8) {
 890     CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
 891     return DAG.getNode(ISD::BRCOND, Op.getValueType(),
 892                       Op.getOperand(0),
 893                       DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
 894                       Op.getOperand(2));
 895   } else
 896     return SDOperand();                // Unchanged
 897 }
 898
 899 static SDOperand
 900 LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
 901 {
 902   MachineFunction &MF = DAG.getMachineFunction();
 903   MachineFrameInfo *MFI = MF.getFrameInfo();
 904   MachineRegisterInfo &RegInfo = MF.getRegInfo();
 905   SmallVector<SDOperand, 8> ArgValues;
 906   SDOperand Root = Op.getOperand(0);
 907   bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
 908
 909   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
 910   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
 911
 912   unsigned ArgOffset = SPUFrameInfo::minStackSize();
 913   unsigned ArgRegIdx = 0;
 914   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
 915
 916   MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 917
 918   // Add DAG nodes to load the arguments or copy them out of registers.
 919   for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
 920     SDOperand ArgVal;
 921     bool needsLoad = false;
 922     MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
 923     unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
 924
 925     switch (ObjectVT) {
 926     default: {
 927       cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
 928            << MVT::getValueTypeString(ObjectVT)
 929            << "\n";
 930       abort();
 931     }
 932     case MVT::i8:
 933       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 934         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass);
 935         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 936         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
 937         ++ArgRegIdx;
 938       } else {
 939         needsLoad = true;
 940       }
 941       break;
 942     case MVT::i16:
 943       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 944         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
 945         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 946         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
 947         ++ArgRegIdx;
 948       } else {
 949         needsLoad = true;
 950       }
 951       break;
 952     case MVT::i32:
 953       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 954         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 955         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 956         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
 957         ++ArgRegIdx;
 958       } else {
 959         needsLoad = true;
 960       }
 961       break;
 962     case MVT::i64:
 963       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 964         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass);
 965         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 966         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
 967         ++ArgRegIdx;
 968       } else {
 969         needsLoad = true;
 970       }
 971       break;
 972     case MVT::f32:
 973       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 974         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
 975         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 976         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
 977         ++ArgRegIdx;
 978       } else {
 979         needsLoad = true;
 980       }
 981       break;
 982     case MVT::f64:
 983       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 984         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass);
 985         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 986         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
 987         ++ArgRegIdx;
 988       } else {
 989         needsLoad = true;
 990       }
 991       break;
 992     case MVT::v2f64:
 993     case MVT::v4f32:
 994     case MVT::v4i32:
 995     case MVT::v8i16:
 996     case MVT::v16i8:
 997       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 998         unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
 999         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1000         ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1001         ++ArgRegIdx;
1002       } else {
1003         needsLoad = true;
1004       }
1005       break;
1006     }
1007
1008     // We need to load the argument to a virtual register if we determined above
1009     // that we ran out of physical registers of the appropriate type
1010     if (needsLoad) {
1011       // If the argument is actually used, emit a load from the right stack
1012       // slot.
1013       if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
1014         int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1015         SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
1016         ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1017       } else {
1018         // Don't emit a dead load.
1019         ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
1020       }
1021
1022       ArgOffset += StackSlotSize;
1023     }
1024
1025     ArgValues.push_back(ArgVal);
1026   }
1027
1028   // If the function takes variable number of arguments, make a frame index for
1029   // the start of the first vararg value... for expansion of llvm.va_start.
1030   if (isVarArg) {
1031     VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
1032                                                ArgOffset);
1033     SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1034     // If this function is vararg, store any remaining integer argument regs to
1035     // their spots on the stack so that they may be loaded by deferencing the
1036     // result of va_next.
1037     SmallVector<SDOperand, 8> MemOps;
1038     for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1039       unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
1040       RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1041       SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1042       SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1043       MemOps.push_back(Store);
1044       // Increment the address by four for the next argument to store
1045       SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
1046       FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1047     }
1048     if (!MemOps.empty())
1049       Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1050   }
1051
1052   ArgValues.push_back(Root);
1053
1054   // Return the new list of results.
1055   std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
1056                                     Op.Val->value_end());
1057   return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1058 }
1059
1060 /// isLSAAddress - Return the immediate to use if the specified
1061 /// value is representable as a LSA address.
1062 static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1063   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1064   if (!C) return 0;
1065
1066   int Addr = C->getValue();
1067   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1068       (Addr << 14 >> 14) != Addr)
1069     return 0;  // Top 14 bits have to be sext of immediate.
1070
1071   return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1072 }
1073
1074 static
1075 SDOperand
1076 LowerCALL(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1077   SDOperand Chain = Op.getOperand(0);
1078 #if 0
1079   bool isVarArg       = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1080   bool isTailCall     = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1081 #endif
1082   SDOperand Callee    = Op.getOperand(4);
1083   unsigned NumOps     = (Op.getNumOperands() - 5) / 2;
1084   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1085   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1086   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1087
1088   // Handy pointer type
1089   MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1090
1091   // Accumulate how many bytes are to be pushed on the stack, including the
1092   // linkage area, and parameter passing area.  According to the SPU ABI,
1093   // we minimally need space for [LR] and [SP]
1094   unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1095
1096   // Set up a copy of the stack pointer for use loading and storing any
1097   // arguments that may not fit in the registers available for argument
1098   // passing.
1099   SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1100
1101   // Figure out which arguments are going to go in registers, and which in
1102   // memory.
1103   unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1104   unsigned ArgRegIdx = 0;
1105
1106   // Keep track of registers passing arguments
1107   std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1108   // And the arguments passed on the stack
1109   SmallVector<SDOperand, 8> MemOpChains;
1110
1111   for (unsigned i = 0; i != NumOps; ++i) {
1112     SDOperand Arg = Op.getOperand(5+2*i);
1113
1114     // PtrOff will be used to store the current argument to the stack if a
1115     // register cannot be found for it.
1116     SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1117     PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1118
1119     switch (Arg.getValueType()) {
1120     default: assert(0 && "Unexpected ValueType for argument!");
1121     case MVT::i32:
1122     case MVT::i64:
1123     case MVT::i128:
1124       if (ArgRegIdx != NumArgRegs) {
1125         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1126       } else {
1127         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1128         ArgOffset += StackSlotSize;
1129       }
1130       break;
1131     case MVT::f32:
1132     case MVT::f64:
1133       if (ArgRegIdx != NumArgRegs) {
1134         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1135       } else {
1136         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1137         ArgOffset += StackSlotSize;
1138       }
1139       break;
1140     case MVT::v4f32:
1141     case MVT::v4i32:
1142     case MVT::v8i16:
1143     case MVT::v16i8:
1144       if (ArgRegIdx != NumArgRegs) {
1145         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1146       } else {
1147         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1148         ArgOffset += StackSlotSize;
1149       }
1150       break;
1151     }
1152   }
1153
1154   // Update number of stack bytes actually used, insert a call sequence start
1155   NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1156   Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1157
1158   if (!MemOpChains.empty()) {
1159     // Adjust the stack pointer for the stack arguments.
1160     Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1161                         &MemOpChains[0], MemOpChains.size());
1162   }
1163
1164   // Build a sequence of copy-to-reg nodes chained together with token chain
1165   // and flag operands which copy the outgoing args into the appropriate regs.
1166   SDOperand InFlag;
1167   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1168     Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1169                              InFlag);
1170     InFlag = Chain.getValue(1);
1171   }
1172
1173   std::vector<MVT::ValueType> NodeTys;
1174   NodeTys.push_back(MVT::Other);   // Returns a chain
1175   NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
1176
1177   SmallVector<SDOperand, 8> Ops;
1178   unsigned CallOpc = SPUISD::CALL;
1179
1180   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1181   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1182   // node so that legalize doesn't hack it.
1183   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1184     GlobalValue *GV = G->getGlobal();
1185     unsigned CalleeVT = Callee.getValueType();
1186     SDOperand Zero = DAG.getConstant(0, PtrVT);
1187     SDOperand GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1188
1189     if (!ST->usingLargeMem()) {
1190       // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1191       // style calls, otherwise, external symbols are BRASL calls. This assumes
1192       // that declared/defined symbols are in the same compilation unit and can
1193       // be reached through PC-relative jumps.
1194       //
1195       // NOTE:
1196       // This may be an unsafe assumption for JIT and really large compilation
1197       // units.
1198       if (GV->isDeclaration()) {
1199         Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1200       } else {
1201         Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1202       }
1203     } else {
1204       // "Large memory" mode: Turn all calls into indirect calls with a X-form
1205       // address pairs:
1206       Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1207     }
1208   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1209     Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1210   else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1211     // If this is an absolute destination address that appears to be a legal
1212     // local store address, use the munged value.
1213     Callee = SDOperand(Dest, 0);
1214   }
1215
1216   Ops.push_back(Chain);
1217   Ops.push_back(Callee);
1218
1219   // Add argument registers to the end of the list so that they are known live
1220   // into the call.
1221   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1222     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1223                                   RegsToPass[i].second.getValueType()));
1224
1225   if (InFlag.Val)
1226     Ops.push_back(InFlag);
1227   Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1228   InFlag = Chain.getValue(1);
1229
1230   Chain = DAG.getCALLSEQ_END(Chain,
1231                              DAG.getConstant(NumStackBytes, PtrVT),
1232                              DAG.getConstant(0, PtrVT),
1233                              InFlag);
1234   if (Op.Val->getValueType(0) != MVT::Other)
1235     InFlag = Chain.getValue(1);
1236
1237   SDOperand ResultVals[3];
1238   unsigned NumResults = 0;
1239   NodeTys.clear();
1240
1241   // If the call has results, copy the values out of the ret val registers.
1242   switch (Op.Val->getValueType(0)) {
1243   default: assert(0 && "Unexpected ret value!");
1244   case MVT::Other: break;
1245   case MVT::i32:
1246     if (Op.Val->getValueType(1) == MVT::i32) {
1247       Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1248       ResultVals[0] = Chain.getValue(0);
1249       Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1250                                  Chain.getValue(2)).getValue(1);
1251       ResultVals[1] = Chain.getValue(0);
1252       NumResults = 2;
1253       NodeTys.push_back(MVT::i32);
1254     } else {
1255       Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1256       ResultVals[0] = Chain.getValue(0);
1257       NumResults = 1;
1258     }
1259     NodeTys.push_back(MVT::i32);
1260     break;
1261   case MVT::i64:
1262     Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1263     ResultVals[0] = Chain.getValue(0);
1264     NumResults = 1;
1265     NodeTys.push_back(MVT::i64);
1266     break;
1267   case MVT::f32:
1268   case MVT::f64:
1269     Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1270                                InFlag).getValue(1);
1271     ResultVals[0] = Chain.getValue(0);
1272     NumResults = 1;
1273     NodeTys.push_back(Op.Val->getValueType(0));
1274     break;
1275   case MVT::v2f64:
1276   case MVT::v4f32:
1277   case MVT::v4i32:
1278   case MVT::v8i16:
1279   case MVT::v16i8:
1280     Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1281                                    InFlag).getValue(1);
1282     ResultVals[0] = Chain.getValue(0);
1283     NumResults = 1;
1284     NodeTys.push_back(Op.Val->getValueType(0));
1285     break;
1286   }
1287
1288   NodeTys.push_back(MVT::Other);
1289
1290   // If the function returns void, just return the chain.
1291   if (NumResults == 0)
1292     return Chain;
1293
1294   // Otherwise, merge everything together with a MERGE_VALUES node.
1295   ResultVals[NumResults++] = Chain;
1296   SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1297                               ResultVals, NumResults);
1298   return Res.getValue(Op.ResNo);
1299 }
1300
1301 static SDOperand
1302 LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1303   SmallVector<CCValAssign, 16> RVLocs;
1304   unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1305   bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1306   CCState CCInfo(CC, isVarArg, TM, RVLocs);
1307   CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1308
1309   // If this is the first return lowered for this function, add the regs to the
1310   // liveout set for the function.
1311   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1312     for (unsigned i = 0; i != RVLocs.size(); ++i)
1313       DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1314   }
1315
1316   SDOperand Chain = Op.getOperand(0);
1317   SDOperand Flag;
1318
1319   // Copy the result values into the output registers.
1320   for (unsigned i = 0; i != RVLocs.size(); ++i) {
1321     CCValAssign &VA = RVLocs[i];
1322     assert(VA.isRegLoc() && "Can only return in registers!");
1323     Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1324     Flag = Chain.getValue(1);
1325   }
1326
1327   if (Flag.Val)
1328     return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1329   else
1330     return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1331 }
1332
1333
1334 //===----------------------------------------------------------------------===//
1335 // Vector related lowering:
1336 //===----------------------------------------------------------------------===//
1337
1338 static ConstantSDNode *
1339 getVecImm(SDNode *N) {
1340   SDOperand OpVal(0, 0);
1341
1342   // Check to see if this buildvec has a single non-undef value in its elements.
1343   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1344     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1345     if (OpVal.Val == 0)
1346       OpVal = N->getOperand(i);
1347     else if (OpVal != N->getOperand(i))
1348       return 0;
1349   }
1350
1351   if (OpVal.Val != 0) {
1352     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1353       return CN;
1354     }
1355   }
1356
1357   return 0; // All UNDEF: use implicit def.; not Constant node
1358 }
1359
1360 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1361 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1362 /// constant
1363 SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1364                               MVT::ValueType ValueType) {
1365   if (ConstantSDNode *CN = getVecImm(N)) {
1366     uint64_t Value = CN->getValue();
1367     if (Value <= 0x3ffff)
1368       return DAG.getConstant(Value, ValueType);
1369   }
1370
1371   return SDOperand();
1372 }
1373
1374 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1375 /// and the value fits into a signed 16-bit constant, and if so, return the
1376 /// constant
1377 SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1378                               MVT::ValueType ValueType) {
1379   if (ConstantSDNode *CN = getVecImm(N)) {
1380     if (ValueType == MVT::i32) {
1381       int Value = (int) CN->getValue();
1382       int SExtValue = ((Value & 0xffff) << 16) >> 16;
1383
1384       if (Value == SExtValue)
1385         return DAG.getConstant(Value, ValueType);
1386     } else if (ValueType == MVT::i16) {
1387       short Value = (short) CN->getValue();
1388       int SExtValue = ((int) Value << 16) >> 16;
1389
1390       if (Value == (short) SExtValue)
1391         return DAG.getConstant(Value, ValueType);
1392     } else if (ValueType == MVT::i64) {
1393       int64_t Value = CN->getValue();
1394       int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
1395
1396       if (Value == SExtValue)
1397         return DAG.getConstant(Value, ValueType);
1398     }
1399   }
1400
1401   return SDOperand();
1402 }
1403
1404 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1405 /// and the value fits into a signed 10-bit constant, and if so, return the
1406 /// constant
1407 SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1408                               MVT::ValueType ValueType) {
1409   if (ConstantSDNode *CN = getVecImm(N)) {
1410     int Value = (int) CN->getValue();
1411     if ((ValueType == MVT::i32 && isS10Constant(Value))
1412         || (ValueType == MVT::i16 && isS10Constant((short) Value)))
1413       return DAG.getConstant(Value, ValueType);
1414   }
1415
1416   return SDOperand();
1417 }
1418
1419 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1420 /// and the value fits into a signed 8-bit constant, and if so, return the
1421 /// constant.
1422 ///
1423 /// @note: The incoming vector is v16i8 because that's the only way we can load
1424 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1425 /// same value.
1426 SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1427                              MVT::ValueType ValueType) {
1428   if (ConstantSDNode *CN = getVecImm(N)) {
1429     int Value = (int) CN->getValue();
1430     if (ValueType == MVT::i16
1431         && Value <= 0xffff                 /* truncated from uint64_t */
1432         && ((short) Value >> 8) == ((short) Value & 0xff))
1433       return DAG.getConstant(Value & 0xff, ValueType);
1434     else if (ValueType == MVT::i8
1435              && (Value & 0xff) == Value)
1436       return DAG.getConstant(Value, ValueType);
1437   }
1438
1439   return SDOperand();
1440 }
1441
1442 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1443 /// and the value fits into a signed 16-bit constant, and if so, return the
1444 /// constant
1445 SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1446                                MVT::ValueType ValueType) {
1447   if (ConstantSDNode *CN = getVecImm(N)) {
1448     uint64_t Value = CN->getValue();
1449     if ((ValueType == MVT::i32
1450           && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1451         || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1452       return DAG.getConstant(Value >> 16, ValueType);
1453   }
1454
1455   return SDOperand();
1456 }
1457
1458 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1459 SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1460   if (ConstantSDNode *CN = getVecImm(N)) {
1461     return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1462   }
1463
1464   return SDOperand();
1465 }
1466
1467 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1468 SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1469   if (ConstantSDNode *CN = getVecImm(N)) {
1470     return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1471   }
1472
1473   return SDOperand();
1474 }
1475
1476 // If this is a vector of constants or undefs, get the bits.  A bit in
1477 // UndefBits is set if the corresponding element of the vector is an
1478 // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1479 // zero.   Return true if this is not an array of constants, false if it is.
1480 //
1481 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1482                                        uint64_t UndefBits[2]) {
1483   // Start with zero'd results.
1484   VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1485
1486   unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
1487   for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1488     SDOperand OpVal = BV->getOperand(i);
1489
1490     unsigned PartNo = i >= e/2;     // In the upper 128 bits?
1491     unsigned SlotNo = e/2 - (i & (e/2-1))-1;  // Which subpiece of the uint64_t.
1492
1493     uint64_t EltBits = 0;
1494     if (OpVal.getOpcode() == ISD::UNDEF) {
1495       uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1496       UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1497       continue;
1498     } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1499       EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1500     } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1501       const APFloat &apf = CN->getValueAPF();
1502       EltBits = (CN->getValueType(0) == MVT::f32
1503                  ? FloatToBits(apf.convertToFloat())
1504                  : DoubleToBits(apf.convertToDouble()));
1505     } else {
1506       // Nonconstant element.
1507       return true;
1508     }
1509
1510     VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1511   }
1512
1513   //printf("%llx %llx  %llx %llx\n",
1514   //       VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1515   return false;
1516 }
1517
1518 /// If this is a splat (repetition) of a value across the whole vector, return
1519 /// the smallest size that splats it.  For example, "0x01010101010101..." is a
1520 /// splat of 0x01, 0x0101, and 0x01010101.  We return SplatBits = 0x01 and
1521 /// SplatSize = 1 byte.
1522 static bool isConstantSplat(const uint64_t Bits128[2],
1523                             const uint64_t Undef128[2],
1524                             int MinSplatBits,
1525                             uint64_t &SplatBits, uint64_t &SplatUndef,
1526                             int &SplatSize) {
1527   // Don't let undefs prevent splats from matching.  See if the top 64-bits are
1528   // the same as the lower 64-bits, ignoring undefs.
1529   uint64_t Bits64  = Bits128[0] | Bits128[1];
1530   uint64_t Undef64 = Undef128[0] & Undef128[1];
1531   uint32_t Bits32  = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1532   uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1533   uint16_t Bits16  = uint16_t(Bits32)  | uint16_t(Bits32 >> 16);
1534   uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1535
1536   if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1537     if (MinSplatBits < 64) {
1538
1539       // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1540       // undefs.
1541       if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1542         if (MinSplatBits < 32) {
1543
1544           // If the top 16-bits are different than the lower 16-bits, ignoring
1545           // undefs, we have an i32 splat.
1546           if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1547             if (MinSplatBits < 16) {
1548               // If the top 8-bits are different than the lower 8-bits, ignoring
1549               // undefs, we have an i16 splat.
1550               if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1551                 // Otherwise, we have an 8-bit splat.
1552                 SplatBits  = uint8_t(Bits16)  | uint8_t(Bits16 >> 8);
1553                 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1554                 SplatSize = 1;
1555                 return true;
1556               }
1557             } else {
1558               SplatBits = Bits16;
1559               SplatUndef = Undef16;
1560               SplatSize = 2;
1561               return true;
1562             }
1563           }
1564         } else {
1565           SplatBits = Bits32;
1566           SplatUndef = Undef32;
1567           SplatSize = 4;
1568           return true;
1569         }
1570       }
1571     } else {
1572       SplatBits = Bits128[0];
1573       SplatUndef = Undef128[0];
1574       SplatSize = 8;
1575       return true;
1576     }
1577   }
1578
1579   return false;  // Can't be a splat if two pieces don't match.
1580 }
1581
1582 // If this is a case we can't handle, return null and let the default
1583 // expansion code take care of it.  If we CAN select this case, and if it
1584 // selects to a single instruction, return Op.  Otherwise, if we can codegen
1585 // this case more efficiently than a constant pool load, lower it to the
1586 // sequence of ops that should be used.
1587 static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1588   MVT::ValueType VT = Op.getValueType();
1589   // If this is a vector of constants or undefs, get the bits.  A bit in
1590   // UndefBits is set if the corresponding element of the vector is an
1591   // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1592   // zero.
1593   uint64_t VectorBits[2];
1594   uint64_t UndefBits[2];
1595   uint64_t SplatBits, SplatUndef;
1596   int SplatSize;
1597   if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1598       || !isConstantSplat(VectorBits, UndefBits,
1599                           MVT::getSizeInBits(MVT::getVectorElementType(VT)),
1600                           SplatBits, SplatUndef, SplatSize))
1601     return SDOperand();   // Not a constant vector, not a splat.
1602
1603   switch (VT) {
1604   default:
1605   case MVT::v4f32: {
1606     uint32_t Value32 = SplatBits;
1607     assert(SplatSize == 4
1608            && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1609     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1610     SDOperand T = DAG.getConstant(Value32, MVT::i32);
1611     return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1612                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1613     break;
1614   }
1615   case MVT::v2f64: {
1616     uint64_t f64val = SplatBits;
1617     assert(SplatSize == 8
1618            && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1619     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1620     SDOperand T = DAG.getConstant(f64val, MVT::i64);
1621     return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1622                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1623     break;
1624   }
1625   case MVT::v16i8: {
1626    // 8-bit constants have to be expanded to 16-bits
1627    unsigned short Value16 = SplatBits | (SplatBits << 8);
1628    SDOperand Ops[8];
1629    for (int i = 0; i < 8; ++i)
1630      Ops[i] = DAG.getConstant(Value16, MVT::i16);
1631    return DAG.getNode(ISD::BIT_CONVERT, VT,
1632                       DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1633   }
1634   case MVT::v8i16: {
1635     unsigned short Value16;
1636     if (SplatSize == 2)
1637       Value16 = (unsigned short) (SplatBits & 0xffff);
1638     else
1639       Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1640     SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
1641     SDOperand Ops[8];
1642     for (int i = 0; i < 8; ++i) Ops[i] = T;
1643     return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1644   }
1645   case MVT::v4i32: {
1646     unsigned int Value = SplatBits;
1647     SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
1648     return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1649   }
1650   case MVT::v2i64: {
1651     uint64_t val = SplatBits;
1652     uint32_t upper = uint32_t(val >> 32);
1653     uint32_t lower = uint32_t(val);
1654
1655     if (val != 0) {
1656       SDOperand LO32;
1657       SDOperand HI32;
1658       SmallVector<SDOperand, 16> ShufBytes;
1659       SDOperand Result;
1660       bool upper_special, lower_special;
1661
1662       // NOTE: This code creates common-case shuffle masks that can be easily
1663       // detected as common expressions. It is not attempting to create highly
1664       // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1665
1666       // Detect if the upper or lower half is a special shuffle mask pattern:
1667       upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1668       lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1669
1670       // Create lower vector if not a special pattern
1671       if (!lower_special) {
1672         SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1673         LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1674                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1675                                        LO32C, LO32C, LO32C, LO32C));
1676       }
1677
1678       // Create upper vector if not a special pattern
1679       if (!upper_special) {
1680         SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1681         HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1682                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1683                                        HI32C, HI32C, HI32C, HI32C));
1684       }
1685
1686       // If either upper or lower are special, then the two input operands are
1687       // the same (basically, one of them is a "don't care")
1688       if (lower_special)
1689         LO32 = HI32;
1690       if (upper_special)
1691         HI32 = LO32;
1692       if (lower_special && upper_special) {
1693         // Unhappy situation... both upper and lower are special, so punt with
1694         // a target constant:
1695         SDOperand Zero = DAG.getConstant(0, MVT::i32);
1696         HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1697                                   Zero, Zero);
1698       }
1699
1700       for (int i = 0; i < 4; ++i) {
1701         for (int j = 0; j < 4; ++j) {
1702           SDOperand V;
1703           bool process_upper, process_lower;
1704           uint64_t val = 0;
1705
1706           process_upper = (upper_special && (i & 1) == 0);
1707           process_lower = (lower_special && (i & 1) == 1);
1708
1709           if (process_upper || process_lower) {
1710             if ((process_upper && upper == 0)
1711                 || (process_lower && lower == 0))
1712               val = 0x80;
1713             else if ((process_upper && upper == 0xffffffff)
1714                      || (process_lower && lower == 0xffffffff))
1715               val = 0xc0;
1716             else if ((process_upper && upper == 0x80000000)
1717                      || (process_lower && lower == 0x80000000))
1718               val = (j == 0 ? 0xe0 : 0x80);
1719           } else
1720             val = i * 4 + j + ((i & 1) * 16);
1721
1722           ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
1723         }
1724       }
1725
1726       return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1727                          DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1728                                      &ShufBytes[0], ShufBytes.size()));
1729     } else {
1730       // For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
1731       SDOperand Zero = DAG.getConstant(0, MVT::i32);
1732       return DAG.getNode(ISD::BIT_CONVERT, VT,
1733                          DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1734                                      Zero, Zero, Zero, Zero));
1735     }
1736   }
1737   }
1738
1739   return SDOperand();
1740 }
1741
1742 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1743 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1744 /// permutation vector, V3, is monotonically increasing with one "exception"
1745 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1746 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1747 /// In either case, the net result is going to eventually invoke SHUFB to
1748 /// permute/shuffle the bytes from V1 and V2.
1749 /// \note
1750 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1751 /// control word for byte/halfword/word insertion. This takes care of a single
1752 /// element move from V2 into V1.
1753 /// \note
1754 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1755 static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1756   SDOperand V1 = Op.getOperand(0);
1757   SDOperand V2 = Op.getOperand(1);
1758   SDOperand PermMask = Op.getOperand(2);
1759
1760   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1761
1762   // If we have a single element being moved from V1 to V2, this can be handled
1763   // using the C*[DX] compute mask instructions, but the vector elements have
1764   // to be monotonically increasing with one exception element.
1765   MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
1766   unsigned EltsFromV2 = 0;
1767   unsigned V2Elt = 0;
1768   unsigned V2EltIdx0 = 0;
1769   unsigned CurrElt = 0;
1770   bool monotonic = true;
1771   if (EltVT == MVT::i8)
1772     V2EltIdx0 = 16;
1773   else if (EltVT == MVT::i16)
1774     V2EltIdx0 = 8;
1775   else if (EltVT == MVT::i32)
1776     V2EltIdx0 = 4;
1777   else
1778     assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1779
1780   for (unsigned i = 0, e = PermMask.getNumOperands();
1781        EltsFromV2 <= 1 && monotonic && i != e;
1782        ++i) {
1783     unsigned SrcElt;
1784     if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1785       SrcElt = 0;
1786     else
1787       SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1788
1789     if (SrcElt >= V2EltIdx0) {
1790       ++EltsFromV2;
1791       V2Elt = (V2EltIdx0 - SrcElt) << 2;
1792     } else if (CurrElt != SrcElt) {
1793       monotonic = false;
1794     }
1795
1796     ++CurrElt;
1797   }
1798
1799   if (EltsFromV2 == 1 && monotonic) {
1800     // Compute mask and shuffle
1801     MachineFunction &MF = DAG.getMachineFunction();
1802     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1803     unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1804     MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1805     // Initialize temporary register to 0
1806     SDOperand InitTempReg =
1807       DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1808     // Copy register's contents as index in INSERT_MASK:
1809     SDOperand ShufMaskOp =
1810       DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1811                   DAG.getTargetConstant(V2Elt, MVT::i32),
1812                   DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1813     // Use shuffle mask in SHUFB synthetic instruction:
1814     return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1815   } else {
1816     // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1817     unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
1818
1819     SmallVector<SDOperand, 16> ResultMask;
1820     for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1821       unsigned SrcElt;
1822       if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1823         SrcElt = 0;
1824       else
1825         SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1826
1827       for (unsigned j = 0; j != BytesPerElement; ++j) {
1828         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1829                                              MVT::i8));
1830       }
1831     }
1832
1833     SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1834                                       &ResultMask[0], ResultMask.size());
1835     return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1836   }
1837 }
1838
1839 static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1840   SDOperand Op0 = Op.getOperand(0);                     // Op0 = the scalar
1841
1842   if (Op0.Val->getOpcode() == ISD::Constant) {
1843     // For a constant, build the appropriate constant vector, which will
1844     // eventually simplify to a vector register load.
1845
1846     ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1847     SmallVector<SDOperand, 16> ConstVecValues;
1848     MVT::ValueType VT;
1849     size_t n_copies;
1850
1851     // Create a constant vector:
1852     switch (Op.getValueType()) {
1853     default: assert(0 && "Unexpected constant value type in "
1854                          "LowerSCALAR_TO_VECTOR");
1855     case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1856     case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1857     case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1858     case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1859     case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1860     case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1861     }
1862
1863     SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1864     for (size_t j = 0; j < n_copies; ++j)
1865       ConstVecValues.push_back(CValue);
1866
1867     return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1868                        &ConstVecValues[0], ConstVecValues.size());
1869   } else {
1870     // Otherwise, copy the value from one register to another:
1871     switch (Op0.getValueType()) {
1872     default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1873     case MVT::i8:
1874     case MVT::i16:
1875     case MVT::i32:
1876     case MVT::i64:
1877     case MVT::f32:
1878     case MVT::f64:
1879       return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1880     }
1881   }
1882
1883   return SDOperand();
1884 }
1885
1886 static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1887   switch (Op.getValueType()) {
1888   case MVT::v4i32: {
1889     SDOperand rA = Op.getOperand(0);
1890     SDOperand rB = Op.getOperand(1);
1891     SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1892     SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1893     SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1894     SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1895
1896     return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1897     break;
1898   }
1899
1900   // Multiply two v8i16 vectors (pipeline friendly version):
1901   // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1902   // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1903   // c) Use SELB to select upper and lower halves from the intermediate results
1904   //
1905   // NOTE: We really want to move the FSMBI to earlier to actually get the
1906   // dual-issue. This code does manage to do this, even if it's a little on
1907   // the wacky side
1908   case MVT::v8i16: {
1909     MachineFunction &MF = DAG.getMachineFunction();
1910     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1911     SDOperand Chain = Op.getOperand(0);
1912     SDOperand rA = Op.getOperand(0);
1913     SDOperand rB = Op.getOperand(1);
1914     unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1915     unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1916
1917     SDOperand FSMBOp =
1918       DAG.getCopyToReg(Chain, FSMBIreg,
1919                        DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1920                                    DAG.getConstant(0xcccc, MVT::i32)));
1921
1922     SDOperand HHProd =
1923       DAG.getCopyToReg(FSMBOp, HiProdReg,
1924                        DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1925
1926     SDOperand HHProd_v4i32 =
1927       DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1928                   DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1929
1930     return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1931                        DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1932                        DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1933                                    DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1934                                                HHProd_v4i32,
1935                                                DAG.getConstant(16, MVT::i16))),
1936                        DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1937   }
1938
1939   // This M00sE is N@stI! (apologies to Monty Python)
1940   //
1941   // SPU doesn't know how to do any 8-bit multiplication, so the solution
1942   // is to break it all apart, sign extend, and reassemble the various
1943   // intermediate products.
1944   case MVT::v16i8: {
1945     MachineFunction &MF = DAG.getMachineFunction();
1946     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1947     SDOperand Chain = Op.getOperand(0);
1948     SDOperand rA = Op.getOperand(0);
1949     SDOperand rB = Op.getOperand(1);
1950     SDOperand c8 = DAG.getConstant(8, MVT::i8);
1951     SDOperand c16 = DAG.getConstant(16, MVT::i8);
1952
1953     unsigned FSMBreg_2222 = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1954     unsigned LoProd_reg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1955     unsigned HiProd_reg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1956
1957     SDOperand LLProd =
1958       DAG.getNode(SPUISD::MPY, MVT::v8i16,
1959                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1960                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1961
1962     SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1963
1964     SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1965
1966     SDOperand LHProd =
1967       DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1968                   DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1969
1970     SDOperand FSMBdef_2222 =
1971       DAG.getCopyToReg(Chain, FSMBreg_2222,
1972                        DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1973                                    DAG.getConstant(0x2222, MVT::i32)));
1974
1975     SDOperand FSMBuse_2222 =
1976       DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32);
1977
1978     SDOperand LoProd_1 =
1979       DAG.getCopyToReg(Chain, LoProd_reg,
1980                        DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd,
1981                                    FSMBuse_2222));
1982
1983     SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1984
1985     SDOperand LoProd =
1986       DAG.getNode(ISD::AND, MVT::v4i32,
1987                   DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32),
1988                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1989                               LoProdMask, LoProdMask,
1990                               LoProdMask, LoProdMask));
1991
1992     SDOperand rAH =
1993       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1994                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1995
1996     SDOperand rBH =
1997       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1998                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1999
2000     SDOperand HLProd =
2001       DAG.getNode(SPUISD::MPY, MVT::v8i16,
2002                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
2003                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
2004
2005     SDOperand HHProd_1 =
2006       DAG.getNode(SPUISD::MPY, MVT::v8i16,
2007                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2008                               DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
2009                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2010                               DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
2011
2012     SDOperand HHProd =
2013       DAG.getCopyToReg(Chain, HiProd_reg,
2014                        DAG.getNode(SPUISD::SELB, MVT::v8i16,
2015                                    HLProd,
2016                                    DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2017                                    FSMBuse_2222));
2018
2019     SDOperand HiProd =
2020       DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
2021                   DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16);
2022
2023     return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2024                        DAG.getNode(ISD::OR, MVT::v4i32,
2025                                    LoProd, HiProd));
2026   }
2027
2028   default:
2029     cerr << "CellSPU: Unknown vector multiplication, got "
2030          << MVT::getValueTypeString(Op.getValueType())
2031          << "\n";
2032     abort();
2033     /*NOTREACHED*/
2034   }
2035
2036   return SDOperand();
2037 }
2038
2039 static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2040   MachineFunction &MF = DAG.getMachineFunction();
2041   MachineRegisterInfo &RegInfo = MF.getRegInfo();
2042
2043   SDOperand A = Op.getOperand(0);
2044   SDOperand B = Op.getOperand(1);
2045   unsigned VT = Op.getValueType();
2046
2047   unsigned VRegBR, VRegC;
2048
2049   if (VT == MVT::f32) {
2050     VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2051     VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2052   } else {
2053     VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2054     VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2055   }
2056   // TODO: make sure we're feeding FPInterp the right arguments
2057   // Right now: fi B, frest(B)
2058
2059   // Computes BRcpl =
2060   // (Floating Interpolate (FP Reciprocal Estimate B))
2061   SDOperand BRcpl =
2062       DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2063                        DAG.getNode(SPUISD::FPInterp, VT, B,
2064                                 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2065
2066   // Computes A * BRcpl and stores in a temporary register
2067   SDOperand AxBRcpl =
2068       DAG.getCopyToReg(BRcpl, VRegC,
2069                  DAG.getNode(ISD::FMUL, VT, A,
2070                         DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2071   // What's the Chain variable do? It's magic!
2072   // TODO: set Chain = Op(0).getEntryNode()
2073
2074   return DAG.getNode(ISD::FADD, VT,
2075                 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2076                 DAG.getNode(ISD::FMUL, VT,
2077                         DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2078                         DAG.getNode(ISD::FSUB, VT, A,
2079                             DAG.getNode(ISD::FMUL, VT, B,
2080                             DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2081 }
2082
2083 static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2084   unsigned VT = Op.getValueType();
2085   SDOperand N = Op.getOperand(0);
2086   SDOperand Elt = Op.getOperand(1);
2087   SDOperand ShufMask[16];
2088   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2089
2090   assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2091
2092   int EltNo = (int) C->getValue();
2093
2094   // sanity checks:
2095   if (VT == MVT::i8 && EltNo >= 16)
2096     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2097   else if (VT == MVT::i16 && EltNo >= 8)
2098     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2099   else if (VT == MVT::i32 && EltNo >= 4)
2100     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2101   else if (VT == MVT::i64 && EltNo >= 2)
2102     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2103
2104   if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2105     // i32 and i64: Element 0 is the preferred slot
2106     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2107   }
2108
2109   // Need to generate shuffle mask and extract:
2110   int prefslot_begin = -1, prefslot_end = -1;
2111   int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
2112
2113   switch (VT) {
2114   case MVT::i8: {
2115     prefslot_begin = prefslot_end = 3;
2116     break;
2117   }
2118   case MVT::i16: {
2119     prefslot_begin = 2; prefslot_end = 3;
2120     break;
2121   }
2122   case MVT::i32: {
2123     prefslot_begin = 0; prefslot_end = 3;
2124     break;
2125   }
2126   case MVT::i64: {
2127     prefslot_begin = 0; prefslot_end = 7;
2128     break;
2129   }
2130   }
2131
2132   assert(prefslot_begin != -1 && prefslot_end != -1 &&
2133          "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2134
2135   for (int i = 0; i < 16; ++i) {
2136     // zero fill uppper part of preferred slot, don't care about the
2137     // other slots:
2138     unsigned int mask_val;
2139
2140     if (i <= prefslot_end) {
2141       mask_val =
2142         ((i < prefslot_begin)
2143          ? 0x80
2144          : elt_byte + (i - prefslot_begin));
2145
2146       ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2147     } else
2148       ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2149   }
2150
2151   SDOperand ShufMaskVec =
2152     DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2153                 &ShufMask[0],
2154                 sizeof(ShufMask) / sizeof(ShufMask[0]));
2155
2156   return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2157                      DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2158                                  N, N, ShufMaskVec));
2159
2160 }
2161
2162 static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2163   SDOperand VecOp = Op.getOperand(0);
2164   SDOperand ValOp = Op.getOperand(1);
2165   SDOperand IdxOp = Op.getOperand(2);
2166   MVT::ValueType VT = Op.getValueType();
2167
2168   ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2169   assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2170
2171   MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2172   // Use $2 because it's always 16-byte aligned and it's available:
2173   SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2174
2175   SDOperand result =
2176     DAG.getNode(SPUISD::SHUFB, VT,
2177                 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2178                 VecOp,
2179                 DAG.getNode(SPUISD::INSERT_MASK, VT,
2180                             DAG.getNode(ISD::ADD, PtrVT,
2181                                         PtrBase,
2182                                         DAG.getConstant(CN->getValue(),
2183                                                         PtrVT))));
2184
2185   return result;
2186 }
2187
2188 static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
2189   SDOperand N0 = Op.getOperand(0);      // Everything has at least one operand
2190
2191   assert(Op.getValueType() == MVT::i8);
2192   switch (Opc) {
2193   default:
2194     assert(0 && "Unhandled i8 math operator");
2195     /*NOTREACHED*/
2196     break;
2197   case ISD::SUB: {
2198     // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2199     // the result:
2200     SDOperand N1 = Op.getOperand(1);
2201     N0 = (N0.getOpcode() != ISD::Constant
2202           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2203           : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2204     N1 = (N1.getOpcode() != ISD::Constant
2205           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2206           : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2207     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2208                        DAG.getNode(Opc, MVT::i16, N0, N1));
2209   }
2210   case ISD::ROTR:
2211   case ISD::ROTL: {
2212     SDOperand N1 = Op.getOperand(1);
2213     unsigned N1Opc;
2214     N0 = (N0.getOpcode() != ISD::Constant
2215           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2216           : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2217     N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2218     N1 = (N1.getOpcode() != ISD::Constant
2219           ? DAG.getNode(N1Opc, MVT::i16, N1)
2220           : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2221     SDOperand ExpandArg =
2222       DAG.getNode(ISD::OR, MVT::i16, N0,
2223                   DAG.getNode(ISD::SHL, MVT::i16,
2224                               N0, DAG.getConstant(8, MVT::i16)));
2225     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2226                        DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2227   }
2228   case ISD::SRL:
2229   case ISD::SHL: {
2230     SDOperand N1 = Op.getOperand(1);
2231     unsigned N1Opc;
2232     N0 = (N0.getOpcode() != ISD::Constant
2233           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2234           : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2235     N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2236     N1 = (N1.getOpcode() != ISD::Constant
2237           ? DAG.getNode(N1Opc, MVT::i16, N1)
2238           : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2239     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2240                        DAG.getNode(Opc, MVT::i16, N0, N1));
2241   }
2242   case ISD::SRA: {
2243     SDOperand N1 = Op.getOperand(1);
2244     unsigned N1Opc;
2245     N0 = (N0.getOpcode() != ISD::Constant
2246           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2247           : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2248     N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2249     N1 = (N1.getOpcode() != ISD::Constant
2250           ? DAG.getNode(N1Opc, MVT::i16, N1)
2251           : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2252     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2253                        DAG.getNode(Opc, MVT::i16, N0, N1));
2254   }
2255   case ISD::MUL: {
2256     SDOperand N1 = Op.getOperand(1);
2257     unsigned N1Opc;
2258     N0 = (N0.getOpcode() != ISD::Constant
2259           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2260           : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2261     N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2262     N1 = (N1.getOpcode() != ISD::Constant
2263           ? DAG.getNode(N1Opc, MVT::i16, N1)
2264           : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2265     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2266                        DAG.getNode(Opc, MVT::i16, N0, N1));
2267     break;
2268   }
2269   }
2270
2271   return SDOperand();
2272 }
2273
2274 //! Lower byte immediate operations for v16i8 vectors:
2275 static SDOperand
2276 LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2277   SDOperand ConstVec;
2278   SDOperand Arg;
2279   MVT::ValueType VT = Op.getValueType();
2280
2281   ConstVec = Op.getOperand(0);
2282   Arg = Op.getOperand(1);
2283   if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2284     if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2285       ConstVec = ConstVec.getOperand(0);
2286     } else {
2287       ConstVec = Op.getOperand(1);
2288       Arg = Op.getOperand(0);
2289       if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2290         ConstVec = ConstVec.getOperand(0);
2291       }
2292     }
2293   }
2294
2295   if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2296     uint64_t VectorBits[2];
2297     uint64_t UndefBits[2];
2298     uint64_t SplatBits, SplatUndef;
2299     int SplatSize;
2300
2301     if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2302         && isConstantSplat(VectorBits, UndefBits,
2303                            MVT::getSizeInBits(MVT::getVectorElementType(VT)),
2304                            SplatBits, SplatUndef, SplatSize)) {
2305       SDOperand tcVec[16];
2306       SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2307       const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2308
2309       // Turn the BUILD_VECTOR into a set of target constants:
2310       for (size_t i = 0; i < tcVecSize; ++i)
2311         tcVec[i] = tc;
2312
2313       return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2314                          DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2315     }
2316   }
2317
2318   return SDOperand();
2319 }
2320
2321 //! Lower i32 multiplication
2322 static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
2323                           unsigned Opc) {
2324   switch (VT) {
2325   default:
2326     cerr << "CellSPU: Unknown LowerMUL value type, got "
2327          << MVT::getValueTypeString(Op.getValueType())
2328          << "\n";
2329     abort();
2330     /*NOTREACHED*/
2331
2332   case MVT::i32: {
2333     SDOperand rA = Op.getOperand(0);
2334     SDOperand rB = Op.getOperand(1);
2335
2336     return DAG.getNode(ISD::ADD, MVT::i32,
2337                        DAG.getNode(ISD::ADD, MVT::i32,
2338                                    DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2339                                    DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2340                        DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2341   }
2342   }
2343
2344   return SDOperand();
2345 }
2346
2347 //! Custom lowering for CTPOP (count population)
2348 /*!
2349   Custom lowering code that counts the number ones in the input
2350   operand. SPU has such an instruction, but it counts the number of
2351   ones per byte, which then have to be accumulated.
2352 */
2353 static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2354   unsigned VT = Op.getValueType();
2355   unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2356
2357   switch (VT) {
2358   case MVT::i8: {
2359     SDOperand N = Op.getOperand(0);
2360     SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2361
2362     SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2363     SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2364
2365     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2366   }
2367
2368   case MVT::i16: {
2369     MachineFunction &MF = DAG.getMachineFunction();
2370     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2371
2372     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2373
2374     SDOperand N = Op.getOperand(0);
2375     SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2376     SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2377     SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2378
2379     SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2380     SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2381
2382     // CNTB_result becomes the chain to which all of the virtual registers
2383     // CNTB_reg, SUM1_reg become associated:
2384     SDOperand CNTB_result =
2385       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2386
2387     SDOperand CNTB_rescopy =
2388       DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2389
2390     SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2391
2392     return DAG.getNode(ISD::AND, MVT::i16,
2393                        DAG.getNode(ISD::ADD, MVT::i16,
2394                                    DAG.getNode(ISD::SRL, MVT::i16,
2395                                                Tmp1, Shift1),
2396                                    Tmp1),
2397                        Mask0);
2398   }
2399
2400   case MVT::i32: {
2401     MachineFunction &MF = DAG.getMachineFunction();
2402     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2403
2404     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2405     unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2406
2407     SDOperand N = Op.getOperand(0);
2408     SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2409     SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2410     SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2411     SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2412
2413     SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2414     SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2415
2416     // CNTB_result becomes the chain to which all of the virtual registers
2417     // CNTB_reg, SUM1_reg become associated:
2418     SDOperand CNTB_result =
2419       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2420
2421     SDOperand CNTB_rescopy =
2422       DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2423
2424     SDOperand Comp1 =
2425       DAG.getNode(ISD::SRL, MVT::i32,
2426                   DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2427
2428     SDOperand Sum1 =
2429       DAG.getNode(ISD::ADD, MVT::i32,
2430                   Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2431
2432     SDOperand Sum1_rescopy =
2433       DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2434
2435     SDOperand Comp2 =
2436       DAG.getNode(ISD::SRL, MVT::i32,
2437                   DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2438                   Shift2);
2439     SDOperand Sum2 =
2440       DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2441                   DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2442
2443     return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2444   }
2445
2446   case MVT::i64:
2447     break;
2448   }
2449
2450   return SDOperand();
2451 }
2452
2453 /// LowerOperation - Provide custom lowering hooks for some operations.
2454 ///
2455 SDOperand
2456 SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2457 {
2458   switch (Op.getOpcode()) {
2459   default: {
2460     cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2461     cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n";
2462     cerr << "*Op.Val:\n";
2463     Op.Val->dump();
2464     abort();
2465   }
2466   case ISD::LOAD:
2467   case ISD::SEXTLOAD:
2468   case ISD::ZEXTLOAD:
2469     return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2470   case ISD::STORE:
2471     return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2472   case ISD::ConstantPool:
2473     return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2474   case ISD::GlobalAddress:
2475     return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2476   case ISD::JumpTable:
2477     return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2478   case ISD::Constant:
2479     return LowerConstant(Op, DAG);
2480   case ISD::ConstantFP:
2481     return LowerConstantFP(Op, DAG);
2482   case ISD::BRCOND:
2483     return LowerBRCOND(Op, DAG);
2484   case ISD::FORMAL_ARGUMENTS:
2485     return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2486   case ISD::CALL:
2487     return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2488   case ISD::RET:
2489     return LowerRET(Op, DAG, getTargetMachine());
2490
2491   // i8 math ops:
2492   case ISD::SUB:
2493   case ISD::ROTR:
2494   case ISD::ROTL:
2495   case ISD::SRL:
2496   case ISD::SHL:
2497   case ISD::SRA:
2498     return LowerI8Math(Op, DAG, Op.getOpcode());
2499
2500   // Vector-related lowering.
2501   case ISD::BUILD_VECTOR:
2502     return LowerBUILD_VECTOR(Op, DAG);
2503   case ISD::SCALAR_TO_VECTOR:
2504     return LowerSCALAR_TO_VECTOR(Op, DAG);
2505   case ISD::VECTOR_SHUFFLE:
2506     return LowerVECTOR_SHUFFLE(Op, DAG);
2507   case ISD::EXTRACT_VECTOR_ELT:
2508     return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2509   case ISD::INSERT_VECTOR_ELT:
2510     return LowerINSERT_VECTOR_ELT(Op, DAG);
2511
2512   // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2513   case ISD::AND:
2514   case ISD::OR:
2515   case ISD::XOR:
2516     return LowerByteImmed(Op, DAG);
2517
2518   // Vector and i8 multiply:
2519   case ISD::MUL:
2520     if (MVT::isVector(Op.getValueType()))
2521       return LowerVectorMUL(Op, DAG);
2522     else if (Op.getValueType() == MVT::i8)
2523       return LowerI8Math(Op, DAG, Op.getOpcode());
2524     else
2525       return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode());
2526
2527   case ISD::FDIV:
2528     if (Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::v4f32)
2529       return LowerFDIVf32(Op, DAG);
2530 //    else if (Op.getValueType() == MVT::f64)
2531 //      return LowerFDIVf64(Op, DAG);
2532     else
2533       assert(0 && "Calling FDIV on unsupported MVT");
2534
2535   case ISD::CTPOP:
2536     return LowerCTPOP(Op, DAG);
2537   }
2538
2539   return SDOperand();
2540 }
2541
2542 //===----------------------------------------------------------------------===//
2543 // Target Optimization Hooks
2544 //===----------------------------------------------------------------------===//
2545
2546 SDOperand
2547 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2548 {
2549 #if 0
2550   TargetMachine &TM = getTargetMachine();
2551 #endif
2552   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2553   SelectionDAG &DAG = DCI.DAG;
2554   SDOperand N0 = N->getOperand(0);      // everything has at least one operand
2555
2556   switch (N->getOpcode()) {
2557   default: break;
2558   case SPUISD::IndirectAddr: {
2559     if (!ST->usingLargeMem() && N0.getOpcode() == SPUISD::AFormAddr) {
2560       ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2561       if (CN->getValue() == 0) {
2562         // (SPUindirect (SPUaform <addr>, 0), 0) ->
2563         // (SPUaform <addr>, 0)
2564
2565         DEBUG(cerr << "Replace: ");
2566         DEBUG(N->dump(&DAG));
2567         DEBUG(cerr << "\nWith:    ");
2568         DEBUG(N0.Val->dump(&DAG));
2569         DEBUG(cerr << "\n");
2570
2571         return N0;
2572       }
2573     }
2574   }
2575   case ISD::ADD: {
2576     SDOperand Op0 = N->getOperand(0);
2577     SDOperand Op1 = N->getOperand(1);
2578
2579     if ((Op1.getOpcode() == ISD::Constant
2580          || Op1.getOpcode() == ISD::TargetConstant)
2581         && Op0.getOpcode() == SPUISD::IndirectAddr) {
2582       SDOperand Op01 = Op0.getOperand(1);
2583       if (Op01.getOpcode() == ISD::Constant
2584           || Op01.getOpcode() == ISD::TargetConstant) {
2585         // (add <const>, (SPUindirect <arg>, <const>)) ->
2586         // (SPUindirect <arg>, <const + const>)
2587         ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2588         ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2589         SDOperand combinedConst =
2590           DAG.getConstant(CN0->getValue() + CN1->getValue(),
2591                           Op0.getValueType());
2592
2593         DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", "
2594                    << "(SPUindirect <arg>, " << CN1->getValue() << "))\n");
2595         DEBUG(cerr << "With:    (SPUindirect <arg>, "
2596                    << CN0->getValue() + CN1->getValue() << ")\n");
2597         return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2598                            Op0.getOperand(0), combinedConst);
2599       }
2600     } else if ((Op0.getOpcode() == ISD::Constant
2601                 || Op0.getOpcode() == ISD::TargetConstant)
2602                && Op1.getOpcode() == SPUISD::IndirectAddr) {
2603       SDOperand Op11 = Op1.getOperand(1);
2604       if (Op11.getOpcode() == ISD::Constant
2605           || Op11.getOpcode() == ISD::TargetConstant) {
2606         // (add (SPUindirect <arg>, <const>), <const>) ->
2607         // (SPUindirect <arg>, <const + const>)
2608         ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2609         ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2610         SDOperand combinedConst =
2611           DAG.getConstant(CN0->getValue() + CN1->getValue(),
2612                           Op0.getValueType());
2613
2614         DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", "
2615                    << "(SPUindirect <arg>, " << CN1->getValue() << "))\n");
2616         DEBUG(cerr << "With:    (SPUindirect <arg>, "
2617                    << CN0->getValue() + CN1->getValue() << ")\n");
2618
2619         return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2620                            Op1.getOperand(0), combinedConst);
2621       }
2622     }
2623   }
2624   }
2625   // Otherwise, return unchanged.
2626   return SDOperand();
2627 }
2628
2629 //===----------------------------------------------------------------------===//
2630 // Inline Assembly Support
2631 //===----------------------------------------------------------------------===//
2632
2633 /// getConstraintType - Given a constraint letter, return the type of
2634 /// constraint it is for this target.
2635 SPUTargetLowering::ConstraintType
2636 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2637   if (ConstraintLetter.size() == 1) {
2638     switch (ConstraintLetter[0]) {
2639     default: break;
2640     case 'b':
2641     case 'r':
2642     case 'f':
2643     case 'v':
2644     case 'y':
2645       return C_RegisterClass;
2646     }
2647   }
2648   return TargetLowering::getConstraintType(ConstraintLetter);
2649 }
2650
2651 std::pair<unsigned, const TargetRegisterClass*>
2652 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2653                                                 MVT::ValueType VT) const
2654 {
2655   if (Constraint.size() == 1) {
2656     // GCC RS6000 Constraint Letters
2657     switch (Constraint[0]) {
2658     case 'b':   // R1-R31
2659     case 'r':   // R0-R31
2660       if (VT == MVT::i64)
2661         return std::make_pair(0U, SPU::R64CRegisterClass);
2662       return std::make_pair(0U, SPU::R32CRegisterClass);
2663     case 'f':
2664       if (VT == MVT::f32)
2665         return std::make_pair(0U, SPU::R32FPRegisterClass);
2666       else if (VT == MVT::f64)
2667         return std::make_pair(0U, SPU::R64FPRegisterClass);
2668       break;
2669     case 'v':
2670       return std::make_pair(0U, SPU::GPRCRegisterClass);
2671     }
2672   }
2673
2674   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2675 }
2676
2677 void
2678 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2679                                                   APInt Mask,
2680                                                   APInt &KnownZero,
2681                                                   APInt &KnownOne,
2682                                                   const SelectionDAG &DAG,
2683                                                   unsigned Depth ) const {
2684   KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2685 }
2686
2687 // LowerAsmOperandForConstraint
2688 void
2689 SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
2690                                                 char ConstraintLetter,
2691                                                 std::vector<SDOperand> &Ops,
2692                                                 SelectionDAG &DAG) {
2693   // Default, for the time being, to the base class handler
2694   TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
2695 }
2696
2697 /// isLegalAddressImmediate - Return true if the integer value can be used
2698 /// as the offset of the target addressing mode.
2699 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
2700   // SPU's addresses are 256K:
2701   return (V > -(1 << 18) && V < (1 << 18) - 1);
2702 }
2703
2704 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
2705   return false;
2706 }