lib/Target/CellSPU/SPUISelLowering.cpp

   1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the SPUTargetLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "SPURegisterNames.h"
  15 #include "SPUISelLowering.h"
  16 #include "SPUTargetMachine.h"
  17 #include "llvm/ADT/VectorExtras.h"
  18 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  19 #include "llvm/CodeGen/CallingConvLower.h"
  20 #include "llvm/CodeGen/MachineFrameInfo.h"
  21 #include "llvm/CodeGen/MachineFunction.h"
  22 #include "llvm/CodeGen/MachineInstrBuilder.h"
  23 #include "llvm/CodeGen/MachineRegisterInfo.h"
  24 #include "llvm/CodeGen/SelectionDAG.h"
  25 #include "llvm/Constants.h"
  26 #include "llvm/Function.h"
  27 #include "llvm/Intrinsics.h"
  28 #include "llvm/Support/Debug.h"
  29 #include "llvm/Support/MathExtras.h"
  30 #include "llvm/Target/TargetOptions.h"
  31
  32 #include <map>
  33
  34 using namespace llvm;
  35
  36 // Used in getTargetNodeName() below
  37 namespace {
  38   std::map<unsigned, const char *> node_names;
  39
  40   //! MVT::ValueType mapping to useful data for Cell SPU
  41   struct valtype_map_s {
  42     const MVT::ValueType        valtype;
  43     const int                   prefslot_byte;
  44   };
  45
  46   const valtype_map_s valtype_map[] = {
  47     { MVT::i1,   3 },
  48     { MVT::i8,   3 },
  49     { MVT::i16,  2 },
  50     { MVT::i32,  0 },
  51     { MVT::f32,  0 },
  52     { MVT::i64,  0 },
  53     { MVT::f64,  0 },
  54     { MVT::i128, 0 }
  55   };
  56
  57   const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
  58
  59   const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
  60     const valtype_map_s *retval = 0;
  61
  62     for (size_t i = 0; i < n_valtype_map; ++i) {
  63       if (valtype_map[i].valtype == VT) {
  64         retval = valtype_map + i;
  65         break;
  66       }
  67     }
  68
  69 #ifndef NDEBUG
  70     if (retval == 0) {
  71       cerr << "getValueTypeMapEntry returns NULL for "
  72            << MVT::getValueTypeString(VT)
  73            << "\n";
  74       abort();
  75     }
  76 #endif
  77
  78     return retval;
  79   }
  80
  81   //! Predicate that returns true if operand is a memory target
  82   /*!
  83     \arg Op Operand to test
  84     \return true if the operand is a memory target (i.e., global
  85     address, external symbol, constant pool) or an A-form
  86     address.
  87    */
  88   bool isMemoryOperand(const SDOperand &Op)
  89   {
  90     const unsigned Opc = Op.getOpcode();
  91     return (Opc == ISD::GlobalAddress
  92             || Opc == ISD::GlobalTLSAddress
  93             || Opc == ISD::JumpTable
  94             || Opc == ISD::ConstantPool
  95             || Opc == ISD::ExternalSymbol
  96             || Opc == ISD::TargetGlobalAddress
  97             || Opc == ISD::TargetGlobalTLSAddress
  98             || Opc == ISD::TargetJumpTable
  99             || Opc == ISD::TargetConstantPool
 100             || Opc == ISD::TargetExternalSymbol
 101             || Opc == SPUISD::AFormAddr);
 102   }
 103 }
 104
 105 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
 106   : TargetLowering(TM),
 107     SPUTM(TM)
 108 {
 109   // Fold away setcc operations if possible.
 110   setPow2DivIsCheap();
 111
 112   // Use _setjmp/_longjmp instead of setjmp/longjmp.
 113   setUseUnderscoreSetJmp(true);
 114   setUseUnderscoreLongJmp(true);
 115
 116   // Set up the SPU's register classes:
 117   // NOTE: i8 register class is not registered because we cannot determine when
 118   // we need to zero or sign extend for custom-lowered loads and stores.
 119   // NOTE: Ignore the previous note. For now. :-)
 120   addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
 121   addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
 122   addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
 123   addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
 124   addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
 125   addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
 126   addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
 127
 128   // SPU has no sign or zero extended loads for i1, i8, i16:
 129   setLoadXAction(ISD::EXTLOAD,  MVT::i1, Custom);
 130   setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
 131   setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
 132   setTruncStoreAction(MVT::i8, MVT::i1, Custom);
 133   setTruncStoreAction(MVT::i16, MVT::i1, Custom);
 134   setTruncStoreAction(MVT::i32, MVT::i1, Custom);
 135   setTruncStoreAction(MVT::i64, MVT::i1, Custom);
 136   setTruncStoreAction(MVT::i128, MVT::i1, Custom);
 137
 138   setLoadXAction(ISD::EXTLOAD,  MVT::i8, Custom);
 139   setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
 140   setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
 141   setTruncStoreAction(MVT::i8  , MVT::i8, Custom);
 142   setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
 143   setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
 144   setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
 145   setTruncStoreAction(MVT::i128, MVT::i8, Custom);
 146
 147   setLoadXAction(ISD::EXTLOAD,  MVT::i16, Custom);
 148   setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
 149   setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
 150
 151   // SPU constant load actions are custom lowered:
 152   setOperationAction(ISD::Constant,   MVT::i64, Custom);
 153   setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
 154   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
 155
 156   // SPU's loads and stores have to be custom lowered:
 157   for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
 158        ++sctype) {
 159     setOperationAction(ISD::LOAD, sctype, Custom);
 160     setOperationAction(ISD::STORE, sctype, Custom);
 161   }
 162
 163   // SPU supports BRCOND, although DAGCombine will convert BRCONDs
 164   // into BR_CCs. BR_CC instructions are custom selected in
 165   // SPUDAGToDAGISel.
 166   setOperationAction(ISD::BRCOND, MVT::Other, Legal);
 167
 168   // Expand the jumptable branches
 169   setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
 170   setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
 171   setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
 172
 173   // SPU has no intrinsics for these particular operations:
 174   setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
 175   setOperationAction(ISD::MEMSET, MVT::Other, Expand);
 176   setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
 177
 178   // PowerPC has no SREM/UREM instructions
 179   setOperationAction(ISD::SREM, MVT::i32, Expand);
 180   setOperationAction(ISD::UREM, MVT::i32, Expand);
 181   setOperationAction(ISD::SREM, MVT::i64, Expand);
 182   setOperationAction(ISD::UREM, MVT::i64, Expand);
 183
 184   // We don't support sin/cos/sqrt/fmod
 185   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 186   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 187   setOperationAction(ISD::FREM , MVT::f64, Expand);
 188   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 189   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 190   setOperationAction(ISD::FREM , MVT::f32, Expand);
 191
 192   // If we're enabling GP optimizations, use hardware square root
 193   setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 194   setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 195
 196   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 197   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 198
 199   // SPU can do rotate right and left, so legalize it... but customize for i8
 200   // because instructions don't exist.
 201   setOperationAction(ISD::ROTR, MVT::i32,    Legal);
 202   setOperationAction(ISD::ROTR, MVT::i16,    Legal);
 203   setOperationAction(ISD::ROTR, MVT::i8,     Custom);
 204   setOperationAction(ISD::ROTL, MVT::i32,    Legal);
 205   setOperationAction(ISD::ROTL, MVT::i16,    Legal);
 206   setOperationAction(ISD::ROTL, MVT::i8,     Custom);
 207   // SPU has no native version of shift left/right for i8
 208   setOperationAction(ISD::SHL,  MVT::i8,     Custom);
 209   setOperationAction(ISD::SRL,  MVT::i8,     Custom);
 210   setOperationAction(ISD::SRA,  MVT::i8,     Custom);
 211
 212   // Custom lower i32 multiplications
 213   setOperationAction(ISD::MUL,  MVT::i32,    Custom);
 214
 215   // Need to custom handle (some) common i8 math ops
 216   setOperationAction(ISD::SUB,  MVT::i8,     Custom);
 217   setOperationAction(ISD::MUL,  MVT::i8,     Custom);
 218
 219   // SPU does not have BSWAP. It does have i32 support CTLZ.
 220   // CTPOP has to be custom lowered.
 221   setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
 222   setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
 223
 224   setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
 225   setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
 226   setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
 227   setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
 228
 229   setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
 230   setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
 231
 232   setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
 233
 234   // SPU does not have select or setcc
 235   setOperationAction(ISD::SELECT, MVT::i1,   Expand);
 236   setOperationAction(ISD::SELECT, MVT::i8,   Expand);
 237   setOperationAction(ISD::SELECT, MVT::i16,  Expand);
 238   setOperationAction(ISD::SELECT, MVT::i32,  Expand);
 239   setOperationAction(ISD::SELECT, MVT::i64,  Expand);
 240   setOperationAction(ISD::SELECT, MVT::f32,  Expand);
 241   setOperationAction(ISD::SELECT, MVT::f64,  Expand);
 242
 243   setOperationAction(ISD::SETCC, MVT::i1,   Expand);
 244   setOperationAction(ISD::SETCC, MVT::i8,   Expand);
 245   setOperationAction(ISD::SETCC, MVT::i16,  Expand);
 246   setOperationAction(ISD::SETCC, MVT::i32,  Expand);
 247   setOperationAction(ISD::SETCC, MVT::i64,  Expand);
 248   setOperationAction(ISD::SETCC, MVT::f32,  Expand);
 249   setOperationAction(ISD::SETCC, MVT::f64,  Expand);
 250
 251   // SPU has a legal FP -> signed INT instruction
 252   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
 253   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 254   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
 255   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
 256
 257   // FDIV on SPU requires custom lowering
 258   setOperationAction(ISD::FDIV, MVT::f32, Custom);
 259   //setOperationAction(ISD::FDIV, MVT::f64, Custom);
 260
 261   // SPU has [U|S]INT_TO_FP
 262   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
 263   setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
 264   setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
 265   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
 266   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
 267   setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
 268   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 269   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 270
 271   setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
 272   setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
 273   setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
 274   setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
 275
 276   // We cannot sextinreg(i1).  Expand to shifts.
 277   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 278
 279   // Support label based line numbers.
 280   setOperationAction(ISD::LOCATION, MVT::Other, Expand);
 281   setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
 282
 283   // We want to legalize GlobalAddress and ConstantPool nodes into the
 284   // appropriate instructions to materialize the address.
 285   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
 286   setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
 287   setOperationAction(ISD::ConstantPool,  MVT::f32, Custom);
 288   setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
 289   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
 290   setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
 291   setOperationAction(ISD::ConstantPool,  MVT::f64, Custom);
 292   setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
 293
 294   // RET must be custom lowered, to meet ABI requirements
 295   setOperationAction(ISD::RET,           MVT::Other, Custom);
 296
 297   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 298   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 299
 300   // Use the default implementation.
 301   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
 302   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 303   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 304   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 305   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
 306   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
 307   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
 308
 309   // Cell SPU has instructions for converting between i64 and fp.
 310   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 311   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 312
 313   // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
 314   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
 315
 316   // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 317   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 318
 319   // First set operation action for all vector types to expand. Then we
 320   // will selectively turn on ones that can be effectively codegen'd.
 321   addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
 322   addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
 323   addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
 324   addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
 325   addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
 326   addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
 327
 328   for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
 329        VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
 330     // add/sub are legal for all supported vector VT's.
 331     setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
 332     setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
 333     // mul has to be custom lowered.
 334     setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
 335
 336     setOperationAction(ISD::AND   , (MVT::ValueType)VT, Legal);
 337     setOperationAction(ISD::OR    , (MVT::ValueType)VT, Legal);
 338     setOperationAction(ISD::XOR   , (MVT::ValueType)VT, Legal);
 339     setOperationAction(ISD::LOAD  , (MVT::ValueType)VT, Legal);
 340     setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
 341     setOperationAction(ISD::STORE,  (MVT::ValueType)VT, Legal);
 342
 343     // These operations need to be expanded:
 344     setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
 345     setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
 346     setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
 347     setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
 348     setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
 349
 350     // Custom lower build_vector, constant pool spills, insert and
 351     // extract vector elements:
 352     setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
 353     setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
 354     setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
 355     setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
 356     setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
 357     setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
 358   }
 359
 360   setOperationAction(ISD::MUL, MVT::v16i8, Custom);
 361   setOperationAction(ISD::AND, MVT::v16i8, Custom);
 362   setOperationAction(ISD::OR,  MVT::v16i8, Custom);
 363   setOperationAction(ISD::XOR, MVT::v16i8, Custom);
 364   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 365
 366   setSetCCResultType(MVT::i32);
 367   setShiftAmountType(MVT::i32);
 368   setSetCCResultContents(ZeroOrOneSetCCResult);
 369
 370   setStackPointerRegisterToSaveRestore(SPU::R1);
 371
 372   // We have target-specific dag combine patterns for the following nodes:
 373   // e.g., setTargetDAGCombine(ISD::SUB);
 374
 375   computeRegisterProperties();
 376 }
 377
 378 const char *
 379 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
 380 {
 381   if (node_names.empty()) {
 382     node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
 383     node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
 384     node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
 385     node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
 386     node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
 387     node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr";
 388     node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr";
 389     node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
 390     node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
 391     node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
 392     node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
 393     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
 394     node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
 395     node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
 396     node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
 397     node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
 398     node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
 399     node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
 400     node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
 401     node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
 402     node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
 403     node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
 404     node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
 405     node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
 406     node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
 407     node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
 408     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
 409     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
 410     node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] =
 411       "SPUISD::ROTBYTES_RIGHT_Z";
 412     node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
 413       "SPUISD::ROTBYTES_RIGHT_S";
 414     node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
 415     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
 416       "SPUISD::ROTBYTES_LEFT_CHAINED";
 417     node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
 418     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
 419     node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant";
 420     node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
 421     node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
 422     node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
 423   }
 424
 425   std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
 426
 427   return ((i != node_names.end()) ? i->second : 0);
 428 }
 429
 430 //===----------------------------------------------------------------------===//
 431 // Calling convention code:
 432 //===----------------------------------------------------------------------===//
 433
 434 #include "SPUGenCallingConv.inc"
 435
 436 //===----------------------------------------------------------------------===//
 437 //  LowerOperation implementation
 438 //===----------------------------------------------------------------------===//
 439
 440 /// Aligned load common code for CellSPU
 441 /*!
 442   \param[in] Op The SelectionDAG load or store operand
 443   \param[in] DAG The selection DAG
 444   \param[in] ST CellSPU subtarget information structure
 445   \param[in,out] alignment Caller initializes this to the load or store node's
 446   value from getAlignment(), may be updated while generating the aligned load
 447   \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
 448   offset (divisible by 16, modulo 16 == 0)
 449   \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
 450   offset of the preferred slot (modulo 16 != 0)
 451   \param[in,out] VT Caller initializes this value type to the the load or store
 452   node's loaded or stored value type; may be updated if an i1-extended load or
 453   store.
 454   \param[out] was16aligned true if the base pointer had 16-byte alignment,
 455   otherwise false. Can help to determine if the chunk needs to be rotated.
 456
 457  Both load and store lowering load a block of data aligned on a 16-byte
 458  boundary. This is the common aligned load code shared between both.
 459  */
 460 static SDOperand
 461 AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST,
 462             LSBaseSDNode *LSN,
 463             unsigned &alignment, int &alignOffs, int &prefSlotOffs,
 464             MVT::ValueType &VT, bool &was16aligned)
 465 {
 466   MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 467   const valtype_map_s *vtm = getValueTypeMapEntry(VT);
 468   SDOperand basePtr = LSN->getBasePtr();
 469   SDOperand chain = LSN->getChain();
 470
 471   if (basePtr.getOpcode() == ISD::ADD) {
 472     SDOperand Op1 = basePtr.Val->getOperand(1);
 473
 474     if (Op1.getOpcode() == ISD::Constant || Op1.getOpcode() == ISD::TargetConstant) {
 475       const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.Val->getOperand(1));
 476
 477       alignOffs = (int) CN->getValue();
 478       prefSlotOffs = (int) (alignOffs & 0xf);
 479
 480       // Adjust the rotation amount to ensure that the final result ends up in
 481       // the preferred slot:
 482       prefSlotOffs -= vtm->prefslot_byte;
 483       basePtr = basePtr.getOperand(0);
 484
 485       // Modify alignment, since the ADD is likely from getElementPtr:
 486       switch (basePtr.getOpcode()) {
 487       case ISD::GlobalAddress:
 488       case ISD::TargetGlobalAddress: {
 489         GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(basePtr.Val);
 490         const GlobalValue *GV = GN->getGlobal();
 491         alignment = GV->getAlignment();
 492         break;
 493       }
 494       }
 495     } else {
 496       alignOffs = 0;
 497       prefSlotOffs = -vtm->prefslot_byte;
 498     }
 499   } else {
 500     alignOffs = 0;
 501     prefSlotOffs = -vtm->prefslot_byte;
 502   }
 503
 504   if (alignment == 16) {
 505     // Realign the base pointer as a D-Form address:
 506     if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
 507       if (isMemoryOperand(basePtr)) {
 508         SDOperand Zero = DAG.getConstant(0, PtrVT);
 509         unsigned Opc = (!ST->usingLargeMem()
 510                         ? SPUISD::AFormAddr
 511                         : SPUISD::XFormAddr);
 512         basePtr = DAG.getNode(Opc, PtrVT, basePtr, Zero);
 513       }
 514       basePtr = DAG.getNode(SPUISD::DFormAddr, PtrVT,
 515                           basePtr, DAG.getConstant((alignOffs & ~0xf), PtrVT));
 516     }
 517
 518     // Emit the vector load:
 519     was16aligned = true;
 520     return DAG.getLoad(MVT::v16i8, chain, basePtr,
 521                        LSN->getSrcValue(), LSN->getSrcValueOffset(),
 522                        LSN->isVolatile(), 16);
 523   }
 524
 525   // Unaligned load or we're using the "large memory" model, which means that
 526   // we have to be very pessimistic:
 527   if (isMemoryOperand(basePtr)) {
 528     basePtr = DAG.getNode(SPUISD::XFormAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT));
 529   }
 530
 531   // Add the offset
 532   basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, DAG.getConstant(alignOffs, PtrVT));
 533   was16aligned = false;
 534   return DAG.getLoad(MVT::v16i8, chain, basePtr,
 535                      LSN->getSrcValue(), LSN->getSrcValueOffset(),
 536                      LSN->isVolatile(), 16);
 537 }
 538
 539 /// Custom lower loads for CellSPU
 540 /*!
 541  All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
 542  within a 16-byte block, we have to rotate to extract the requested element.
 543  */
 544 static SDOperand
 545 LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 546   LoadSDNode *LN = cast<LoadSDNode>(Op);
 547   SDOperand the_chain = LN->getChain();
 548   MVT::ValueType VT = LN->getLoadedVT();
 549   MVT::ValueType OpVT = Op.Val->getValueType(0);
 550   ISD::LoadExtType ExtType = LN->getExtensionType();
 551   unsigned alignment = LN->getAlignment();
 552   SDOperand Ops[8];
 553
 554   // For an extending load of an i1 variable, just call it i8 (or whatever we
 555   // were passed) and make it zero-extended:
 556   if (VT == MVT::i1) {
 557     VT = OpVT;
 558     ExtType = ISD::ZEXTLOAD;
 559   }
 560
 561   switch (LN->getAddressingMode()) {
 562   case ISD::UNINDEXED: {
 563     int offset, rotamt;
 564     bool was16aligned;
 565     SDOperand result =
 566       AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
 567
 568     if (result.Val == 0)
 569       return result;
 570
 571     the_chain = result.getValue(1);
 572     // Rotate the chunk if necessary
 573     if (rotamt < 0)
 574       rotamt += 16;
 575     if (rotamt != 0 || !was16aligned) {
 576       SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
 577
 578       if (was16aligned) {
 579         Ops[0] = the_chain;
 580         Ops[1] = result;
 581         Ops[2] = DAG.getConstant(rotamt, MVT::i16);
 582       } else {
 583         MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 584         LoadSDNode *LN1 = cast<LoadSDNode>(result);
 585         Ops[0] = the_chain;
 586         Ops[1] = result;
 587         Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
 588                              DAG.getConstant(rotamt, PtrVT));
 589       }
 590
 591       result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
 592       the_chain = result.getValue(1);
 593     }
 594
 595     if (VT == OpVT || ExtType == ISD::EXTLOAD) {
 596       SDVTList scalarvts;
 597       MVT::ValueType vecVT = MVT::v16i8;
 598
 599       // Convert the loaded v16i8 vector to the appropriate vector type
 600       // specified by the operand:
 601       if (OpVT == VT) {
 602         if (VT != MVT::i1)
 603           vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
 604       } else
 605         vecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
 606
 607       Ops[0] = the_chain;
 608       Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
 609       scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
 610       result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
 611       the_chain = result.getValue(1);
 612     } else {
 613       // Handle the sign and zero-extending loads for i1 and i8:
 614       unsigned NewOpC;
 615
 616       if (ExtType == ISD::SEXTLOAD) {
 617         NewOpC = (OpVT == MVT::i1
 618                   ? SPUISD::EXTRACT_I1_SEXT
 619                   : SPUISD::EXTRACT_I8_SEXT);
 620       } else {
 621         assert(ExtType == ISD::ZEXTLOAD);
 622         NewOpC = (OpVT == MVT::i1
 623                   ? SPUISD::EXTRACT_I1_ZEXT
 624                   : SPUISD::EXTRACT_I8_ZEXT);
 625       }
 626
 627       result = DAG.getNode(NewOpC, OpVT, result);
 628     }
 629
 630     SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
 631     SDOperand retops[2] = { result, the_chain };
 632
 633     result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
 634     return result;
 635   }
 636   case ISD::PRE_INC:
 637   case ISD::PRE_DEC:
 638   case ISD::POST_INC:
 639   case ISD::POST_DEC:
 640   case ISD::LAST_INDEXED_MODE:
 641     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 642             "UNINDEXED\n";
 643     cerr << (unsigned) LN->getAddressingMode() << "\n";
 644     abort();
 645     /*NOTREACHED*/
 646   }
 647
 648   return SDOperand();
 649 }
 650
 651 /// Custom lower stores for CellSPU
 652 /*!
 653  All CellSPU stores are aligned to 16-byte boundaries, so for elements
 654  within a 16-byte block, we have to generate a shuffle to insert the
 655  requested element into its place, then store the resulting block.
 656  */
 657 static SDOperand
 658 LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 659   StoreSDNode *SN = cast<StoreSDNode>(Op);
 660   SDOperand Value = SN->getValue();
 661   MVT::ValueType VT = Value.getValueType();
 662   MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT());
 663   MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 664   unsigned alignment = SN->getAlignment();
 665
 666   switch (SN->getAddressingMode()) {
 667   case ISD::UNINDEXED: {
 668     int chunk_offset, slot_offset;
 669     bool was16aligned;
 670
 671     // The vector type we really want to load from the 16-byte chunk, except
 672     // in the case of MVT::i1, which has to be v16i8.
 673     unsigned vecVT, stVecVT = MVT::v16i8;
 674
 675     if (StVT != MVT::i1)
 676       stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
 677     vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
 678
 679     SDOperand alignLoadVec =
 680       AlignedLoad(Op, DAG, ST, SN, alignment,
 681                   chunk_offset, slot_offset, VT, was16aligned);
 682
 683     if (alignLoadVec.Val == 0)
 684       return alignLoadVec;
 685
 686     LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
 687     SDOperand basePtr = LN->getBasePtr();
 688     SDOperand the_chain = alignLoadVec.getValue(1);
 689     SDOperand theValue = SN->getValue();
 690     SDOperand result;
 691
 692     if (StVT != VT
 693         && (theValue.getOpcode() == ISD::AssertZext
 694             || theValue.getOpcode() == ISD::AssertSext)) {
 695       // Drill down and get the value for zero- and sign-extended
 696       // quantities
 697       theValue = theValue.getOperand(0);
 698     }
 699
 700     chunk_offset &= 0xf;
 701
 702     SDOperand insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
 703     SDOperand insertEltPtr;
 704     SDOperand insertEltOp;
 705
 706     // If the base pointer is already a D-form address, then just create
 707     // a new D-form address with a slot offset and the orignal base pointer.
 708     // Otherwise generate a D-form address with the slot offset relative
 709     // to the stack pointer, which is always aligned.
 710     DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
 711     DEBUG(basePtr.Val->dump(&DAG));
 712     DEBUG(cerr << "\n");
 713
 714     if (basePtr.getOpcode() == SPUISD::DFormAddr) {
 715       insertEltPtr = DAG.getNode(SPUISD::DFormAddr, PtrVT,
 716                                  basePtr.getOperand(0),
 717                                  insertEltOffs);
 718     } else if (basePtr.getOpcode() == SPUISD::XFormAddr ||
 719                (basePtr.getOpcode() == ISD::ADD
 720                 && basePtr.getOperand(0).getOpcode() == SPUISD::XFormAddr)) {
 721       insertEltPtr = basePtr;
 722     } else {
 723       insertEltPtr = DAG.getNode(SPUISD::DFormAddr, PtrVT,
 724                                  DAG.getRegister(SPU::R1, PtrVT),
 725                                  insertEltOffs);
 726     }
 727
 728     insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
 729     result = DAG.getNode(SPUISD::SHUFB, vecVT,
 730                          DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
 731                          alignLoadVec,
 732                          DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
 733
 734     result = DAG.getStore(the_chain, result, basePtr,
 735                           LN->getSrcValue(), LN->getSrcValueOffset(),
 736                           LN->isVolatile(), LN->getAlignment());
 737
 738     return result;
 739     /*UNREACHED*/
 740   }
 741   case ISD::PRE_INC:
 742   case ISD::PRE_DEC:
 743   case ISD::POST_INC:
 744   case ISD::POST_DEC:
 745   case ISD::LAST_INDEXED_MODE:
 746     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 747             "UNINDEXED\n";
 748     cerr << (unsigned) SN->getAddressingMode() << "\n";
 749     abort();
 750     /*NOTREACHED*/
 751   }
 752
 753   return SDOperand();
 754 }
 755
 756 /// Generate the address of a constant pool entry.
 757 static SDOperand
 758 LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 759   MVT::ValueType PtrVT = Op.getValueType();
 760   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 761   Constant *C = CP->getConstVal();
 762   SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
 763   SDOperand Zero = DAG.getConstant(0, PtrVT);
 764   const TargetMachine &TM = DAG.getTarget();
 765
 766   if (TM.getRelocationModel() == Reloc::Static) {
 767     if (!ST->usingLargeMem()) {
 768       // Just return the SDOperand with the constant pool address in it.
 769       return CPI;
 770     } else {
 771 #if 1
 772       // Generate hi/lo address pair
 773       SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
 774       SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
 775
 776       return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
 777 #else
 778       return DAG.getNode(SPUISD::XFormAddr, PtrVT, CPI, Zero);
 779 #endif
 780     }
 781   }
 782
 783   assert(0 &&
 784          "LowerConstantPool: Relocation model other than static not supported.");
 785   return SDOperand();
 786 }
 787
 788 static SDOperand
 789 LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 790   MVT::ValueType PtrVT = Op.getValueType();
 791   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 792   SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
 793   SDOperand Zero = DAG.getConstant(0, PtrVT);
 794   const TargetMachine &TM = DAG.getTarget();
 795
 796   if (TM.getRelocationModel() == Reloc::Static) {
 797     return (!ST->usingLargeMem()
 798             ? JTI
 799             : DAG.getNode(SPUISD::XFormAddr, PtrVT, JTI, Zero));
 800   }
 801
 802   assert(0 &&
 803          "LowerJumpTable: Relocation model other than static not supported.");
 804   return SDOperand();
 805 }
 806
 807 static SDOperand
 808 LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 809   MVT::ValueType PtrVT = Op.getValueType();
 810   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
 811   GlobalValue *GV = GSDN->getGlobal();
 812   SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
 813   const TargetMachine &TM = DAG.getTarget();
 814   SDOperand Zero = DAG.getConstant(0, PtrVT);
 815
 816   if (TM.getRelocationModel() == Reloc::Static) {
 817     return (!ST->usingLargeMem()
 818             ? GA
 819             : DAG.getNode(SPUISD::XFormAddr, PtrVT, GA, Zero));
 820   } else {
 821     cerr << "LowerGlobalAddress: Relocation model other than static not "
 822          << "supported.\n";
 823     abort();
 824     /*NOTREACHED*/
 825   }
 826
 827   return SDOperand();
 828 }
 829
 830 //! Custom lower i64 integer constants
 831 /*!
 832  This code inserts all of the necessary juggling that needs to occur to load
 833  a 64-bit constant into a register.
 834  */
 835 static SDOperand
 836 LowerConstant(SDOperand Op, SelectionDAG &DAG) {
 837   unsigned VT = Op.getValueType();
 838   ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
 839
 840   if (VT == MVT::i64) {
 841     SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
 842     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
 843                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
 844
 845   } else {
 846     cerr << "LowerConstant: unhandled constant type "
 847          << MVT::getValueTypeString(VT)
 848          << "\n";
 849     abort();
 850     /*NOTREACHED*/
 851   }
 852
 853   return SDOperand();
 854 }
 855
 856 //! Custom lower single precision floating point constants
 857 /*!
 858   "float" immediates can be lowered as if they were unsigned 32-bit integers.
 859   The SPUISD::SFPConstant pseudo-instruction handles this in the instruction
 860   target description.
 861  */
 862 static SDOperand
 863 LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
 864   unsigned VT = Op.getValueType();
 865   ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
 866
 867   assert((FP != 0) &&
 868          "LowerConstantFP: Node is not ConstantFPSDNode");
 869
 870   if (VT == MVT::f32) {
 871     float targetConst = FP->getValueAPF().convertToFloat();
 872     return DAG.getNode(SPUISD::SFPConstant, VT,
 873                        DAG.getTargetConstantFP(targetConst, VT));
 874   } else if (VT == MVT::f64) {
 875     uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
 876     return DAG.getNode(ISD::BIT_CONVERT, VT,
 877                        LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
 878   }
 879
 880   return SDOperand();
 881 }
 882
 883 static SDOperand
 884 LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
 885 {
 886   MachineFunction &MF = DAG.getMachineFunction();
 887   MachineFrameInfo *MFI = MF.getFrameInfo();
 888   MachineRegisterInfo &RegInfo = MF.getRegInfo();
 889   SmallVector<SDOperand, 8> ArgValues;
 890   SDOperand Root = Op.getOperand(0);
 891   bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
 892
 893   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
 894   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
 895
 896   unsigned ArgOffset = SPUFrameInfo::minStackSize();
 897   unsigned ArgRegIdx = 0;
 898   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
 899
 900   MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 901
 902   // Add DAG nodes to load the arguments or copy them out of registers.
 903   for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
 904     SDOperand ArgVal;
 905     bool needsLoad = false;
 906     MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
 907     unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
 908
 909     switch (ObjectVT) {
 910     default: {
 911       cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
 912            << MVT::getValueTypeString(ObjectVT)
 913            << "\n";
 914       abort();
 915     }
 916     case MVT::i8:
 917       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 918         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass);
 919         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 920         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
 921         ++ArgRegIdx;
 922       } else {
 923         needsLoad = true;
 924       }
 925       break;
 926     case MVT::i16:
 927       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 928         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
 929         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 930         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
 931         ++ArgRegIdx;
 932       } else {
 933         needsLoad = true;
 934       }
 935       break;
 936     case MVT::i32:
 937       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 938         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 939         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 940         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
 941         ++ArgRegIdx;
 942       } else {
 943         needsLoad = true;
 944       }
 945       break;
 946     case MVT::i64:
 947       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 948         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass);
 949         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 950         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
 951         ++ArgRegIdx;
 952       } else {
 953         needsLoad = true;
 954       }
 955       break;
 956     case MVT::f32:
 957       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 958         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
 959         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 960         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
 961         ++ArgRegIdx;
 962       } else {
 963         needsLoad = true;
 964       }
 965       break;
 966     case MVT::f64:
 967       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 968         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass);
 969         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 970         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
 971         ++ArgRegIdx;
 972       } else {
 973         needsLoad = true;
 974       }
 975       break;
 976     case MVT::v2f64:
 977     case MVT::v4f32:
 978     case MVT::v4i32:
 979     case MVT::v8i16:
 980     case MVT::v16i8:
 981       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 982         unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
 983         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 984         ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
 985         ++ArgRegIdx;
 986       } else {
 987         needsLoad = true;
 988       }
 989       break;
 990     }
 991
 992     // We need to load the argument to a virtual register if we determined above
 993     // that we ran out of physical registers of the appropriate type
 994     if (needsLoad) {
 995       // If the argument is actually used, emit a load from the right stack
 996       // slot.
 997       if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
 998         int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
 999         SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
1000         ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1001       } else {
1002         // Don't emit a dead load.
1003         ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
1004       }
1005
1006       ArgOffset += StackSlotSize;
1007     }
1008
1009     ArgValues.push_back(ArgVal);
1010   }
1011
1012   // If the function takes variable number of arguments, make a frame index for
1013   // the start of the first vararg value... for expansion of llvm.va_start.
1014   if (isVarArg) {
1015     VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
1016                                                ArgOffset);
1017     SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1018     // If this function is vararg, store any remaining integer argument regs to
1019     // their spots on the stack so that they may be loaded by deferencing the
1020     // result of va_next.
1021     SmallVector<SDOperand, 8> MemOps;
1022     for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1023       unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
1024       RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1025       SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1026       SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1027       MemOps.push_back(Store);
1028       // Increment the address by four for the next argument to store
1029       SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
1030       FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1031     }
1032     if (!MemOps.empty())
1033       Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1034   }
1035
1036   ArgValues.push_back(Root);
1037
1038   // Return the new list of results.
1039   std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
1040                                     Op.Val->value_end());
1041   return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1042 }
1043
1044 /// isLSAAddress - Return the immediate to use if the specified
1045 /// value is representable as a LSA address.
1046 static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1047   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1048   if (!C) return 0;
1049
1050   int Addr = C->getValue();
1051   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1052       (Addr << 14 >> 14) != Addr)
1053     return 0;  // Top 14 bits have to be sext of immediate.
1054
1055   return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1056 }
1057
1058 static
1059 SDOperand
1060 LowerCALL(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1061   SDOperand Chain = Op.getOperand(0);
1062 #if 0
1063   bool isVarArg       = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1064   bool isTailCall     = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1065 #endif
1066   SDOperand Callee    = Op.getOperand(4);
1067   unsigned NumOps     = (Op.getNumOperands() - 5) / 2;
1068   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1069   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1070   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1071
1072   // Handy pointer type
1073   MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1074
1075   // Accumulate how many bytes are to be pushed on the stack, including the
1076   // linkage area, and parameter passing area.  According to the SPU ABI,
1077   // we minimally need space for [LR] and [SP]
1078   unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1079
1080   // Set up a copy of the stack pointer for use loading and storing any
1081   // arguments that may not fit in the registers available for argument
1082   // passing.
1083   SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1084
1085   // Figure out which arguments are going to go in registers, and which in
1086   // memory.
1087   unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1088   unsigned ArgRegIdx = 0;
1089
1090   // Keep track of registers passing arguments
1091   std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1092   // And the arguments passed on the stack
1093   SmallVector<SDOperand, 8> MemOpChains;
1094
1095   for (unsigned i = 0; i != NumOps; ++i) {
1096     SDOperand Arg = Op.getOperand(5+2*i);
1097
1098     // PtrOff will be used to store the current argument to the stack if a
1099     // register cannot be found for it.
1100     SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1101     PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1102
1103     switch (Arg.getValueType()) {
1104     default: assert(0 && "Unexpected ValueType for argument!");
1105     case MVT::i32:
1106     case MVT::i64:
1107     case MVT::i128:
1108       if (ArgRegIdx != NumArgRegs) {
1109         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1110       } else {
1111         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1112         ArgOffset += StackSlotSize;
1113       }
1114       break;
1115     case MVT::f32:
1116     case MVT::f64:
1117       if (ArgRegIdx != NumArgRegs) {
1118         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1119       } else {
1120         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1121         ArgOffset += StackSlotSize;
1122       }
1123       break;
1124     case MVT::v4f32:
1125     case MVT::v4i32:
1126     case MVT::v8i16:
1127     case MVT::v16i8:
1128       if (ArgRegIdx != NumArgRegs) {
1129         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1130       } else {
1131         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1132         ArgOffset += StackSlotSize;
1133       }
1134       break;
1135     }
1136   }
1137
1138   // Update number of stack bytes actually used, insert a call sequence start
1139   NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1140   Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1141
1142   if (!MemOpChains.empty()) {
1143     // Adjust the stack pointer for the stack arguments.
1144     Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1145                         &MemOpChains[0], MemOpChains.size());
1146   }
1147
1148   // Build a sequence of copy-to-reg nodes chained together with token chain
1149   // and flag operands which copy the outgoing args into the appropriate regs.
1150   SDOperand InFlag;
1151   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1152     Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1153                              InFlag);
1154     InFlag = Chain.getValue(1);
1155   }
1156
1157   std::vector<MVT::ValueType> NodeTys;
1158   NodeTys.push_back(MVT::Other);   // Returns a chain
1159   NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
1160
1161   SmallVector<SDOperand, 8> Ops;
1162   unsigned CallOpc = SPUISD::CALL;
1163
1164   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1165   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1166   // node so that legalize doesn't hack it.
1167   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1168     GlobalValue *GV = G->getGlobal();
1169     unsigned CalleeVT = Callee.getValueType();
1170     SDOperand Zero = DAG.getConstant(0, PtrVT);
1171     SDOperand GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1172
1173     if (!ST->usingLargeMem()) {
1174       // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1175       // style calls, otherwise, external symbols are BRASL calls. This assumes
1176       // that declared/defined symbols are in the same compilation unit and can
1177       // be reached through PC-relative jumps.
1178       //
1179       // NOTE:
1180       // This may be an unsafe assumption for JIT and really large compilation
1181       // units.
1182       if (GV->isDeclaration()) {
1183         Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1184       } else {
1185         Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1186       }
1187     } else {
1188       // "Large memory" mode: Turn all calls into indirect calls with a X-form
1189       // address pairs:
1190       Callee = DAG.getNode(SPUISD::XFormAddr, PtrVT, GA, Zero);
1191     }
1192   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1193     Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1194   else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1195     // If this is an absolute destination address that appears to be a legal
1196     // local store address, use the munged value.
1197     Callee = SDOperand(Dest, 0);
1198   }
1199
1200   Ops.push_back(Chain);
1201   Ops.push_back(Callee);
1202
1203   // Add argument registers to the end of the list so that they are known live
1204   // into the call.
1205   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1206     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1207                                   RegsToPass[i].second.getValueType()));
1208
1209   if (InFlag.Val)
1210     Ops.push_back(InFlag);
1211   Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1212   InFlag = Chain.getValue(1);
1213
1214   SDOperand ResultVals[3];
1215   unsigned NumResults = 0;
1216   NodeTys.clear();
1217
1218   // If the call has results, copy the values out of the ret val registers.
1219   switch (Op.Val->getValueType(0)) {
1220   default: assert(0 && "Unexpected ret value!");
1221   case MVT::Other: break;
1222   case MVT::i32:
1223     if (Op.Val->getValueType(1) == MVT::i32) {
1224       Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1225       ResultVals[0] = Chain.getValue(0);
1226       Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1227                                  Chain.getValue(2)).getValue(1);
1228       ResultVals[1] = Chain.getValue(0);
1229       NumResults = 2;
1230       NodeTys.push_back(MVT::i32);
1231     } else {
1232       Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1233       ResultVals[0] = Chain.getValue(0);
1234       NumResults = 1;
1235     }
1236     NodeTys.push_back(MVT::i32);
1237     break;
1238   case MVT::i64:
1239     Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1240     ResultVals[0] = Chain.getValue(0);
1241     NumResults = 1;
1242     NodeTys.push_back(MVT::i64);
1243     break;
1244   case MVT::f32:
1245   case MVT::f64:
1246     Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1247                                InFlag).getValue(1);
1248     ResultVals[0] = Chain.getValue(0);
1249     NumResults = 1;
1250     NodeTys.push_back(Op.Val->getValueType(0));
1251     break;
1252   case MVT::v2f64:
1253   case MVT::v4f32:
1254   case MVT::v4i32:
1255   case MVT::v8i16:
1256   case MVT::v16i8:
1257     Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1258                                    InFlag).getValue(1);
1259     ResultVals[0] = Chain.getValue(0);
1260     NumResults = 1;
1261     NodeTys.push_back(Op.Val->getValueType(0));
1262     break;
1263   }
1264
1265   Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
1266                       DAG.getConstant(NumStackBytes, PtrVT));
1267   NodeTys.push_back(MVT::Other);
1268
1269   // If the function returns void, just return the chain.
1270   if (NumResults == 0)
1271     return Chain;
1272
1273   // Otherwise, merge everything together with a MERGE_VALUES node.
1274   ResultVals[NumResults++] = Chain;
1275   SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1276                               ResultVals, NumResults);
1277   return Res.getValue(Op.ResNo);
1278 }
1279
1280 static SDOperand
1281 LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1282   SmallVector<CCValAssign, 16> RVLocs;
1283   unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1284   bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1285   CCState CCInfo(CC, isVarArg, TM, RVLocs);
1286   CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1287
1288   // If this is the first return lowered for this function, add the regs to the
1289   // liveout set for the function.
1290   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1291     for (unsigned i = 0; i != RVLocs.size(); ++i)
1292       DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1293   }
1294
1295   SDOperand Chain = Op.getOperand(0);
1296   SDOperand Flag;
1297
1298   // Copy the result values into the output registers.
1299   for (unsigned i = 0; i != RVLocs.size(); ++i) {
1300     CCValAssign &VA = RVLocs[i];
1301     assert(VA.isRegLoc() && "Can only return in registers!");
1302     Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1303     Flag = Chain.getValue(1);
1304   }
1305
1306   if (Flag.Val)
1307     return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1308   else
1309     return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1310 }
1311
1312
1313 //===----------------------------------------------------------------------===//
1314 // Vector related lowering:
1315 //===----------------------------------------------------------------------===//
1316
1317 static ConstantSDNode *
1318 getVecImm(SDNode *N) {
1319   SDOperand OpVal(0, 0);
1320
1321   // Check to see if this buildvec has a single non-undef value in its elements.
1322   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1323     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1324     if (OpVal.Val == 0)
1325       OpVal = N->getOperand(i);
1326     else if (OpVal != N->getOperand(i))
1327       return 0;
1328   }
1329
1330   if (OpVal.Val != 0) {
1331     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1332       return CN;
1333     }
1334   }
1335
1336   return 0; // All UNDEF: use implicit def.; not Constant node
1337 }
1338
1339 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1340 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1341 /// constant
1342 SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1343                               MVT::ValueType ValueType) {
1344   if (ConstantSDNode *CN = getVecImm(N)) {
1345     uint64_t Value = CN->getValue();
1346     if (Value <= 0x3ffff)
1347       return DAG.getConstant(Value, ValueType);
1348   }
1349
1350   return SDOperand();
1351 }
1352
1353 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1354 /// and the value fits into a signed 16-bit constant, and if so, return the
1355 /// constant
1356 SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1357                               MVT::ValueType ValueType) {
1358   if (ConstantSDNode *CN = getVecImm(N)) {
1359     if (ValueType == MVT::i32) {
1360       int Value = (int) CN->getValue();
1361       int SExtValue = ((Value & 0xffff) << 16) >> 16;
1362
1363       if (Value == SExtValue)
1364         return DAG.getConstant(Value, ValueType);
1365     } else if (ValueType == MVT::i16) {
1366       short Value = (short) CN->getValue();
1367       int SExtValue = ((int) Value << 16) >> 16;
1368
1369       if (Value == (short) SExtValue)
1370         return DAG.getConstant(Value, ValueType);
1371     } else if (ValueType == MVT::i64) {
1372       int64_t Value = CN->getValue();
1373       int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
1374
1375       if (Value == SExtValue)
1376         return DAG.getConstant(Value, ValueType);
1377     }
1378   }
1379
1380   return SDOperand();
1381 }
1382
1383 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1384 /// and the value fits into a signed 10-bit constant, and if so, return the
1385 /// constant
1386 SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1387                               MVT::ValueType ValueType) {
1388   if (ConstantSDNode *CN = getVecImm(N)) {
1389     int Value = (int) CN->getValue();
1390     if ((ValueType == MVT::i32 && isS10Constant(Value))
1391         || (ValueType == MVT::i16 && isS10Constant((short) Value)))
1392       return DAG.getConstant(Value, ValueType);
1393   }
1394
1395   return SDOperand();
1396 }
1397
1398 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1399 /// and the value fits into a signed 8-bit constant, and if so, return the
1400 /// constant.
1401 ///
1402 /// @note: The incoming vector is v16i8 because that's the only way we can load
1403 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1404 /// same value.
1405 SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1406                              MVT::ValueType ValueType) {
1407   if (ConstantSDNode *CN = getVecImm(N)) {
1408     int Value = (int) CN->getValue();
1409     if (ValueType == MVT::i16
1410         && Value <= 0xffff                 /* truncated from uint64_t */
1411         && ((short) Value >> 8) == ((short) Value & 0xff))
1412       return DAG.getConstant(Value & 0xff, ValueType);
1413     else if (ValueType == MVT::i8
1414              && (Value & 0xff) == Value)
1415       return DAG.getConstant(Value, ValueType);
1416   }
1417
1418   return SDOperand();
1419 }
1420
1421 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1422 /// and the value fits into a signed 16-bit constant, and if so, return the
1423 /// constant
1424 SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1425                                MVT::ValueType ValueType) {
1426   if (ConstantSDNode *CN = getVecImm(N)) {
1427     uint64_t Value = CN->getValue();
1428     if ((ValueType == MVT::i32
1429           && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1430         || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1431       return DAG.getConstant(Value >> 16, ValueType);
1432   }
1433
1434   return SDOperand();
1435 }
1436
1437 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1438 SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1439   if (ConstantSDNode *CN = getVecImm(N)) {
1440     return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1441   }
1442
1443   return SDOperand();
1444 }
1445
1446 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1447 SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1448   if (ConstantSDNode *CN = getVecImm(N)) {
1449     return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1450   }
1451
1452   return SDOperand();
1453 }
1454
1455 // If this is a vector of constants or undefs, get the bits.  A bit in
1456 // UndefBits is set if the corresponding element of the vector is an
1457 // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1458 // zero.   Return true if this is not an array of constants, false if it is.
1459 //
1460 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1461                                        uint64_t UndefBits[2]) {
1462   // Start with zero'd results.
1463   VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1464
1465   unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
1466   for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1467     SDOperand OpVal = BV->getOperand(i);
1468
1469     unsigned PartNo = i >= e/2;     // In the upper 128 bits?
1470     unsigned SlotNo = e/2 - (i & (e/2-1))-1;  // Which subpiece of the uint64_t.
1471
1472     uint64_t EltBits = 0;
1473     if (OpVal.getOpcode() == ISD::UNDEF) {
1474       uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1475       UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1476       continue;
1477     } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1478       EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1479     } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1480       const APFloat &apf = CN->getValueAPF();
1481       EltBits = (CN->getValueType(0) == MVT::f32
1482                  ? FloatToBits(apf.convertToFloat())
1483                  : DoubleToBits(apf.convertToDouble()));
1484     } else {
1485       // Nonconstant element.
1486       return true;
1487     }
1488
1489     VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1490   }
1491
1492   //printf("%llx %llx  %llx %llx\n",
1493   //       VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1494   return false;
1495 }
1496
1497 /// If this is a splat (repetition) of a value across the whole vector, return
1498 /// the smallest size that splats it.  For example, "0x01010101010101..." is a
1499 /// splat of 0x01, 0x0101, and 0x01010101.  We return SplatBits = 0x01 and
1500 /// SplatSize = 1 byte.
1501 static bool isConstantSplat(const uint64_t Bits128[2],
1502                             const uint64_t Undef128[2],
1503                             int MinSplatBits,
1504                             uint64_t &SplatBits, uint64_t &SplatUndef,
1505                             int &SplatSize) {
1506   // Don't let undefs prevent splats from matching.  See if the top 64-bits are
1507   // the same as the lower 64-bits, ignoring undefs.
1508   uint64_t Bits64  = Bits128[0] | Bits128[1];
1509   uint64_t Undef64 = Undef128[0] & Undef128[1];
1510   uint32_t Bits32  = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1511   uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1512   uint16_t Bits16  = uint16_t(Bits32)  | uint16_t(Bits32 >> 16);
1513   uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1514
1515   if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1516     if (MinSplatBits < 64) {
1517
1518       // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1519       // undefs.
1520       if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1521         if (MinSplatBits < 32) {
1522
1523           // If the top 16-bits are different than the lower 16-bits, ignoring
1524           // undefs, we have an i32 splat.
1525           if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1526             if (MinSplatBits < 16) {
1527               // If the top 8-bits are different than the lower 8-bits, ignoring
1528               // undefs, we have an i16 splat.
1529               if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1530                 // Otherwise, we have an 8-bit splat.
1531                 SplatBits  = uint8_t(Bits16)  | uint8_t(Bits16 >> 8);
1532                 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1533                 SplatSize = 1;
1534                 return true;
1535               }
1536             } else {
1537               SplatBits = Bits16;
1538               SplatUndef = Undef16;
1539               SplatSize = 2;
1540               return true;
1541             }
1542           }
1543         } else {
1544           SplatBits = Bits32;
1545           SplatUndef = Undef32;
1546           SplatSize = 4;
1547           return true;
1548         }
1549       }
1550     } else {
1551       SplatBits = Bits128[0];
1552       SplatUndef = Undef128[0];
1553       SplatSize = 8;
1554       return true;
1555     }
1556   }
1557
1558   return false;  // Can't be a splat if two pieces don't match.
1559 }
1560
1561 // If this is a case we can't handle, return null and let the default
1562 // expansion code take care of it.  If we CAN select this case, and if it
1563 // selects to a single instruction, return Op.  Otherwise, if we can codegen
1564 // this case more efficiently than a constant pool load, lower it to the
1565 // sequence of ops that should be used.
1566 static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1567   MVT::ValueType VT = Op.getValueType();
1568   // If this is a vector of constants or undefs, get the bits.  A bit in
1569   // UndefBits is set if the corresponding element of the vector is an
1570   // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1571   // zero.
1572   uint64_t VectorBits[2];
1573   uint64_t UndefBits[2];
1574   uint64_t SplatBits, SplatUndef;
1575   int SplatSize;
1576   if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1577       || !isConstantSplat(VectorBits, UndefBits,
1578                           MVT::getSizeInBits(MVT::getVectorElementType(VT)),
1579                           SplatBits, SplatUndef, SplatSize))
1580     return SDOperand();   // Not a constant vector, not a splat.
1581
1582   switch (VT) {
1583   default:
1584   case MVT::v4f32: {
1585     uint32_t Value32 = SplatBits;
1586     assert(SplatSize == 4
1587            && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1588     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1589     SDOperand T = DAG.getConstant(Value32, MVT::i32);
1590     return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1591                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1592     break;
1593   }
1594   case MVT::v2f64: {
1595     uint64_t f64val = SplatBits;
1596     assert(SplatSize == 8
1597            && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1598     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1599     SDOperand T = DAG.getConstant(f64val, MVT::i64);
1600     return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1601                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1602     break;
1603   }
1604   case MVT::v16i8: {
1605    // 8-bit constants have to be expanded to 16-bits
1606    unsigned short Value16 = SplatBits | (SplatBits << 8);
1607    SDOperand Ops[8];
1608    for (int i = 0; i < 8; ++i)
1609      Ops[i] = DAG.getConstant(Value16, MVT::i16);
1610    return DAG.getNode(ISD::BIT_CONVERT, VT,
1611                       DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1612   }
1613   case MVT::v8i16: {
1614     unsigned short Value16;
1615     if (SplatSize == 2)
1616       Value16 = (unsigned short) (SplatBits & 0xffff);
1617     else
1618       Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1619     SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
1620     SDOperand Ops[8];
1621     for (int i = 0; i < 8; ++i) Ops[i] = T;
1622     return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1623   }
1624   case MVT::v4i32: {
1625     unsigned int Value = SplatBits;
1626     SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
1627     return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1628   }
1629   case MVT::v2i64: {
1630     uint64_t val = SplatBits;
1631     uint32_t upper = uint32_t(val >> 32);
1632     uint32_t lower = uint32_t(val);
1633
1634     if (val != 0) {
1635       SDOperand LO32;
1636       SDOperand HI32;
1637       SmallVector<SDOperand, 16> ShufBytes;
1638       SDOperand Result;
1639       bool upper_special, lower_special;
1640
1641       // NOTE: This code creates common-case shuffle masks that can be easily
1642       // detected as common expressions. It is not attempting to create highly
1643       // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1644
1645       // Detect if the upper or lower half is a special shuffle mask pattern:
1646       upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1647       lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1648
1649       // Create lower vector if not a special pattern
1650       if (!lower_special) {
1651         SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1652         LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1653                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1654                                        LO32C, LO32C, LO32C, LO32C));
1655       }
1656
1657       // Create upper vector if not a special pattern
1658       if (!upper_special) {
1659         SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1660         HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1661                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1662                                        HI32C, HI32C, HI32C, HI32C));
1663       }
1664
1665       // If either upper or lower are special, then the two input operands are
1666       // the same (basically, one of them is a "don't care")
1667       if (lower_special)
1668         LO32 = HI32;
1669       if (upper_special)
1670         HI32 = LO32;
1671       if (lower_special && upper_special) {
1672         // Unhappy situation... both upper and lower are special, so punt with
1673         // a target constant:
1674         SDOperand Zero = DAG.getConstant(0, MVT::i32);
1675         HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1676                                   Zero, Zero);
1677       }
1678
1679       for (int i = 0; i < 4; ++i) {
1680         for (int j = 0; j < 4; ++j) {
1681           SDOperand V;
1682           bool process_upper, process_lower;
1683           uint64_t val = 0;
1684
1685           process_upper = (upper_special && (i & 1) == 0);
1686           process_lower = (lower_special && (i & 1) == 1);
1687
1688           if (process_upper || process_lower) {
1689             if ((process_upper && upper == 0)
1690                 || (process_lower && lower == 0))
1691               val = 0x80;
1692             else if ((process_upper && upper == 0xffffffff)
1693                      || (process_lower && lower == 0xffffffff))
1694               val = 0xc0;
1695             else if ((process_upper && upper == 0x80000000)
1696                      || (process_lower && lower == 0x80000000))
1697               val = (j == 0 ? 0xe0 : 0x80);
1698           } else
1699             val = i * 4 + j + ((i & 1) * 16);
1700
1701           ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
1702         }
1703       }
1704
1705       return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1706                          DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1707                                      &ShufBytes[0], ShufBytes.size()));
1708     } else {
1709       // For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
1710       SDOperand Zero = DAG.getConstant(0, MVT::i32);
1711       return DAG.getNode(ISD::BIT_CONVERT, VT,
1712                          DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1713                                      Zero, Zero, Zero, Zero));
1714     }
1715   }
1716   }
1717
1718   return SDOperand();
1719 }
1720
1721 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1722 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1723 /// permutation vector, V3, is monotonically increasing with one "exception"
1724 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1725 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1726 /// In either case, the net result is going to eventually invoke SHUFB to
1727 /// permute/shuffle the bytes from V1 and V2.
1728 /// \note
1729 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1730 /// control word for byte/halfword/word insertion. This takes care of a single
1731 /// element move from V2 into V1.
1732 /// \note
1733 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1734 static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1735   SDOperand V1 = Op.getOperand(0);
1736   SDOperand V2 = Op.getOperand(1);
1737   SDOperand PermMask = Op.getOperand(2);
1738
1739   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1740
1741   // If we have a single element being moved from V1 to V2, this can be handled
1742   // using the C*[DX] compute mask instructions, but the vector elements have
1743   // to be monotonically increasing with one exception element.
1744   MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
1745   unsigned EltsFromV2 = 0;
1746   unsigned V2Elt = 0;
1747   unsigned V2EltIdx0 = 0;
1748   unsigned CurrElt = 0;
1749   bool monotonic = true;
1750   if (EltVT == MVT::i8)
1751     V2EltIdx0 = 16;
1752   else if (EltVT == MVT::i16)
1753     V2EltIdx0 = 8;
1754   else if (EltVT == MVT::i32)
1755     V2EltIdx0 = 4;
1756   else
1757     assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1758
1759   for (unsigned i = 0, e = PermMask.getNumOperands();
1760        EltsFromV2 <= 1 && monotonic && i != e;
1761        ++i) {
1762     unsigned SrcElt;
1763     if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1764       SrcElt = 0;
1765     else
1766       SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1767
1768     if (SrcElt >= V2EltIdx0) {
1769       ++EltsFromV2;
1770       V2Elt = (V2EltIdx0 - SrcElt) << 2;
1771     } else if (CurrElt != SrcElt) {
1772       monotonic = false;
1773     }
1774
1775     ++CurrElt;
1776   }
1777
1778   if (EltsFromV2 == 1 && monotonic) {
1779     // Compute mask and shuffle
1780     MachineFunction &MF = DAG.getMachineFunction();
1781     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1782     unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1783     MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1784     // Initialize temporary register to 0
1785     SDOperand InitTempReg =
1786       DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1787     // Copy register's contents as index in INSERT_MASK:
1788     SDOperand ShufMaskOp =
1789       DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1790                   DAG.getTargetConstant(V2Elt, MVT::i32),
1791                   DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1792     // Use shuffle mask in SHUFB synthetic instruction:
1793     return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1794   } else {
1795     // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1796     unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
1797
1798     SmallVector<SDOperand, 16> ResultMask;
1799     for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1800       unsigned SrcElt;
1801       if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1802         SrcElt = 0;
1803       else
1804         SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1805
1806       for (unsigned j = 0; j != BytesPerElement; ++j) {
1807         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1808                                              MVT::i8));
1809       }
1810     }
1811
1812     SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1813                                       &ResultMask[0], ResultMask.size());
1814     return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1815   }
1816 }
1817
1818 static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1819   SDOperand Op0 = Op.getOperand(0);                     // Op0 = the scalar
1820
1821   if (Op0.Val->getOpcode() == ISD::Constant) {
1822     // For a constant, build the appropriate constant vector, which will
1823     // eventually simplify to a vector register load.
1824
1825     ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1826     SmallVector<SDOperand, 16> ConstVecValues;
1827     MVT::ValueType VT;
1828     size_t n_copies;
1829
1830     // Create a constant vector:
1831     switch (Op.getValueType()) {
1832     default: assert(0 && "Unexpected constant value type in "
1833                          "LowerSCALAR_TO_VECTOR");
1834     case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1835     case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1836     case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1837     case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1838     case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1839     case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1840     }
1841
1842     SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1843     for (size_t j = 0; j < n_copies; ++j)
1844       ConstVecValues.push_back(CValue);
1845
1846     return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1847                        &ConstVecValues[0], ConstVecValues.size());
1848   } else {
1849     // Otherwise, copy the value from one register to another:
1850     switch (Op0.getValueType()) {
1851     default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1852     case MVT::i8:
1853     case MVT::i16:
1854     case MVT::i32:
1855     case MVT::i64:
1856     case MVT::f32:
1857     case MVT::f64:
1858       return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1859     }
1860   }
1861
1862   return SDOperand();
1863 }
1864
1865 static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1866   switch (Op.getValueType()) {
1867   case MVT::v4i32: {
1868     SDOperand rA = Op.getOperand(0);
1869     SDOperand rB = Op.getOperand(1);
1870     SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1871     SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1872     SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1873     SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1874
1875     return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1876     break;
1877   }
1878
1879   // Multiply two v8i16 vectors (pipeline friendly version):
1880   // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1881   // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1882   // c) Use SELB to select upper and lower halves from the intermediate results
1883   //
1884   // NOTE: We really want to move the FSMBI to earlier to actually get the
1885   // dual-issue. This code does manage to do this, even if it's a little on
1886   // the wacky side
1887   case MVT::v8i16: {
1888     MachineFunction &MF = DAG.getMachineFunction();
1889     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1890     SDOperand Chain = Op.getOperand(0);
1891     SDOperand rA = Op.getOperand(0);
1892     SDOperand rB = Op.getOperand(1);
1893     unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1894     unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1895
1896     SDOperand FSMBOp =
1897       DAG.getCopyToReg(Chain, FSMBIreg,
1898                        DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1899                                    DAG.getConstant(0xcccc, MVT::i32)));
1900
1901     SDOperand HHProd =
1902       DAG.getCopyToReg(FSMBOp, HiProdReg,
1903                        DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1904
1905     SDOperand HHProd_v4i32 =
1906       DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1907                   DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1908
1909     return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1910                        DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1911                        DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1912                                    DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1913                                                HHProd_v4i32,
1914                                                DAG.getConstant(16, MVT::i16))),
1915                        DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1916   }
1917
1918   // This M00sE is N@stI! (apologies to Monty Python)
1919   //
1920   // SPU doesn't know how to do any 8-bit multiplication, so the solution
1921   // is to break it all apart, sign extend, and reassemble the various
1922   // intermediate products.
1923   case MVT::v16i8: {
1924     MachineFunction &MF = DAG.getMachineFunction();
1925     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1926     SDOperand Chain = Op.getOperand(0);
1927     SDOperand rA = Op.getOperand(0);
1928     SDOperand rB = Op.getOperand(1);
1929     SDOperand c8 = DAG.getConstant(8, MVT::i8);
1930     SDOperand c16 = DAG.getConstant(16, MVT::i8);
1931
1932     unsigned FSMBreg_2222 = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1933     unsigned LoProd_reg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1934     unsigned HiProd_reg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1935
1936     SDOperand LLProd =
1937       DAG.getNode(SPUISD::MPY, MVT::v8i16,
1938                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1939                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1940
1941     SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1942
1943     SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1944
1945     SDOperand LHProd =
1946       DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1947                   DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1948
1949     SDOperand FSMBdef_2222 =
1950       DAG.getCopyToReg(Chain, FSMBreg_2222,
1951                        DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1952                                    DAG.getConstant(0x2222, MVT::i32)));
1953
1954     SDOperand FSMBuse_2222 =
1955       DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32);
1956
1957     SDOperand LoProd_1 =
1958       DAG.getCopyToReg(Chain, LoProd_reg,
1959                        DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd,
1960                                    FSMBuse_2222));
1961
1962     SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1963
1964     SDOperand LoProd =
1965       DAG.getNode(ISD::AND, MVT::v4i32,
1966                   DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32),
1967                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1968                               LoProdMask, LoProdMask,
1969                               LoProdMask, LoProdMask));
1970
1971     SDOperand rAH =
1972       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1973                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1974
1975     SDOperand rBH =
1976       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1977                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1978
1979     SDOperand HLProd =
1980       DAG.getNode(SPUISD::MPY, MVT::v8i16,
1981                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1982                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1983
1984     SDOperand HHProd_1 =
1985       DAG.getNode(SPUISD::MPY, MVT::v8i16,
1986                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1987                               DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
1988                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1989                               DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
1990
1991     SDOperand HHProd =
1992       DAG.getCopyToReg(Chain, HiProd_reg,
1993                        DAG.getNode(SPUISD::SELB, MVT::v8i16,
1994                                    HLProd,
1995                                    DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
1996                                    FSMBuse_2222));
1997
1998     SDOperand HiProd =
1999       DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
2000                   DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16);
2001
2002     return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2003                        DAG.getNode(ISD::OR, MVT::v4i32,
2004                                    LoProd, HiProd));
2005   }
2006
2007   default:
2008     cerr << "CellSPU: Unknown vector multiplication, got "
2009          << MVT::getValueTypeString(Op.getValueType())
2010          << "\n";
2011     abort();
2012     /*NOTREACHED*/
2013   }
2014
2015   return SDOperand();
2016 }
2017
2018 static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2019   MachineFunction &MF = DAG.getMachineFunction();
2020   MachineRegisterInfo &RegInfo = MF.getRegInfo();
2021
2022   SDOperand A = Op.getOperand(0);
2023   SDOperand B = Op.getOperand(1);
2024   unsigned VT = Op.getValueType();
2025
2026   unsigned VRegBR, VRegC;
2027
2028   if (VT == MVT::f32) {
2029     VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2030     VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2031   } else {
2032     VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2033     VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2034   }
2035   // TODO: make sure we're feeding FPInterp the right arguments
2036   // Right now: fi B, frest(B)
2037
2038   // Computes BRcpl =
2039   // (Floating Interpolate (FP Reciprocal Estimate B))
2040   SDOperand BRcpl =
2041       DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2042                        DAG.getNode(SPUISD::FPInterp, VT, B,
2043                                 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2044
2045   // Computes A * BRcpl and stores in a temporary register
2046   SDOperand AxBRcpl =
2047       DAG.getCopyToReg(BRcpl, VRegC,
2048                  DAG.getNode(ISD::FMUL, VT, A,
2049                         DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2050   // What's the Chain variable do? It's magic!
2051   // TODO: set Chain = Op(0).getEntryNode()
2052
2053   return DAG.getNode(ISD::FADD, VT,
2054                 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2055                 DAG.getNode(ISD::FMUL, VT,
2056                         DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2057                         DAG.getNode(ISD::FSUB, VT, A,
2058                             DAG.getNode(ISD::FMUL, VT, B,
2059                             DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2060 }
2061
2062 static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2063   unsigned VT = Op.getValueType();
2064   SDOperand N = Op.getOperand(0);
2065   SDOperand Elt = Op.getOperand(1);
2066   SDOperand ShufMask[16];
2067   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2068
2069   assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2070
2071   int EltNo = (int) C->getValue();
2072
2073   // sanity checks:
2074   if (VT == MVT::i8 && EltNo >= 16)
2075     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2076   else if (VT == MVT::i16 && EltNo >= 8)
2077     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2078   else if (VT == MVT::i32 && EltNo >= 4)
2079     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2080   else if (VT == MVT::i64 && EltNo >= 2)
2081     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2082
2083   if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2084     // i32 and i64: Element 0 is the preferred slot
2085     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2086   }
2087
2088   // Need to generate shuffle mask and extract:
2089   int prefslot_begin = -1, prefslot_end = -1;
2090   int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
2091
2092   switch (VT) {
2093   case MVT::i8: {
2094     prefslot_begin = prefslot_end = 3;
2095     break;
2096   }
2097   case MVT::i16: {
2098     prefslot_begin = 2; prefslot_end = 3;
2099     break;
2100   }
2101   case MVT::i32: {
2102     prefslot_begin = 0; prefslot_end = 3;
2103     break;
2104   }
2105   case MVT::i64: {
2106     prefslot_begin = 0; prefslot_end = 7;
2107     break;
2108   }
2109   }
2110
2111   assert(prefslot_begin != -1 && prefslot_end != -1 &&
2112          "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2113
2114   for (int i = 0; i < 16; ++i) {
2115     // zero fill uppper part of preferred slot, don't care about the
2116     // other slots:
2117     unsigned int mask_val;
2118
2119     if (i <= prefslot_end) {
2120       mask_val =
2121         ((i < prefslot_begin)
2122          ? 0x80
2123          : elt_byte + (i - prefslot_begin));
2124
2125       ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2126     } else
2127       ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2128   }
2129
2130   SDOperand ShufMaskVec =
2131     DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2132                 &ShufMask[0],
2133                 sizeof(ShufMask) / sizeof(ShufMask[0]));
2134
2135   return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2136                      DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2137                                  N, N, ShufMaskVec));
2138
2139 }
2140
2141 static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2142   SDOperand VecOp = Op.getOperand(0);
2143   SDOperand ValOp = Op.getOperand(1);
2144   SDOperand IdxOp = Op.getOperand(2);
2145   MVT::ValueType VT = Op.getValueType();
2146
2147   ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2148   assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2149
2150   MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2151   // Use $2 because it's always 16-byte aligned and it's available:
2152   SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2153
2154   SDOperand result =
2155     DAG.getNode(SPUISD::SHUFB, VT,
2156                 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2157                 VecOp,
2158                 DAG.getNode(SPUISD::INSERT_MASK, VT,
2159                             DAG.getNode(ISD::ADD, PtrVT,
2160                                         PtrBase,
2161                                         DAG.getConstant(CN->getValue(),
2162                                                         PtrVT))));
2163
2164   return result;
2165 }
2166
2167 static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
2168   SDOperand N0 = Op.getOperand(0);      // Everything has at least one operand
2169
2170   assert(Op.getValueType() == MVT::i8);
2171   switch (Opc) {
2172   default:
2173     assert(0 && "Unhandled i8 math operator");
2174     /*NOTREACHED*/
2175     break;
2176   case ISD::SUB: {
2177     // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2178     // the result:
2179     SDOperand N1 = Op.getOperand(1);
2180     N0 = (N0.getOpcode() != ISD::Constant
2181           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2182           : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2183     N1 = (N1.getOpcode() != ISD::Constant
2184           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2185           : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2186     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2187                        DAG.getNode(Opc, MVT::i16, N0, N1));
2188   }
2189   case ISD::ROTR:
2190   case ISD::ROTL: {
2191     SDOperand N1 = Op.getOperand(1);
2192     unsigned N1Opc;
2193     N0 = (N0.getOpcode() != ISD::Constant
2194           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2195           : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2196     N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2197     N1 = (N1.getOpcode() != ISD::Constant
2198           ? DAG.getNode(N1Opc, MVT::i16, N1)
2199           : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2200     SDOperand ExpandArg =
2201       DAG.getNode(ISD::OR, MVT::i16, N0,
2202                   DAG.getNode(ISD::SHL, MVT::i16,
2203                               N0, DAG.getConstant(8, MVT::i16)));
2204     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2205                        DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2206   }
2207   case ISD::SRL:
2208   case ISD::SHL: {
2209     SDOperand N1 = Op.getOperand(1);
2210     unsigned N1Opc;
2211     N0 = (N0.getOpcode() != ISD::Constant
2212           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2213           : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2214     N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2215     N1 = (N1.getOpcode() != ISD::Constant
2216           ? DAG.getNode(N1Opc, MVT::i16, N1)
2217           : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2218     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2219                        DAG.getNode(Opc, MVT::i16, N0, N1));
2220   }
2221   case ISD::SRA: {
2222     SDOperand N1 = Op.getOperand(1);
2223     unsigned N1Opc;
2224     N0 = (N0.getOpcode() != ISD::Constant
2225           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2226           : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2227     N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2228     N1 = (N1.getOpcode() != ISD::Constant
2229           ? DAG.getNode(N1Opc, MVT::i16, N1)
2230           : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2231     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2232                        DAG.getNode(Opc, MVT::i16, N0, N1));
2233   }
2234   case ISD::MUL: {
2235     SDOperand N1 = Op.getOperand(1);
2236     unsigned N1Opc;
2237     N0 = (N0.getOpcode() != ISD::Constant
2238           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2239           : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2240     N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2241     N1 = (N1.getOpcode() != ISD::Constant
2242           ? DAG.getNode(N1Opc, MVT::i16, N1)
2243           : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2244     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2245                        DAG.getNode(Opc, MVT::i16, N0, N1));
2246     break;
2247   }
2248   }
2249
2250   return SDOperand();
2251 }
2252
2253 //! Lower byte immediate operations for v16i8 vectors:
2254 static SDOperand
2255 LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2256   SDOperand ConstVec;
2257   SDOperand Arg;
2258   MVT::ValueType VT = Op.getValueType();
2259
2260   ConstVec = Op.getOperand(0);
2261   Arg = Op.getOperand(1);
2262   if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2263     if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2264       ConstVec = ConstVec.getOperand(0);
2265     } else {
2266       ConstVec = Op.getOperand(1);
2267       Arg = Op.getOperand(0);
2268       if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2269         ConstVec = ConstVec.getOperand(0);
2270       }
2271     }
2272   }
2273
2274   if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2275     uint64_t VectorBits[2];
2276     uint64_t UndefBits[2];
2277     uint64_t SplatBits, SplatUndef;
2278     int SplatSize;
2279
2280     if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2281         && isConstantSplat(VectorBits, UndefBits,
2282                            MVT::getSizeInBits(MVT::getVectorElementType(VT)),
2283                            SplatBits, SplatUndef, SplatSize)) {
2284       SDOperand tcVec[16];
2285       SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2286       const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2287
2288       // Turn the BUILD_VECTOR into a set of target constants:
2289       for (size_t i = 0; i < tcVecSize; ++i)
2290         tcVec[i] = tc;
2291
2292       return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2293                          DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2294     }
2295   }
2296
2297   return SDOperand();
2298 }
2299
2300 //! Lower i32 multiplication
2301 static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
2302                           unsigned Opc) {
2303   switch (VT) {
2304   default:
2305     cerr << "CellSPU: Unknown LowerMUL value type, got "
2306          << MVT::getValueTypeString(Op.getValueType())
2307          << "\n";
2308     abort();
2309     /*NOTREACHED*/
2310
2311   case MVT::i32: {
2312     SDOperand rA = Op.getOperand(0);
2313     SDOperand rB = Op.getOperand(1);
2314
2315     return DAG.getNode(ISD::ADD, MVT::i32,
2316                        DAG.getNode(ISD::ADD, MVT::i32,
2317                                    DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2318                                    DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2319                        DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2320   }
2321   }
2322
2323   return SDOperand();
2324 }
2325
2326 //! Custom lowering for CTPOP (count population)
2327 /*!
2328   Custom lowering code that counts the number ones in the input
2329   operand. SPU has such an instruction, but it counts the number of
2330   ones per byte, which then have to be accumulated.
2331 */
2332 static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2333   unsigned VT = Op.getValueType();
2334   unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2335
2336   switch (VT) {
2337   case MVT::i8: {
2338     SDOperand N = Op.getOperand(0);
2339     SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2340
2341     SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2342     SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2343
2344     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2345   }
2346
2347   case MVT::i16: {
2348     MachineFunction &MF = DAG.getMachineFunction();
2349     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2350
2351     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2352
2353     SDOperand N = Op.getOperand(0);
2354     SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2355     SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2356     SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2357
2358     SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2359     SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2360
2361     // CNTB_result becomes the chain to which all of the virtual registers
2362     // CNTB_reg, SUM1_reg become associated:
2363     SDOperand CNTB_result =
2364       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2365
2366     SDOperand CNTB_rescopy =
2367       DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2368
2369     SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2370
2371     return DAG.getNode(ISD::AND, MVT::i16,
2372                        DAG.getNode(ISD::ADD, MVT::i16,
2373                                    DAG.getNode(ISD::SRL, MVT::i16,
2374                                                Tmp1, Shift1),
2375                                    Tmp1),
2376                        Mask0);
2377   }
2378
2379   case MVT::i32: {
2380     MachineFunction &MF = DAG.getMachineFunction();
2381     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2382
2383     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2384     unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2385
2386     SDOperand N = Op.getOperand(0);
2387     SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2388     SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2389     SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2390     SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2391
2392     SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2393     SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2394
2395     // CNTB_result becomes the chain to which all of the virtual registers
2396     // CNTB_reg, SUM1_reg become associated:
2397     SDOperand CNTB_result =
2398       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2399
2400     SDOperand CNTB_rescopy =
2401       DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2402
2403     SDOperand Comp1 =
2404       DAG.getNode(ISD::SRL, MVT::i32,
2405                   DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2406
2407     SDOperand Sum1 =
2408       DAG.getNode(ISD::ADD, MVT::i32,
2409                   Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2410
2411     SDOperand Sum1_rescopy =
2412       DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2413
2414     SDOperand Comp2 =
2415       DAG.getNode(ISD::SRL, MVT::i32,
2416                   DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2417                   Shift2);
2418     SDOperand Sum2 =
2419       DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2420                   DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2421
2422     return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2423   }
2424
2425   case MVT::i64:
2426     break;
2427   }
2428
2429   return SDOperand();
2430 }
2431
2432 /// LowerOperation - Provide custom lowering hooks for some operations.
2433 ///
2434 SDOperand
2435 SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2436 {
2437   switch (Op.getOpcode()) {
2438   default: {
2439     cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2440     cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n";
2441     cerr << "*Op.Val:\n";
2442     Op.Val->dump();
2443     abort();
2444   }
2445   case ISD::LOAD:
2446   case ISD::SEXTLOAD:
2447   case ISD::ZEXTLOAD:
2448     return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2449   case ISD::STORE:
2450     return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2451   case ISD::ConstantPool:
2452     return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2453   case ISD::GlobalAddress:
2454     return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2455   case ISD::JumpTable:
2456     return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2457   case ISD::Constant:
2458     return LowerConstant(Op, DAG);
2459   case ISD::ConstantFP:
2460     return LowerConstantFP(Op, DAG);
2461   case ISD::FORMAL_ARGUMENTS:
2462       return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2463   case ISD::CALL:
2464     return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2465   case ISD::RET:
2466     return LowerRET(Op, DAG, getTargetMachine());
2467
2468   // i8 math ops:
2469   case ISD::SUB:
2470   case ISD::ROTR:
2471   case ISD::ROTL:
2472   case ISD::SRL:
2473   case ISD::SHL:
2474   case ISD::SRA:
2475     return LowerI8Math(Op, DAG, Op.getOpcode());
2476
2477   // Vector-related lowering.
2478   case ISD::BUILD_VECTOR:
2479     return LowerBUILD_VECTOR(Op, DAG);
2480   case ISD::SCALAR_TO_VECTOR:
2481     return LowerSCALAR_TO_VECTOR(Op, DAG);
2482   case ISD::VECTOR_SHUFFLE:
2483     return LowerVECTOR_SHUFFLE(Op, DAG);
2484   case ISD::EXTRACT_VECTOR_ELT:
2485     return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2486   case ISD::INSERT_VECTOR_ELT:
2487     return LowerINSERT_VECTOR_ELT(Op, DAG);
2488
2489   // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2490   case ISD::AND:
2491   case ISD::OR:
2492   case ISD::XOR:
2493     return LowerByteImmed(Op, DAG);
2494
2495   // Vector and i8 multiply:
2496   case ISD::MUL:
2497     if (MVT::isVector(Op.getValueType()))
2498       return LowerVectorMUL(Op, DAG);
2499     else if (Op.getValueType() == MVT::i8)
2500       return LowerI8Math(Op, DAG, Op.getOpcode());
2501     else
2502       return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode());
2503
2504   case ISD::FDIV:
2505     if (Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::v4f32)
2506       return LowerFDIVf32(Op, DAG);
2507 //    else if (Op.getValueType() == MVT::f64)
2508 //      return LowerFDIVf64(Op, DAG);
2509     else
2510       assert(0 && "Calling FDIV on unsupported MVT");
2511
2512   case ISD::CTPOP:
2513     return LowerCTPOP(Op, DAG);
2514   }
2515
2516   return SDOperand();
2517 }
2518
2519 //===----------------------------------------------------------------------===//
2520 //  Other Lowering Code
2521 //===----------------------------------------------------------------------===//
2522
2523 MachineBasicBlock *
2524 SPUTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
2525                                            MachineBasicBlock *BB)
2526 {
2527   return BB;
2528 }
2529
2530 //===----------------------------------------------------------------------===//
2531 // Target Optimization Hooks
2532 //===----------------------------------------------------------------------===//
2533
2534 SDOperand
2535 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2536 {
2537 #if 0
2538   TargetMachine &TM = getTargetMachine();
2539   SelectionDAG &DAG = DCI.DAG;
2540 #endif
2541   SDOperand N0 = N->getOperand(0);      // everything has at least one operand
2542
2543   switch (N->getOpcode()) {
2544   default: break;
2545
2546   // Look for obvious optimizations for shift left:
2547   // a) Replace 0 << V with 0
2548   // b) Replace V << 0 with V
2549   //
2550   // N.B: llvm will generate an undef node if the shift amount is greater than
2551   // 15 (e.g.: V << 16), which will naturally trigger an assert.
2552   case SPU::SHLIr32:
2553   case SPU::SHLHIr16:
2554   case SPU::SHLQBIIvec:
2555   case SPU::ROTHIr16:
2556   case SPU::ROTHIr16_i32:
2557   case SPU::ROTIr32:
2558   case SPU::ROTIr32_i16:
2559   case SPU::ROTQBYIvec:
2560   case SPU::ROTQBYBIvec:
2561   case SPU::ROTQBIIvec:
2562   case SPU::ROTHMIr16:
2563   case SPU::ROTMIr32:
2564   case SPU::ROTQMBYIvec: {
2565     if (N0.getOpcode() == ISD::Constant) {
2566       if (ConstantSDNode *C = cast<ConstantSDNode>(N0)) {
2567         if (C->getValue() == 0)         // 0 << V -> 0.
2568           return N0;
2569       }
2570     }
2571     SDOperand N1 = N->getOperand(1);
2572     if (N1.getOpcode() == ISD::Constant) {
2573       if (ConstantSDNode *C = cast<ConstantSDNode>(N1)) {
2574         if (C->getValue() == 0)         // V << 0 -> V
2575           return N1;
2576       }
2577     }
2578     break;
2579   }
2580   }
2581
2582   return SDOperand();
2583 }
2584
2585 //===----------------------------------------------------------------------===//
2586 // Inline Assembly Support
2587 //===----------------------------------------------------------------------===//
2588
2589 /// getConstraintType - Given a constraint letter, return the type of
2590 /// constraint it is for this target.
2591 SPUTargetLowering::ConstraintType
2592 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2593   if (ConstraintLetter.size() == 1) {
2594     switch (ConstraintLetter[0]) {
2595     default: break;
2596     case 'b':
2597     case 'r':
2598     case 'f':
2599     case 'v':
2600     case 'y':
2601       return C_RegisterClass;
2602     }
2603   }
2604   return TargetLowering::getConstraintType(ConstraintLetter);
2605 }
2606
2607 std::pair<unsigned, const TargetRegisterClass*>
2608 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2609                                                 MVT::ValueType VT) const
2610 {
2611   if (Constraint.size() == 1) {
2612     // GCC RS6000 Constraint Letters
2613     switch (Constraint[0]) {
2614     case 'b':   // R1-R31
2615     case 'r':   // R0-R31
2616       if (VT == MVT::i64)
2617         return std::make_pair(0U, SPU::R64CRegisterClass);
2618       return std::make_pair(0U, SPU::R32CRegisterClass);
2619     case 'f':
2620       if (VT == MVT::f32)
2621         return std::make_pair(0U, SPU::R32FPRegisterClass);
2622       else if (VT == MVT::f64)
2623         return std::make_pair(0U, SPU::R64FPRegisterClass);
2624       break;
2625     case 'v':
2626       return std::make_pair(0U, SPU::GPRCRegisterClass);
2627     }
2628   }
2629
2630   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2631 }
2632
2633 void
2634 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2635                                                   uint64_t Mask,
2636                                                   uint64_t &KnownZero,
2637                                                   uint64_t &KnownOne,
2638                                                   const SelectionDAG &DAG,
2639                                                   unsigned Depth ) const {
2640   KnownZero = 0;
2641   KnownOne = 0;
2642 }
2643
2644 // LowerAsmOperandForConstraint
2645 void
2646 SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
2647                                                 char ConstraintLetter,
2648                                                 std::vector<SDOperand> &Ops,
2649                                                 SelectionDAG &DAG) {
2650   // Default, for the time being, to the base class handler
2651   TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
2652 }
2653
2654 /// isLegalAddressImmediate - Return true if the integer value can be used
2655 /// as the offset of the target addressing mode.
2656 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
2657   // SPU's addresses are 256K:
2658   return (V > -(1 << 18) && V < (1 << 18) - 1);
2659 }
2660
2661 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
2662   return false;
2663 }