lib/Target/CellSPU/SPUISelLowering.cpp

   1 //
   2 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the SPUTargetLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "SPURegisterNames.h"
  15 #include "SPUISelLowering.h"
  16 #include "SPUTargetMachine.h"
  17 #include "SPUFrameInfo.h"
  18 #include "llvm/Constants.h"
  19 #include "llvm/Function.h"
  20 #include "llvm/Intrinsics.h"
  21 #include "llvm/CallingConv.h"
  22 #include "llvm/CodeGen/CallingConvLower.h"
  23 #include "llvm/CodeGen/MachineFrameInfo.h"
  24 #include "llvm/CodeGen/MachineFunction.h"
  25 #include "llvm/CodeGen/MachineInstrBuilder.h"
  26 #include "llvm/CodeGen/MachineRegisterInfo.h"
  27 #include "llvm/CodeGen/SelectionDAG.h"
  28 #include "llvm/Target/TargetLoweringObjectFile.h"
  29 #include "llvm/Target/TargetOptions.h"
  30 #include "llvm/ADT/VectorExtras.h"
  31 #include "llvm/Support/Debug.h"
  32 #include "llvm/Support/ErrorHandling.h"
  33 #include "llvm/Support/MathExtras.h"
  34 #include "llvm/Support/raw_ostream.h"
  35 #include <map>
  36
  37 using namespace llvm;
  38
  39 // Used in getTargetNodeName() below
  40 namespace {
  41   std::map<unsigned, const char *> node_names;
  42
  43   //! EVT mapping to useful data for Cell SPU
  44   struct valtype_map_s {
  45     EVT   valtype;
  46     int   prefslot_byte;
  47   };
  48
  49   const valtype_map_s valtype_map[] = {
  50     { MVT::i1,   3 },
  51     { MVT::i8,   3 },
  52     { MVT::i16,  2 },
  53     { MVT::i32,  0 },
  54     { MVT::f32,  0 },
  55     { MVT::i64,  0 },
  56     { MVT::f64,  0 },
  57     { MVT::i128, 0 }
  58   };
  59
  60   const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
  61
  62   const valtype_map_s *getValueTypeMapEntry(EVT VT) {
  63     const valtype_map_s *retval = 0;
  64
  65     for (size_t i = 0; i < n_valtype_map; ++i) {
  66       if (valtype_map[i].valtype == VT) {
  67         retval = valtype_map + i;
  68         break;
  69       }
  70     }
  71
  72 #ifndef NDEBUG
  73     if (retval == 0) {
  74       std::string msg;
  75       raw_string_ostream Msg(msg);
  76       Msg << "getValueTypeMapEntry returns NULL for "
  77            << VT.getEVTString();
  78       llvm_report_error(Msg.str());
  79     }
  80 #endif
  81
  82     return retval;
  83   }
  84
  85   //! Expand a library call into an actual call DAG node
  86   /*!
  87    \note
  88    This code is taken from SelectionDAGLegalize, since it is not exposed as
  89    part of the LLVM SelectionDAG API.
  90    */
  91
  92   SDValue
  93   ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
  94                 bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
  95     // The input chain to this libcall is the entry node of the function.
  96     // Legalizing the call will automatically add the previous call to the
  97     // dependence.
  98     SDValue InChain = DAG.getEntryNode();
  99
 100     TargetLowering::ArgListTy Args;
 101     TargetLowering::ArgListEntry Entry;
 102     for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
 103       EVT ArgVT = Op.getOperand(i).getValueType();
 104       const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
 105       Entry.Node = Op.getOperand(i);
 106       Entry.Ty = ArgTy;
 107       Entry.isSExt = isSigned;
 108       Entry.isZExt = !isSigned;
 109       Args.push_back(Entry);
 110     }
 111     SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
 112                                            TLI.getPointerTy());
 113
 114     // Splice the libcall in wherever FindInputOutputChains tells us to.
 115     const Type *RetTy =
 116                 Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
 117     std::pair<SDValue, SDValue> CallInfo =
 118             TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
 119                             0, TLI.getLibcallCallingConv(LC), false,
 120                             /*isReturnValueUsed=*/true,
 121                             Callee, Args, DAG, Op.getDebugLoc(),
 122                             DAG.GetOrdering(InChain.getNode()));
 123
 124     return CallInfo.first;
 125   }
 126 }
 127
 128 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
 129   : TargetLowering(TM, new TargetLoweringObjectFileELF()),
 130     SPUTM(TM) {
 131   // Fold away setcc operations if possible.
 132   setPow2DivIsCheap();
 133
 134   // Use _setjmp/_longjmp instead of setjmp/longjmp.
 135   setUseUnderscoreSetJmp(true);
 136   setUseUnderscoreLongJmp(true);
 137
 138   // Set RTLIB libcall names as used by SPU:
 139   setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
 140
 141   // Set up the SPU's register classes:
 142   addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
 143   addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
 144   addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
 145   addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
 146   addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
 147   addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
 148   addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
 149
 150   // SPU has no sign or zero extended loads for i1, i8, i16:
 151   setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
 152   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
 153   setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
 154
 155   setLoadExtAction(ISD::EXTLOAD,  MVT::f32, Expand);
 156   setLoadExtAction(ISD::EXTLOAD,  MVT::f64, Expand);
 157
 158   setTruncStoreAction(MVT::i128, MVT::i64, Expand);
 159   setTruncStoreAction(MVT::i128, MVT::i32, Expand);
 160   setTruncStoreAction(MVT::i128, MVT::i16, Expand);
 161   setTruncStoreAction(MVT::i128, MVT::i8, Expand);
 162
 163   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
 164
 165   // SPU constant load actions are custom lowered:
 166   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
 167   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
 168
 169   // SPU's loads and stores have to be custom lowered:
 170   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
 171        ++sctype) {
 172     MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
 173
 174     setOperationAction(ISD::LOAD,   VT, Custom);
 175     setOperationAction(ISD::STORE,  VT, Custom);
 176     setLoadExtAction(ISD::EXTLOAD,  VT, Custom);
 177     setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
 178     setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
 179
 180     for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
 181       MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
 182       setTruncStoreAction(VT, StoreVT, Expand);
 183     }
 184   }
 185
 186   for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
 187        ++sctype) {
 188     MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
 189
 190     setOperationAction(ISD::LOAD,   VT, Custom);
 191     setOperationAction(ISD::STORE,  VT, Custom);
 192
 193     for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
 194       MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
 195       setTruncStoreAction(VT, StoreVT, Expand);
 196     }
 197   }
 198
 199   // Expand the jumptable branches
 200   setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
 201   setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
 202
 203   // Custom lower SELECT_CC for most cases, but expand by default
 204   setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
 205   setOperationAction(ISD::SELECT_CC,    MVT::i8,    Custom);
 206   setOperationAction(ISD::SELECT_CC,    MVT::i16,   Custom);
 207   setOperationAction(ISD::SELECT_CC,    MVT::i32,   Custom);
 208   setOperationAction(ISD::SELECT_CC,    MVT::i64,   Custom);
 209
 210   // SPU has no intrinsics for these particular operations:
 211   setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
 212
 213   // SPU has no division/remainder instructions
 214   setOperationAction(ISD::SREM,    MVT::i8,   Expand);
 215   setOperationAction(ISD::UREM,    MVT::i8,   Expand);
 216   setOperationAction(ISD::SDIV,    MVT::i8,   Expand);
 217   setOperationAction(ISD::UDIV,    MVT::i8,   Expand);
 218   setOperationAction(ISD::SDIVREM, MVT::i8,   Expand);
 219   setOperationAction(ISD::UDIVREM, MVT::i8,   Expand);
 220   setOperationAction(ISD::SREM,    MVT::i16,  Expand);
 221   setOperationAction(ISD::UREM,    MVT::i16,  Expand);
 222   setOperationAction(ISD::SDIV,    MVT::i16,  Expand);
 223   setOperationAction(ISD::UDIV,    MVT::i16,  Expand);
 224   setOperationAction(ISD::SDIVREM, MVT::i16,  Expand);
 225   setOperationAction(ISD::UDIVREM, MVT::i16,  Expand);
 226   setOperationAction(ISD::SREM,    MVT::i32,  Expand);
 227   setOperationAction(ISD::UREM,    MVT::i32,  Expand);
 228   setOperationAction(ISD::SDIV,    MVT::i32,  Expand);
 229   setOperationAction(ISD::UDIV,    MVT::i32,  Expand);
 230   setOperationAction(ISD::SDIVREM, MVT::i32,  Expand);
 231   setOperationAction(ISD::UDIVREM, MVT::i32,  Expand);
 232   setOperationAction(ISD::SREM,    MVT::i64,  Expand);
 233   setOperationAction(ISD::UREM,    MVT::i64,  Expand);
 234   setOperationAction(ISD::SDIV,    MVT::i64,  Expand);
 235   setOperationAction(ISD::UDIV,    MVT::i64,  Expand);
 236   setOperationAction(ISD::SDIVREM, MVT::i64,  Expand);
 237   setOperationAction(ISD::UDIVREM, MVT::i64,  Expand);
 238   setOperationAction(ISD::SREM,    MVT::i128, Expand);
 239   setOperationAction(ISD::UREM,    MVT::i128, Expand);
 240   setOperationAction(ISD::SDIV,    MVT::i128, Expand);
 241   setOperationAction(ISD::UDIV,    MVT::i128, Expand);
 242   setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
 243   setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
 244
 245   // We don't support sin/cos/sqrt/fmod
 246   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 247   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 248   setOperationAction(ISD::FREM , MVT::f64, Expand);
 249   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 250   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 251   setOperationAction(ISD::FREM , MVT::f32, Expand);
 252
 253   // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
 254   // for f32!)
 255   setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 256   setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 257
 258   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 259   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 260
 261   // SPU can do rotate right and left, so legalize it... but customize for i8
 262   // because instructions don't exist.
 263
 264   // FIXME: Change from "expand" to appropriate type once ROTR is supported in
 265   //        .td files.
 266   setOperationAction(ISD::ROTR, MVT::i32,    Expand /*Legal*/);
 267   setOperationAction(ISD::ROTR, MVT::i16,    Expand /*Legal*/);
 268   setOperationAction(ISD::ROTR, MVT::i8,     Expand /*Custom*/);
 269
 270   setOperationAction(ISD::ROTL, MVT::i32,    Legal);
 271   setOperationAction(ISD::ROTL, MVT::i16,    Legal);
 272   setOperationAction(ISD::ROTL, MVT::i8,     Custom);
 273
 274   // SPU has no native version of shift left/right for i8
 275   setOperationAction(ISD::SHL,  MVT::i8,     Custom);
 276   setOperationAction(ISD::SRL,  MVT::i8,     Custom);
 277   setOperationAction(ISD::SRA,  MVT::i8,     Custom);
 278
 279   // Make these operations legal and handle them during instruction selection:
 280   setOperationAction(ISD::SHL,  MVT::i64,    Legal);
 281   setOperationAction(ISD::SRL,  MVT::i64,    Legal);
 282   setOperationAction(ISD::SRA,  MVT::i64,    Legal);
 283
 284   // Custom lower i8, i32 and i64 multiplications
 285   setOperationAction(ISD::MUL,  MVT::i8,     Custom);
 286   setOperationAction(ISD::MUL,  MVT::i32,    Legal);
 287   setOperationAction(ISD::MUL,  MVT::i64,    Legal);
 288
 289   // Expand double-width multiplication
 290   // FIXME: It would probably be reasonable to support some of these operations
 291   setOperationAction(ISD::UMUL_LOHI, MVT::i8,  Expand);
 292   setOperationAction(ISD::SMUL_LOHI, MVT::i8,  Expand);
 293   setOperationAction(ISD::MULHU,     MVT::i8,  Expand);
 294   setOperationAction(ISD::MULHS,     MVT::i8,  Expand);
 295   setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
 296   setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
 297   setOperationAction(ISD::MULHU,     MVT::i16, Expand);
 298   setOperationAction(ISD::MULHS,     MVT::i16, Expand);
 299   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
 300   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
 301   setOperationAction(ISD::MULHU,     MVT::i32, Expand);
 302   setOperationAction(ISD::MULHS,     MVT::i32, Expand);
 303   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
 304   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
 305   setOperationAction(ISD::MULHU,     MVT::i64, Expand);
 306   setOperationAction(ISD::MULHS,     MVT::i64, Expand);
 307
 308   // Need to custom handle (some) common i8, i64 math ops
 309   setOperationAction(ISD::ADD,  MVT::i8,     Custom);
 310   setOperationAction(ISD::ADD,  MVT::i64,    Legal);
 311   setOperationAction(ISD::SUB,  MVT::i8,     Custom);
 312   setOperationAction(ISD::SUB,  MVT::i64,    Legal);
 313
 314   // SPU does not have BSWAP. It does have i32 support CTLZ.
 315   // CTPOP has to be custom lowered.
 316   setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
 317   setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
 318
 319   setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
 320   setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
 321   setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
 322   setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
 323   setOperationAction(ISD::CTPOP, MVT::i128,  Expand);
 324
 325   setOperationAction(ISD::CTTZ , MVT::i8,    Expand);
 326   setOperationAction(ISD::CTTZ , MVT::i16,   Expand);
 327   setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
 328   setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
 329   setOperationAction(ISD::CTTZ , MVT::i128,  Expand);
 330
 331   setOperationAction(ISD::CTLZ , MVT::i8,    Promote);
 332   setOperationAction(ISD::CTLZ , MVT::i16,   Promote);
 333   setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
 334   setOperationAction(ISD::CTLZ , MVT::i64,   Expand);
 335   setOperationAction(ISD::CTLZ , MVT::i128,  Expand);
 336
 337   // SPU has a version of select that implements (a&~c)|(b&c), just like
 338   // select ought to work:
 339   setOperationAction(ISD::SELECT, MVT::i8,   Legal);
 340   setOperationAction(ISD::SELECT, MVT::i16,  Legal);
 341   setOperationAction(ISD::SELECT, MVT::i32,  Legal);
 342   setOperationAction(ISD::SELECT, MVT::i64,  Legal);
 343
 344   setOperationAction(ISD::SETCC, MVT::i8,    Legal);
 345   setOperationAction(ISD::SETCC, MVT::i16,   Legal);
 346   setOperationAction(ISD::SETCC, MVT::i32,   Legal);
 347   setOperationAction(ISD::SETCC, MVT::i64,   Legal);
 348   setOperationAction(ISD::SETCC, MVT::f64,   Custom);
 349
 350   // Custom lower i128 -> i64 truncates
 351   setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
 352
 353   // Custom lower i32/i64 -> i128 sign extend
 354   setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
 355
 356   setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
 357   setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
 358   setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
 359   setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
 360   // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
 361   // to expand to a libcall, hence the custom lowering:
 362   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
 363   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 364   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
 365   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
 366   setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
 367   setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
 368
 369   // FDIV on SPU requires custom lowering
 370   setOperationAction(ISD::FDIV, MVT::f64, Expand);      // to libcall
 371
 372   // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
 373   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
 374   setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
 375   setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
 376   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
 377   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
 378   setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
 379   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 380   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 381
 382   setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
 383   setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
 384   setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
 385   setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
 386
 387   // We cannot sextinreg(i1).  Expand to shifts.
 388   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 389
 390   // We want to legalize GlobalAddress and ConstantPool nodes into the
 391   // appropriate instructions to materialize the address.
 392   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
 393        ++sctype) {
 394     MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
 395
 396     setOperationAction(ISD::GlobalAddress,  VT, Custom);
 397     setOperationAction(ISD::ConstantPool,   VT, Custom);
 398     setOperationAction(ISD::JumpTable,      VT, Custom);
 399   }
 400
 401   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 402   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 403
 404   // Use the default implementation.
 405   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
 406   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 407   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 408   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 409   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
 410   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
 411   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
 412
 413   // Cell SPU has instructions for converting between i64 and fp.
 414   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 415   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 416
 417   // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
 418   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
 419
 420   // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 421   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 422
 423   // First set operation action for all vector types to expand. Then we
 424   // will selectively turn on ones that can be effectively codegen'd.
 425   addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
 426   addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
 427   addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
 428   addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
 429   addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
 430   addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
 431
 432   // "Odd size" vector classes that we're willing to support:
 433   addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
 434
 435   for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
 436        i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
 437     MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
 438
 439     // add/sub are legal for all supported vector VT's.
 440     setOperationAction(ISD::ADD,     VT, Legal);
 441     setOperationAction(ISD::SUB,     VT, Legal);
 442     // mul has to be custom lowered.
 443     setOperationAction(ISD::MUL,     VT, Legal);
 444
 445     setOperationAction(ISD::AND,     VT, Legal);
 446     setOperationAction(ISD::OR,      VT, Legal);
 447     setOperationAction(ISD::XOR,     VT, Legal);
 448     setOperationAction(ISD::LOAD,    VT, Legal);
 449     setOperationAction(ISD::SELECT,  VT, Legal);
 450     setOperationAction(ISD::STORE,   VT, Legal);
 451
 452     // These operations need to be expanded:
 453     setOperationAction(ISD::SDIV,    VT, Expand);
 454     setOperationAction(ISD::SREM,    VT, Expand);
 455     setOperationAction(ISD::UDIV,    VT, Expand);
 456     setOperationAction(ISD::UREM,    VT, Expand);
 457
 458     // Custom lower build_vector, constant pool spills, insert and
 459     // extract vector elements:
 460     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
 461     setOperationAction(ISD::ConstantPool, VT, Custom);
 462     setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
 463     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
 464     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
 465     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
 466   }
 467
 468   setOperationAction(ISD::AND, MVT::v16i8, Custom);
 469   setOperationAction(ISD::OR,  MVT::v16i8, Custom);
 470   setOperationAction(ISD::XOR, MVT::v16i8, Custom);
 471   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 472
 473   setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
 474
 475   setShiftAmountType(MVT::i32);
 476   setBooleanContents(ZeroOrNegativeOneBooleanContent);
 477
 478   setStackPointerRegisterToSaveRestore(SPU::R1);
 479
 480   // We have target-specific dag combine patterns for the following nodes:
 481   setTargetDAGCombine(ISD::ADD);
 482   setTargetDAGCombine(ISD::ZERO_EXTEND);
 483   setTargetDAGCombine(ISD::SIGN_EXTEND);
 484   setTargetDAGCombine(ISD::ANY_EXTEND);
 485
 486   computeRegisterProperties();
 487
 488   // Set pre-RA register scheduler default to BURR, which produces slightly
 489   // better code than the default (could also be TDRR, but TargetLowering.h
 490   // needs a mod to support that model):
 491   setSchedulingPreference(SchedulingForRegPressure);
 492 }
 493
 494 const char *
 495 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
 496 {
 497   if (node_names.empty()) {
 498     node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
 499     node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
 500     node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
 501     node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
 502     node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
 503     node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
 504     node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
 505     node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
 506     node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
 507     node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
 508     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
 509     node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
 510     node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
 511     node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
 512     node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
 513     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
 514     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
 515     node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
 516     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
 517             "SPUISD::ROTBYTES_LEFT_BITS";
 518     node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
 519     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
 520     node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
 521     node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
 522     node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
 523   }
 524
 525   std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
 526
 527   return ((i != node_names.end()) ? i->second : 0);
 528 }
 529
 530 /// getFunctionAlignment - Return the Log2 alignment of this function.
 531 unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
 532   return 3;
 533 }
 534
 535 //===----------------------------------------------------------------------===//
 536 // Return the Cell SPU's SETCC result type
 537 //===----------------------------------------------------------------------===//
 538
 539 MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const {
 540   // i16 and i32 are valid SETCC result types
 541   return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ?
 542     VT.getSimpleVT().SimpleTy :
 543     MVT::i32);
 544 }
 545
 546 //===----------------------------------------------------------------------===//
 547 // Calling convention code:
 548 //===----------------------------------------------------------------------===//
 549
 550 #include "SPUGenCallingConv.inc"
 551
 552 //===----------------------------------------------------------------------===//
 553 //  LowerOperation implementation
 554 //===----------------------------------------------------------------------===//
 555
 556 /// Custom lower loads for CellSPU
 557 /*!
 558  All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
 559  within a 16-byte block, we have to rotate to extract the requested element.
 560
 561  For extending loads, we also want to ensure that the following sequence is
 562  emitted, e.g. for MVT::f32 extending load to MVT::f64:
 563
 564 \verbatim
 565 %1  v16i8,ch = load
 566 %2  v16i8,ch = rotate %1
 567 %3  v4f8, ch = bitconvert %2
 568 %4  f32      = vec2perfslot %3
 569 %5  f64      = fp_extend %4
 570 \endverbatim
 571 */
 572 static SDValue
 573 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 574   LoadSDNode *LN = cast<LoadSDNode>(Op);
 575   SDValue the_chain = LN->getChain();
 576   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 577   EVT InVT = LN->getMemoryVT();
 578   EVT OutVT = Op.getValueType();
 579   ISD::LoadExtType ExtType = LN->getExtensionType();
 580   unsigned alignment = LN->getAlignment();
 581   const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
 582   DebugLoc dl = Op.getDebugLoc();
 583
 584   switch (LN->getAddressingMode()) {
 585   case ISD::UNINDEXED: {
 586     SDValue result;
 587     SDValue basePtr = LN->getBasePtr();
 588     SDValue rotate;
 589
 590     if (alignment == 16) {
 591       ConstantSDNode *CN;
 592
 593       // Special cases for a known aligned load to simplify the base pointer
 594       // and the rotation amount:
 595       if (basePtr.getOpcode() == ISD::ADD
 596           && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
 597         // Known offset into basePtr
 598         int64_t offset = CN->getSExtValue();
 599         int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
 600
 601         if (rotamt < 0)
 602           rotamt += 16;
 603
 604         rotate = DAG.getConstant(rotamt, MVT::i16);
 605
 606         // Simplify the base pointer for this case:
 607         basePtr = basePtr.getOperand(0);
 608         if ((offset & ~0xf) > 0) {
 609           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 610                                 basePtr,
 611                                 DAG.getConstant((offset & ~0xf), PtrVT));
 612         }
 613       } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
 614                  || (basePtr.getOpcode() == SPUISD::IndirectAddr
 615                      && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
 616                      && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
 617         // Plain aligned a-form address: rotate into preferred slot
 618         // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
 619         int64_t rotamt = -vtm->prefslot_byte;
 620         if (rotamt < 0)
 621           rotamt += 16;
 622         rotate = DAG.getConstant(rotamt, MVT::i16);
 623       } else {
 624         // Offset the rotate amount by the basePtr and the preferred slot
 625         // byte offset
 626         int64_t rotamt = -vtm->prefslot_byte;
 627         if (rotamt < 0)
 628           rotamt += 16;
 629         rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
 630                              basePtr,
 631                              DAG.getConstant(rotamt, PtrVT));
 632       }
 633     } else {
 634       // Unaligned load: must be more pessimistic about addressing modes:
 635       if (basePtr.getOpcode() == ISD::ADD) {
 636         MachineFunction &MF = DAG.getMachineFunction();
 637         MachineRegisterInfo &RegInfo = MF.getRegInfo();
 638         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 639         SDValue Flag;
 640
 641         SDValue Op0 = basePtr.getOperand(0);
 642         SDValue Op1 = basePtr.getOperand(1);
 643
 644         if (isa<ConstantSDNode>(Op1)) {
 645           // Convert the (add <ptr>, <const>) to an indirect address contained
 646           // in a register. Note that this is done because we need to avoid
 647           // creating a 0(reg) d-form address due to the SPU's block loads.
 648           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 649           the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
 650           basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
 651         } else {
 652           // Convert the (add <arg1>, <arg2>) to an indirect address, which
 653           // will likely be lowered as a reg(reg) x-form address.
 654           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 655         }
 656       } else {
 657         basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 658                               basePtr,
 659                               DAG.getConstant(0, PtrVT));
 660       }
 661
 662       // Offset the rotate amount by the basePtr and the preferred slot
 663       // byte offset
 664       rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
 665                            basePtr,
 666                            DAG.getConstant(-vtm->prefslot_byte, PtrVT));
 667     }
 668
 669     // Re-emit as a v16i8 vector load
 670     result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
 671                          LN->getSrcValue(), LN->getSrcValueOffset(),
 672                          LN->isVolatile(), 16);
 673
 674     // Update the chain
 675     the_chain = result.getValue(1);
 676
 677     // Rotate into the preferred slot:
 678     result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8,
 679                          result.getValue(0), rotate);
 680
 681     // Convert the loaded v16i8 vector to the appropriate vector type
 682     // specified by the operand:
 683     EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
 684                                  InVT, (128 / InVT.getSizeInBits()));
 685     result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
 686                          DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
 687
 688     // Handle extending loads by extending the scalar result:
 689     if (ExtType == ISD::SEXTLOAD) {
 690       result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
 691     } else if (ExtType == ISD::ZEXTLOAD) {
 692       result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
 693     } else if (ExtType == ISD::EXTLOAD) {
 694       unsigned NewOpc = ISD::ANY_EXTEND;
 695
 696       if (OutVT.isFloatingPoint())
 697         NewOpc = ISD::FP_EXTEND;
 698
 699       result = DAG.getNode(NewOpc, dl, OutVT, result);
 700     }
 701
 702     SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
 703     SDValue retops[2] = {
 704       result,
 705       the_chain
 706     };
 707
 708     result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
 709                          retops, sizeof(retops) / sizeof(retops[0]));
 710     return result;
 711   }
 712   case ISD::PRE_INC:
 713   case ISD::PRE_DEC:
 714   case ISD::POST_INC:
 715   case ISD::POST_DEC:
 716   case ISD::LAST_INDEXED_MODE:
 717     {
 718       std::string msg;
 719       raw_string_ostream Msg(msg);
 720       Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 721             "UNINDEXED\n";
 722       Msg << (unsigned) LN->getAddressingMode();
 723       llvm_report_error(Msg.str());
 724       /*NOTREACHED*/
 725     }
 726   }
 727
 728   return SDValue();
 729 }
 730
 731 /// Custom lower stores for CellSPU
 732 /*!
 733  All CellSPU stores are aligned to 16-byte boundaries, so for elements
 734  within a 16-byte block, we have to generate a shuffle to insert the
 735  requested element into its place, then store the resulting block.
 736  */
 737 static SDValue
 738 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 739   StoreSDNode *SN = cast<StoreSDNode>(Op);
 740   SDValue Value = SN->getValue();
 741   EVT VT = Value.getValueType();
 742   EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
 743   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 744   DebugLoc dl = Op.getDebugLoc();
 745   unsigned alignment = SN->getAlignment();
 746
 747   switch (SN->getAddressingMode()) {
 748   case ISD::UNINDEXED: {
 749     // The vector type we really want to load from the 16-byte chunk.
 750     EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
 751                                  VT, (128 / VT.getSizeInBits())),
 752         stVecVT = EVT::getVectorVT(*DAG.getContext(),
 753                                    StVT, (128 / StVT.getSizeInBits()));
 754
 755     SDValue alignLoadVec;
 756     SDValue basePtr = SN->getBasePtr();
 757     SDValue the_chain = SN->getChain();
 758     SDValue insertEltOffs;
 759
 760     if (alignment == 16) {
 761       ConstantSDNode *CN;
 762
 763       // Special cases for a known aligned load to simplify the base pointer
 764       // and insertion byte:
 765       if (basePtr.getOpcode() == ISD::ADD
 766           && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
 767         // Known offset into basePtr
 768         int64_t offset = CN->getSExtValue();
 769
 770         // Simplify the base pointer for this case:
 771         basePtr = basePtr.getOperand(0);
 772         insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 773                                     basePtr,
 774                                     DAG.getConstant((offset & 0xf), PtrVT));
 775
 776         if ((offset & ~0xf) > 0) {
 777           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 778                                 basePtr,
 779                                 DAG.getConstant((offset & ~0xf), PtrVT));
 780         }
 781       } else {
 782         // Otherwise, assume it's at byte 0 of basePtr
 783         insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 784                                     basePtr,
 785                                     DAG.getConstant(0, PtrVT));
 786       }
 787     } else {
 788       // Unaligned load: must be more pessimistic about addressing modes:
 789       if (basePtr.getOpcode() == ISD::ADD) {
 790         MachineFunction &MF = DAG.getMachineFunction();
 791         MachineRegisterInfo &RegInfo = MF.getRegInfo();
 792         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 793         SDValue Flag;
 794
 795         SDValue Op0 = basePtr.getOperand(0);
 796         SDValue Op1 = basePtr.getOperand(1);
 797
 798         if (isa<ConstantSDNode>(Op1)) {
 799           // Convert the (add <ptr>, <const>) to an indirect address contained
 800           // in a register. Note that this is done because we need to avoid
 801           // creating a 0(reg) d-form address due to the SPU's block loads.
 802           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 803           the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
 804           basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
 805         } else {
 806           // Convert the (add <arg1>, <arg2>) to an indirect address, which
 807           // will likely be lowered as a reg(reg) x-form address.
 808           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 809         }
 810       } else {
 811         basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 812                               basePtr,
 813                               DAG.getConstant(0, PtrVT));
 814       }
 815
 816       // Insertion point is solely determined by basePtr's contents
 817       insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
 818                                   basePtr,
 819                                   DAG.getConstant(0, PtrVT));
 820     }
 821
 822     // Re-emit as a v16i8 vector load
 823     alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
 824                                SN->getSrcValue(), SN->getSrcValueOffset(),
 825                                SN->isVolatile(), 16);
 826
 827     // Update the chain
 828     the_chain = alignLoadVec.getValue(1);
 829
 830     LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
 831     SDValue theValue = SN->getValue();
 832     SDValue result;
 833
 834     if (StVT != VT
 835         && (theValue.getOpcode() == ISD::AssertZext
 836             || theValue.getOpcode() == ISD::AssertSext)) {
 837       // Drill down and get the value for zero- and sign-extended
 838       // quantities
 839       theValue = theValue.getOperand(0);
 840     }
 841
 842     // If the base pointer is already a D-form address, then just create
 843     // a new D-form address with a slot offset and the orignal base pointer.
 844     // Otherwise generate a D-form address with the slot offset relative
 845     // to the stack pointer, which is always aligned.
 846 #if !defined(NDEBUG)
 847       if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
 848         errs() << "CellSPU LowerSTORE: basePtr = ";
 849         basePtr.getNode()->dump(&DAG);
 850         errs() << "\n";
 851       }
 852 #endif
 853
 854     SDValue insertEltOp =
 855             DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
 856     SDValue vectorizeOp =
 857             DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
 858
 859     result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
 860                          vectorizeOp, alignLoadVec,
 861                          DAG.getNode(ISD::BIT_CONVERT, dl,
 862                                      MVT::v4i32, insertEltOp));
 863
 864     result = DAG.getStore(the_chain, dl, result, basePtr,
 865                           LN->getSrcValue(), LN->getSrcValueOffset(),
 866                           LN->isVolatile(), LN->getAlignment());
 867
 868 #if 0 && !defined(NDEBUG)
 869     if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
 870       const SDValue &currentRoot = DAG.getRoot();
 871
 872       DAG.setRoot(result);
 873       errs() << "------- CellSPU:LowerStore result:\n";
 874       DAG.dump();
 875       errs() << "-------\n";
 876       DAG.setRoot(currentRoot);
 877     }
 878 #endif
 879
 880     return result;
 881     /*UNREACHED*/
 882   }
 883   case ISD::PRE_INC:
 884   case ISD::PRE_DEC:
 885   case ISD::POST_INC:
 886   case ISD::POST_DEC:
 887   case ISD::LAST_INDEXED_MODE:
 888     {
 889       std::string msg;
 890       raw_string_ostream Msg(msg);
 891       Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 892             "UNINDEXED\n";
 893       Msg << (unsigned) SN->getAddressingMode();
 894       llvm_report_error(Msg.str());
 895       /*NOTREACHED*/
 896     }
 897   }
 898
 899   return SDValue();
 900 }
 901
 902 //! Generate the address of a constant pool entry.
 903 static SDValue
 904 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 905   EVT PtrVT = Op.getValueType();
 906   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 907   Constant *C = CP->getConstVal();
 908   SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
 909   SDValue Zero = DAG.getConstant(0, PtrVT);
 910   const TargetMachine &TM = DAG.getTarget();
 911   // FIXME there is no actual debug info here
 912   DebugLoc dl = Op.getDebugLoc();
 913
 914   if (TM.getRelocationModel() == Reloc::Static) {
 915     if (!ST->usingLargeMem()) {
 916       // Just return the SDValue with the constant pool address in it.
 917       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
 918     } else {
 919       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
 920       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
 921       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 922     }
 923   }
 924
 925   llvm_unreachable("LowerConstantPool: Relocation model other than static"
 926                    " not supported.");
 927   return SDValue();
 928 }
 929
 930 //! Alternate entry point for generating the address of a constant pool entry
 931 SDValue
 932 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
 933   return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
 934 }
 935
 936 static SDValue
 937 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 938   EVT PtrVT = Op.getValueType();
 939   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 940   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
 941   SDValue Zero = DAG.getConstant(0, PtrVT);
 942   const TargetMachine &TM = DAG.getTarget();
 943   // FIXME there is no actual debug info here
 944   DebugLoc dl = Op.getDebugLoc();
 945
 946   if (TM.getRelocationModel() == Reloc::Static) {
 947     if (!ST->usingLargeMem()) {
 948       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
 949     } else {
 950       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
 951       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
 952       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 953     }
 954   }
 955
 956   llvm_unreachable("LowerJumpTable: Relocation model other than static"
 957                    " not supported.");
 958   return SDValue();
 959 }
 960
 961 static SDValue
 962 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 963   EVT PtrVT = Op.getValueType();
 964   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
 965   GlobalValue *GV = GSDN->getGlobal();
 966   SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
 967   const TargetMachine &TM = DAG.getTarget();
 968   SDValue Zero = DAG.getConstant(0, PtrVT);
 969   // FIXME there is no actual debug info here
 970   DebugLoc dl = Op.getDebugLoc();
 971
 972   if (TM.getRelocationModel() == Reloc::Static) {
 973     if (!ST->usingLargeMem()) {
 974       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
 975     } else {
 976       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
 977       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
 978       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 979     }
 980   } else {
 981     llvm_report_error("LowerGlobalAddress: Relocation model other than static"
 982                       "not supported.");
 983     /*NOTREACHED*/
 984   }
 985
 986   return SDValue();
 987 }
 988
 989 //! Custom lower double precision floating point constants
 990 static SDValue
 991 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
 992   EVT VT = Op.getValueType();
 993   // FIXME there is no actual debug info here
 994   DebugLoc dl = Op.getDebugLoc();
 995
 996   if (VT == MVT::f64) {
 997     ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
 998
 999     assert((FP != 0) &&
1000            "LowerConstantFP: Node is not ConstantFPSDNode");
1001
1002     uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
1003     SDValue T = DAG.getConstant(dbits, MVT::i64);
1004     SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
1005     return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1006                        DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec));
1007   }
1008
1009   return SDValue();
1010 }
1011
1012 SDValue
1013 SPUTargetLowering::LowerFormalArguments(SDValue Chain,
1014                                         CallingConv::ID CallConv, bool isVarArg,
1015                                         const SmallVectorImpl<ISD::InputArg>
1016                                           &Ins,
1017                                         DebugLoc dl, SelectionDAG &DAG,
1018                                         SmallVectorImpl<SDValue> &InVals) {
1019
1020   MachineFunction &MF = DAG.getMachineFunction();
1021   MachineFrameInfo *MFI = MF.getFrameInfo();
1022   MachineRegisterInfo &RegInfo = MF.getRegInfo();
1023
1024   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1025   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1026
1027   unsigned ArgOffset = SPUFrameInfo::minStackSize();
1028   unsigned ArgRegIdx = 0;
1029   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1030
1031   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1032
1033   // Add DAG nodes to load the arguments or copy them out of registers.
1034   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
1035     EVT ObjectVT = Ins[ArgNo].VT;
1036     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
1037     SDValue ArgVal;
1038
1039     if (ArgRegIdx < NumArgRegs) {
1040       const TargetRegisterClass *ArgRegClass;
1041
1042       switch (ObjectVT.getSimpleVT().SimpleTy) {
1043       default: {
1044         std::string msg;
1045         raw_string_ostream Msg(msg);
1046         Msg << "LowerFormalArguments Unhandled argument type: "
1047              << ObjectVT.getEVTString();
1048         llvm_report_error(Msg.str());
1049       }
1050       case MVT::i8:
1051         ArgRegClass = &SPU::R8CRegClass;
1052         break;
1053       case MVT::i16:
1054         ArgRegClass = &SPU::R16CRegClass;
1055         break;
1056       case MVT::i32:
1057         ArgRegClass = &SPU::R32CRegClass;
1058         break;
1059       case MVT::i64:
1060         ArgRegClass = &SPU::R64CRegClass;
1061         break;
1062       case MVT::i128:
1063         ArgRegClass = &SPU::GPRCRegClass;
1064         break;
1065       case MVT::f32:
1066         ArgRegClass = &SPU::R32FPRegClass;
1067         break;
1068       case MVT::f64:
1069         ArgRegClass = &SPU::R64FPRegClass;
1070         break;
1071       case MVT::v2f64:
1072       case MVT::v4f32:
1073       case MVT::v2i64:
1074       case MVT::v4i32:
1075       case MVT::v8i16:
1076       case MVT::v16i8:
1077         ArgRegClass = &SPU::VECREGRegClass;
1078         break;
1079       }
1080
1081       unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1082       RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1083       ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
1084       ++ArgRegIdx;
1085     } else {
1086       // We need to load the argument to a virtual register if we determined
1087       // above that we ran out of physical registers of the appropriate type
1088       // or we're forced to do vararg
1089       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true, false);
1090       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1091       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0);
1092       ArgOffset += StackSlotSize;
1093     }
1094
1095     InVals.push_back(ArgVal);
1096     // Update the chain
1097     Chain = ArgVal.getOperand(0);
1098   }
1099
1100   // vararg handling:
1101   if (isVarArg) {
1102     // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1103     // We will spill (79-3)+1 registers to the stack
1104     SmallVector<SDValue, 79-3+1> MemOps;
1105
1106     // Create the frame slot
1107
1108     for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1109       VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset,
1110                                                  true, false);
1111       SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1112       SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1113       SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0);
1114       Chain = Store.getOperand(0);
1115       MemOps.push_back(Store);
1116
1117       // Increment address by stack slot size for the next stored argument
1118       ArgOffset += StackSlotSize;
1119     }
1120     if (!MemOps.empty())
1121       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1122                           &MemOps[0], MemOps.size());
1123   }
1124
1125   return Chain;
1126 }
1127
1128 /// isLSAAddress - Return the immediate to use if the specified
1129 /// value is representable as a LSA address.
1130 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1131   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1132   if (!C) return 0;
1133
1134   int Addr = C->getZExtValue();
1135   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1136       (Addr << 14 >> 14) != Addr)
1137     return 0;  // Top 14 bits have to be sext of immediate.
1138
1139   return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1140 }
1141
1142 SDValue
1143 SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1144                              CallingConv::ID CallConv, bool isVarArg,
1145                              bool isTailCall,
1146                              const SmallVectorImpl<ISD::OutputArg> &Outs,
1147                              const SmallVectorImpl<ISD::InputArg> &Ins,
1148                              DebugLoc dl, SelectionDAG &DAG,
1149                              SmallVectorImpl<SDValue> &InVals) {
1150
1151   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
1152   unsigned NumOps     = Outs.size();
1153   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1154   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1155   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1156
1157   // Handy pointer type
1158   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1159
1160   // Set up a copy of the stack pointer for use loading and storing any
1161   // arguments that may not fit in the registers available for argument
1162   // passing.
1163   SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1164
1165   // Figure out which arguments are going to go in registers, and which in
1166   // memory.
1167   unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1168   unsigned ArgRegIdx = 0;
1169
1170   // Keep track of registers passing arguments
1171   std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1172   // And the arguments passed on the stack
1173   SmallVector<SDValue, 8> MemOpChains;
1174
1175   for (unsigned i = 0; i != NumOps; ++i) {
1176     SDValue Arg = Outs[i].Val;
1177
1178     // PtrOff will be used to store the current argument to the stack if a
1179     // register cannot be found for it.
1180     SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1181     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1182
1183     switch (Arg.getValueType().getSimpleVT().SimpleTy) {
1184     default: llvm_unreachable("Unexpected ValueType for argument!");
1185     case MVT::i8:
1186     case MVT::i16:
1187     case MVT::i32:
1188     case MVT::i64:
1189     case MVT::i128:
1190       if (ArgRegIdx != NumArgRegs) {
1191         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1192       } else {
1193         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1194         ArgOffset += StackSlotSize;
1195       }
1196       break;
1197     case MVT::f32:
1198     case MVT::f64:
1199       if (ArgRegIdx != NumArgRegs) {
1200         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1201       } else {
1202         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1203         ArgOffset += StackSlotSize;
1204       }
1205       break;
1206     case MVT::v2i64:
1207     case MVT::v2f64:
1208     case MVT::v4f32:
1209     case MVT::v4i32:
1210     case MVT::v8i16:
1211     case MVT::v16i8:
1212       if (ArgRegIdx != NumArgRegs) {
1213         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1214       } else {
1215         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1216         ArgOffset += StackSlotSize;
1217       }
1218       break;
1219     }
1220   }
1221
1222   // Accumulate how many bytes are to be pushed on the stack, including the
1223   // linkage area, and parameter passing area.  According to the SPU ABI,
1224   // we minimally need space for [LR] and [SP].
1225   unsigned NumStackBytes = ArgOffset - SPUFrameInfo::minStackSize();
1226
1227   // Insert a call sequence start
1228   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1229                                                             true));
1230
1231   if (!MemOpChains.empty()) {
1232     // Adjust the stack pointer for the stack arguments.
1233     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1234                         &MemOpChains[0], MemOpChains.size());
1235   }
1236
1237   // Build a sequence of copy-to-reg nodes chained together with token chain
1238   // and flag operands which copy the outgoing args into the appropriate regs.
1239   SDValue InFlag;
1240   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1241     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1242                              RegsToPass[i].second, InFlag);
1243     InFlag = Chain.getValue(1);
1244   }
1245
1246   SmallVector<SDValue, 8> Ops;
1247   unsigned CallOpc = SPUISD::CALL;
1248
1249   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1250   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1251   // node so that legalize doesn't hack it.
1252   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1253     GlobalValue *GV = G->getGlobal();
1254     EVT CalleeVT = Callee.getValueType();
1255     SDValue Zero = DAG.getConstant(0, PtrVT);
1256     SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1257
1258     if (!ST->usingLargeMem()) {
1259       // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1260       // style calls, otherwise, external symbols are BRASL calls. This assumes
1261       // that declared/defined symbols are in the same compilation unit and can
1262       // be reached through PC-relative jumps.
1263       //
1264       // NOTE:
1265       // This may be an unsafe assumption for JIT and really large compilation
1266       // units.
1267       if (GV->isDeclaration()) {
1268         Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
1269       } else {
1270         Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
1271       }
1272     } else {
1273       // "Large memory" mode: Turn all calls into indirect calls with a X-form
1274       // address pairs:
1275       Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
1276     }
1277   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1278     EVT CalleeVT = Callee.getValueType();
1279     SDValue Zero = DAG.getConstant(0, PtrVT);
1280     SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1281         Callee.getValueType());
1282
1283     if (!ST->usingLargeMem()) {
1284       Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
1285     } else {
1286       Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
1287     }
1288   } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1289     // If this is an absolute destination address that appears to be a legal
1290     // local store address, use the munged value.
1291     Callee = SDValue(Dest, 0);
1292   }
1293
1294   Ops.push_back(Chain);
1295   Ops.push_back(Callee);
1296
1297   // Add argument registers to the end of the list so that they are known live
1298   // into the call.
1299   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1300     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1301                                   RegsToPass[i].second.getValueType()));
1302
1303   if (InFlag.getNode())
1304     Ops.push_back(InFlag);
1305   // Returns a chain and a flag for retval copy to use.
1306   Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
1307                       &Ops[0], Ops.size());
1308   InFlag = Chain.getValue(1);
1309
1310   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1311                              DAG.getIntPtrConstant(0, true), InFlag);
1312   if (!Ins.empty())
1313     InFlag = Chain.getValue(1);
1314
1315   // If the function returns void, just return the chain.
1316   if (Ins.empty())
1317     return Chain;
1318
1319   // If the call has results, copy the values out of the ret val registers.
1320   switch (Ins[0].VT.getSimpleVT().SimpleTy) {
1321   default: llvm_unreachable("Unexpected ret value!");
1322   case MVT::Other: break;
1323   case MVT::i32:
1324     if (Ins.size() > 1 && Ins[1].VT == MVT::i32) {
1325       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
1326                                  MVT::i32, InFlag).getValue(1);
1327       InVals.push_back(Chain.getValue(0));
1328       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1329                                  Chain.getValue(2)).getValue(1);
1330       InVals.push_back(Chain.getValue(0));
1331     } else {
1332       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1333                                  InFlag).getValue(1);
1334       InVals.push_back(Chain.getValue(0));
1335     }
1336     break;
1337   case MVT::i64:
1338     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64,
1339                                InFlag).getValue(1);
1340     InVals.push_back(Chain.getValue(0));
1341     break;
1342   case MVT::i128:
1343     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128,
1344                                InFlag).getValue(1);
1345     InVals.push_back(Chain.getValue(0));
1346     break;
1347   case MVT::f32:
1348   case MVT::f64:
1349     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
1350                                InFlag).getValue(1);
1351     InVals.push_back(Chain.getValue(0));
1352     break;
1353   case MVT::v2f64:
1354   case MVT::v2i64:
1355   case MVT::v4f32:
1356   case MVT::v4i32:
1357   case MVT::v8i16:
1358   case MVT::v16i8:
1359     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
1360                                    InFlag).getValue(1);
1361     InVals.push_back(Chain.getValue(0));
1362     break;
1363   }
1364
1365   return Chain;
1366 }
1367
1368 SDValue
1369 SPUTargetLowering::LowerReturn(SDValue Chain,
1370                                CallingConv::ID CallConv, bool isVarArg,
1371                                const SmallVectorImpl<ISD::OutputArg> &Outs,
1372                                DebugLoc dl, SelectionDAG &DAG) {
1373
1374   SmallVector<CCValAssign, 16> RVLocs;
1375   CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
1376                  RVLocs, *DAG.getContext());
1377   CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
1378
1379   // If this is the first return lowered for this function, add the regs to the
1380   // liveout set for the function.
1381   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1382     for (unsigned i = 0; i != RVLocs.size(); ++i)
1383       DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1384   }
1385
1386   SDValue Flag;
1387
1388   // Copy the result values into the output registers.
1389   for (unsigned i = 0; i != RVLocs.size(); ++i) {
1390     CCValAssign &VA = RVLocs[i];
1391     assert(VA.isRegLoc() && "Can only return in registers!");
1392     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1393                              Outs[i].Val, Flag);
1394     Flag = Chain.getValue(1);
1395   }
1396
1397   if (Flag.getNode())
1398     return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1399   else
1400     return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
1401 }
1402
1403
1404 //===----------------------------------------------------------------------===//
1405 // Vector related lowering:
1406 //===----------------------------------------------------------------------===//
1407
1408 static ConstantSDNode *
1409 getVecImm(SDNode *N) {
1410   SDValue OpVal(0, 0);
1411
1412   // Check to see if this buildvec has a single non-undef value in its elements.
1413   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1414     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1415     if (OpVal.getNode() == 0)
1416       OpVal = N->getOperand(i);
1417     else if (OpVal != N->getOperand(i))
1418       return 0;
1419   }
1420
1421   if (OpVal.getNode() != 0) {
1422     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1423       return CN;
1424     }
1425   }
1426
1427   return 0;
1428 }
1429
1430 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1431 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1432 /// constant
1433 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1434                               EVT ValueType) {
1435   if (ConstantSDNode *CN = getVecImm(N)) {
1436     uint64_t Value = CN->getZExtValue();
1437     if (ValueType == MVT::i64) {
1438       uint64_t UValue = CN->getZExtValue();
1439       uint32_t upper = uint32_t(UValue >> 32);
1440       uint32_t lower = uint32_t(UValue);
1441       if (upper != lower)
1442         return SDValue();
1443       Value = Value >> 32;
1444     }
1445     if (Value <= 0x3ffff)
1446       return DAG.getTargetConstant(Value, ValueType);
1447   }
1448
1449   return SDValue();
1450 }
1451
1452 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1453 /// and the value fits into a signed 16-bit constant, and if so, return the
1454 /// constant
1455 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1456                               EVT ValueType) {
1457   if (ConstantSDNode *CN = getVecImm(N)) {
1458     int64_t Value = CN->getSExtValue();
1459     if (ValueType == MVT::i64) {
1460       uint64_t UValue = CN->getZExtValue();
1461       uint32_t upper = uint32_t(UValue >> 32);
1462       uint32_t lower = uint32_t(UValue);
1463       if (upper != lower)
1464         return SDValue();
1465       Value = Value >> 32;
1466     }
1467     if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1468       return DAG.getTargetConstant(Value, ValueType);
1469     }
1470   }
1471
1472   return SDValue();
1473 }
1474
1475 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1476 /// and the value fits into a signed 10-bit constant, and if so, return the
1477 /// constant
1478 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1479                               EVT ValueType) {
1480   if (ConstantSDNode *CN = getVecImm(N)) {
1481     int64_t Value = CN->getSExtValue();
1482     if (ValueType == MVT::i64) {
1483       uint64_t UValue = CN->getZExtValue();
1484       uint32_t upper = uint32_t(UValue >> 32);
1485       uint32_t lower = uint32_t(UValue);
1486       if (upper != lower)
1487         return SDValue();
1488       Value = Value >> 32;
1489     }
1490     if (isS10Constant(Value))
1491       return DAG.getTargetConstant(Value, ValueType);
1492   }
1493
1494   return SDValue();
1495 }
1496
1497 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1498 /// and the value fits into a signed 8-bit constant, and if so, return the
1499 /// constant.
1500 ///
1501 /// @note: The incoming vector is v16i8 because that's the only way we can load
1502 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1503 /// same value.
1504 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1505                              EVT ValueType) {
1506   if (ConstantSDNode *CN = getVecImm(N)) {
1507     int Value = (int) CN->getZExtValue();
1508     if (ValueType == MVT::i16
1509         && Value <= 0xffff                 /* truncated from uint64_t */
1510         && ((short) Value >> 8) == ((short) Value & 0xff))
1511       return DAG.getTargetConstant(Value & 0xff, ValueType);
1512     else if (ValueType == MVT::i8
1513              && (Value & 0xff) == Value)
1514       return DAG.getTargetConstant(Value, ValueType);
1515   }
1516
1517   return SDValue();
1518 }
1519
1520 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1521 /// and the value fits into a signed 16-bit constant, and if so, return the
1522 /// constant
1523 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1524                                EVT ValueType) {
1525   if (ConstantSDNode *CN = getVecImm(N)) {
1526     uint64_t Value = CN->getZExtValue();
1527     if ((ValueType == MVT::i32
1528           && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1529         || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1530       return DAG.getTargetConstant(Value >> 16, ValueType);
1531   }
1532
1533   return SDValue();
1534 }
1535
1536 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1537 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1538   if (ConstantSDNode *CN = getVecImm(N)) {
1539     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1540   }
1541
1542   return SDValue();
1543 }
1544
1545 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1546 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1547   if (ConstantSDNode *CN = getVecImm(N)) {
1548     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1549   }
1550
1551   return SDValue();
1552 }
1553
1554 //! Lower a BUILD_VECTOR instruction creatively:
1555 static SDValue
1556 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1557   EVT VT = Op.getValueType();
1558   EVT EltVT = VT.getVectorElementType();
1559   DebugLoc dl = Op.getDebugLoc();
1560   BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
1561   assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
1562   unsigned minSplatBits = EltVT.getSizeInBits();
1563
1564   if (minSplatBits < 16)
1565     minSplatBits = 16;
1566
1567   APInt APSplatBits, APSplatUndef;
1568   unsigned SplatBitSize;
1569   bool HasAnyUndefs;
1570
1571   if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
1572                             HasAnyUndefs, minSplatBits)
1573       || minSplatBits < SplatBitSize)
1574     return SDValue();   // Wasn't a constant vector or splat exceeded min
1575
1576   uint64_t SplatBits = APSplatBits.getZExtValue();
1577
1578   switch (VT.getSimpleVT().SimpleTy) {
1579   default: {
1580     std::string msg;
1581     raw_string_ostream Msg(msg);
1582     Msg << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
1583          << VT.getEVTString();
1584     llvm_report_error(Msg.str());
1585     /*NOTREACHED*/
1586   }
1587   case MVT::v4f32: {
1588     uint32_t Value32 = uint32_t(SplatBits);
1589     assert(SplatBitSize == 32
1590            && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1591     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1592     SDValue T = DAG.getConstant(Value32, MVT::i32);
1593     return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
1594                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
1595     break;
1596   }
1597   case MVT::v2f64: {
1598     uint64_t f64val = uint64_t(SplatBits);
1599     assert(SplatBitSize == 64
1600            && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1601     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1602     SDValue T = DAG.getConstant(f64val, MVT::i64);
1603     return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64,
1604                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
1605     break;
1606   }
1607   case MVT::v16i8: {
1608    // 8-bit constants have to be expanded to 16-bits
1609    unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
1610    SmallVector<SDValue, 8> Ops;
1611
1612    Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
1613    return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
1614                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
1615   }
1616   case MVT::v8i16: {
1617     unsigned short Value16 = SplatBits;
1618     SDValue T = DAG.getConstant(Value16, EltVT);
1619     SmallVector<SDValue, 8> Ops;
1620
1621     Ops.assign(8, T);
1622     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
1623   }
1624   case MVT::v4i32: {
1625     SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1626     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
1627   }
1628   case MVT::v2i32: {
1629     SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1630     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
1631   }
1632   case MVT::v2i64: {
1633     return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
1634   }
1635   }
1636
1637   return SDValue();
1638 }
1639
1640 /*!
1641  */
1642 SDValue
1643 SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
1644                      DebugLoc dl) {
1645   uint32_t upper = uint32_t(SplatVal >> 32);
1646   uint32_t lower = uint32_t(SplatVal);
1647
1648   if (upper == lower) {
1649     // Magic constant that can be matched by IL, ILA, et. al.
1650     SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1651     return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1652                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1653                                    Val, Val, Val, Val));
1654   } else {
1655     bool upper_special, lower_special;
1656
1657     // NOTE: This code creates common-case shuffle masks that can be easily
1658     // detected as common expressions. It is not attempting to create highly
1659     // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1660
1661     // Detect if the upper or lower half is a special shuffle mask pattern:
1662     upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1663     lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1664
1665     // Both upper and lower are special, lower to a constant pool load:
1666     if (lower_special && upper_special) {
1667       SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
1668       return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
1669                          SplatValCN, SplatValCN);
1670     }
1671
1672     SDValue LO32;
1673     SDValue HI32;
1674     SmallVector<SDValue, 16> ShufBytes;
1675     SDValue Result;
1676
1677     // Create lower vector if not a special pattern
1678     if (!lower_special) {
1679       SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1680       LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1681                          DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1682                                      LO32C, LO32C, LO32C, LO32C));
1683     }
1684
1685     // Create upper vector if not a special pattern
1686     if (!upper_special) {
1687       SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1688       HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1689                          DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1690                                      HI32C, HI32C, HI32C, HI32C));
1691     }
1692
1693     // If either upper or lower are special, then the two input operands are
1694     // the same (basically, one of them is a "don't care")
1695     if (lower_special)
1696       LO32 = HI32;
1697     if (upper_special)
1698       HI32 = LO32;
1699
1700     for (int i = 0; i < 4; ++i) {
1701       uint64_t val = 0;
1702       for (int j = 0; j < 4; ++j) {
1703         SDValue V;
1704         bool process_upper, process_lower;
1705         val <<= 8;
1706         process_upper = (upper_special && (i & 1) == 0);
1707         process_lower = (lower_special && (i & 1) == 1);
1708
1709         if (process_upper || process_lower) {
1710           if ((process_upper && upper == 0)
1711                   || (process_lower && lower == 0))
1712             val |= 0x80;
1713           else if ((process_upper && upper == 0xffffffff)
1714                   || (process_lower && lower == 0xffffffff))
1715             val |= 0xc0;
1716           else if ((process_upper && upper == 0x80000000)
1717                   || (process_lower && lower == 0x80000000))
1718             val |= (j == 0 ? 0xe0 : 0x80);
1719         } else
1720           val |= i * 4 + j + ((i & 1) * 16);
1721       }
1722
1723       ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1724     }
1725
1726     return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
1727                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1728                                    &ShufBytes[0], ShufBytes.size()));
1729   }
1730 }
1731
1732 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1733 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1734 /// permutation vector, V3, is monotonically increasing with one "exception"
1735 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1736 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1737 /// In either case, the net result is going to eventually invoke SHUFB to
1738 /// permute/shuffle the bytes from V1 and V2.
1739 /// \note
1740 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1741 /// control word for byte/halfword/word insertion. This takes care of a single
1742 /// element move from V2 into V1.
1743 /// \note
1744 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1745 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1746   const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
1747   SDValue V1 = Op.getOperand(0);
1748   SDValue V2 = Op.getOperand(1);
1749   DebugLoc dl = Op.getDebugLoc();
1750
1751   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1752
1753   // If we have a single element being moved from V1 to V2, this can be handled
1754   // using the C*[DX] compute mask instructions, but the vector elements have
1755   // to be monotonically increasing with one exception element.
1756   EVT VecVT = V1.getValueType();
1757   EVT EltVT = VecVT.getVectorElementType();
1758   unsigned EltsFromV2 = 0;
1759   unsigned V2Elt = 0;
1760   unsigned V2EltIdx0 = 0;
1761   unsigned CurrElt = 0;
1762   unsigned MaxElts = VecVT.getVectorNumElements();
1763   unsigned PrevElt = 0;
1764   unsigned V0Elt = 0;
1765   bool monotonic = true;
1766   bool rotate = true;
1767
1768   if (EltVT == MVT::i8) {
1769     V2EltIdx0 = 16;
1770   } else if (EltVT == MVT::i16) {
1771     V2EltIdx0 = 8;
1772   } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1773     V2EltIdx0 = 4;
1774   } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1775     V2EltIdx0 = 2;
1776   } else
1777     llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
1778
1779   for (unsigned i = 0; i != MaxElts; ++i) {
1780     if (SVN->getMaskElt(i) < 0)
1781       continue;
1782
1783     unsigned SrcElt = SVN->getMaskElt(i);
1784
1785     if (monotonic) {
1786       if (SrcElt >= V2EltIdx0) {
1787         if (1 >= (++EltsFromV2)) {
1788           V2Elt = (V2EltIdx0 - SrcElt) << 2;
1789         }
1790       } else if (CurrElt != SrcElt) {
1791         monotonic = false;
1792       }
1793
1794       ++CurrElt;
1795     }
1796
1797     if (rotate) {
1798       if (PrevElt > 0 && SrcElt < MaxElts) {
1799         if ((PrevElt == SrcElt - 1)
1800             || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1801           PrevElt = SrcElt;
1802           if (SrcElt == 0)
1803             V0Elt = i;
1804         } else {
1805           rotate = false;
1806         }
1807       } else if (PrevElt == 0) {
1808         // First time through, need to keep track of previous element
1809         PrevElt = SrcElt;
1810       } else {
1811         // This isn't a rotation, takes elements from vector 2
1812         rotate = false;
1813       }
1814     }
1815   }
1816
1817   if (EltsFromV2 == 1 && monotonic) {
1818     // Compute mask and shuffle
1819     MachineFunction &MF = DAG.getMachineFunction();
1820     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1821     unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1822     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1823     // Initialize temporary register to 0
1824     SDValue InitTempReg =
1825       DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT));
1826     // Copy register's contents as index in SHUFFLE_MASK:
1827     SDValue ShufMaskOp =
1828       DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32,
1829                   DAG.getTargetConstant(V2Elt, MVT::i32),
1830                   DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT));
1831     // Use shuffle mask in SHUFB synthetic instruction:
1832     return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
1833                        ShufMaskOp);
1834   } else if (rotate) {
1835     int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1836
1837     return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
1838                        V1, DAG.getConstant(rotamt, MVT::i16));
1839   } else {
1840    // Convert the SHUFFLE_VECTOR mask's input element units to the
1841    // actual bytes.
1842     unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1843
1844     SmallVector<SDValue, 16> ResultMask;
1845     for (unsigned i = 0, e = MaxElts; i != e; ++i) {
1846       unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
1847
1848       for (unsigned j = 0; j < BytesPerElement; ++j)
1849         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
1850     }
1851
1852     SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
1853                                     &ResultMask[0], ResultMask.size());
1854     return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
1855   }
1856 }
1857
1858 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1859   SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar
1860   DebugLoc dl = Op.getDebugLoc();
1861
1862   if (Op0.getNode()->getOpcode() == ISD::Constant) {
1863     // For a constant, build the appropriate constant vector, which will
1864     // eventually simplify to a vector register load.
1865
1866     ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1867     SmallVector<SDValue, 16> ConstVecValues;
1868     EVT VT;
1869     size_t n_copies;
1870
1871     // Create a constant vector:
1872     switch (Op.getValueType().getSimpleVT().SimpleTy) {
1873     default: llvm_unreachable("Unexpected constant value type in "
1874                               "LowerSCALAR_TO_VECTOR");
1875     case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1876     case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1877     case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1878     case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1879     case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1880     case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1881     }
1882
1883     SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1884     for (size_t j = 0; j < n_copies; ++j)
1885       ConstVecValues.push_back(CValue);
1886
1887     return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
1888                        &ConstVecValues[0], ConstVecValues.size());
1889   } else {
1890     // Otherwise, copy the value from one register to another:
1891     switch (Op0.getValueType().getSimpleVT().SimpleTy) {
1892     default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
1893     case MVT::i8:
1894     case MVT::i16:
1895     case MVT::i32:
1896     case MVT::i64:
1897     case MVT::f32:
1898     case MVT::f64:
1899       return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
1900     }
1901   }
1902
1903   return SDValue();
1904 }
1905
1906 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1907   EVT VT = Op.getValueType();
1908   SDValue N = Op.getOperand(0);
1909   SDValue Elt = Op.getOperand(1);
1910   DebugLoc dl = Op.getDebugLoc();
1911   SDValue retval;
1912
1913   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1914     // Constant argument:
1915     int EltNo = (int) C->getZExtValue();
1916
1917     // sanity checks:
1918     if (VT == MVT::i8 && EltNo >= 16)
1919       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1920     else if (VT == MVT::i16 && EltNo >= 8)
1921       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1922     else if (VT == MVT::i32 && EltNo >= 4)
1923       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1924     else if (VT == MVT::i64 && EltNo >= 2)
1925       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1926
1927     if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1928       // i32 and i64: Element 0 is the preferred slot
1929       return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
1930     }
1931
1932     // Need to generate shuffle mask and extract:
1933     int prefslot_begin = -1, prefslot_end = -1;
1934     int elt_byte = EltNo * VT.getSizeInBits() / 8;
1935
1936     switch (VT.getSimpleVT().SimpleTy) {
1937     default:
1938       assert(false && "Invalid value type!");
1939     case MVT::i8: {
1940       prefslot_begin = prefslot_end = 3;
1941       break;
1942     }
1943     case MVT::i16: {
1944       prefslot_begin = 2; prefslot_end = 3;
1945       break;
1946     }
1947     case MVT::i32:
1948     case MVT::f32: {
1949       prefslot_begin = 0; prefslot_end = 3;
1950       break;
1951     }
1952     case MVT::i64:
1953     case MVT::f64: {
1954       prefslot_begin = 0; prefslot_end = 7;
1955       break;
1956     }
1957     }
1958
1959     assert(prefslot_begin != -1 && prefslot_end != -1 &&
1960            "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1961
1962     unsigned int ShufBytes[16] = {
1963       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1964     };
1965     for (int i = 0; i < 16; ++i) {
1966       // zero fill uppper part of preferred slot, don't care about the
1967       // other slots:
1968       unsigned int mask_val;
1969       if (i <= prefslot_end) {
1970         mask_val =
1971           ((i < prefslot_begin)
1972            ? 0x80
1973            : elt_byte + (i - prefslot_begin));
1974
1975         ShufBytes[i] = mask_val;
1976       } else
1977         ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1978     }
1979
1980     SDValue ShufMask[4];
1981     for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1982       unsigned bidx = i * 4;
1983       unsigned int bits = ((ShufBytes[bidx] << 24) |
1984                            (ShufBytes[bidx+1] << 16) |
1985                            (ShufBytes[bidx+2] << 8) |
1986                            ShufBytes[bidx+3]);
1987       ShufMask[i] = DAG.getConstant(bits, MVT::i32);
1988     }
1989
1990     SDValue ShufMaskVec =
1991       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1992                   &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
1993
1994     retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1995                          DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
1996                                      N, N, ShufMaskVec));
1997   } else {
1998     // Variable index: Rotate the requested element into slot 0, then replicate
1999     // slot 0 across the vector
2000     EVT VecVT = N.getValueType();
2001     if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
2002       llvm_report_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
2003                         "vector type!");
2004     }
2005
2006     // Make life easier by making sure the index is zero-extended to i32
2007     if (Elt.getValueType() != MVT::i32)
2008       Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
2009
2010     // Scale the index to a bit/byte shift quantity
2011     APInt scaleFactor =
2012             APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2013     unsigned scaleShift = scaleFactor.logBase2();
2014     SDValue vecShift;
2015
2016     if (scaleShift > 0) {
2017       // Scale the shift factor:
2018       Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
2019                         DAG.getConstant(scaleShift, MVT::i32));
2020     }
2021
2022     vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
2023
2024     // Replicate the bytes starting at byte 0 across the entire vector (for
2025     // consistency with the notion of a unified register set)
2026     SDValue replicate;
2027
2028     switch (VT.getSimpleVT().SimpleTy) {
2029     default:
2030       llvm_report_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
2031                         "type");
2032       /*NOTREACHED*/
2033     case MVT::i8: {
2034       SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2035       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2036                               factor, factor, factor, factor);
2037       break;
2038     }
2039     case MVT::i16: {
2040       SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2041       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2042                               factor, factor, factor, factor);
2043       break;
2044     }
2045     case MVT::i32:
2046     case MVT::f32: {
2047       SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2048       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2049                               factor, factor, factor, factor);
2050       break;
2051     }
2052     case MVT::i64:
2053     case MVT::f64: {
2054       SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2055       SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2056       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2057                               loFactor, hiFactor, loFactor, hiFactor);
2058       break;
2059     }
2060     }
2061
2062     retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2063                          DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2064                                      vecShift, vecShift, replicate));
2065   }
2066
2067   return retval;
2068 }
2069
2070 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2071   SDValue VecOp = Op.getOperand(0);
2072   SDValue ValOp = Op.getOperand(1);
2073   SDValue IdxOp = Op.getOperand(2);
2074   DebugLoc dl = Op.getDebugLoc();
2075   EVT VT = Op.getValueType();
2076
2077   ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2078   assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2079
2080   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2081   // Use $sp ($1) because it's always 16-byte aligned and it's available:
2082   SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
2083                                 DAG.getRegister(SPU::R1, PtrVT),
2084                                 DAG.getConstant(CN->getSExtValue(), PtrVT));
2085   SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
2086
2087   SDValue result =
2088     DAG.getNode(SPUISD::SHUFB, dl, VT,
2089                 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
2090                 VecOp,
2091                 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask));
2092
2093   return result;
2094 }
2095
2096 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2097                            const TargetLowering &TLI)
2098 {
2099   SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
2100   DebugLoc dl = Op.getDebugLoc();
2101   EVT ShiftVT = TLI.getShiftAmountTy();
2102
2103   assert(Op.getValueType() == MVT::i8);
2104   switch (Opc) {
2105   default:
2106     llvm_unreachable("Unhandled i8 math operator");
2107     /*NOTREACHED*/
2108     break;
2109   case ISD::ADD: {
2110     // 8-bit addition: Promote the arguments up to 16-bits and truncate
2111     // the result:
2112     SDValue N1 = Op.getOperand(1);
2113     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2114     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2115     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2116                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2117
2118   }
2119
2120   case ISD::SUB: {
2121     // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2122     // the result:
2123     SDValue N1 = Op.getOperand(1);
2124     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2125     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2126     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2127                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2128   }
2129   case ISD::ROTR:
2130   case ISD::ROTL: {
2131     SDValue N1 = Op.getOperand(1);
2132     EVT N1VT = N1.getValueType();
2133
2134     N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2135     if (!N1VT.bitsEq(ShiftVT)) {
2136       unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
2137                        ? ISD::ZERO_EXTEND
2138                        : ISD::TRUNCATE;
2139       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2140     }
2141
2142     // Replicate lower 8-bits into upper 8:
2143     SDValue ExpandArg =
2144       DAG.getNode(ISD::OR, dl, MVT::i16, N0,
2145                   DAG.getNode(ISD::SHL, dl, MVT::i16,
2146                               N0, DAG.getConstant(8, MVT::i32)));
2147
2148     // Truncate back down to i8
2149     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2150                        DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
2151   }
2152   case ISD::SRL:
2153   case ISD::SHL: {
2154     SDValue N1 = Op.getOperand(1);
2155     EVT N1VT = N1.getValueType();
2156
2157     N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2158     if (!N1VT.bitsEq(ShiftVT)) {
2159       unsigned N1Opc = ISD::ZERO_EXTEND;
2160
2161       if (N1.getValueType().bitsGT(ShiftVT))
2162         N1Opc = ISD::TRUNCATE;
2163
2164       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2165     }
2166
2167     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2168                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2169   }
2170   case ISD::SRA: {
2171     SDValue N1 = Op.getOperand(1);
2172     EVT N1VT = N1.getValueType();
2173
2174     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2175     if (!N1VT.bitsEq(ShiftVT)) {
2176       unsigned N1Opc = ISD::SIGN_EXTEND;
2177
2178       if (N1VT.bitsGT(ShiftVT))
2179         N1Opc = ISD::TRUNCATE;
2180       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2181     }
2182
2183     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2184                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2185   }
2186   case ISD::MUL: {
2187     SDValue N1 = Op.getOperand(1);
2188
2189     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2190     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2191     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2192                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2193     break;
2194   }
2195   }
2196
2197   return SDValue();
2198 }
2199
2200 //! Lower byte immediate operations for v16i8 vectors:
2201 static SDValue
2202 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2203   SDValue ConstVec;
2204   SDValue Arg;
2205   EVT VT = Op.getValueType();
2206   DebugLoc dl = Op.getDebugLoc();
2207
2208   ConstVec = Op.getOperand(0);
2209   Arg = Op.getOperand(1);
2210   if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2211     if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2212       ConstVec = ConstVec.getOperand(0);
2213     } else {
2214       ConstVec = Op.getOperand(1);
2215       Arg = Op.getOperand(0);
2216       if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2217         ConstVec = ConstVec.getOperand(0);
2218       }
2219     }
2220   }
2221
2222   if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2223     BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
2224     assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
2225
2226     APInt APSplatBits, APSplatUndef;
2227     unsigned SplatBitSize;
2228     bool HasAnyUndefs;
2229     unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
2230
2231     if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
2232                               HasAnyUndefs, minSplatBits)
2233         && minSplatBits <= SplatBitSize) {
2234       uint64_t SplatBits = APSplatBits.getZExtValue();
2235       SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2236
2237       SmallVector<SDValue, 16> tcVec;
2238       tcVec.assign(16, tc);
2239       return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
2240                          DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
2241     }
2242   }
2243
2244   // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2245   // lowered.  Return the operation, rather than a null SDValue.
2246   return Op;
2247 }
2248
2249 //! Custom lowering for CTPOP (count population)
2250 /*!
2251   Custom lowering code that counts the number ones in the input
2252   operand. SPU has such an instruction, but it counts the number of
2253   ones per byte, which then have to be accumulated.
2254 */
2255 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2256   EVT VT = Op.getValueType();
2257   EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
2258                                VT, (128 / VT.getSizeInBits()));
2259   DebugLoc dl = Op.getDebugLoc();
2260
2261   switch (VT.getSimpleVT().SimpleTy) {
2262   default:
2263     assert(false && "Invalid value type!");
2264   case MVT::i8: {
2265     SDValue N = Op.getOperand(0);
2266     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2267
2268     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2269     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2270
2271     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
2272   }
2273
2274   case MVT::i16: {
2275     MachineFunction &MF = DAG.getMachineFunction();
2276     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2277
2278     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2279
2280     SDValue N = Op.getOperand(0);
2281     SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2282     SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2283     SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2284
2285     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2286     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2287
2288     // CNTB_result becomes the chain to which all of the virtual registers
2289     // CNTB_reg, SUM1_reg become associated:
2290     SDValue CNTB_result =
2291       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
2292
2293     SDValue CNTB_rescopy =
2294       DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2295
2296     SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
2297
2298     return DAG.getNode(ISD::AND, dl, MVT::i16,
2299                        DAG.getNode(ISD::ADD, dl, MVT::i16,
2300                                    DAG.getNode(ISD::SRL, dl, MVT::i16,
2301                                                Tmp1, Shift1),
2302                                    Tmp1),
2303                        Mask0);
2304   }
2305
2306   case MVT::i32: {
2307     MachineFunction &MF = DAG.getMachineFunction();
2308     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2309
2310     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2311     unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2312
2313     SDValue N = Op.getOperand(0);
2314     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2315     SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2316     SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2317     SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2318
2319     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2320     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2321
2322     // CNTB_result becomes the chain to which all of the virtual registers
2323     // CNTB_reg, SUM1_reg become associated:
2324     SDValue CNTB_result =
2325       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
2326
2327     SDValue CNTB_rescopy =
2328       DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2329
2330     SDValue Comp1 =
2331       DAG.getNode(ISD::SRL, dl, MVT::i32,
2332                   DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
2333                   Shift1);
2334
2335     SDValue Sum1 =
2336       DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
2337                   DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
2338
2339     SDValue Sum1_rescopy =
2340       DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
2341
2342     SDValue Comp2 =
2343       DAG.getNode(ISD::SRL, dl, MVT::i32,
2344                   DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
2345                   Shift2);
2346     SDValue Sum2 =
2347       DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
2348                   DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
2349
2350     return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
2351   }
2352
2353   case MVT::i64:
2354     break;
2355   }
2356
2357   return SDValue();
2358 }
2359
2360 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2361 /*!
2362  f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2363  All conversions to i64 are expanded to a libcall.
2364  */
2365 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2366                               SPUTargetLowering &TLI) {
2367   EVT OpVT = Op.getValueType();
2368   SDValue Op0 = Op.getOperand(0);
2369   EVT Op0VT = Op0.getValueType();
2370
2371   if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2372       || OpVT == MVT::i64) {
2373     // Convert f32 / f64 to i32 / i64 via libcall.
2374     RTLIB::Libcall LC =
2375             (Op.getOpcode() == ISD::FP_TO_SINT)
2376              ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2377              : RTLIB::getFPTOUINT(Op0VT, OpVT);
2378     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2379     SDValue Dummy;
2380     return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2381   }
2382
2383   return Op;
2384 }
2385
2386 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2387 /*!
2388  i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2389  All conversions from i64 are expanded to a libcall.
2390  */
2391 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2392                               SPUTargetLowering &TLI) {
2393   EVT OpVT = Op.getValueType();
2394   SDValue Op0 = Op.getOperand(0);
2395   EVT Op0VT = Op0.getValueType();
2396
2397   if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2398       || Op0VT == MVT::i64) {
2399     // Convert i32, i64 to f64 via libcall:
2400     RTLIB::Libcall LC =
2401             (Op.getOpcode() == ISD::SINT_TO_FP)
2402              ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2403              : RTLIB::getUINTTOFP(Op0VT, OpVT);
2404     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2405     SDValue Dummy;
2406     return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2407   }
2408
2409   return Op;
2410 }
2411
2412 //! Lower ISD::SETCC
2413 /*!
2414  This handles MVT::f64 (double floating point) condition lowering
2415  */
2416 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2417                           const TargetLowering &TLI) {
2418   CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2419   DebugLoc dl = Op.getDebugLoc();
2420   assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2421
2422   SDValue lhs = Op.getOperand(0);
2423   SDValue rhs = Op.getOperand(1);
2424   EVT lhsVT = lhs.getValueType();
2425   assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2426
2427   EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2428   APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2429   EVT IntVT(MVT::i64);
2430
2431   // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2432   // selected to a NOP:
2433   SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
2434   SDValue lhsHi32 =
2435           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2436                       DAG.getNode(ISD::SRL, dl, IntVT,
2437                                   i64lhs, DAG.getConstant(32, MVT::i32)));
2438   SDValue lhsHi32abs =
2439           DAG.getNode(ISD::AND, dl, MVT::i32,
2440                       lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2441   SDValue lhsLo32 =
2442           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2443
2444   // SETO and SETUO only use the lhs operand:
2445   if (CC->get() == ISD::SETO) {
2446     // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2447     // SETUO
2448     APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2449     return DAG.getNode(ISD::XOR, dl, ccResultVT,
2450                        DAG.getSetCC(dl, ccResultVT,
2451                                     lhs, DAG.getConstantFP(0.0, lhsVT),
2452                                     ISD::SETUO),
2453                        DAG.getConstant(ccResultAllOnes, ccResultVT));
2454   } else if (CC->get() == ISD::SETUO) {
2455     // Evaluates to true if Op0 is [SQ]NaN
2456     return DAG.getNode(ISD::AND, dl, ccResultVT,
2457                        DAG.getSetCC(dl, ccResultVT,
2458                                     lhsHi32abs,
2459                                     DAG.getConstant(0x7ff00000, MVT::i32),
2460                                     ISD::SETGE),
2461                        DAG.getSetCC(dl, ccResultVT,
2462                                     lhsLo32,
2463                                     DAG.getConstant(0, MVT::i32),
2464                                     ISD::SETGT));
2465   }
2466
2467   SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
2468   SDValue rhsHi32 =
2469           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2470                       DAG.getNode(ISD::SRL, dl, IntVT,
2471                                   i64rhs, DAG.getConstant(32, MVT::i32)));
2472
2473   // If a value is negative, subtract from the sign magnitude constant:
2474   SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2475
2476   // Convert the sign-magnitude representation into 2's complement:
2477   SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2478                                       lhsHi32, DAG.getConstant(31, MVT::i32));
2479   SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2480   SDValue lhsSelect =
2481           DAG.getNode(ISD::SELECT, dl, IntVT,
2482                       lhsSelectMask, lhsSignMag2TC, i64lhs);
2483
2484   SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2485                                       rhsHi32, DAG.getConstant(31, MVT::i32));
2486   SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2487   SDValue rhsSelect =
2488           DAG.getNode(ISD::SELECT, dl, IntVT,
2489                       rhsSelectMask, rhsSignMag2TC, i64rhs);
2490
2491   unsigned compareOp;
2492
2493   switch (CC->get()) {
2494   case ISD::SETOEQ:
2495   case ISD::SETUEQ:
2496     compareOp = ISD::SETEQ; break;
2497   case ISD::SETOGT:
2498   case ISD::SETUGT:
2499     compareOp = ISD::SETGT; break;
2500   case ISD::SETOGE:
2501   case ISD::SETUGE:
2502     compareOp = ISD::SETGE; break;
2503   case ISD::SETOLT:
2504   case ISD::SETULT:
2505     compareOp = ISD::SETLT; break;
2506   case ISD::SETOLE:
2507   case ISD::SETULE:
2508     compareOp = ISD::SETLE; break;
2509   case ISD::SETUNE:
2510   case ISD::SETONE:
2511     compareOp = ISD::SETNE; break;
2512   default:
2513     llvm_report_error("CellSPU ISel Select: unimplemented f64 condition");
2514   }
2515
2516   SDValue result =
2517           DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2518                        (ISD::CondCode) compareOp);
2519
2520   if ((CC->get() & 0x8) == 0) {
2521     // Ordered comparison:
2522     SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2523                                   lhs, DAG.getConstantFP(0.0, MVT::f64),
2524                                   ISD::SETO);
2525     SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2526                                   rhs, DAG.getConstantFP(0.0, MVT::f64),
2527                                   ISD::SETO);
2528     SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2529
2530     result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2531   }
2532
2533   return result;
2534 }
2535
2536 //! Lower ISD::SELECT_CC
2537 /*!
2538   ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2539   SELB instruction.
2540
2541   \note Need to revisit this in the future: if the code path through the true
2542   and false value computations is longer than the latency of a branch (6
2543   cycles), then it would be more advantageous to branch and insert a new basic
2544   block and branch on the condition. However, this code does not make that
2545   assumption, given the simplisitc uses so far.
2546  */
2547
2548 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2549                               const TargetLowering &TLI) {
2550   EVT VT = Op.getValueType();
2551   SDValue lhs = Op.getOperand(0);
2552   SDValue rhs = Op.getOperand(1);
2553   SDValue trueval = Op.getOperand(2);
2554   SDValue falseval = Op.getOperand(3);
2555   SDValue condition = Op.getOperand(4);
2556   DebugLoc dl = Op.getDebugLoc();
2557
2558   // NOTE: SELB's arguments: $rA, $rB, $mask
2559   //
2560   // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2561   // where bits in $mask are 1. CCond will be inverted, having 1s where the
2562   // condition was true and 0s where the condition was false. Hence, the
2563   // arguments to SELB get reversed.
2564
2565   // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2566   // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2567   // with another "cannot select select_cc" assert:
2568
2569   SDValue compare = DAG.getNode(ISD::SETCC, dl,
2570                                 TLI.getSetCCResultType(Op.getValueType()),
2571                                 lhs, rhs, condition);
2572   return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
2573 }
2574
2575 //! Custom lower ISD::TRUNCATE
2576 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2577 {
2578   // Type to truncate to
2579   EVT VT = Op.getValueType();
2580   MVT simpleVT = VT.getSimpleVT();
2581   EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
2582                                VT, (128 / VT.getSizeInBits()));
2583   DebugLoc dl = Op.getDebugLoc();
2584
2585   // Type to truncate from
2586   SDValue Op0 = Op.getOperand(0);
2587   EVT Op0VT = Op0.getValueType();
2588
2589   if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2590     // Create shuffle mask, least significant doubleword of quadword
2591     unsigned maskHigh = 0x08090a0b;
2592     unsigned maskLow = 0x0c0d0e0f;
2593     // Use a shuffle to perform the truncation
2594     SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2595                                    DAG.getConstant(maskHigh, MVT::i32),
2596                                    DAG.getConstant(maskLow, MVT::i32),
2597                                    DAG.getConstant(maskHigh, MVT::i32),
2598                                    DAG.getConstant(maskLow, MVT::i32));
2599
2600     SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2601                                        Op0, Op0, shufMask);
2602
2603     return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
2604   }
2605
2606   return SDValue();             // Leave the truncate unmolested
2607 }
2608
2609 /*!
2610  * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
2611  * algorithm is to duplicate the sign bit using rotmai to generate at
2612  * least one byte full of sign bits. Then propagate the "sign-byte" into
2613  * the leftmost words and the i64/i32 into the rightmost words using shufb.
2614  *
2615  * @param Op The sext operand
2616  * @param DAG The current DAG
2617  * @return The SDValue with the entire instruction sequence
2618  */
2619 static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
2620 {
2621   DebugLoc dl = Op.getDebugLoc();
2622
2623   // Type to extend to
2624   MVT OpVT = Op.getValueType().getSimpleVT();
2625   EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
2626                                OpVT, (128 / OpVT.getSizeInBits()));
2627
2628   // Type to extend from
2629   SDValue Op0 = Op.getOperand(0);
2630   MVT Op0VT = Op0.getValueType().getSimpleVT();
2631
2632   // The type to extend to needs to be a i128 and
2633   // the type to extend from needs to be i64 or i32.
2634   assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
2635           "LowerSIGN_EXTEND: input and/or output operand have wrong size");
2636
2637   // Create shuffle mask
2638   unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
2639   unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte  8 - 11
2640   unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
2641   SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2642                                  DAG.getConstant(mask1, MVT::i32),
2643                                  DAG.getConstant(mask1, MVT::i32),
2644                                  DAG.getConstant(mask2, MVT::i32),
2645                                  DAG.getConstant(mask3, MVT::i32));
2646
2647   // Word wise arithmetic right shift to generate at least one byte
2648   // that contains sign bits.
2649   MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
2650   SDValue sraVal = DAG.getNode(ISD::SRA,
2651                  dl,
2652                  mvt,
2653                  DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
2654                  DAG.getConstant(31, MVT::i32));
2655
2656   // Shuffle bytes - Copy the sign bits into the upper 64 bits
2657   // and the input value into the lower 64 bits.
2658   SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
2659       DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i128, Op0), sraVal, shufMask);
2660
2661   return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, extShuffle);
2662 }
2663
2664 //! Custom (target-specific) lowering entry point
2665 /*!
2666   This is where LLVM's DAG selection process calls to do target-specific
2667   lowering of nodes.
2668  */
2669 SDValue
2670 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2671 {
2672   unsigned Opc = (unsigned) Op.getOpcode();
2673   EVT VT = Op.getValueType();
2674
2675   switch (Opc) {
2676   default: {
2677 #ifndef NDEBUG
2678     errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2679     errs() << "Op.getOpcode() = " << Opc << "\n";
2680     errs() << "*Op.getNode():\n";
2681     Op.getNode()->dump();
2682 #endif
2683     llvm_unreachable(0);
2684   }
2685   case ISD::LOAD:
2686   case ISD::EXTLOAD:
2687   case ISD::SEXTLOAD:
2688   case ISD::ZEXTLOAD:
2689     return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2690   case ISD::STORE:
2691     return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2692   case ISD::ConstantPool:
2693     return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2694   case ISD::GlobalAddress:
2695     return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2696   case ISD::JumpTable:
2697     return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2698   case ISD::ConstantFP:
2699     return LowerConstantFP(Op, DAG);
2700
2701   // i8, i64 math ops:
2702   case ISD::ADD:
2703   case ISD::SUB:
2704   case ISD::ROTR:
2705   case ISD::ROTL:
2706   case ISD::SRL:
2707   case ISD::SHL:
2708   case ISD::SRA: {
2709     if (VT == MVT::i8)
2710       return LowerI8Math(Op, DAG, Opc, *this);
2711     break;
2712   }
2713
2714   case ISD::FP_TO_SINT:
2715   case ISD::FP_TO_UINT:
2716     return LowerFP_TO_INT(Op, DAG, *this);
2717
2718   case ISD::SINT_TO_FP:
2719   case ISD::UINT_TO_FP:
2720     return LowerINT_TO_FP(Op, DAG, *this);
2721
2722   // Vector-related lowering.
2723   case ISD::BUILD_VECTOR:
2724     return LowerBUILD_VECTOR(Op, DAG);
2725   case ISD::SCALAR_TO_VECTOR:
2726     return LowerSCALAR_TO_VECTOR(Op, DAG);
2727   case ISD::VECTOR_SHUFFLE:
2728     return LowerVECTOR_SHUFFLE(Op, DAG);
2729   case ISD::EXTRACT_VECTOR_ELT:
2730     return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2731   case ISD::INSERT_VECTOR_ELT:
2732     return LowerINSERT_VECTOR_ELT(Op, DAG);
2733
2734   // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2735   case ISD::AND:
2736   case ISD::OR:
2737   case ISD::XOR:
2738     return LowerByteImmed(Op, DAG);
2739
2740   // Vector and i8 multiply:
2741   case ISD::MUL:
2742     if (VT == MVT::i8)
2743       return LowerI8Math(Op, DAG, Opc, *this);
2744
2745   case ISD::CTPOP:
2746     return LowerCTPOP(Op, DAG);
2747
2748   case ISD::SELECT_CC:
2749     return LowerSELECT_CC(Op, DAG, *this);
2750
2751   case ISD::SETCC:
2752     return LowerSETCC(Op, DAG, *this);
2753
2754   case ISD::TRUNCATE:
2755     return LowerTRUNCATE(Op, DAG);
2756
2757   case ISD::SIGN_EXTEND:
2758     return LowerSIGN_EXTEND(Op, DAG);
2759   }
2760
2761   return SDValue();
2762 }
2763
2764 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2765                                            SmallVectorImpl<SDValue>&Results,
2766                                            SelectionDAG &DAG)
2767 {
2768 #if 0
2769   unsigned Opc = (unsigned) N->getOpcode();
2770   EVT OpVT = N->getValueType(0);
2771
2772   switch (Opc) {
2773   default: {
2774     errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2775     errs() << "Op.getOpcode() = " << Opc << "\n";
2776     errs() << "*Op.getNode():\n";
2777     N->dump();
2778     abort();
2779     /*NOTREACHED*/
2780   }
2781   }
2782 #endif
2783
2784   /* Otherwise, return unchanged */
2785 }
2786
2787 //===----------------------------------------------------------------------===//
2788 // Target Optimization Hooks
2789 //===----------------------------------------------------------------------===//
2790
2791 SDValue
2792 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2793 {
2794 #if 0
2795   TargetMachine &TM = getTargetMachine();
2796 #endif
2797   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2798   SelectionDAG &DAG = DCI.DAG;
2799   SDValue Op0 = N->getOperand(0);       // everything has at least one operand
2800   EVT NodeVT = N->getValueType(0);      // The node's value type
2801   EVT Op0VT = Op0.getValueType();       // The first operand's result
2802   SDValue Result;                       // Initially, empty result
2803   DebugLoc dl = N->getDebugLoc();
2804
2805   switch (N->getOpcode()) {
2806   default: break;
2807   case ISD::ADD: {
2808     SDValue Op1 = N->getOperand(1);
2809
2810     if (Op0.getOpcode() == SPUISD::IndirectAddr
2811         || Op1.getOpcode() == SPUISD::IndirectAddr) {
2812       // Normalize the operands to reduce repeated code
2813       SDValue IndirectArg = Op0, AddArg = Op1;
2814
2815       if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2816         IndirectArg = Op1;
2817         AddArg = Op0;
2818       }
2819
2820       if (isa<ConstantSDNode>(AddArg)) {
2821         ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2822         SDValue IndOp1 = IndirectArg.getOperand(1);
2823
2824         if (CN0->isNullValue()) {
2825           // (add (SPUindirect <arg>, <arg>), 0) ->
2826           // (SPUindirect <arg>, <arg>)
2827
2828 #if !defined(NDEBUG)
2829           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2830             errs() << "\n"
2831                  << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2832                  << "With:    (SPUindirect <arg>, <arg>)\n";
2833           }
2834 #endif
2835
2836           return IndirectArg;
2837         } else if (isa<ConstantSDNode>(IndOp1)) {
2838           // (add (SPUindirect <arg>, <const>), <const>) ->
2839           // (SPUindirect <arg>, <const + const>)
2840           ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2841           int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2842           SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2843
2844 #if !defined(NDEBUG)
2845           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2846             errs() << "\n"
2847                  << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2848                  << "), " << CN0->getSExtValue() << ")\n"
2849                  << "With:    (SPUindirect <arg>, "
2850                  << combinedConst << ")\n";
2851           }
2852 #endif
2853
2854           return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2855                              IndirectArg, combinedValue);
2856         }
2857       }
2858     }
2859     break;
2860   }
2861   case ISD::SIGN_EXTEND:
2862   case ISD::ZERO_EXTEND:
2863   case ISD::ANY_EXTEND: {
2864     if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2865       // (any_extend (SPUextract_elt0 <arg>)) ->
2866       // (SPUextract_elt0 <arg>)
2867       // Types must match, however...
2868 #if !defined(NDEBUG)
2869       if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2870         errs() << "\nReplace: ";
2871         N->dump(&DAG);
2872         errs() << "\nWith:    ";
2873         Op0.getNode()->dump(&DAG);
2874         errs() << "\n";
2875       }
2876 #endif
2877
2878       return Op0;
2879     }
2880     break;
2881   }
2882   case SPUISD::IndirectAddr: {
2883     if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2884       ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2885       if (CN != 0 && CN->getZExtValue() == 0) {
2886         // (SPUindirect (SPUaform <addr>, 0), 0) ->
2887         // (SPUaform <addr>, 0)
2888
2889         DEBUG(errs() << "Replace: ");
2890         DEBUG(N->dump(&DAG));
2891         DEBUG(errs() << "\nWith:    ");
2892         DEBUG(Op0.getNode()->dump(&DAG));
2893         DEBUG(errs() << "\n");
2894
2895         return Op0;
2896       }
2897     } else if (Op0.getOpcode() == ISD::ADD) {
2898       SDValue Op1 = N->getOperand(1);
2899       if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2900         // (SPUindirect (add <arg>, <arg>), 0) ->
2901         // (SPUindirect <arg>, <arg>)
2902         if (CN1->isNullValue()) {
2903
2904 #if !defined(NDEBUG)
2905           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2906             errs() << "\n"
2907                  << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2908                  << "With:    (SPUindirect <arg>, <arg>)\n";
2909           }
2910 #endif
2911
2912           return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2913                              Op0.getOperand(0), Op0.getOperand(1));
2914         }
2915       }
2916     }
2917     break;
2918   }
2919   case SPUISD::SHLQUAD_L_BITS:
2920   case SPUISD::SHLQUAD_L_BYTES:
2921   case SPUISD::ROTBYTES_LEFT: {
2922     SDValue Op1 = N->getOperand(1);
2923
2924     // Kill degenerate vector shifts:
2925     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2926       if (CN->isNullValue()) {
2927         Result = Op0;
2928       }
2929     }
2930     break;
2931   }
2932   case SPUISD::PREFSLOT2VEC: {
2933     switch (Op0.getOpcode()) {
2934     default:
2935       break;
2936     case ISD::ANY_EXTEND:
2937     case ISD::ZERO_EXTEND:
2938     case ISD::SIGN_EXTEND: {
2939       // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2940       // <arg>
2941       // but only if the SPUprefslot2vec and <arg> types match.
2942       SDValue Op00 = Op0.getOperand(0);
2943       if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2944         SDValue Op000 = Op00.getOperand(0);
2945         if (Op000.getValueType() == NodeVT) {
2946           Result = Op000;
2947         }
2948       }
2949       break;
2950     }
2951     case SPUISD::VEC2PREFSLOT: {
2952       // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2953       // <arg>
2954       Result = Op0.getOperand(0);
2955       break;
2956     }
2957     }
2958     break;
2959   }
2960   }
2961
2962   // Otherwise, return unchanged.
2963 #ifndef NDEBUG
2964   if (Result.getNode()) {
2965     DEBUG(errs() << "\nReplace.SPU: ");
2966     DEBUG(N->dump(&DAG));
2967     DEBUG(errs() << "\nWith:        ");
2968     DEBUG(Result.getNode()->dump(&DAG));
2969     DEBUG(errs() << "\n");
2970   }
2971 #endif
2972
2973   return Result;
2974 }
2975
2976 //===----------------------------------------------------------------------===//
2977 // Inline Assembly Support
2978 //===----------------------------------------------------------------------===//
2979
2980 /// getConstraintType - Given a constraint letter, return the type of
2981 /// constraint it is for this target.
2982 SPUTargetLowering::ConstraintType
2983 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2984   if (ConstraintLetter.size() == 1) {
2985     switch (ConstraintLetter[0]) {
2986     default: break;
2987     case 'b':
2988     case 'r':
2989     case 'f':
2990     case 'v':
2991     case 'y':
2992       return C_RegisterClass;
2993     }
2994   }
2995   return TargetLowering::getConstraintType(ConstraintLetter);
2996 }
2997
2998 std::pair<unsigned, const TargetRegisterClass*>
2999 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3000                                                 EVT VT) const
3001 {
3002   if (Constraint.size() == 1) {
3003     // GCC RS6000 Constraint Letters
3004     switch (Constraint[0]) {
3005     case 'b':   // R1-R31
3006     case 'r':   // R0-R31
3007       if (VT == MVT::i64)
3008         return std::make_pair(0U, SPU::R64CRegisterClass);
3009       return std::make_pair(0U, SPU::R32CRegisterClass);
3010     case 'f':
3011       if (VT == MVT::f32)
3012         return std::make_pair(0U, SPU::R32FPRegisterClass);
3013       else if (VT == MVT::f64)
3014         return std::make_pair(0U, SPU::R64FPRegisterClass);
3015       break;
3016     case 'v':
3017       return std::make_pair(0U, SPU::GPRCRegisterClass);
3018     }
3019   }
3020
3021   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3022 }
3023
3024 //! Compute used/known bits for a SPU operand
3025 void
3026 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3027                                                   const APInt &Mask,
3028                                                   APInt &KnownZero,
3029                                                   APInt &KnownOne,
3030                                                   const SelectionDAG &DAG,
3031                                                   unsigned Depth ) const {
3032 #if 0
3033   const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
3034
3035   switch (Op.getOpcode()) {
3036   default:
3037     // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3038     break;
3039   case CALL:
3040   case SHUFB:
3041   case SHUFFLE_MASK:
3042   case CNTB:
3043   case SPUISD::PREFSLOT2VEC:
3044   case SPUISD::LDRESULT:
3045   case SPUISD::VEC2PREFSLOT:
3046   case SPUISD::SHLQUAD_L_BITS:
3047   case SPUISD::SHLQUAD_L_BYTES:
3048   case SPUISD::VEC_ROTL:
3049   case SPUISD::VEC_ROTR:
3050   case SPUISD::ROTBYTES_LEFT:
3051   case SPUISD::SELECT_MASK:
3052   case SPUISD::SELB:
3053   }
3054 #endif
3055 }
3056
3057 unsigned
3058 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3059                                                    unsigned Depth) const {
3060   switch (Op.getOpcode()) {
3061   default:
3062     return 1;
3063
3064   case ISD::SETCC: {
3065     EVT VT = Op.getValueType();
3066
3067     if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
3068       VT = MVT::i32;
3069     }
3070     return VT.getSizeInBits();
3071   }
3072   }
3073 }
3074
3075 // LowerAsmOperandForConstraint
3076 void
3077 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3078                                                 char ConstraintLetter,
3079                                                 bool hasMemory,
3080                                                 std::vector<SDValue> &Ops,
3081                                                 SelectionDAG &DAG) const {
3082   // Default, for the time being, to the base class handler
3083   TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3084                                                Ops, DAG);
3085 }
3086
3087 /// isLegalAddressImmediate - Return true if the integer value can be used
3088 /// as the offset of the target addressing mode.
3089 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3090                                                 const Type *Ty) const {
3091   // SPU's addresses are 256K:
3092   return (V > -(1 << 18) && V < (1 << 18) - 1);
3093 }
3094
3095 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3096   return false;
3097 }
3098
3099 bool
3100 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3101   // The SPU target isn't yet aware of offsets.
3102   return false;
3103 }