lib/Target/PowerPC/PPCISelLowering.cpp

   1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the PPCISelLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "PPCISelLowering.h"
  15 #include "PPCMachineFunctionInfo.h"
  16 #include "PPCPredicates.h"
  17 #include "PPCTargetMachine.h"
  18 #include "PPCPerfectShuffle.h"
  19 #include "llvm/ADT/STLExtras.h"
  20 #include "llvm/ADT/VectorExtras.h"
  21 #include "llvm/CodeGen/CallingConvLower.h"
  22 #include "llvm/CodeGen/MachineFrameInfo.h"
  23 #include "llvm/CodeGen/MachineFunction.h"
  24 #include "llvm/CodeGen/MachineInstrBuilder.h"
  25 #include "llvm/CodeGen/MachineRegisterInfo.h"
  26 #include "llvm/CodeGen/PseudoSourceValue.h"
  27 #include "llvm/CodeGen/SelectionDAG.h"
  28 #include "llvm/CallingConv.h"
  29 #include "llvm/Constants.h"
  30 #include "llvm/Function.h"
  31 #include "llvm/Intrinsics.h"
  32 #include "llvm/Support/MathExtras.h"
  33 #include "llvm/Target/TargetOptions.h"
  34 #include "llvm/Support/CommandLine.h"
  35 #include "llvm/DerivedTypes.h"
  36 using namespace llvm;
  37
  38 static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
  39                                      CCValAssign::LocInfo &LocInfo,
  40                                      ISD::ArgFlagsTy &ArgFlags,
  41                                      CCState &State);
  42 static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
  43                                             MVT &LocVT,
  44                                             CCValAssign::LocInfo &LocInfo,
  45                                             ISD::ArgFlagsTy &ArgFlags,
  46                                             CCState &State);
  47 static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
  48                                               MVT &LocVT,
  49                                               CCValAssign::LocInfo &LocInfo,
  50                                               ISD::ArgFlagsTy &ArgFlags,
  51                                               CCState &State);
  52
  53 static cl::opt<bool> EnablePPCPreinc("enable-ppc-preinc",
  54 cl::desc("enable preincrement load/store generation on PPC (experimental)"),
  55                                      cl::Hidden);
  56
  57 PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
  58   : TargetLowering(TM), PPCSubTarget(*TM.getSubtargetImpl()) {
  59
  60   setPow2DivIsCheap();
  61
  62   // Use _setjmp/_longjmp instead of setjmp/longjmp.
  63   setUseUnderscoreSetJmp(true);
  64   setUseUnderscoreLongJmp(true);
  65
  66   // Set up the register classes.
  67   addRegisterClass(MVT::i32, PPC::GPRCRegisterClass);
  68   addRegisterClass(MVT::f32, PPC::F4RCRegisterClass);
  69   addRegisterClass(MVT::f64, PPC::F8RCRegisterClass);
  70
  71   // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
  72   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
  73   setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
  74
  75   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
  76
  77   // PowerPC has pre-inc load and store's.
  78   setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
  79   setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
  80   setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
  81   setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
  82   setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
  83   setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
  84   setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
  85   setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
  86   setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
  87   setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
  88
  89   // This is used in the ppcf128->int sequence.  Note it has different semantics
  90   // from FP_ROUND:  that rounds to nearest, this rounds to zero.
  91   setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
  92
  93   // PowerPC has no SREM/UREM instructions
  94   setOperationAction(ISD::SREM, MVT::i32, Expand);
  95   setOperationAction(ISD::UREM, MVT::i32, Expand);
  96   setOperationAction(ISD::SREM, MVT::i64, Expand);
  97   setOperationAction(ISD::UREM, MVT::i64, Expand);
  98
  99   // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
 100   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
 101   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
 102   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
 103   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
 104   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
 105   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
 106   setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
 107   setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
 108
 109   // We don't support sin/cos/sqrt/fmod/pow
 110   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 111   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 112   setOperationAction(ISD::FREM , MVT::f64, Expand);
 113   setOperationAction(ISD::FPOW , MVT::f64, Expand);
 114   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 115   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 116   setOperationAction(ISD::FREM , MVT::f32, Expand);
 117   setOperationAction(ISD::FPOW , MVT::f32, Expand);
 118
 119   setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
 120
 121   // If we're enabling GP optimizations, use hardware square root
 122   if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) {
 123     setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 124     setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 125   }
 126
 127   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 128   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 129
 130   // PowerPC does not have BSWAP, CTPOP or CTTZ
 131   setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
 132   setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
 133   setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
 134   setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
 135   setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
 136   setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
 137
 138   // PowerPC does not have ROTR
 139   setOperationAction(ISD::ROTR, MVT::i32   , Expand);
 140   setOperationAction(ISD::ROTR, MVT::i64   , Expand);
 141
 142   // PowerPC does not have Select
 143   setOperationAction(ISD::SELECT, MVT::i32, Expand);
 144   setOperationAction(ISD::SELECT, MVT::i64, Expand);
 145   setOperationAction(ISD::SELECT, MVT::f32, Expand);
 146   setOperationAction(ISD::SELECT, MVT::f64, Expand);
 147
 148   // PowerPC wants to turn select_cc of FP into fsel when possible.
 149   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
 150   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
 151
 152   // PowerPC wants to optimize integer setcc a bit
 153   setOperationAction(ISD::SETCC, MVT::i32, Custom);
 154
 155   // PowerPC does not have BRCOND which requires SetCC
 156   setOperationAction(ISD::BRCOND, MVT::Other, Expand);
 157
 158   setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
 159
 160   // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
 161   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
 162
 163   // PowerPC does not have [U|S]INT_TO_FP
 164   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
 165   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
 166
 167   setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
 168   setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
 169   setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand);
 170   setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand);
 171
 172   // We cannot sextinreg(i1).  Expand to shifts.
 173   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 174
 175   // Support label based line numbers.
 176   setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
 177   setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
 178
 179   setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
 180   setOperationAction(ISD::EHSELECTION,   MVT::i64, Expand);
 181   setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
 182   setOperationAction(ISD::EHSELECTION,   MVT::i32, Expand);
 183
 184
 185   // We want to legalize GlobalAddress and ConstantPool nodes into the
 186   // appropriate instructions to materialize the address.
 187   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
 188   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
 189   setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
 190   setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
 191   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
 192   setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
 193   setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
 194   setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
 195
 196   // RET must be custom lowered, to meet ABI requirements.
 197   setOperationAction(ISD::RET               , MVT::Other, Custom);
 198
 199   // TRAP is legal.
 200   setOperationAction(ISD::TRAP, MVT::Other, Legal);
 201
 202   // TRAMPOLINE is custom lowered.
 203   setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
 204
 205   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 206   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 207
 208   // VAARG is custom lowered with the SVR4 ABI
 209   if (TM.getSubtarget<PPCSubtarget>().isSVR4ABI())
 210     setOperationAction(ISD::VAARG, MVT::Other, Custom);
 211   else
 212     setOperationAction(ISD::VAARG, MVT::Other, Expand);
 213
 214   // Use the default implementation.
 215   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 216   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 217   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 218   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
 219   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
 220   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
 221
 222   // We want to custom lower some of our intrinsics.
 223   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 224
 225   // Comparisons that require checking two conditions.
 226   setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
 227   setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
 228   setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
 229   setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
 230   setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
 231   setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
 232   setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
 233   setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
 234   setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
 235   setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
 236   setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
 237   setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
 238
 239   if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
 240     // They also have instructions for converting between i64 and fp.
 241     setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 242     setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
 243     setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 244     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
 245     // This is just the low 32 bits of a (signed) fp->i64 conversion.
 246     // We cannot do this with Promote because i64 is not a legal type.
 247     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 248
 249     // FIXME: disable this lowered code.  This generates 64-bit register values,
 250     // and we don't model the fact that the top part is clobbered by calls.  We
 251     // need to flag these together so that the value isn't live across a call.
 252     //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
 253   } else {
 254     // PowerPC does not have FP_TO_UINT on 32-bit implementations.
 255     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
 256   }
 257
 258   if (TM.getSubtarget<PPCSubtarget>().use64BitRegs()) {
 259     // 64-bit PowerPC implementations can support i64 types directly
 260     addRegisterClass(MVT::i64, PPC::G8RCRegisterClass);
 261     // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 262     setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 263     // 64-bit PowerPC wants to expand i128 shifts itself.
 264     setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
 265     setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
 266     setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
 267   } else {
 268     // 32-bit PowerPC wants to expand i64 shifts itself.
 269     setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
 270     setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
 271     setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
 272   }
 273
 274   if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) {
 275     // First set operation action for all vector types to expand. Then we
 276     // will selectively turn on ones that can be effectively codegen'd.
 277     for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
 278          i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
 279       MVT VT = (MVT::SimpleValueType)i;
 280
 281       // add/sub are legal for all supported vector VT's.
 282       setOperationAction(ISD::ADD , VT, Legal);
 283       setOperationAction(ISD::SUB , VT, Legal);
 284
 285       // We promote all shuffles to v16i8.
 286       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
 287       AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
 288
 289       // We promote all non-typed operations to v4i32.
 290       setOperationAction(ISD::AND   , VT, Promote);
 291       AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
 292       setOperationAction(ISD::OR    , VT, Promote);
 293       AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
 294       setOperationAction(ISD::XOR   , VT, Promote);
 295       AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
 296       setOperationAction(ISD::LOAD  , VT, Promote);
 297       AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
 298       setOperationAction(ISD::SELECT, VT, Promote);
 299       AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
 300       setOperationAction(ISD::STORE, VT, Promote);
 301       AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
 302
 303       // No other operations are legal.
 304       setOperationAction(ISD::MUL , VT, Expand);
 305       setOperationAction(ISD::SDIV, VT, Expand);
 306       setOperationAction(ISD::SREM, VT, Expand);
 307       setOperationAction(ISD::UDIV, VT, Expand);
 308       setOperationAction(ISD::UREM, VT, Expand);
 309       setOperationAction(ISD::FDIV, VT, Expand);
 310       setOperationAction(ISD::FNEG, VT, Expand);
 311       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
 312       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
 313       setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
 314       setOperationAction(ISD::UMUL_LOHI, VT, Expand);
 315       setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 316       setOperationAction(ISD::UDIVREM, VT, Expand);
 317       setOperationAction(ISD::SDIVREM, VT, Expand);
 318       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
 319       setOperationAction(ISD::FPOW, VT, Expand);
 320       setOperationAction(ISD::CTPOP, VT, Expand);
 321       setOperationAction(ISD::CTLZ, VT, Expand);
 322       setOperationAction(ISD::CTTZ, VT, Expand);
 323     }
 324
 325     // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
 326     // with merges, splats, etc.
 327     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
 328
 329     setOperationAction(ISD::AND   , MVT::v4i32, Legal);
 330     setOperationAction(ISD::OR    , MVT::v4i32, Legal);
 331     setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
 332     setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
 333     setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
 334     setOperationAction(ISD::STORE , MVT::v4i32, Legal);
 335
 336     addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass);
 337     addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass);
 338     addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass);
 339     addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass);
 340
 341     setOperationAction(ISD::MUL, MVT::v4f32, Legal);
 342     setOperationAction(ISD::MUL, MVT::v4i32, Custom);
 343     setOperationAction(ISD::MUL, MVT::v8i16, Custom);
 344     setOperationAction(ISD::MUL, MVT::v16i8, Custom);
 345
 346     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 347     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
 348
 349     setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
 350     setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
 351     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
 352     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
 353   }
 354
 355   setShiftAmountType(MVT::i32);
 356   setBooleanContents(ZeroOrOneBooleanContent);
 357
 358   if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {
 359     setStackPointerRegisterToSaveRestore(PPC::X1);
 360     setExceptionPointerRegister(PPC::X3);
 361     setExceptionSelectorRegister(PPC::X4);
 362   } else {
 363     setStackPointerRegisterToSaveRestore(PPC::R1);
 364     setExceptionPointerRegister(PPC::R3);
 365     setExceptionSelectorRegister(PPC::R4);
 366   }
 367
 368   // We have target-specific dag combine patterns for the following nodes:
 369   setTargetDAGCombine(ISD::SINT_TO_FP);
 370   setTargetDAGCombine(ISD::STORE);
 371   setTargetDAGCombine(ISD::BR_CC);
 372   setTargetDAGCombine(ISD::BSWAP);
 373
 374   // Darwin long double math library functions have $LDBL128 appended.
 375   if (TM.getSubtarget<PPCSubtarget>().isDarwin()) {
 376     setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
 377     setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
 378     setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
 379     setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
 380     setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
 381     setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
 382     setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
 383     setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
 384     setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
 385     setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
 386   }
 387
 388   computeRegisterProperties();
 389 }
 390
 391 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
 392 /// function arguments in the caller parameter area.
 393 unsigned PPCTargetLowering::getByValTypeAlignment(const Type *Ty) const {
 394   TargetMachine &TM = getTargetMachine();
 395   // Darwin passes everything on 4 byte boundary.
 396   if (TM.getSubtarget<PPCSubtarget>().isDarwin())
 397     return 4;
 398   // FIXME SVR4 TBD
 399   return 4;
 400 }
 401
 402 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
 403   switch (Opcode) {
 404   default: return 0;
 405   case PPCISD::FSEL:            return "PPCISD::FSEL";
 406   case PPCISD::FCFID:           return "PPCISD::FCFID";
 407   case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
 408   case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
 409   case PPCISD::STFIWX:          return "PPCISD::STFIWX";
 410   case PPCISD::VMADDFP:         return "PPCISD::VMADDFP";
 411   case PPCISD::VNMSUBFP:        return "PPCISD::VNMSUBFP";
 412   case PPCISD::VPERM:           return "PPCISD::VPERM";
 413   case PPCISD::Hi:              return "PPCISD::Hi";
 414   case PPCISD::Lo:              return "PPCISD::Lo";
 415   case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
 416   case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
 417   case PPCISD::SRL:             return "PPCISD::SRL";
 418   case PPCISD::SRA:             return "PPCISD::SRA";
 419   case PPCISD::SHL:             return "PPCISD::SHL";
 420   case PPCISD::EXTSW_32:        return "PPCISD::EXTSW_32";
 421   case PPCISD::STD_32:          return "PPCISD::STD_32";
 422   case PPCISD::CALL_SVR4:       return "PPCISD::CALL_SVR4";
 423   case PPCISD::CALL_Darwin:     return "PPCISD::CALL_Darwin";
 424   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
 425   case PPCISD::BCTRL_Darwin:    return "PPCISD::BCTRL_Darwin";
 426   case PPCISD::BCTRL_SVR4:      return "PPCISD::BCTRL_SVR4";
 427   case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
 428   case PPCISD::MFCR:            return "PPCISD::MFCR";
 429   case PPCISD::VCMP:            return "PPCISD::VCMP";
 430   case PPCISD::VCMPo:           return "PPCISD::VCMPo";
 431   case PPCISD::LBRX:            return "PPCISD::LBRX";
 432   case PPCISD::STBRX:           return "PPCISD::STBRX";
 433   case PPCISD::LARX:            return "PPCISD::LARX";
 434   case PPCISD::STCX:            return "PPCISD::STCX";
 435   case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
 436   case PPCISD::MFFS:            return "PPCISD::MFFS";
 437   case PPCISD::MTFSB0:          return "PPCISD::MTFSB0";
 438   case PPCISD::MTFSB1:          return "PPCISD::MTFSB1";
 439   case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
 440   case PPCISD::MTFSF:           return "PPCISD::MTFSF";
 441   case PPCISD::TAILCALL:        return "PPCISD::TAILCALL";
 442   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
 443   }
 444 }
 445
 446 MVT PPCTargetLowering::getSetCCResultType(MVT VT) const {
 447   return MVT::i32;
 448 }
 449
 450 /// getFunctionAlignment - Return the Log2 alignment of this function.
 451 unsigned PPCTargetLowering::getFunctionAlignment(const Function *F) const {
 452   if (getTargetMachine().getSubtarget<PPCSubtarget>().isDarwin())
 453     return F->hasFnAttr(Attribute::OptimizeForSize) ? 2 : 4;
 454   else
 455     return 2;
 456 }
 457
 458 //===----------------------------------------------------------------------===//
 459 // Node matching predicates, for use by the tblgen matching code.
 460 //===----------------------------------------------------------------------===//
 461
 462 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
 463 static bool isFloatingPointZero(SDValue Op) {
 464   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
 465     return CFP->getValueAPF().isZero();
 466   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
 467     // Maybe this has already been legalized into the constant pool?
 468     if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
 469       if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
 470         return CFP->getValueAPF().isZero();
 471   }
 472   return false;
 473 }
 474
 475 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
 476 /// true if Op is undef or if it matches the specified value.
 477 static bool isConstantOrUndef(int Op, int Val) {
 478   return Op < 0 || Op == Val;
 479 }
 480
 481 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
 482 /// VPKUHUM instruction.
 483 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
 484   if (!isUnary) {
 485     for (unsigned i = 0; i != 16; ++i)
 486       if (!isConstantOrUndef(N->getMaskElt(i),  i*2+1))
 487         return false;
 488   } else {
 489     for (unsigned i = 0; i != 8; ++i)
 490       if (!isConstantOrUndef(N->getMaskElt(i),    i*2+1) ||
 491           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+1))
 492         return false;
 493   }
 494   return true;
 495 }
 496
 497 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
 498 /// VPKUWUM instruction.
 499 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
 500   if (!isUnary) {
 501     for (unsigned i = 0; i != 16; i += 2)
 502       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
 503           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3))
 504         return false;
 505   } else {
 506     for (unsigned i = 0; i != 8; i += 2)
 507       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
 508           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3) ||
 509           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+2) ||
 510           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+3))
 511         return false;
 512   }
 513   return true;
 514 }
 515
 516 /// isVMerge - Common function, used to match vmrg* shuffles.
 517 ///
 518 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
 519                      unsigned LHSStart, unsigned RHSStart) {
 520   assert(N->getValueType(0) == MVT::v16i8 &&
 521          "PPC only supports shuffles by bytes!");
 522   assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
 523          "Unsupported merge size!");
 524
 525   for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
 526     for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
 527       if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
 528                              LHSStart+j+i*UnitSize) ||
 529           !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
 530                              RHSStart+j+i*UnitSize))
 531         return false;
 532     }
 533   return true;
 534 }
 535
 536 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
 537 /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
 538 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
 539                              bool isUnary) {
 540   if (!isUnary)
 541     return isVMerge(N, UnitSize, 8, 24);
 542   return isVMerge(N, UnitSize, 8, 8);
 543 }
 544
 545 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
 546 /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
 547 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
 548                              bool isUnary) {
 549   if (!isUnary)
 550     return isVMerge(N, UnitSize, 0, 16);
 551   return isVMerge(N, UnitSize, 0, 0);
 552 }
 553
 554
 555 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
 556 /// amount, otherwise return -1.
 557 int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
 558   assert(N->getValueType(0) == MVT::v16i8 &&
 559          "PPC only supports shuffles by bytes!");
 560
 561   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
 562
 563   // Find the first non-undef value in the shuffle mask.
 564   unsigned i;
 565   for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
 566     /*search*/;
 567
 568   if (i == 16) return -1;  // all undef.
 569
 570   // Otherwise, check to see if the rest of the elements are consecutively
 571   // numbered from this value.
 572   unsigned ShiftAmt = SVOp->getMaskElt(i);
 573   if (ShiftAmt < i) return -1;
 574   ShiftAmt -= i;
 575
 576   if (!isUnary) {
 577     // Check the rest of the elements to see if they are consecutive.
 578     for (++i; i != 16; ++i)
 579       if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
 580         return -1;
 581   } else {
 582     // Check the rest of the elements to see if they are consecutive.
 583     for (++i; i != 16; ++i)
 584       if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
 585         return -1;
 586   }
 587   return ShiftAmt;
 588 }
 589
 590 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
 591 /// specifies a splat of a single element that is suitable for input to
 592 /// VSPLTB/VSPLTH/VSPLTW.
 593 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
 594   assert(N->getValueType(0) == MVT::v16i8 &&
 595          (EltSize == 1 || EltSize == 2 || EltSize == 4));
 596
 597   // This is a splat operation if each element of the permute is the same, and
 598   // if the value doesn't reference the second vector.
 599   unsigned ElementBase = N->getMaskElt(0);
 600
 601   // FIXME: Handle UNDEF elements too!
 602   if (ElementBase >= 16)
 603     return false;
 604
 605   // Check that the indices are consecutive, in the case of a multi-byte element
 606   // splatted with a v16i8 mask.
 607   for (unsigned i = 1; i != EltSize; ++i)
 608     if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
 609       return false;
 610
 611   for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
 612     if (N->getMaskElt(i) < 0) continue;
 613     for (unsigned j = 0; j != EltSize; ++j)
 614       if (N->getMaskElt(i+j) != N->getMaskElt(j))
 615         return false;
 616   }
 617   return true;
 618 }
 619
 620 /// isAllNegativeZeroVector - Returns true if all elements of build_vector
 621 /// are -0.0.
 622 bool PPC::isAllNegativeZeroVector(SDNode *N) {
 623   BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
 624
 625   APInt APVal, APUndef;
 626   unsigned BitSize;
 627   bool HasAnyUndefs;
 628
 629   if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32))
 630     if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
 631       return CFP->getValueAPF().isNegZero();
 632
 633   return false;
 634 }
 635
 636 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
 637 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
 638 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {
 639   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
 640   assert(isSplatShuffleMask(SVOp, EltSize));
 641   return SVOp->getMaskElt(0) / EltSize;
 642 }
 643
 644 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
 645 /// by using a vspltis[bhw] instruction of the specified element size, return
 646 /// the constant being splatted.  The ByteSize field indicates the number of
 647 /// bytes of each element [124] -> [bhw].
 648 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
 649   SDValue OpVal(0, 0);
 650
 651   // If ByteSize of the splat is bigger than the element size of the
 652   // build_vector, then we have a case where we are checking for a splat where
 653   // multiple elements of the buildvector are folded together into a single
 654   // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
 655   unsigned EltSize = 16/N->getNumOperands();
 656   if (EltSize < ByteSize) {
 657     unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
 658     SDValue UniquedVals[4];
 659     assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
 660
 661     // See if all of the elements in the buildvector agree across.
 662     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
 663       if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
 664       // If the element isn't a constant, bail fully out.
 665       if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
 666
 667
 668       if (UniquedVals[i&(Multiple-1)].getNode() == 0)
 669         UniquedVals[i&(Multiple-1)] = N->getOperand(i);
 670       else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
 671         return SDValue();  // no match.
 672     }
 673
 674     // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
 675     // either constant or undef values that are identical for each chunk.  See
 676     // if these chunks can form into a larger vspltis*.
 677
 678     // Check to see if all of the leading entries are either 0 or -1.  If
 679     // neither, then this won't fit into the immediate field.
 680     bool LeadingZero = true;
 681     bool LeadingOnes = true;
 682     for (unsigned i = 0; i != Multiple-1; ++i) {
 683       if (UniquedVals[i].getNode() == 0) continue;  // Must have been undefs.
 684
 685       LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();
 686       LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();
 687     }
 688     // Finally, check the least significant entry.
 689     if (LeadingZero) {
 690       if (UniquedVals[Multiple-1].getNode() == 0)
 691         return DAG.getTargetConstant(0, MVT::i32);  // 0,0,0,undef
 692       int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
 693       if (Val < 16)
 694         return DAG.getTargetConstant(Val, MVT::i32);  // 0,0,0,4 -> vspltisw(4)
 695     }
 696     if (LeadingOnes) {
 697       if (UniquedVals[Multiple-1].getNode() == 0)
 698         return DAG.getTargetConstant(~0U, MVT::i32);  // -1,-1,-1,undef
 699       int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
 700       if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
 701         return DAG.getTargetConstant(Val, MVT::i32);
 702     }
 703
 704     return SDValue();
 705   }
 706
 707   // Check to see if this buildvec has a single non-undef value in its elements.
 708   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
 709     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
 710     if (OpVal.getNode() == 0)
 711       OpVal = N->getOperand(i);
 712     else if (OpVal != N->getOperand(i))
 713       return SDValue();
 714   }
 715
 716   if (OpVal.getNode() == 0) return SDValue();  // All UNDEF: use implicit def.
 717
 718   unsigned ValSizeInBytes = EltSize;
 719   uint64_t Value = 0;
 720   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
 721     Value = CN->getZExtValue();
 722   } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
 723     assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
 724     Value = FloatToBits(CN->getValueAPF().convertToFloat());
 725   }
 726
 727   // If the splat value is larger than the element value, then we can never do
 728   // this splat.  The only case that we could fit the replicated bits into our
 729   // immediate field for would be zero, and we prefer to use vxor for it.
 730   if (ValSizeInBytes < ByteSize) return SDValue();
 731
 732   // If the element value is larger than the splat value, cut it in half and
 733   // check to see if the two halves are equal.  Continue doing this until we
 734   // get to ByteSize.  This allows us to handle 0x01010101 as 0x01.
 735   while (ValSizeInBytes > ByteSize) {
 736     ValSizeInBytes >>= 1;
 737
 738     // If the top half equals the bottom half, we're still ok.
 739     if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) !=
 740          (Value                        & ((1 << (8*ValSizeInBytes))-1)))
 741       return SDValue();
 742   }
 743
 744   // Properly sign extend the value.
 745   int ShAmt = (4-ByteSize)*8;
 746   int MaskVal = ((int)Value << ShAmt) >> ShAmt;
 747
 748   // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
 749   if (MaskVal == 0) return SDValue();
 750
 751   // Finally, if this value fits in a 5 bit sext field, return it
 752   if (((MaskVal << (32-5)) >> (32-5)) == MaskVal)
 753     return DAG.getTargetConstant(MaskVal, MVT::i32);
 754   return SDValue();
 755 }
 756
 757 //===----------------------------------------------------------------------===//
 758 //  Addressing Mode Selection
 759 //===----------------------------------------------------------------------===//
 760
 761 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
 762 /// or 64-bit immediate, and if the value can be accurately represented as a
 763 /// sign extension from a 16-bit value.  If so, this returns true and the
 764 /// immediate.
 765 static bool isIntS16Immediate(SDNode *N, short &Imm) {
 766   if (N->getOpcode() != ISD::Constant)
 767     return false;
 768
 769   Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
 770   if (N->getValueType(0) == MVT::i32)
 771     return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
 772   else
 773     return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
 774 }
 775 static bool isIntS16Immediate(SDValue Op, short &Imm) {
 776   return isIntS16Immediate(Op.getNode(), Imm);
 777 }
 778
 779
 780 /// SelectAddressRegReg - Given the specified addressed, check to see if it
 781 /// can be represented as an indexed [r+r] operation.  Returns false if it
 782 /// can be more efficiently represented with [r+imm].
 783 bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
 784                                             SDValue &Index,
 785                                             SelectionDAG &DAG) const {
 786   short imm = 0;
 787   if (N.getOpcode() == ISD::ADD) {
 788     if (isIntS16Immediate(N.getOperand(1), imm))
 789       return false;    // r+i
 790     if (N.getOperand(1).getOpcode() == PPCISD::Lo)
 791       return false;    // r+i
 792
 793     Base = N.getOperand(0);
 794     Index = N.getOperand(1);
 795     return true;
 796   } else if (N.getOpcode() == ISD::OR) {
 797     if (isIntS16Immediate(N.getOperand(1), imm))
 798       return false;    // r+i can fold it if we can.
 799
 800     // If this is an or of disjoint bitfields, we can codegen this as an add
 801     // (for better address arithmetic) if the LHS and RHS of the OR are provably
 802     // disjoint.
 803     APInt LHSKnownZero, LHSKnownOne;
 804     APInt RHSKnownZero, RHSKnownOne;
 805     DAG.ComputeMaskedBits(N.getOperand(0),
 806                           APInt::getAllOnesValue(N.getOperand(0)
 807                             .getValueSizeInBits()),
 808                           LHSKnownZero, LHSKnownOne);
 809
 810     if (LHSKnownZero.getBoolValue()) {
 811       DAG.ComputeMaskedBits(N.getOperand(1),
 812                             APInt::getAllOnesValue(N.getOperand(1)
 813                               .getValueSizeInBits()),
 814                             RHSKnownZero, RHSKnownOne);
 815       // If all of the bits are known zero on the LHS or RHS, the add won't
 816       // carry.
 817       if (~(LHSKnownZero | RHSKnownZero) == 0) {
 818         Base = N.getOperand(0);
 819         Index = N.getOperand(1);
 820         return true;
 821       }
 822     }
 823   }
 824
 825   return false;
 826 }
 827
 828 /// Returns true if the address N can be represented by a base register plus
 829 /// a signed 16-bit displacement [r+imm], and if it is not better
 830 /// represented as reg+reg.
 831 bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
 832                                             SDValue &Base,
 833                                             SelectionDAG &DAG) const {
 834   // FIXME dl should come from parent load or store, not from address
 835   DebugLoc dl = N.getDebugLoc();
 836   // If this can be more profitably realized as r+r, fail.
 837   if (SelectAddressRegReg(N, Disp, Base, DAG))
 838     return false;
 839
 840   if (N.getOpcode() == ISD::ADD) {
 841     short imm = 0;
 842     if (isIntS16Immediate(N.getOperand(1), imm)) {
 843       Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
 844       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
 845         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
 846       } else {
 847         Base = N.getOperand(0);
 848       }
 849       return true; // [r+i]
 850     } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
 851       // Match LOAD (ADD (X, Lo(G))).
 852      assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
 853              && "Cannot handle constant offsets yet!");
 854       Disp = N.getOperand(1).getOperand(0);  // The global address.
 855       assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
 856              Disp.getOpcode() == ISD::TargetConstantPool ||
 857              Disp.getOpcode() == ISD::TargetJumpTable);
 858       Base = N.getOperand(0);
 859       return true;  // [&g+r]
 860     }
 861   } else if (N.getOpcode() == ISD::OR) {
 862     short imm = 0;
 863     if (isIntS16Immediate(N.getOperand(1), imm)) {
 864       // If this is an or of disjoint bitfields, we can codegen this as an add
 865       // (for better address arithmetic) if the LHS and RHS of the OR are
 866       // provably disjoint.
 867       APInt LHSKnownZero, LHSKnownOne;
 868       DAG.ComputeMaskedBits(N.getOperand(0),
 869                             APInt::getAllOnesValue(N.getOperand(0)
 870                                                    .getValueSizeInBits()),
 871                             LHSKnownZero, LHSKnownOne);
 872
 873       if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
 874         // If all of the bits are known zero on the LHS or RHS, the add won't
 875         // carry.
 876         Base = N.getOperand(0);
 877         Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
 878         return true;
 879       }
 880     }
 881   } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
 882     // Loading from a constant address.
 883
 884     // If this address fits entirely in a 16-bit sext immediate field, codegen
 885     // this as "d, 0"
 886     short Imm;
 887     if (isIntS16Immediate(CN, Imm)) {
 888       Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
 889       Base = DAG.getRegister(PPC::R0, CN->getValueType(0));
 890       return true;
 891     }
 892
 893     // Handle 32-bit sext immediates with LIS + addr mode.
 894     if (CN->getValueType(0) == MVT::i32 ||
 895         (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) {
 896       int Addr = (int)CN->getZExtValue();
 897
 898       // Otherwise, break this down into an LIS + disp.
 899       Disp = DAG.getTargetConstant((short)Addr, MVT::i32);
 900
 901       Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32);
 902       unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
 903       Base = SDValue(DAG.getTargetNode(Opc, dl, CN->getValueType(0), Base), 0);
 904       return true;
 905     }
 906   }
 907
 908   Disp = DAG.getTargetConstant(0, getPointerTy());
 909   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
 910     Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
 911   else
 912     Base = N;
 913   return true;      // [r+0]
 914 }
 915
 916 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
 917 /// represented as an indexed [r+r] operation.
 918 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
 919                                                 SDValue &Index,
 920                                                 SelectionDAG &DAG) const {
 921   // Check to see if we can easily represent this as an [r+r] address.  This
 922   // will fail if it thinks that the address is more profitably represented as
 923   // reg+imm, e.g. where imm = 0.
 924   if (SelectAddressRegReg(N, Base, Index, DAG))
 925     return true;
 926
 927   // If the operand is an addition, always emit this as [r+r], since this is
 928   // better (for code size, and execution, as the memop does the add for free)
 929   // than emitting an explicit add.
 930   if (N.getOpcode() == ISD::ADD) {
 931     Base = N.getOperand(0);
 932     Index = N.getOperand(1);
 933     return true;
 934   }
 935
 936   // Otherwise, do it the hard way, using R0 as the base register.
 937   Base = DAG.getRegister(PPC::R0, N.getValueType());
 938   Index = N;
 939   return true;
 940 }
 941
 942 /// SelectAddressRegImmShift - Returns true if the address N can be
 943 /// represented by a base register plus a signed 14-bit displacement
 944 /// [r+imm*4].  Suitable for use by STD and friends.
 945 bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp,
 946                                                  SDValue &Base,
 947                                                  SelectionDAG &DAG) const {
 948   // FIXME dl should come from the parent load or store, not the address
 949   DebugLoc dl = N.getDebugLoc();
 950   // If this can be more profitably realized as r+r, fail.
 951   if (SelectAddressRegReg(N, Disp, Base, DAG))
 952     return false;
 953
 954   if (N.getOpcode() == ISD::ADD) {
 955     short imm = 0;
 956     if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
 957       Disp =  DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
 958       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
 959         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
 960       } else {
 961         Base = N.getOperand(0);
 962       }
 963       return true; // [r+i]
 964     } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
 965       // Match LOAD (ADD (X, Lo(G))).
 966      assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
 967              && "Cannot handle constant offsets yet!");
 968       Disp = N.getOperand(1).getOperand(0);  // The global address.
 969       assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
 970              Disp.getOpcode() == ISD::TargetConstantPool ||
 971              Disp.getOpcode() == ISD::TargetJumpTable);
 972       Base = N.getOperand(0);
 973       return true;  // [&g+r]
 974     }
 975   } else if (N.getOpcode() == ISD::OR) {
 976     short imm = 0;
 977     if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
 978       // If this is an or of disjoint bitfields, we can codegen this as an add
 979       // (for better address arithmetic) if the LHS and RHS of the OR are
 980       // provably disjoint.
 981       APInt LHSKnownZero, LHSKnownOne;
 982       DAG.ComputeMaskedBits(N.getOperand(0),
 983                             APInt::getAllOnesValue(N.getOperand(0)
 984                                                    .getValueSizeInBits()),
 985                             LHSKnownZero, LHSKnownOne);
 986       if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
 987         // If all of the bits are known zero on the LHS or RHS, the add won't
 988         // carry.
 989         Base = N.getOperand(0);
 990         Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
 991         return true;
 992       }
 993     }
 994   } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
 995     // Loading from a constant address.  Verify low two bits are clear.
 996     if ((CN->getZExtValue() & 3) == 0) {
 997       // If this address fits entirely in a 14-bit sext immediate field, codegen
 998       // this as "d, 0"
 999       short Imm;
1000       if (isIntS16Immediate(CN, Imm)) {
1001         Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy());
1002         Base = DAG.getRegister(PPC::R0, CN->getValueType(0));
1003         return true;
1004       }
1005
1006       // Fold the low-part of 32-bit absolute addresses into addr mode.
1007       if (CN->getValueType(0) == MVT::i32 ||
1008           (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) {
1009         int Addr = (int)CN->getZExtValue();
1010
1011         // Otherwise, break this down into an LIS + disp.
1012         Disp = DAG.getTargetConstant((short)Addr >> 2, MVT::i32);
1013         Base = DAG.getTargetConstant((Addr-(signed short)Addr) >> 16, MVT::i32);
1014         unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
1015         Base = SDValue(DAG.getTargetNode(Opc, dl, CN->getValueType(0), Base),0);
1016         return true;
1017       }
1018     }
1019   }
1020
1021   Disp = DAG.getTargetConstant(0, getPointerTy());
1022   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
1023     Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1024   else
1025     Base = N;
1026   return true;      // [r+0]
1027 }
1028
1029
1030 /// getPreIndexedAddressParts - returns true by value, base pointer and
1031 /// offset pointer and addressing mode by reference if the node's address
1032 /// can be legally represented as pre-indexed load / store address.
1033 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
1034                                                   SDValue &Offset,
1035                                                   ISD::MemIndexedMode &AM,
1036                                                   SelectionDAG &DAG) const {
1037   // Disabled by default for now.
1038   if (!EnablePPCPreinc) return false;
1039
1040   SDValue Ptr;
1041   MVT VT;
1042   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1043     Ptr = LD->getBasePtr();
1044     VT = LD->getMemoryVT();
1045
1046   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
1047     ST = ST;
1048     Ptr = ST->getBasePtr();
1049     VT  = ST->getMemoryVT();
1050   } else
1051     return false;
1052
1053   // PowerPC doesn't have preinc load/store instructions for vectors.
1054   if (VT.isVector())
1055     return false;
1056
1057   // TODO: Check reg+reg first.
1058
1059   // LDU/STU use reg+imm*4, others use reg+imm.
1060   if (VT != MVT::i64) {
1061     // reg + imm
1062     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG))
1063       return false;
1064   } else {
1065     // reg + imm * 4.
1066     if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG))
1067       return false;
1068   }
1069
1070   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1071     // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
1072     // sext i32 to i64 when addr mode is r+i.
1073     if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
1074         LD->getExtensionType() == ISD::SEXTLOAD &&
1075         isa<ConstantSDNode>(Offset))
1076       return false;
1077   }
1078
1079   AM = ISD::PRE_INC;
1080   return true;
1081 }
1082
1083 //===----------------------------------------------------------------------===//
1084 //  LowerOperation implementation
1085 //===----------------------------------------------------------------------===//
1086
1087 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
1088                                              SelectionDAG &DAG) {
1089   MVT PtrVT = Op.getValueType();
1090   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
1091   Constant *C = CP->getConstVal();
1092   SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
1093   SDValue Zero = DAG.getConstant(0, PtrVT);
1094   // FIXME there isn't really any debug info here
1095   DebugLoc dl = Op.getDebugLoc();
1096
1097   const TargetMachine &TM = DAG.getTarget();
1098
1099   SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, CPI, Zero);
1100   SDValue Lo = DAG.getNode(PPCISD::Lo, dl, PtrVT, CPI, Zero);
1101
1102   // If this is a non-darwin platform, we don't support non-static relo models
1103   // yet.
1104   if (TM.getRelocationModel() == Reloc::Static ||
1105       !TM.getSubtarget<PPCSubtarget>().isDarwin()) {
1106     // Generate non-pic code that has direct accesses to the constant pool.
1107     // The address of the global is just (hi(&g)+lo(&g)).
1108     return DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
1109   }
1110
1111   if (TM.getRelocationModel() == Reloc::PIC_) {
1112     // With PIC, the first instruction is actually "GR+hi(&G)".
1113     Hi = DAG.getNode(ISD::ADD, dl, PtrVT,
1114                      DAG.getNode(PPCISD::GlobalBaseReg,
1115                                  DebugLoc::getUnknownLoc(), PtrVT), Hi);
1116   }
1117
1118   Lo = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
1119   return Lo;
1120 }
1121
1122 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
1123   MVT PtrVT = Op.getValueType();
1124   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
1125   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
1126   SDValue Zero = DAG.getConstant(0, PtrVT);
1127   // FIXME there isn't really any debug loc here
1128   DebugLoc dl = Op.getDebugLoc();
1129
1130   const TargetMachine &TM = DAG.getTarget();
1131
1132   SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, JTI, Zero);
1133   SDValue Lo = DAG.getNode(PPCISD::Lo, dl, PtrVT, JTI, Zero);
1134
1135   // If this is a non-darwin platform, we don't support non-static relo models
1136   // yet.
1137   if (TM.getRelocationModel() == Reloc::Static ||
1138       !TM.getSubtarget<PPCSubtarget>().isDarwin()) {
1139     // Generate non-pic code that has direct accesses to the constant pool.
1140     // The address of the global is just (hi(&g)+lo(&g)).
1141     return DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
1142   }
1143
1144   if (TM.getRelocationModel() == Reloc::PIC_) {
1145     // With PIC, the first instruction is actually "GR+hi(&G)".
1146     Hi = DAG.getNode(ISD::ADD, dl, PtrVT,
1147                      DAG.getNode(PPCISD::GlobalBaseReg,
1148                                  DebugLoc::getUnknownLoc(), PtrVT), Hi);
1149   }
1150
1151   Lo = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
1152   return Lo;
1153 }
1154
1155 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
1156                                                    SelectionDAG &DAG) {
1157   assert(0 && "TLS not implemented for PPC.");
1158   return SDValue(); // Not reached
1159 }
1160
1161 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
1162                                               SelectionDAG &DAG) {
1163   MVT PtrVT = Op.getValueType();
1164   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
1165   GlobalValue *GV = GSDN->getGlobal();
1166   SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
1167   SDValue Zero = DAG.getConstant(0, PtrVT);
1168   // FIXME there isn't really any debug info here
1169   DebugLoc dl = GSDN->getDebugLoc();
1170
1171   const TargetMachine &TM = DAG.getTarget();
1172
1173   SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, GA, Zero);
1174   SDValue Lo = DAG.getNode(PPCISD::Lo, dl, PtrVT, GA, Zero);
1175
1176   // If this is a non-darwin platform, we don't support non-static relo models
1177   // yet.
1178   if (TM.getRelocationModel() == Reloc::Static ||
1179       !TM.getSubtarget<PPCSubtarget>().isDarwin()) {
1180     // Generate non-pic code that has direct accesses to globals.
1181     // The address of the global is just (hi(&g)+lo(&g)).
1182     return DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
1183   }
1184
1185   if (TM.getRelocationModel() == Reloc::PIC_) {
1186     // With PIC, the first instruction is actually "GR+hi(&G)".
1187     Hi = DAG.getNode(ISD::ADD, dl, PtrVT,
1188                      DAG.getNode(PPCISD::GlobalBaseReg,
1189                                  DebugLoc::getUnknownLoc(), PtrVT), Hi);
1190   }
1191
1192   Lo = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
1193
1194   if (!TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV))
1195     return Lo;
1196
1197   // If the global is weak or external, we have to go through the lazy
1198   // resolution stub.
1199   return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Lo, NULL, 0);
1200 }
1201
1202 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
1203   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1204   DebugLoc dl = Op.getDebugLoc();
1205
1206   // If we're comparing for equality to zero, expose the fact that this is
1207   // implented as a ctlz/srl pair on ppc, so that the dag combiner can
1208   // fold the new nodes.
1209   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
1210     if (C->isNullValue() && CC == ISD::SETEQ) {
1211       MVT VT = Op.getOperand(0).getValueType();
1212       SDValue Zext = Op.getOperand(0);
1213       if (VT.bitsLT(MVT::i32)) {
1214         VT = MVT::i32;
1215         Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
1216       }
1217       unsigned Log2b = Log2_32(VT.getSizeInBits());
1218       SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
1219       SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
1220                                 DAG.getConstant(Log2b, MVT::i32));
1221       return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
1222     }
1223     // Leave comparisons against 0 and -1 alone for now, since they're usually
1224     // optimized.  FIXME: revisit this when we can custom lower all setcc
1225     // optimizations.
1226     if (C->isAllOnesValue() || C->isNullValue())
1227       return SDValue();
1228   }
1229
1230   // If we have an integer seteq/setne, turn it into a compare against zero
1231   // by xor'ing the rhs with the lhs, which is faster than setting a
1232   // condition register, reading it back out, and masking the correct bit.  The
1233   // normal approach here uses sub to do this instead of xor.  Using xor exposes
1234   // the result to other bit-twiddling opportunities.
1235   MVT LHSVT = Op.getOperand(0).getValueType();
1236   if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
1237     MVT VT = Op.getValueType();
1238     SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
1239                                 Op.getOperand(1));
1240     return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC);
1241   }
1242   return SDValue();
1243 }
1244
1245 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
1246                               int VarArgsFrameIndex,
1247                               int VarArgsStackOffset,
1248                               unsigned VarArgsNumGPR,
1249                               unsigned VarArgsNumFPR,
1250                               const PPCSubtarget &Subtarget) {
1251
1252   assert(0 && "VAARG not yet implemented for the SVR4 ABI!");
1253   return SDValue(); // Not reached
1254 }
1255
1256 SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) {
1257   SDValue Chain = Op.getOperand(0);
1258   SDValue Trmp = Op.getOperand(1); // trampoline
1259   SDValue FPtr = Op.getOperand(2); // nested function
1260   SDValue Nest = Op.getOperand(3); // 'nest' parameter value
1261   DebugLoc dl = Op.getDebugLoc();
1262
1263   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1264   bool isPPC64 = (PtrVT == MVT::i64);
1265   const Type *IntPtrTy =
1266     DAG.getTargetLoweringInfo().getTargetData()->getIntPtrType();
1267
1268   TargetLowering::ArgListTy Args;
1269   TargetLowering::ArgListEntry Entry;
1270
1271   Entry.Ty = IntPtrTy;
1272   Entry.Node = Trmp; Args.push_back(Entry);
1273
1274   // TrampSize == (isPPC64 ? 48 : 40);
1275   Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40,
1276                                isPPC64 ? MVT::i64 : MVT::i32);
1277   Args.push_back(Entry);
1278
1279   Entry.Node = FPtr; Args.push_back(Entry);
1280   Entry.Node = Nest; Args.push_back(Entry);
1281
1282   // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
1283   std::pair<SDValue, SDValue> CallResult =
1284     LowerCallTo(Chain, Op.getValueType().getTypeForMVT(), false, false,
1285                 false, false, 0, CallingConv::C, false,
1286                 DAG.getExternalSymbol("__trampoline_setup", PtrVT),
1287                 Args, DAG, dl);
1288
1289   SDValue Ops[] =
1290     { CallResult.first, CallResult.second };
1291
1292   return DAG.getMergeValues(Ops, 2, dl);
1293 }
1294
1295 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
1296                                         int VarArgsFrameIndex,
1297                                         int VarArgsStackOffset,
1298                                         unsigned VarArgsNumGPR,
1299                                         unsigned VarArgsNumFPR,
1300                                         const PPCSubtarget &Subtarget) {
1301   DebugLoc dl = Op.getDebugLoc();
1302
1303   if (Subtarget.isDarwinABI()) {
1304     // vastart just stores the address of the VarArgsFrameIndex slot into the
1305     // memory location argument.
1306     MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1307     SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1308     const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1309     return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0);
1310   }
1311
1312   // For the SVR4 ABI we follow the layout of the va_list struct.
1313   // We suppose the given va_list is already allocated.
1314   //
1315   // typedef struct {
1316   //  char gpr;     /* index into the array of 8 GPRs
1317   //                 * stored in the register save area
1318   //                 * gpr=0 corresponds to r3,
1319   //                 * gpr=1 to r4, etc.
1320   //                 */
1321   //  char fpr;     /* index into the array of 8 FPRs
1322   //                 * stored in the register save area
1323   //                 * fpr=0 corresponds to f1,
1324   //                 * fpr=1 to f2, etc.
1325   //                 */
1326   //  char *overflow_arg_area;
1327   //                /* location on stack that holds
1328   //                 * the next overflow argument
1329   //                 */
1330   //  char *reg_save_area;
1331   //               /* where r3:r10 and f1:f8 (if saved)
1332   //                * are stored
1333   //                */
1334   // } va_list[1];
1335
1336
1337   SDValue ArgGPR = DAG.getConstant(VarArgsNumGPR, MVT::i32);
1338   SDValue ArgFPR = DAG.getConstant(VarArgsNumFPR, MVT::i32);
1339
1340
1341   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1342
1343   SDValue StackOffsetFI = DAG.getFrameIndex(VarArgsStackOffset, PtrVT);
1344   SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1345
1346   uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
1347   SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT);
1348
1349   uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
1350   SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT);
1351
1352   uint64_t FPROffset = 1;
1353   SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT);
1354
1355   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1356
1357   // Store first byte : number of int regs
1358   SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR,
1359                                          Op.getOperand(1), SV, 0, MVT::i8);
1360   uint64_t nextOffset = FPROffset;
1361   SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
1362                                   ConstFPROffset);
1363
1364   // Store second byte : number of float regs
1365   SDValue secondStore =
1366     DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, SV, nextOffset, MVT::i8);
1367   nextOffset += StackOffset;
1368   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
1369
1370   // Store second word : arguments given on stack
1371   SDValue thirdStore =
1372     DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, SV, nextOffset);
1373   nextOffset += FrameOffset;
1374   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
1375
1376   // Store third word : arguments given in registers
1377   return DAG.getStore(thirdStore, dl, FR, nextPtr, SV, nextOffset);
1378
1379 }
1380
1381 #include "PPCGenCallingConv.inc"
1382
1383 static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
1384                                      CCValAssign::LocInfo &LocInfo,
1385                                      ISD::ArgFlagsTy &ArgFlags,
1386                                      CCState &State) {
1387   return true;
1388 }
1389
1390 static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
1391                                             MVT &LocVT,
1392                                             CCValAssign::LocInfo &LocInfo,
1393                                             ISD::ArgFlagsTy &ArgFlags,
1394                                             CCState &State) {
1395   static const unsigned ArgRegs[] = {
1396     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
1397     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
1398   };
1399   const unsigned NumArgRegs = array_lengthof(ArgRegs);
1400
1401   unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
1402
1403   // Skip one register if the first unallocated register has an even register
1404   // number and there are still argument registers available which have not been
1405   // allocated yet. RegNum is actually an index into ArgRegs, which means we
1406   // need to skip a register if RegNum is odd.
1407   if (RegNum != NumArgRegs && RegNum % 2 == 1) {
1408     State.AllocateReg(ArgRegs[RegNum]);
1409   }
1410
1411   // Always return false here, as this function only makes sure that the first
1412   // unallocated register has an odd register number and does not actually
1413   // allocate a register for the current argument.
1414   return false;
1415 }
1416
1417 static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
1418                                               MVT &LocVT,
1419                                               CCValAssign::LocInfo &LocInfo,
1420                                               ISD::ArgFlagsTy &ArgFlags,
1421                                               CCState &State) {
1422   static const unsigned ArgRegs[] = {
1423     PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
1424     PPC::F8
1425   };
1426
1427   const unsigned NumArgRegs = array_lengthof(ArgRegs);
1428
1429   unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
1430
1431   // If there is only one Floating-point register left we need to put both f64
1432   // values of a split ppc_fp128 value on the stack.
1433   if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
1434     State.AllocateReg(ArgRegs[RegNum]);
1435   }
1436
1437   // Always return false here, as this function only makes sure that the two f64
1438   // values a ppc_fp128 value is split into are both passed in registers or both
1439   // passed on the stack and does not actually allocate a register for the
1440   // current argument.
1441   return false;
1442 }
1443
1444 /// GetFPR - Get the set of FP registers that should be allocated for arguments,
1445 /// depending on which subtarget is selected.
1446 static const unsigned *GetFPR(const PPCSubtarget &Subtarget) {
1447   if (Subtarget.isDarwinABI()) {
1448     static const unsigned FPR[] = {
1449       PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
1450       PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
1451     };
1452     return FPR;
1453   }
1454
1455
1456   static const unsigned FPR[] = {
1457     PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
1458     PPC::F8
1459   };
1460   return FPR;
1461 }
1462
1463 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
1464 /// the stack.
1465 static unsigned CalculateStackSlotSize(SDValue Arg, ISD::ArgFlagsTy Flags,
1466                                        unsigned PtrByteSize) {
1467   MVT ArgVT = Arg.getValueType();
1468   unsigned ArgSize = ArgVT.getSizeInBits()/8;
1469   if (Flags.isByVal())
1470     ArgSize = Flags.getByValSize();
1471   ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
1472
1473   return ArgSize;
1474 }
1475
1476 SDValue
1477 PPCTargetLowering::LowerFORMAL_ARGUMENTS_SVR4(SDValue Op,
1478                                               SelectionDAG &DAG,
1479                                               int &VarArgsFrameIndex,
1480                                               int &VarArgsStackOffset,
1481                                               unsigned &VarArgsNumGPR,
1482                                               unsigned &VarArgsNumFPR,
1483                                               const PPCSubtarget &Subtarget) {
1484   // SVR4 ABI Stack Frame Layout:
1485   //              +-----------------------------------+
1486   //        +-->  |            Back chain             |
1487   //        |     +-----------------------------------+
1488   //        |     | Floating-point register save area |
1489   //        |     +-----------------------------------+
1490   //        |     |    General register save area     |
1491   //        |     +-----------------------------------+
1492   //        |     |          CR save word             |
1493   //        |     +-----------------------------------+
1494   //        |     |         VRSAVE save word          |
1495   //        |     +-----------------------------------+
1496   //        |     |         Alignment padding         |
1497   //        |     +-----------------------------------+
1498   //        |     |     Vector register save area     |
1499   //        |     +-----------------------------------+
1500   //        |     |       Local variable space        |
1501   //        |     +-----------------------------------+
1502   //        |     |        Parameter list area        |
1503   //        |     +-----------------------------------+
1504   //        |     |           LR save word            |
1505   //        |     +-----------------------------------+
1506   // SP-->  +---  |            Back chain             |
1507   //              +-----------------------------------+
1508   //
1509   // Specifications:
1510   //   System V Application Binary Interface PowerPC Processor Supplement
1511   //   AltiVec Technology Programming Interface Manual
1512
1513   MachineFunction &MF = DAG.getMachineFunction();
1514   MachineFrameInfo *MFI = MF.getFrameInfo();
1515   SmallVector<SDValue, 8> ArgValues;
1516   SDValue Root = Op.getOperand(0);
1517   bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
1518   DebugLoc dl = Op.getDebugLoc();
1519
1520   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1521   // Potential tail calls could cause overwriting of argument stack slots.
1522   unsigned CC = MF.getFunction()->getCallingConv();
1523   bool isImmutable = !(PerformTailCallOpt && (CC==CallingConv::Fast));
1524   unsigned PtrByteSize = 4;
1525
1526   // Assign locations to all of the incoming arguments.
1527   SmallVector<CCValAssign, 16> ArgLocs;
1528   CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
1529
1530   // Reserve space for the linkage area on the stack.
1531   CCInfo.AllocateStack(PPCFrameInfo::getLinkageSize(false, false), PtrByteSize);
1532
1533   CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_PPC_SVR4);
1534
1535   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1536     CCValAssign &VA = ArgLocs[i];
1537
1538     // Arguments stored in registers.
1539     if (VA.isRegLoc()) {
1540       TargetRegisterClass *RC;
1541       MVT ValVT = VA.getValVT();
1542
1543       switch (ValVT.getSimpleVT()) {
1544         default:
1545           assert(0 && "ValVT not supported by FORMAL_ARGUMENTS Lowering");
1546         case MVT::i32:
1547           RC = PPC::GPRCRegisterClass;
1548           break;
1549         case MVT::f32:
1550           RC = PPC::F4RCRegisterClass;
1551           break;
1552         case MVT::f64:
1553           RC = PPC::F8RCRegisterClass;
1554           break;
1555         case MVT::v16i8:
1556         case MVT::v8i16:
1557         case MVT::v4i32:
1558         case MVT::v4f32:
1559           RC = PPC::VRRCRegisterClass;
1560           break;
1561       }
1562
1563       // Transform the arguments stored in physical registers into virtual ones.
1564       unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
1565       SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, ValVT);
1566
1567       ArgValues.push_back(ArgValue);
1568     } else {
1569       // Argument stored in memory.
1570       assert(VA.isMemLoc());
1571
1572       unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8;
1573       int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
1574                                       isImmutable);
1575
1576       // Create load nodes to retrieve arguments from the stack.
1577       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1578       ArgValues.push_back(DAG.getLoad(VA.getValVT(), dl, Root, FIN, NULL, 0));
1579     }
1580   }
1581
1582   // Assign locations to all of the incoming aggregate by value arguments.
1583   // Aggregates passed by value are stored in the local variable space of the
1584   // caller's stack frame, right above the parameter list area.
1585   SmallVector<CCValAssign, 16> ByValArgLocs;
1586   CCState CCByValInfo(CC, isVarArg, getTargetMachine(), ByValArgLocs);
1587
1588   // Reserve stack space for the allocations in CCInfo.
1589   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
1590
1591   CCByValInfo.AnalyzeFormalArguments(Op.getNode(), CC_PPC_SVR4_ByVal);
1592
1593   // Area that is at least reserved in the caller of this function.
1594   unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
1595
1596   // Set the size that is at least reserved in caller of this function.  Tail
1597   // call optimized function's reserved stack space needs to be aligned so that
1598   // taking the difference between two stack areas will result in an aligned
1599   // stack.
1600   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1601
1602   MinReservedArea =
1603     std::max(MinReservedArea,
1604              PPCFrameInfo::getMinCallFrameSize(false, false));
1605
1606   unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()->
1607     getStackAlignment();
1608   unsigned AlignMask = TargetAlign-1;
1609   MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
1610
1611   FI->setMinReservedArea(MinReservedArea);
1612
1613   SmallVector<SDValue, 8> MemOps;
1614
1615   // If the function takes variable number of arguments, make a frame index for
1616   // the start of the first vararg value... for expansion of llvm.va_start.
1617   if (isVarArg) {
1618     static const unsigned GPArgRegs[] = {
1619       PPC::R3, PPC::R4, PPC::R5, PPC::R6,
1620       PPC::R7, PPC::R8, PPC::R9, PPC::R10,
1621     };
1622     const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
1623
1624     static const unsigned FPArgRegs[] = {
1625       PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
1626       PPC::F8
1627     };
1628     const unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
1629
1630     VarArgsNumGPR = CCInfo.getFirstUnallocated(GPArgRegs, NumGPArgRegs);
1631     VarArgsNumFPR = CCInfo.getFirstUnallocated(FPArgRegs, NumFPArgRegs);
1632
1633     // Make room for NumGPArgRegs and NumFPArgRegs.
1634     int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
1635                 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
1636
1637     VarArgsStackOffset = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
1638                                                 CCInfo.getNextStackOffset());
1639
1640     VarArgsFrameIndex = MFI->CreateStackObject(Depth, 8);
1641     SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1642
1643     // The fixed integer arguments of a variadic function are
1644     // stored to the VarArgsFrameIndex on the stack.
1645     unsigned GPRIndex = 0;
1646     for (; GPRIndex != VarArgsNumGPR; ++GPRIndex) {
1647       SDValue Val = DAG.getRegister(GPArgRegs[GPRIndex], PtrVT);
1648       SDValue Store = DAG.getStore(Root, dl, Val, FIN, NULL, 0);
1649       MemOps.push_back(Store);
1650       // Increment the address by four for the next argument to store
1651       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
1652       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
1653     }
1654
1655     // If this function is vararg, store any remaining integer argument regs
1656     // to their spots on the stack so that they may be loaded by deferencing the
1657     // result of va_next.
1658     for (; GPRIndex != NumGPArgRegs; ++GPRIndex) {
1659       unsigned VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
1660
1661       SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT);
1662       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
1663       MemOps.push_back(Store);
1664       // Increment the address by four for the next argument to store
1665       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
1666       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
1667     }
1668
1669     // FIXME SVR4: We only need to save FP argument registers if CR bit 6 is
1670     // set.
1671
1672     // The double arguments are stored to the VarArgsFrameIndex
1673     // on the stack.
1674     unsigned FPRIndex = 0;
1675     for (FPRIndex = 0; FPRIndex != VarArgsNumFPR; ++FPRIndex) {
1676       SDValue Val = DAG.getRegister(FPArgRegs[FPRIndex], MVT::f64);
1677       SDValue Store = DAG.getStore(Root, dl, Val, FIN, NULL, 0);
1678       MemOps.push_back(Store);
1679       // Increment the address by eight for the next argument to store
1680       SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8,
1681                                          PtrVT);
1682       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
1683     }
1684
1685     for (; FPRIndex != NumFPArgRegs; ++FPRIndex) {
1686       unsigned VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
1687
1688       SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::f64);
1689       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
1690       MemOps.push_back(Store);
1691       // Increment the address by eight for the next argument to store
1692       SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8,
1693                                          PtrVT);
1694       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
1695     }
1696   }
1697
1698   if (!MemOps.empty())
1699     Root = DAG.getNode(ISD::TokenFactor, dl,
1700                        MVT::Other, &MemOps[0], MemOps.size());
1701
1702
1703   ArgValues.push_back(Root);
1704
1705   // Return the new list of results.
1706   return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
1707                      &ArgValues[0], ArgValues.size()).getValue(Op.getResNo());
1708 }
1709
1710 SDValue
1711 PPCTargetLowering::LowerFORMAL_ARGUMENTS_Darwin(SDValue Op,
1712                                                 SelectionDAG &DAG,
1713                                                 int &VarArgsFrameIndex,
1714                                                 const PPCSubtarget &Subtarget) {
1715   // TODO: add description of PPC stack frame format, or at least some docs.
1716   //
1717   MachineFunction &MF = DAG.getMachineFunction();
1718   MachineFrameInfo *MFI = MF.getFrameInfo();
1719   SmallVector<SDValue, 8> ArgValues;
1720   SDValue Root = Op.getOperand(0);
1721   bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
1722   DebugLoc dl = Op.getDebugLoc();
1723
1724   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1725   bool isPPC64 = PtrVT == MVT::i64;
1726   // Potential tail calls could cause overwriting of argument stack slots.
1727   unsigned CC = MF.getFunction()->getCallingConv();
1728   bool isImmutable = !(PerformTailCallOpt && (CC==CallingConv::Fast));
1729   unsigned PtrByteSize = isPPC64 ? 8 : 4;
1730
1731   unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, true);
1732   // Area that is at least reserved in caller of this function.
1733   unsigned MinReservedArea = ArgOffset;
1734
1735   static const unsigned GPR_32[] = {           // 32-bit registers.
1736     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
1737     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
1738   };
1739   static const unsigned GPR_64[] = {           // 64-bit registers.
1740     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
1741     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
1742   };
1743
1744   static const unsigned *FPR = GetFPR(Subtarget);
1745
1746   static const unsigned VR[] = {
1747     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
1748     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
1749   };
1750
1751   const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
1752   const unsigned Num_FPR_Regs = 13;
1753   const unsigned Num_VR_Regs  = array_lengthof( VR);
1754
1755   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
1756
1757   const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
1758
1759   // In 32-bit non-varargs functions, the stack space for vectors is after the
1760   // stack space for non-vectors.  We do not use this space unless we have
1761   // too many vectors to fit in registers, something that only occurs in
1762   // constructed examples:), but we have to walk the arglist to figure
1763   // that out...for the pathological case, compute VecArgOffset as the
1764   // start of the vector parameter area.  Computing VecArgOffset is the
1765   // entire point of the following loop.
1766   unsigned VecArgOffset = ArgOffset;
1767   if (!isVarArg && !isPPC64) {
1768     for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues()-1; ArgNo != e;
1769          ++ArgNo) {
1770       MVT ObjectVT = Op.getValue(ArgNo).getValueType();
1771       unsigned ObjSize = ObjectVT.getSizeInBits()/8;
1772       ISD::ArgFlagsTy Flags =
1773         cast<ARG_FLAGSSDNode>(Op.getOperand(ArgNo+3))->getArgFlags();
1774
1775       if (Flags.isByVal()) {
1776         // ObjSize is the true size, ArgSize rounded up to multiple of regs.
1777         ObjSize = Flags.getByValSize();
1778         unsigned ArgSize =
1779                 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
1780         VecArgOffset += ArgSize;
1781         continue;
1782       }
1783
1784       switch(ObjectVT.getSimpleVT()) {
1785       default: assert(0 && "Unhandled argument type!");
1786       case MVT::i32:
1787       case MVT::f32:
1788         VecArgOffset += isPPC64 ? 8 : 4;
1789         break;
1790       case MVT::i64:  // PPC64
1791       case MVT::f64:
1792         VecArgOffset += 8;
1793         break;
1794       case MVT::v4f32:
1795       case MVT::v4i32:
1796       case MVT::v8i16:
1797       case MVT::v16i8:
1798         // Nothing to do, we're only looking at Nonvector args here.
1799         break;
1800       }
1801     }
1802   }
1803   // We've found where the vector parameter area in memory is.  Skip the
1804   // first 12 parameters; these don't use that memory.
1805   VecArgOffset = ((VecArgOffset+15)/16)*16;
1806   VecArgOffset += 12*16;
1807
1808   // Add DAG nodes to load the arguments or copy them out of registers.  On
1809   // entry to a function on PPC, the arguments start after the linkage area,
1810   // although the first ones are often in registers.
1811
1812   SmallVector<SDValue, 8> MemOps;
1813   unsigned nAltivecParamsAtEnd = 0;
1814   for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
1815        ArgNo != e; ++ArgNo) {
1816     SDValue ArgVal;
1817     bool needsLoad = false;
1818     MVT ObjectVT = Op.getValue(ArgNo).getValueType();
1819     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
1820     unsigned ArgSize = ObjSize;
1821     ISD::ArgFlagsTy Flags =
1822       cast<ARG_FLAGSSDNode>(Op.getOperand(ArgNo+3))->getArgFlags();
1823
1824     unsigned CurArgOffset = ArgOffset;
1825
1826     // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
1827     if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
1828         ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
1829       if (isVarArg || isPPC64) {
1830         MinReservedArea = ((MinReservedArea+15)/16)*16;
1831         MinReservedArea += CalculateStackSlotSize(Op.getValue(ArgNo),
1832                                                   Flags,
1833                                                   PtrByteSize);
1834       } else  nAltivecParamsAtEnd++;
1835     } else
1836       // Calculate min reserved area.
1837       MinReservedArea += CalculateStackSlotSize(Op.getValue(ArgNo),
1838                                                 Flags,
1839                                                 PtrByteSize);
1840
1841     // FIXME the codegen can be much improved in some cases.
1842     // We do not have to keep everything in memory.
1843     if (Flags.isByVal()) {
1844       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
1845       ObjSize = Flags.getByValSize();
1846       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
1847       // Objects of size 1 and 2 are right justified, everything else is
1848       // left justified.  This means the memory address is adjusted forwards.
1849       if (ObjSize==1 || ObjSize==2) {
1850         CurArgOffset = CurArgOffset + (4 - ObjSize);
1851       }
1852       // The value of the object is its address.
1853       int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset);
1854       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1855       ArgValues.push_back(FIN);
1856       if (ObjSize==1 || ObjSize==2) {
1857         if (GPR_idx != Num_GPR_Regs) {
1858           unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
1859           SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT);
1860           SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
1861                                NULL, 0, ObjSize==1 ? MVT::i8 : MVT::i16 );
1862           MemOps.push_back(Store);
1863           ++GPR_idx;
1864         }
1865
1866         ArgOffset += PtrByteSize;
1867
1868         continue;
1869       }
1870       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
1871         // Store whatever pieces of the object are in registers
1872         // to memory.  ArgVal will be address of the beginning of
1873         // the object.
1874         if (GPR_idx != Num_GPR_Regs) {
1875           unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
1876           int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset);
1877           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1878           SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT);
1879           SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
1880           MemOps.push_back(Store);
1881           ++GPR_idx;
1882           ArgOffset += PtrByteSize;
1883         } else {
1884           ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
1885           break;
1886         }
1887       }
1888       continue;
1889     }
1890
1891     switch (ObjectVT.getSimpleVT()) {
1892     default: assert(0 && "Unhandled argument type!");
1893     case MVT::i32:
1894       if (!isPPC64) {
1895         if (GPR_idx != Num_GPR_Regs) {
1896           unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
1897           ArgVal = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32);
1898           ++GPR_idx;
1899         } else {
1900           needsLoad = true;
1901           ArgSize = PtrByteSize;
1902         }
1903         // All int arguments reserve stack space in the Darwin ABI.
1904         ArgOffset += PtrByteSize;
1905         break;
1906       }
1907       // FALLTHROUGH
1908     case MVT::i64:  // PPC64
1909       if (GPR_idx != Num_GPR_Regs) {
1910         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
1911         ArgVal = DAG.getCopyFromReg(Root, dl, VReg, MVT::i64);
1912
1913         if (ObjectVT == MVT::i32) {
1914           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
1915           // value to MVT::i64 and then truncate to the correct register size.
1916           if (Flags.isSExt())
1917             ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
1918                                  DAG.getValueType(ObjectVT));
1919           else if (Flags.isZExt())
1920             ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
1921                                  DAG.getValueType(ObjectVT));
1922
1923           ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
1924         }
1925
1926         ++GPR_idx;
1927       } else {
1928         needsLoad = true;
1929         ArgSize = PtrByteSize;
1930       }
1931       // All int arguments reserve stack space in the Darwin ABI.
1932       ArgOffset += 8;
1933       break;
1934
1935     case MVT::f32:
1936     case MVT::f64:
1937       // Every 4 bytes of argument space consumes one of the GPRs available for
1938       // argument passing.
1939       if (GPR_idx != Num_GPR_Regs) {
1940         ++GPR_idx;
1941         if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
1942           ++GPR_idx;
1943       }
1944       if (FPR_idx != Num_FPR_Regs) {
1945         unsigned VReg;
1946
1947         if (ObjectVT == MVT::f32)
1948           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
1949         else
1950           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
1951
1952         ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT);
1953         ++FPR_idx;
1954       } else {
1955         needsLoad = true;
1956       }
1957
1958       // All FP arguments reserve stack space in the Darwin ABI.
1959       ArgOffset += isPPC64 ? 8 : ObjSize;
1960       break;
1961     case MVT::v4f32:
1962     case MVT::v4i32:
1963     case MVT::v8i16:
1964     case MVT::v16i8:
1965       // Note that vector arguments in registers don't reserve stack space,
1966       // except in varargs functions.
1967       if (VR_idx != Num_VR_Regs) {
1968         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
1969         ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT);
1970         if (isVarArg) {
1971           while ((ArgOffset % 16) != 0) {
1972             ArgOffset += PtrByteSize;
1973             if (GPR_idx != Num_GPR_Regs)
1974               GPR_idx++;
1975           }
1976           ArgOffset += 16;
1977           GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs);
1978         }
1979         ++VR_idx;
1980       } else {
1981         if (!isVarArg && !isPPC64) {
1982           // Vectors go after all the nonvectors.
1983           CurArgOffset = VecArgOffset;
1984           VecArgOffset += 16;
1985         } else {
1986           // Vectors are aligned.
1987           ArgOffset = ((ArgOffset+15)/16)*16;
1988           CurArgOffset = ArgOffset;
1989           ArgOffset += 16;
1990         }
1991         needsLoad = true;
1992       }
1993       break;
1994     }
1995
1996     // We need to load the argument to a virtual register if we determined above
1997     // that we ran out of physical registers of the appropriate type.
1998     if (needsLoad) {
1999       int FI = MFI->CreateFixedObject(ObjSize,
2000                                       CurArgOffset + (ArgSize - ObjSize),
2001                                       isImmutable);
2002       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2003       ArgVal = DAG.getLoad(ObjectVT, dl, Root, FIN, NULL, 0);
2004     }
2005
2006     ArgValues.push_back(ArgVal);
2007   }
2008
2009   // Set the size that is at least reserved in caller of this function.  Tail
2010   // call optimized function's reserved stack space needs to be aligned so that
2011   // taking the difference between two stack areas will result in an aligned
2012   // stack.
2013   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
2014   // Add the Altivec parameters at the end, if needed.
2015   if (nAltivecParamsAtEnd) {
2016     MinReservedArea = ((MinReservedArea+15)/16)*16;
2017     MinReservedArea += 16*nAltivecParamsAtEnd;
2018   }
2019   MinReservedArea =
2020     std::max(MinReservedArea,
2021              PPCFrameInfo::getMinCallFrameSize(isPPC64, true));
2022   unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()->
2023     getStackAlignment();
2024   unsigned AlignMask = TargetAlign-1;
2025   MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
2026   FI->setMinReservedArea(MinReservedArea);
2027
2028   // If the function takes variable number of arguments, make a frame index for
2029   // the start of the first vararg value... for expansion of llvm.va_start.
2030   if (isVarArg) {
2031     int Depth = ArgOffset;
2032
2033     VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
2034                                                Depth);
2035     SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
2036
2037     // If this function is vararg, store any remaining integer argument regs
2038     // to their spots on the stack so that they may be loaded by deferencing the
2039     // result of va_next.
2040     for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
2041       unsigned VReg;
2042
2043       if (isPPC64)
2044         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
2045       else
2046         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
2047
2048       SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT);
2049       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
2050       MemOps.push_back(Store);
2051       // Increment the address by four for the next argument to store
2052       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
2053       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
2054     }
2055   }
2056
2057   if (!MemOps.empty())
2058     Root = DAG.getNode(ISD::TokenFactor, dl,
2059                        MVT::Other, &MemOps[0], MemOps.size());
2060
2061   ArgValues.push_back(Root);
2062
2063   // Return the new list of results.
2064   return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
2065                      &ArgValues[0], ArgValues.size());
2066 }
2067
2068 /// CalculateParameterAndLinkageAreaSize - Get the size of the paramter plus
2069 /// linkage area for the Darwin ABI.
2070 static unsigned
2071 CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
2072                                      bool isPPC64,
2073                                      bool isVarArg,
2074                                      unsigned CC,
2075                                      CallSDNode *TheCall,
2076                                      unsigned &nAltivecParamsAtEnd) {
2077   // Count how many bytes are to be pushed on the stack, including the linkage
2078   // area, and parameter passing area.  We start with 24/48 bytes, which is
2079   // prereserved space for [SP][CR][LR][3 x unused].
2080   unsigned NumBytes = PPCFrameInfo::getLinkageSize(isPPC64, true);
2081   unsigned NumOps = TheCall->getNumArgs();
2082   unsigned PtrByteSize = isPPC64 ? 8 : 4;
2083
2084   // Add up all the space actually used.
2085   // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
2086   // they all go in registers, but we must reserve stack space for them for
2087   // possible use by the caller.  In varargs or 64-bit calls, parameters are
2088   // assigned stack space in order, with padding so Altivec parameters are
2089   // 16-byte aligned.
2090   nAltivecParamsAtEnd = 0;
2091   for (unsigned i = 0; i != NumOps; ++i) {
2092     SDValue Arg = TheCall->getArg(i);
2093     ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i);
2094     MVT ArgVT = Arg.getValueType();
2095     // Varargs Altivec parameters are padded to a 16 byte boundary.
2096     if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 ||
2097         ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) {
2098       if (!isVarArg && !isPPC64) {
2099         // Non-varargs Altivec parameters go after all the non-Altivec
2100         // parameters; handle those later so we know how much padding we need.
2101         nAltivecParamsAtEnd++;
2102         continue;
2103       }
2104       // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
2105       NumBytes = ((NumBytes+15)/16)*16;
2106     }
2107     NumBytes += CalculateStackSlotSize(Arg, Flags, PtrByteSize);
2108   }
2109
2110    // Allow for Altivec parameters at the end, if needed.
2111   if (nAltivecParamsAtEnd) {
2112     NumBytes = ((NumBytes+15)/16)*16;
2113     NumBytes += 16*nAltivecParamsAtEnd;
2114   }
2115
2116   // The prolog code of the callee may store up to 8 GPR argument registers to
2117   // the stack, allowing va_start to index over them in memory if its varargs.
2118   // Because we cannot tell if this is needed on the caller side, we have to
2119   // conservatively assume that it is needed.  As such, make sure we have at
2120   // least enough stack space for the caller to store the 8 GPRs.
2121   NumBytes = std::max(NumBytes,
2122                       PPCFrameInfo::getMinCallFrameSize(isPPC64, true));
2123
2124   // Tail call needs the stack to be aligned.
2125   if (CC==CallingConv::Fast && PerformTailCallOpt) {
2126     unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()->
2127       getStackAlignment();
2128     unsigned AlignMask = TargetAlign-1;
2129     NumBytes = (NumBytes + AlignMask) & ~AlignMask;
2130   }
2131
2132   return NumBytes;
2133 }
2134
2135 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
2136 /// adjusted to accomodate the arguments for the tailcall.
2137 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool IsTailCall,
2138                                    unsigned ParamSize) {
2139
2140   if (!IsTailCall) return 0;
2141
2142   PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
2143   unsigned CallerMinReservedArea = FI->getMinReservedArea();
2144   int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
2145   // Remember only if the new adjustement is bigger.
2146   if (SPDiff < FI->getTailCallSPDelta())
2147     FI->setTailCallSPDelta(SPDiff);
2148
2149   return SPDiff;
2150 }
2151
2152 /// IsEligibleForTailCallElimination - Check to see whether the next instruction
2153 /// following the call is a return. A function is eligible if caller/callee
2154 /// calling conventions match, currently only fastcc supports tail calls, and
2155 /// the function CALL is immediatly followed by a RET.
2156 bool
2157 PPCTargetLowering::IsEligibleForTailCallOptimization(CallSDNode *TheCall,
2158                                                      SDValue Ret,
2159                                                      SelectionDAG& DAG) const {
2160   // Variable argument functions are not supported.
2161   if (!PerformTailCallOpt || TheCall->isVarArg())
2162     return false;
2163
2164   if (CheckTailCallReturnConstraints(TheCall, Ret)) {
2165     MachineFunction &MF = DAG.getMachineFunction();
2166     unsigned CallerCC = MF.getFunction()->getCallingConv();
2167     unsigned CalleeCC = TheCall->getCallingConv();
2168     if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
2169       // Functions containing by val parameters are not supported.
2170       for (unsigned i = 0; i != TheCall->getNumArgs(); i++) {
2171          ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i);
2172          if (Flags.isByVal()) return false;
2173       }
2174
2175       SDValue Callee = TheCall->getCallee();
2176       // Non PIC/GOT  tail calls are supported.
2177       if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
2178         return true;
2179
2180       // At the moment we can only do local tail calls (in same module, hidden
2181       // or protected) if we are generating PIC.
2182       if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2183         return G->getGlobal()->hasHiddenVisibility()
2184             || G->getGlobal()->hasProtectedVisibility();
2185     }
2186   }
2187
2188   return false;
2189 }
2190
2191 /// isCallCompatibleAddress - Return the immediate to use if the specified
2192 /// 32-bit value is representable in the immediate field of a BxA instruction.
2193 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
2194   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
2195   if (!C) return 0;
2196
2197   int Addr = C->getZExtValue();
2198   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
2199       (Addr << 6 >> 6) != Addr)
2200     return 0;  // Top 6 bits have to be sext of immediate.
2201
2202   return DAG.getConstant((int)C->getZExtValue() >> 2,
2203                          DAG.getTargetLoweringInfo().getPointerTy()).getNode();
2204 }
2205
2206 namespace {
2207
2208 struct TailCallArgumentInfo {
2209   SDValue Arg;
2210   SDValue FrameIdxOp;
2211   int       FrameIdx;
2212
2213   TailCallArgumentInfo() : FrameIdx(0) {}
2214 };
2215
2216 }
2217
2218 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
2219 static void
2220 StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
2221                                            SDValue Chain,
2222                    const SmallVector<TailCallArgumentInfo, 8> &TailCallArgs,
2223                    SmallVector<SDValue, 8> &MemOpChains,
2224                    DebugLoc dl) {
2225   for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
2226     SDValue Arg = TailCallArgs[i].Arg;
2227     SDValue FIN = TailCallArgs[i].FrameIdxOp;
2228     int FI = TailCallArgs[i].FrameIdx;
2229     // Store relative to framepointer.
2230     MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN,
2231                                        PseudoSourceValue::getFixedStack(FI),
2232                                        0));
2233   }
2234 }
2235
2236 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
2237 /// the appropriate stack slot for the tail call optimized function call.
2238 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
2239                                                MachineFunction &MF,
2240                                                SDValue Chain,
2241                                                SDValue OldRetAddr,
2242                                                SDValue OldFP,
2243                                                int SPDiff,
2244                                                bool isPPC64,
2245                                                bool isDarwinABI,
2246                                                DebugLoc dl) {
2247   if (SPDiff) {
2248     // Calculate the new stack slot for the return address.
2249     int SlotSize = isPPC64 ? 8 : 4;
2250     int NewRetAddrLoc = SPDiff + PPCFrameInfo::getReturnSaveOffset(isPPC64,
2251                                                                    isDarwinABI);
2252     int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
2253                                                           NewRetAddrLoc);
2254     MVT VT = isPPC64 ? MVT::i64 : MVT::i32;
2255     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
2256     Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
2257                          PseudoSourceValue::getFixedStack(NewRetAddr), 0);
2258
2259     // When using the SVR4 ABI there is no need to move the FP stack slot
2260     // as the FP is never overwritten.
2261     if (isDarwinABI) {
2262       int NewFPLoc =
2263         SPDiff + PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI);
2264       int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc);
2265       SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
2266       Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
2267                            PseudoSourceValue::getFixedStack(NewFPIdx), 0);
2268     }
2269   }
2270   return Chain;
2271 }
2272
2273 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
2274 /// the position of the argument.
2275 static void
2276 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
2277                          SDValue Arg, int SPDiff, unsigned ArgOffset,
2278                       SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) {
2279   int Offset = ArgOffset + SPDiff;
2280   uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
2281   int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
2282   MVT VT = isPPC64 ? MVT::i64 : MVT::i32;
2283   SDValue FIN = DAG.getFrameIndex(FI, VT);
2284   TailCallArgumentInfo Info;
2285   Info.Arg = Arg;
2286   Info.FrameIdxOp = FIN;
2287   Info.FrameIdx = FI;
2288   TailCallArguments.push_back(Info);
2289 }
2290
2291 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
2292 /// stack slot. Returns the chain as result and the loaded frame pointers in
2293 /// LROpOut/FPOpout. Used when tail calling.
2294 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
2295                                                         int SPDiff,
2296                                                         SDValue Chain,
2297                                                         SDValue &LROpOut,
2298                                                         SDValue &FPOpOut,
2299                                                         bool isDarwinABI,
2300                                                         DebugLoc dl) {
2301   if (SPDiff) {
2302     // Load the LR and FP stack slot for later adjusting.
2303     MVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
2304     LROpOut = getReturnAddrFrameIndex(DAG);
2305     LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, NULL, 0);
2306     Chain = SDValue(LROpOut.getNode(), 1);
2307
2308     // When using the SVR4 ABI there is no need to load the FP stack slot
2309     // as the FP is never overwritten.
2310     if (isDarwinABI) {
2311       FPOpOut = getFramePointerFrameIndex(DAG);
2312       FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, NULL, 0);
2313       Chain = SDValue(FPOpOut.getNode(), 1);
2314     }
2315   }
2316   return Chain;
2317 }
2318
2319 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
2320 /// by "Src" to address "Dst" of size "Size".  Alignment information is
2321 /// specified by the specific parameter attribute. The copy will be passed as
2322 /// a byval function parameter.
2323 /// Sometimes what we are copying is the end of a larger object, the part that
2324 /// does not fit in registers.
2325 static SDValue
2326 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
2327                           ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
2328                           DebugLoc dl) {
2329   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
2330   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
2331                        false, NULL, 0, NULL, 0);
2332 }
2333
2334 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
2335 /// tail calls.
2336 static void
2337 LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
2338                  SDValue Arg, SDValue PtrOff, int SPDiff,
2339                  unsigned ArgOffset, bool isPPC64, bool isTailCall,
2340                  bool isVector, SmallVector<SDValue, 8> &MemOpChains,
2341                  SmallVector<TailCallArgumentInfo, 8>& TailCallArguments,
2342                  DebugLoc dl) {
2343   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2344   if (!isTailCall) {
2345     if (isVector) {
2346       SDValue StackPtr;
2347       if (isPPC64)
2348         StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
2349       else
2350         StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
2351       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
2352                            DAG.getConstant(ArgOffset, PtrVT));
2353     }
2354     MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
2355   // Calculate and remember argument location.
2356   } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
2357                                   TailCallArguments);
2358 }
2359
2360 static
2361 void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
2362                      DebugLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
2363                      SDValue LROp, SDValue FPOp, bool isDarwinABI,
2364                      SmallVector<TailCallArgumentInfo, 8> &TailCallArguments) {
2365   MachineFunction &MF = DAG.getMachineFunction();
2366
2367   // Emit a sequence of copyto/copyfrom virtual registers for arguments that
2368   // might overwrite each other in case of tail call optimization.
2369   SmallVector<SDValue, 8> MemOpChains2;
2370   // Do not flag preceeding copytoreg stuff together with the following stuff.
2371   InFlag = SDValue();
2372   StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
2373                                     MemOpChains2, dl);
2374   if (!MemOpChains2.empty())
2375     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
2376                         &MemOpChains2[0], MemOpChains2.size());
2377
2378   // Store the return address to the appropriate stack slot.
2379   Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,
2380                                         isPPC64, isDarwinABI, dl);
2381
2382   // Emit callseq_end just before tailcall node.
2383   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
2384                              DAG.getIntPtrConstant(0, true), InFlag);
2385   InFlag = Chain.getValue(1);
2386 }
2387
2388 static
2389 unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
2390                      SDValue &Chain, DebugLoc dl, int SPDiff, bool isTailCall,
2391                      SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
2392                      SmallVector<SDValue, 8> &Ops, std::vector<MVT> &NodeTys,
2393                      bool isSVR4ABI) {
2394   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2395   NodeTys.push_back(MVT::Other);   // Returns a chain
2396   NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
2397
2398   unsigned CallOpc = isSVR4ABI ? PPCISD::CALL_SVR4 : PPCISD::CALL_Darwin;
2399
2400   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2401   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2402   // node so that legalize doesn't hack it.
2403   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2404     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), Callee.getValueType());
2405   else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
2406     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType());
2407   else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
2408     // If this is an absolute destination address, use the munged value.
2409     Callee = SDValue(Dest, 0);
2410   else {
2411     // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
2412     // to do the call, we can't use PPCISD::CALL.
2413     SDValue MTCTROps[] = {Chain, Callee, InFlag};
2414     Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps,
2415                         2 + (InFlag.getNode() != 0));
2416     InFlag = Chain.getValue(1);
2417
2418     NodeTys.clear();
2419     NodeTys.push_back(MVT::Other);
2420     NodeTys.push_back(MVT::Flag);
2421     Ops.push_back(Chain);
2422     CallOpc = isSVR4ABI ? PPCISD::BCTRL_SVR4 : PPCISD::BCTRL_Darwin;
2423     Callee.setNode(0);
2424     // Add CTR register as callee so a bctr can be emitted later.
2425     if (isTailCall)
2426       Ops.push_back(DAG.getRegister(PPC::CTR, PtrVT));
2427   }
2428
2429   // If this is a direct call, pass the chain and the callee.
2430   if (Callee.getNode()) {
2431     Ops.push_back(Chain);
2432     Ops.push_back(Callee);
2433   }
2434   // If this is a tail call add stack pointer delta.
2435   if (isTailCall)
2436     Ops.push_back(DAG.getConstant(SPDiff, MVT::i32));
2437
2438   // Add argument registers to the end of the list so that they are known live
2439   // into the call.
2440   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2441     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2442                                   RegsToPass[i].second.getValueType()));
2443
2444   return CallOpc;
2445 }
2446
2447 static SDValue LowerCallReturn(SDValue Op, SelectionDAG &DAG, TargetMachine &TM,
2448                                CallSDNode *TheCall, SDValue Chain,
2449                                SDValue InFlag) {
2450   bool isVarArg = TheCall->isVarArg();
2451   DebugLoc dl = TheCall->getDebugLoc();
2452   SmallVector<SDValue, 16> ResultVals;
2453   SmallVector<CCValAssign, 16> RVLocs;
2454   unsigned CallerCC = DAG.getMachineFunction().getFunction()->getCallingConv();
2455   CCState CCRetInfo(CallerCC, isVarArg, TM, RVLocs);
2456   CCRetInfo.AnalyzeCallResult(TheCall, RetCC_PPC);
2457
2458   // Copy all of the result registers out of their specified physreg.
2459   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
2460     CCValAssign &VA = RVLocs[i];
2461     MVT VT = VA.getValVT();
2462     assert(VA.isRegLoc() && "Can only return in registers!");
2463     Chain = DAG.getCopyFromReg(Chain, dl,
2464                                VA.getLocReg(), VT, InFlag).getValue(1);
2465     ResultVals.push_back(Chain.getValue(0));
2466     InFlag = Chain.getValue(2);
2467   }
2468
2469   // If the function returns void, just return the chain.
2470   if (RVLocs.empty())
2471     return Chain;
2472
2473   // Otherwise, merge everything together with a MERGE_VALUES node.
2474   ResultVals.push_back(Chain);
2475   SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(),
2476                             &ResultVals[0], ResultVals.size());
2477   return Res.getValue(Op.getResNo());
2478 }
2479
2480 static
2481 SDValue FinishCall(SelectionDAG &DAG, CallSDNode *TheCall, TargetMachine &TM,
2482                    SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
2483                    SDValue Op, SDValue InFlag, SDValue Chain, SDValue &Callee,
2484                    int SPDiff, unsigned NumBytes) {
2485   unsigned CC = TheCall->getCallingConv();
2486   DebugLoc dl = TheCall->getDebugLoc();
2487   bool isTailCall = TheCall->isTailCall()
2488                  && CC == CallingConv::Fast && PerformTailCallOpt;
2489
2490   std::vector<MVT> NodeTys;
2491   SmallVector<SDValue, 8> Ops;
2492   unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
2493                                  isTailCall, RegsToPass, Ops, NodeTys,
2494                                  TM.getSubtarget<PPCSubtarget>().isSVR4ABI());
2495
2496   // When performing tail call optimization the callee pops its arguments off
2497   // the stack. Account for this here so these bytes can be pushed back on in
2498   // PPCRegisterInfo::eliminateCallFramePseudoInstr.
2499   int BytesCalleePops =
2500     (CC==CallingConv::Fast && PerformTailCallOpt) ? NumBytes : 0;
2501
2502   if (InFlag.getNode())
2503     Ops.push_back(InFlag);
2504
2505   // Emit tail call.
2506   if (isTailCall) {
2507     assert(InFlag.getNode() &&
2508            "Flag must be set. Depend on flag being set in LowerRET");
2509     Chain = DAG.getNode(PPCISD::TAILCALL, dl,
2510                         TheCall->getVTList(), &Ops[0], Ops.size());
2511     return SDValue(Chain.getNode(), Op.getResNo());
2512   }
2513
2514   Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
2515   InFlag = Chain.getValue(1);
2516
2517   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
2518                              DAG.getIntPtrConstant(BytesCalleePops, true),
2519                              InFlag);
2520   if (TheCall->getValueType(0) != MVT::Other)
2521     InFlag = Chain.getValue(1);
2522
2523   return LowerCallReturn(Op, DAG, TM, TheCall, Chain, InFlag);
2524 }
2525
2526 SDValue PPCTargetLowering::LowerCALL_SVR4(SDValue Op, SelectionDAG &DAG,
2527                                           const PPCSubtarget &Subtarget,
2528                                           TargetMachine &TM) {
2529   // See PPCTargetLowering::LowerFORMAL_ARGUMENTS_SVR4() for a description
2530   // of the SVR4 ABI stack frame layout.
2531   CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
2532   SDValue Chain  = TheCall->getChain();
2533   bool isVarArg   = TheCall->isVarArg();
2534   unsigned CC     = TheCall->getCallingConv();
2535   assert((CC == CallingConv::C ||
2536           CC == CallingConv::Fast) && "Unknown calling convention!");
2537   bool isTailCall = TheCall->isTailCall()
2538                  && CC == CallingConv::Fast && PerformTailCallOpt;
2539   SDValue Callee = TheCall->getCallee();
2540   DebugLoc dl = TheCall->getDebugLoc();
2541
2542   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2543   unsigned PtrByteSize = 4;
2544
2545   MachineFunction &MF = DAG.getMachineFunction();
2546
2547   // Mark this function as potentially containing a function that contains a
2548   // tail call. As a consequence the frame pointer will be used for dynamicalloc
2549   // and restoring the callers stack pointer in this functions epilog. This is
2550   // done because by tail calling the called function might overwrite the value
2551   // in this function's (MF) stack pointer stack slot 0(SP).
2552   if (PerformTailCallOpt && CC==CallingConv::Fast)
2553     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
2554
2555   // Count how many bytes are to be pushed on the stack, including the linkage
2556   // area, parameter list area and the part of the local variable space which
2557   // contains copies of aggregates which are passed by value.
2558
2559   // Assign locations to all of the outgoing arguments.
2560   SmallVector<CCValAssign, 16> ArgLocs;
2561   CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
2562
2563   // Reserve space for the linkage area on the stack.
2564   CCInfo.AllocateStack(PPCFrameInfo::getLinkageSize(false, false), PtrByteSize);
2565
2566   if (isVarArg) {
2567     // Handle fixed and variable vector arguments differently.
2568     // Fixed vector arguments go into registers as long as registers are
2569     // available. Variable vector arguments always go into memory.
2570     unsigned NumArgs = TheCall->getNumArgs();
2571     unsigned NumFixedArgs = TheCall->getNumFixedArgs();
2572
2573     for (unsigned i = 0; i != NumArgs; ++i) {
2574       MVT ArgVT = TheCall->getArg(i).getValueType();
2575       ISD::ArgFlagsTy ArgFlags = TheCall->getArgFlags(i);
2576       bool Result;
2577
2578       if (i < NumFixedArgs) {
2579         Result = CC_PPC_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
2580                              CCInfo);
2581       } else {
2582         Result = CC_PPC_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
2583                                     ArgFlags, CCInfo);
2584       }
2585
2586       if (Result) {
2587         cerr << "Call operand #" << i << " has unhandled type "
2588              << ArgVT.getMVTString() << "\n";
2589         abort();
2590       }
2591     }
2592   } else {
2593     // All arguments are treated the same.
2594     CCInfo.AnalyzeCallOperands(TheCall, CC_PPC_SVR4);
2595   }
2596
2597   // Assign locations to all of the outgoing aggregate by value arguments.
2598   SmallVector<CCValAssign, 16> ByValArgLocs;
2599   CCState CCByValInfo(CC, isVarArg, getTargetMachine(), ByValArgLocs);
2600
2601   // Reserve stack space for the allocations in CCInfo.
2602   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
2603
2604   CCByValInfo.AnalyzeCallOperands(TheCall, CC_PPC_SVR4_ByVal);
2605
2606   // Size of the linkage area, parameter list area and the part of the local
2607   // space variable where copies of aggregates which are passed by value are
2608   // stored.
2609   unsigned NumBytes = CCByValInfo.getNextStackOffset();
2610
2611   // Calculate by how many bytes the stack has to be adjusted in case of tail
2612   // call optimization.
2613   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
2614
2615   // Adjust the stack pointer for the new arguments...
2616   // These operations are automatically eliminated by the prolog/epilog pass
2617   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
2618   SDValue CallSeqStart = Chain;
2619
2620   // Load the return address and frame pointer so it can be moved somewhere else
2621   // later.
2622   SDValue LROp, FPOp;
2623   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false,
2624                                        dl);
2625
2626   // Set up a copy of the stack pointer for use loading and storing any
2627   // arguments that may not fit in the registers available for argument
2628   // passing.
2629   SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
2630
2631   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
2632   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
2633   SmallVector<SDValue, 8> MemOpChains;
2634
2635   // Walk the register/memloc assignments, inserting copies/loads.
2636   for (unsigned i = 0, j = 0, e = ArgLocs.size();
2637        i != e;
2638        ++i) {
2639     CCValAssign &VA = ArgLocs[i];
2640     SDValue Arg = TheCall->getArg(i);
2641     ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i);
2642
2643     if (Flags.isByVal()) {
2644       // Argument is an aggregate which is passed by value, thus we need to
2645       // create a copy of it in the local variable space of the current stack
2646       // frame (which is the stack frame of the caller) and pass the address of
2647       // this copy to the callee.
2648       assert((j < ByValArgLocs.size()) && "Index out of bounds!");
2649       CCValAssign &ByValVA = ByValArgLocs[j++];
2650       assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
2651
2652       // Memory reserved in the local variable space of the callers stack frame.
2653       unsigned LocMemOffset = ByValVA.getLocMemOffset();
2654
2655       SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
2656       PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
2657
2658       // Create a copy of the argument in the local area of the current
2659       // stack frame.
2660       SDValue MemcpyCall =
2661         CreateCopyOfByValArgument(Arg, PtrOff,
2662                                   CallSeqStart.getNode()->getOperand(0),
2663                                   Flags, DAG, dl);
2664
2665       // This must go outside the CALLSEQ_START..END.
2666       SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
2667                            CallSeqStart.getNode()->getOperand(1));
2668       DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
2669                              NewCallSeqStart.getNode());
2670       Chain = CallSeqStart = NewCallSeqStart;
2671
2672       // Pass the address of the aggregate copy on the stack either in a
2673       // physical register or in the parameter list area of the current stack
2674       // frame to the callee.
2675       Arg = PtrOff;
2676     }
2677
2678     if (VA.isRegLoc()) {
2679       // Put argument in a physical register.
2680       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2681     } else {
2682       // Put argument in the parameter list area of the current stack frame.
2683       assert(VA.isMemLoc());
2684       unsigned LocMemOffset = VA.getLocMemOffset();
2685
2686       if (!isTailCall) {
2687         SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
2688         PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
2689
2690         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
2691                               PseudoSourceValue::getStack(), LocMemOffset));
2692       } else {
2693         // Calculate and remember argument location.
2694         CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
2695                                  TailCallArguments);
2696       }
2697     }
2698   }
2699
2700   if (!MemOpChains.empty())
2701     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
2702                         &MemOpChains[0], MemOpChains.size());
2703
2704   // Build a sequence of copy-to-reg nodes chained together with token chain
2705   // and flag operands which copy the outgoing args into the appropriate regs.
2706   SDValue InFlag;
2707   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2708     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2709                              RegsToPass[i].second, InFlag);
2710     InFlag = Chain.getValue(1);
2711   }
2712
2713   // Set CR6 to true if this is a vararg call.
2714   if (isVarArg) {
2715     SDValue SetCR(DAG.getTargetNode(PPC::CRSET, dl, MVT::i32), 0);
2716     Chain = DAG.getCopyToReg(Chain, dl, PPC::CR1EQ, SetCR, InFlag);
2717     InFlag = Chain.getValue(1);
2718   }
2719
2720   if (isTailCall) {
2721     PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
2722                     false, TailCallArguments);
2723   }
2724
2725   return FinishCall(DAG, TheCall, TM, RegsToPass, Op, InFlag, Chain, Callee,
2726                     SPDiff, NumBytes);
2727 }
2728
2729 SDValue PPCTargetLowering::LowerCALL_Darwin(SDValue Op, SelectionDAG &DAG,
2730                                             const PPCSubtarget &Subtarget,
2731                                             TargetMachine &TM) {
2732   CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
2733   SDValue Chain  = TheCall->getChain();
2734   bool isVarArg   = TheCall->isVarArg();
2735   unsigned CC     = TheCall->getCallingConv();
2736   bool isTailCall = TheCall->isTailCall()
2737                  && CC == CallingConv::Fast && PerformTailCallOpt;
2738   SDValue Callee = TheCall->getCallee();
2739   unsigned NumOps  = TheCall->getNumArgs();
2740   DebugLoc dl = TheCall->getDebugLoc();
2741
2742   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2743   bool isPPC64 = PtrVT == MVT::i64;
2744   unsigned PtrByteSize = isPPC64 ? 8 : 4;
2745
2746   MachineFunction &MF = DAG.getMachineFunction();
2747
2748   // Mark this function as potentially containing a function that contains a
2749   // tail call. As a consequence the frame pointer will be used for dynamicalloc
2750   // and restoring the callers stack pointer in this functions epilog. This is
2751   // done because by tail calling the called function might overwrite the value
2752   // in this function's (MF) stack pointer stack slot 0(SP).
2753   if (PerformTailCallOpt && CC==CallingConv::Fast)
2754     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
2755
2756   unsigned nAltivecParamsAtEnd = 0;
2757
2758   // Count how many bytes are to be pushed on the stack, including the linkage
2759   // area, and parameter passing area.  We start with 24/48 bytes, which is
2760   // prereserved space for [SP][CR][LR][3 x unused].
2761   unsigned NumBytes =
2762     CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CC, TheCall,
2763                                          nAltivecParamsAtEnd);
2764
2765   // Calculate by how many bytes the stack has to be adjusted in case of tail
2766   // call optimization.
2767   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
2768
2769   // Adjust the stack pointer for the new arguments...
2770   // These operations are automatically eliminated by the prolog/epilog pass
2771   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
2772   SDValue CallSeqStart = Chain;
2773
2774   // Load the return address and frame pointer so it can be move somewhere else
2775   // later.
2776   SDValue LROp, FPOp;
2777   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
2778                                        dl);
2779
2780   // Set up a copy of the stack pointer for use loading and storing any
2781   // arguments that may not fit in the registers available for argument
2782   // passing.
2783   SDValue StackPtr;
2784   if (isPPC64)
2785     StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
2786   else
2787     StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
2788
2789   // Figure out which arguments are going to go in registers, and which in
2790   // memory.  Also, if this is a vararg function, floating point operations
2791   // must be stored to our stack, and loaded into integer regs as well, if
2792   // any integer regs are available for argument passing.
2793   unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, true);
2794   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
2795
2796   static const unsigned GPR_32[] = {           // 32-bit registers.
2797     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
2798     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
2799   };
2800   static const unsigned GPR_64[] = {           // 64-bit registers.
2801     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
2802     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
2803   };
2804   static const unsigned *FPR = GetFPR(Subtarget);
2805
2806   static const unsigned VR[] = {
2807     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
2808     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
2809   };
2810   const unsigned NumGPRs = array_lengthof(GPR_32);
2811   const unsigned NumFPRs = 13;
2812   const unsigned NumVRs  = array_lengthof(VR);
2813
2814   const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
2815
2816   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
2817   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
2818
2819   SmallVector<SDValue, 8> MemOpChains;
2820   for (unsigned i = 0; i != NumOps; ++i) {
2821     bool inMem = false;
2822     SDValue Arg = TheCall->getArg(i);
2823     ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i);
2824
2825     // PtrOff will be used to store the current argument to the stack if a
2826     // register cannot be found for it.
2827     SDValue PtrOff;
2828
2829     PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
2830
2831     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
2832
2833     // On PPC64, promote integers to 64-bit values.
2834     if (isPPC64 && Arg.getValueType() == MVT::i32) {
2835       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
2836       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
2837       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
2838     }
2839
2840     // FIXME memcpy is used way more than necessary.  Correctness first.
2841     if (Flags.isByVal()) {
2842       unsigned Size = Flags.getByValSize();
2843       if (Size==1 || Size==2) {
2844         // Very small objects are passed right-justified.
2845         // Everything else is passed left-justified.
2846         MVT VT = (Size==1) ? MVT::i8 : MVT::i16;
2847         if (GPR_idx != NumGPRs) {
2848           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
2849                                           NULL, 0, VT);
2850           MemOpChains.push_back(Load.getValue(1));
2851           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
2852
2853           ArgOffset += PtrByteSize;
2854         } else {
2855           SDValue Const = DAG.getConstant(4 - Size, PtrOff.getValueType());
2856           SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
2857           SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr,
2858                                 CallSeqStart.getNode()->getOperand(0),
2859                                 Flags, DAG, dl);
2860           // This must go outside the CALLSEQ_START..END.
2861           SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
2862                                CallSeqStart.getNode()->getOperand(1));
2863           DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
2864                                  NewCallSeqStart.getNode());
2865           Chain = CallSeqStart = NewCallSeqStart;
2866           ArgOffset += PtrByteSize;
2867         }
2868         continue;
2869       }
2870       // Copy entire object into memory.  There are cases where gcc-generated
2871       // code assumes it is there, even if it could be put entirely into
2872       // registers.  (This is not what the doc says.)
2873       SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
2874                             CallSeqStart.getNode()->getOperand(0),
2875                             Flags, DAG, dl);
2876       // This must go outside the CALLSEQ_START..END.
2877       SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
2878                            CallSeqStart.getNode()->getOperand(1));
2879       DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), NewCallSeqStart.getNode());
2880       Chain = CallSeqStart = NewCallSeqStart;
2881       // And copy the pieces of it that fit into registers.
2882       for (unsigned j=0; j<Size; j+=PtrByteSize) {
2883         SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
2884         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
2885         if (GPR_idx != NumGPRs) {
2886           SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, NULL, 0);
2887           MemOpChains.push_back(Load.getValue(1));
2888           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
2889           ArgOffset += PtrByteSize;
2890         } else {
2891           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
2892           break;
2893         }
2894       }
2895       continue;
2896     }
2897
2898     switch (Arg.getValueType().getSimpleVT()) {
2899     default: assert(0 && "Unexpected ValueType for argument!");
2900     case MVT::i32:
2901     case MVT::i64:
2902       if (GPR_idx != NumGPRs) {
2903         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
2904       } else {
2905         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
2906                          isPPC64, isTailCall, false, MemOpChains,
2907                          TailCallArguments, dl);
2908         inMem = true;
2909       }
2910       ArgOffset += PtrByteSize;
2911       break;
2912     case MVT::f32:
2913     case MVT::f64:
2914       if (FPR_idx != NumFPRs) {
2915         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
2916
2917         if (isVarArg) {
2918           SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0);
2919           MemOpChains.push_back(Store);
2920
2921           // Float varargs are always shadowed in available integer registers
2922           if (GPR_idx != NumGPRs) {
2923             SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0);
2924             MemOpChains.push_back(Load.getValue(1));
2925             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
2926           }
2927           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
2928             SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
2929             PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
2930             SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0);
2931             MemOpChains.push_back(Load.getValue(1));
2932             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
2933           }
2934         } else {
2935           // If we have any FPRs remaining, we may also have GPRs remaining.
2936           // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
2937           // GPRs.
2938           if (GPR_idx != NumGPRs)
2939             ++GPR_idx;
2940           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
2941               !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
2942             ++GPR_idx;
2943         }
2944       } else {
2945         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
2946                          isPPC64, isTailCall, false, MemOpChains,
2947                          TailCallArguments, dl);
2948         inMem = true;
2949       }
2950       if (isPPC64)
2951         ArgOffset += 8;
2952       else
2953         ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
2954       break;
2955     case MVT::v4f32:
2956     case MVT::v4i32:
2957     case MVT::v8i16:
2958     case MVT::v16i8:
2959       if (isVarArg) {
2960         // These go aligned on the stack, or in the corresponding R registers
2961         // when within range.  The Darwin PPC ABI doc claims they also go in
2962         // V registers; in fact gcc does this only for arguments that are
2963         // prototyped, not for those that match the ...  We do it for all
2964         // arguments, seems to work.
2965         while (ArgOffset % 16 !=0) {
2966           ArgOffset += PtrByteSize;
2967           if (GPR_idx != NumGPRs)
2968             GPR_idx++;
2969         }
2970         // We could elide this store in the case where the object fits
2971         // entirely in R registers.  Maybe later.
2972         PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
2973                             DAG.getConstant(ArgOffset, PtrVT));
2974         SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0);
2975         MemOpChains.push_back(Store);
2976         if (VR_idx != NumVRs) {
2977           SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, NULL, 0);
2978           MemOpChains.push_back(Load.getValue(1));
2979           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
2980         }
2981         ArgOffset += 16;
2982         for (unsigned i=0; i<16; i+=PtrByteSize) {
2983           if (GPR_idx == NumGPRs)
2984             break;
2985           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
2986                                   DAG.getConstant(i, PtrVT));
2987           SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, NULL, 0);
2988           MemOpChains.push_back(Load.getValue(1));
2989           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
2990         }
2991         break;
2992       }
2993
2994       // Non-varargs Altivec params generally go in registers, but have
2995       // stack space allocated at the end.
2996       if (VR_idx != NumVRs) {
2997         // Doesn't have GPR space allocated.
2998         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
2999       } else if (nAltivecParamsAtEnd==0) {
3000         // We are emitting Altivec params in order.
3001         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
3002                          isPPC64, isTailCall, true, MemOpChains,
3003                          TailCallArguments, dl);
3004         ArgOffset += 16;
3005       }
3006       break;
3007     }
3008   }
3009   // If all Altivec parameters fit in registers, as they usually do,
3010   // they get stack space following the non-Altivec parameters.  We
3011   // don't track this here because nobody below needs it.
3012   // If there are more Altivec parameters than fit in registers emit
3013   // the stores here.
3014   if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
3015     unsigned j = 0;
3016     // Offset is aligned; skip 1st 12 params which go in V registers.
3017     ArgOffset = ((ArgOffset+15)/16)*16;
3018     ArgOffset += 12*16;
3019     for (unsigned i = 0; i != NumOps; ++i) {
3020       SDValue Arg = TheCall->getArg(i);
3021       MVT ArgType = Arg.getValueType();
3022       if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
3023           ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
3024         if (++j > NumVRs) {
3025           SDValue PtrOff;
3026           // We are emitting Altivec params in order.
3027           LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
3028                            isPPC64, isTailCall, true, MemOpChains,
3029                            TailCallArguments, dl);
3030           ArgOffset += 16;
3031         }
3032       }
3033     }
3034   }
3035
3036   if (!MemOpChains.empty())
3037     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3038                         &MemOpChains[0], MemOpChains.size());
3039
3040   // Build a sequence of copy-to-reg nodes chained together with token chain
3041   // and flag operands which copy the outgoing args into the appropriate regs.
3042   SDValue InFlag;
3043   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
3044     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
3045                              RegsToPass[i].second, InFlag);
3046     InFlag = Chain.getValue(1);
3047   }
3048
3049   if (isTailCall) {
3050     PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp,
3051                     FPOp, true, TailCallArguments);
3052   }
3053
3054   return FinishCall(DAG, TheCall, TM, RegsToPass, Op, InFlag, Chain, Callee,
3055                     SPDiff, NumBytes);
3056 }
3057
3058 SDValue PPCTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG,
3059                                       TargetMachine &TM) {
3060   SmallVector<CCValAssign, 16> RVLocs;
3061   unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
3062   bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
3063   DebugLoc dl = Op.getDebugLoc();
3064   CCState CCInfo(CC, isVarArg, TM, RVLocs);
3065   CCInfo.AnalyzeReturn(Op.getNode(), RetCC_PPC);
3066
3067   // If this is the first return lowered for this function, add the regs to the
3068   // liveout set for the function.
3069   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
3070     for (unsigned i = 0; i != RVLocs.size(); ++i)
3071       DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
3072   }
3073
3074   SDValue Chain = Op.getOperand(0);
3075
3076   Chain = GetPossiblePreceedingTailCall(Chain, PPCISD::TAILCALL);
3077   if (Chain.getOpcode() == PPCISD::TAILCALL) {
3078     SDValue TailCall = Chain;
3079     SDValue TargetAddress = TailCall.getOperand(1);
3080     SDValue StackAdjustment = TailCall.getOperand(2);
3081
3082     assert(((TargetAddress.getOpcode() == ISD::Register &&
3083              cast<RegisterSDNode>(TargetAddress)->getReg() == PPC::CTR) ||
3084             TargetAddress.getOpcode() == ISD::TargetExternalSymbol ||
3085             TargetAddress.getOpcode() == ISD::TargetGlobalAddress ||
3086             isa<ConstantSDNode>(TargetAddress)) &&
3087     "Expecting an global address, external symbol, absolute value or register");
3088
3089     assert(StackAdjustment.getOpcode() == ISD::Constant &&
3090            "Expecting a const value");
3091
3092     SmallVector<SDValue,8> Operands;
3093     Operands.push_back(Chain.getOperand(0));
3094     Operands.push_back(TargetAddress);
3095     Operands.push_back(StackAdjustment);
3096     // Copy registers used by the call. Last operand is a flag so it is not
3097     // copied.
3098     for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) {
3099       Operands.push_back(Chain.getOperand(i));
3100     }
3101     return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Operands[0],
3102                        Operands.size());
3103   }
3104
3105   SDValue Flag;
3106
3107   // Copy the result values into the output registers.
3108   for (unsigned i = 0; i != RVLocs.size(); ++i) {
3109     CCValAssign &VA = RVLocs[i];
3110     assert(VA.isRegLoc() && "Can only return in registers!");
3111     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3112                              Op.getOperand(i*2+1), Flag);
3113     Flag = Chain.getValue(1);
3114   }
3115
3116   if (Flag.getNode())
3117     return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
3118   else
3119     return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain);
3120 }
3121
3122 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
3123                                    const PPCSubtarget &Subtarget) {
3124   // When we pop the dynamic allocation we need to restore the SP link.
3125   DebugLoc dl = Op.getDebugLoc();
3126
3127   // Get the corect type for pointers.
3128   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
3129
3130   // Construct the stack pointer operand.
3131   bool IsPPC64 = Subtarget.isPPC64();
3132   unsigned SP = IsPPC64 ? PPC::X1 : PPC::R1;
3133   SDValue StackPtr = DAG.getRegister(SP, PtrVT);
3134
3135   // Get the operands for the STACKRESTORE.
3136   SDValue Chain = Op.getOperand(0);
3137   SDValue SaveSP = Op.getOperand(1);
3138
3139   // Load the old link SP.
3140   SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, NULL, 0);
3141
3142   // Restore the stack pointer.
3143   Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
3144
3145   // Store the old link SP.
3146   return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, NULL, 0);
3147 }
3148
3149
3150
3151 SDValue
3152 PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
3153   MachineFunction &MF = DAG.getMachineFunction();
3154   bool IsPPC64 = PPCSubTarget.isPPC64();
3155   bool isDarwinABI = PPCSubTarget.isDarwinABI();
3156   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
3157
3158   // Get current frame pointer save index.  The users of this index will be
3159   // primarily DYNALLOC instructions.
3160   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
3161   int RASI = FI->getReturnAddrSaveIndex();
3162
3163   // If the frame pointer save index hasn't been defined yet.
3164   if (!RASI) {
3165     // Find out what the fix offset of the frame pointer save area.
3166     int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isDarwinABI);
3167     // Allocate the frame index for frame pointer save area.
3168     RASI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, LROffset);
3169     // Save the result.
3170     FI->setReturnAddrSaveIndex(RASI);
3171   }
3172   return DAG.getFrameIndex(RASI, PtrVT);
3173 }
3174
3175 SDValue
3176 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
3177   MachineFunction &MF = DAG.getMachineFunction();
3178   bool IsPPC64 = PPCSubTarget.isPPC64();
3179   bool isDarwinABI = PPCSubTarget.isDarwinABI();
3180   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
3181
3182   // Get current frame pointer save index.  The users of this index will be
3183   // primarily DYNALLOC instructions.
3184   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
3185   int FPSI = FI->getFramePointerSaveIndex();
3186
3187   // If the frame pointer save index hasn't been defined yet.
3188   if (!FPSI) {
3189     // Find out what the fix offset of the frame pointer save area.
3190     int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64,
3191                                                            isDarwinABI);
3192
3193     // Allocate the frame index for frame pointer save area.
3194     FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset);
3195     // Save the result.
3196     FI->setFramePointerSaveIndex(FPSI);
3197   }
3198   return DAG.getFrameIndex(FPSI, PtrVT);
3199 }
3200
3201 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
3202                                          SelectionDAG &DAG,
3203                                          const PPCSubtarget &Subtarget) {
3204   // Get the inputs.
3205   SDValue Chain = Op.getOperand(0);
3206   SDValue Size  = Op.getOperand(1);
3207   DebugLoc dl = Op.getDebugLoc();
3208
3209   // Get the corect type for pointers.
3210   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
3211   // Negate the size.
3212   SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
3213                                   DAG.getConstant(0, PtrVT), Size);
3214   // Construct a node for the frame pointer save index.
3215   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
3216   // Build a DYNALLOC node.
3217   SDValue Ops[3] = { Chain, NegSize, FPSIdx };
3218   SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
3219   return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3);
3220 }
3221
3222 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
3223 /// possible.
3224 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
3225   // Not FP? Not a fsel.
3226   if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
3227       !Op.getOperand(2).getValueType().isFloatingPoint())
3228     return Op;
3229
3230   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3231
3232   // Cannot handle SETEQ/SETNE.
3233   if (CC == ISD::SETEQ || CC == ISD::SETNE) return Op;
3234
3235   MVT ResVT = Op.getValueType();
3236   MVT CmpVT = Op.getOperand(0).getValueType();
3237   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
3238   SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
3239   DebugLoc dl = Op.getDebugLoc();
3240
3241   // If the RHS of the comparison is a 0.0, we don't need to do the
3242   // subtraction at all.
3243   if (isFloatingPointZero(RHS))
3244     switch (CC) {
3245     default: break;       // SETUO etc aren't handled by fsel.
3246     case ISD::SETULT:
3247     case ISD::SETLT:
3248       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
3249     case ISD::SETOGE:
3250     case ISD::SETGE:
3251       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
3252         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
3253       return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
3254     case ISD::SETUGT:
3255     case ISD::SETGT:
3256       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
3257     case ISD::SETOLE:
3258     case ISD::SETLE:
3259       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
3260         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
3261       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
3262                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
3263     }
3264
3265   SDValue Cmp;
3266   switch (CC) {
3267   default: break;       // SETUO etc aren't handled by fsel.
3268   case ISD::SETULT:
3269   case ISD::SETLT:
3270     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
3271     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
3272       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
3273       return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
3274   case ISD::SETOGE:
3275   case ISD::SETGE:
3276     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
3277     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
3278       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
3279       return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
3280   case ISD::SETUGT:
3281   case ISD::SETGT:
3282     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
3283     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
3284       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
3285       return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
3286   case ISD::SETOLE:
3287   case ISD::SETLE:
3288     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
3289     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
3290       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
3291       return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
3292   }
3293   return Op;
3294 }
3295
3296 // FIXME: Split this code up when LegalizeDAGTypes lands.
3297 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
3298                                            DebugLoc dl) {
3299   assert(Op.getOperand(0).getValueType().isFloatingPoint());
3300   SDValue Src = Op.getOperand(0);
3301   if (Src.getValueType() == MVT::f32)
3302     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
3303
3304   SDValue Tmp;
3305   switch (Op.getValueType().getSimpleVT()) {
3306   default: assert(0 && "Unhandled FP_TO_INT type in custom expander!");
3307   case MVT::i32:
3308     Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
3309                                                          PPCISD::FCTIDZ,
3310                       dl, MVT::f64, Src);
3311     break;
3312   case MVT::i64:
3313     Tmp = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Src);
3314     break;
3315   }
3316
3317   // Convert the FP value to an int value through memory.
3318   SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64);
3319
3320   // Emit a store to the stack slot.
3321   SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, NULL, 0);
3322
3323   // Result is a load from the stack slot.  If loading 4 bytes, make sure to
3324   // add in a bias.
3325   if (Op.getValueType() == MVT::i32)
3326     FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
3327                         DAG.getConstant(4, FIPtr.getValueType()));
3328   return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, NULL, 0);
3329 }
3330
3331 SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
3332   DebugLoc dl = Op.getDebugLoc();
3333   // Don't handle ppc_fp128 here; let it be lowered to a libcall.
3334   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
3335     return SDValue();
3336
3337   if (Op.getOperand(0).getValueType() == MVT::i64) {
3338     SDValue Bits = DAG.getNode(ISD::BIT_CONVERT, dl,
3339                                MVT::f64, Op.getOperand(0));
3340     SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits);
3341     if (Op.getValueType() == MVT::f32)
3342       FP = DAG.getNode(ISD::FP_ROUND, dl,
3343                        MVT::f32, FP, DAG.getIntPtrConstant(0));
3344     return FP;
3345   }
3346
3347   assert(Op.getOperand(0).getValueType() == MVT::i32 &&
3348          "Unhandled SINT_TO_FP type in custom expander!");
3349   // Since we only generate this in 64-bit mode, we can take advantage of
3350   // 64-bit registers.  In particular, sign extend the input value into the
3351   // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
3352   // then lfd it and fcfid it.
3353   MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
3354   int FrameIdx = FrameInfo->CreateStackObject(8, 8);
3355   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
3356   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
3357
3358   SDValue Ext64 = DAG.getNode(PPCISD::EXTSW_32, dl, MVT::i32,
3359                                 Op.getOperand(0));
3360
3361   // STD the extended value into the stack slot.
3362   MachineMemOperand MO(PseudoSourceValue::getFixedStack(FrameIdx),
3363                        MachineMemOperand::MOStore, 0, 8, 8);
3364   SDValue Store = DAG.getNode(PPCISD::STD_32, dl, MVT::Other,
3365                                 DAG.getEntryNode(), Ext64, FIdx,
3366                                 DAG.getMemOperand(MO));
3367   // Load the value as a double.
3368   SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, NULL, 0);
3369
3370   // FCFID it and return it.
3371   SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld);
3372   if (Op.getValueType() == MVT::f32)
3373     FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0));
3374   return FP;
3375 }
3376
3377 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) {
3378   DebugLoc dl = Op.getDebugLoc();
3379   /*
3380    The rounding mode is in bits 30:31 of FPSR, and has the following
3381    settings:
3382      00 Round to nearest
3383      01 Round to 0
3384      10 Round to +inf
3385      11 Round to -inf
3386
3387   FLT_ROUNDS, on the other hand, expects the following:
3388     -1 Undefined
3389      0 Round to 0
3390      1 Round to nearest
3391      2 Round to +inf
3392      3 Round to -inf
3393
3394   To perform the conversion, we do:
3395     ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
3396   */
3397
3398   MachineFunction &MF = DAG.getMachineFunction();
3399   MVT VT = Op.getValueType();
3400   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
3401   std::vector<MVT> NodeTys;
3402   SDValue MFFSreg, InFlag;
3403
3404   // Save FP Control Word to register
3405   NodeTys.push_back(MVT::f64);    // return register
3406   NodeTys.push_back(MVT::Flag);   // unused in this context
3407   SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
3408
3409   // Save FP register to stack slot
3410   int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
3411   SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
3412   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain,
3413                                  StackSlot, NULL, 0);
3414
3415   // Load FP Control Word from low 32 bits of stack slot.
3416   SDValue Four = DAG.getConstant(4, PtrVT);
3417   SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
3418   SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, NULL, 0);
3419
3420   // Transform as necessary
3421   SDValue CWD1 =
3422     DAG.getNode(ISD::AND, dl, MVT::i32,
3423                 CWD, DAG.getConstant(3, MVT::i32));
3424   SDValue CWD2 =
3425     DAG.getNode(ISD::SRL, dl, MVT::i32,
3426                 DAG.getNode(ISD::AND, dl, MVT::i32,
3427                             DAG.getNode(ISD::XOR, dl, MVT::i32,
3428                                         CWD, DAG.getConstant(3, MVT::i32)),
3429                             DAG.getConstant(3, MVT::i32)),
3430                 DAG.getConstant(1, MVT::i32));
3431
3432   SDValue RetVal =
3433     DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
3434
3435   return DAG.getNode((VT.getSizeInBits() < 16 ?
3436                       ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
3437 }
3438
3439 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) {
3440   MVT VT = Op.getValueType();
3441   unsigned BitWidth = VT.getSizeInBits();
3442   DebugLoc dl = Op.getDebugLoc();
3443   assert(Op.getNumOperands() == 3 &&
3444          VT == Op.getOperand(1).getValueType() &&
3445          "Unexpected SHL!");
3446
3447   // Expand into a bunch of logical ops.  Note that these ops
3448   // depend on the PPC behavior for oversized shift amounts.
3449   SDValue Lo = Op.getOperand(0);
3450   SDValue Hi = Op.getOperand(1);
3451   SDValue Amt = Op.getOperand(2);
3452   MVT AmtVT = Amt.getValueType();
3453
3454   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
3455                              DAG.getConstant(BitWidth, AmtVT), Amt);
3456   SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
3457   SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
3458   SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
3459   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
3460                              DAG.getConstant(-BitWidth, AmtVT));
3461   SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
3462   SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
3463   SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
3464   SDValue OutOps[] = { OutLo, OutHi };
3465   return DAG.getMergeValues(OutOps, 2, dl);
3466 }
3467
3468 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) {
3469   MVT VT = Op.getValueType();
3470   DebugLoc dl = Op.getDebugLoc();
3471   unsigned BitWidth = VT.getSizeInBits();
3472   assert(Op.getNumOperands() == 3 &&
3473          VT == Op.getOperand(1).getValueType() &&
3474          "Unexpected SRL!");
3475
3476   // Expand into a bunch of logical ops.  Note that these ops
3477   // depend on the PPC behavior for oversized shift amounts.
3478   SDValue Lo = Op.getOperand(0);
3479   SDValue Hi = Op.getOperand(1);
3480   SDValue Amt = Op.getOperand(2);
3481   MVT AmtVT = Amt.getValueType();
3482
3483   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
3484                              DAG.getConstant(BitWidth, AmtVT), Amt);
3485   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
3486   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
3487   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
3488   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
3489                              DAG.getConstant(-BitWidth, AmtVT));
3490   SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
3491   SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
3492   SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
3493   SDValue OutOps[] = { OutLo, OutHi };
3494   return DAG.getMergeValues(OutOps, 2, dl);
3495 }
3496
3497 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) {
3498   DebugLoc dl = Op.getDebugLoc();
3499   MVT VT = Op.getValueType();
3500   unsigned BitWidth = VT.getSizeInBits();
3501   assert(Op.getNumOperands() == 3 &&
3502          VT == Op.getOperand(1).getValueType() &&
3503          "Unexpected SRA!");
3504
3505   // Expand into a bunch of logical ops, followed by a select_cc.
3506   SDValue Lo = Op.getOperand(0);
3507   SDValue Hi = Op.getOperand(1);
3508   SDValue Amt = Op.getOperand(2);
3509   MVT AmtVT = Amt.getValueType();
3510
3511   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
3512                              DAG.getConstant(BitWidth, AmtVT), Amt);
3513   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
3514   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
3515   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
3516   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
3517                              DAG.getConstant(-BitWidth, AmtVT));
3518   SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
3519   SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
3520   SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT),
3521                                   Tmp4, Tmp6, ISD::SETLE);
3522   SDValue OutOps[] = { OutLo, OutHi };
3523   return DAG.getMergeValues(OutOps, 2, dl);
3524 }
3525
3526 //===----------------------------------------------------------------------===//
3527 // Vector related lowering.
3528 //
3529
3530 /// BuildSplatI - Build a canonical splati of Val with an element size of
3531 /// SplatSize.  Cast the result to VT.
3532 static SDValue BuildSplatI(int Val, unsigned SplatSize, MVT VT,
3533                              SelectionDAG &DAG, DebugLoc dl) {
3534   assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
3535
3536   static const MVT VTys[] = { // canonical VT to use for each size.
3537     MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
3538   };
3539
3540   MVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
3541
3542   // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
3543   if (Val == -1)
3544     SplatSize = 1;
3545
3546   MVT CanonicalVT = VTys[SplatSize-1];
3547
3548   // Build a canonical splat for this value.
3549   SDValue Elt = DAG.getConstant(Val, MVT::i32);
3550   SmallVector<SDValue, 8> Ops;
3551   Ops.assign(CanonicalVT.getVectorNumElements(), Elt);
3552   SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT,
3553                               &Ops[0], Ops.size());
3554   return DAG.getNode(ISD::BIT_CONVERT, dl, ReqVT, Res);
3555 }
3556
3557 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
3558 /// specified intrinsic ID.
3559 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
3560                                 SelectionDAG &DAG, DebugLoc dl,
3561                                 MVT DestVT = MVT::Other) {
3562   if (DestVT == MVT::Other) DestVT = LHS.getValueType();
3563   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
3564                      DAG.getConstant(IID, MVT::i32), LHS, RHS);
3565 }
3566
3567 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
3568 /// specified intrinsic ID.
3569 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
3570                                 SDValue Op2, SelectionDAG &DAG,
3571                                 DebugLoc dl, MVT DestVT = MVT::Other) {
3572   if (DestVT == MVT::Other) DestVT = Op0.getValueType();
3573   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
3574                      DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);
3575 }
3576
3577
3578 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
3579 /// amount.  The result has the specified value type.
3580 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
3581                              MVT VT, SelectionDAG &DAG, DebugLoc dl) {
3582   // Force LHS/RHS to be the right type.
3583   LHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, LHS);
3584   RHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, RHS);
3585
3586   int Ops[16];
3587   for (unsigned i = 0; i != 16; ++i)
3588     Ops[i] = i + Amt;
3589   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
3590   return DAG.getNode(ISD::BIT_CONVERT, dl, VT, T);
3591 }
3592
3593 // If this is a case we can't handle, return null and let the default
3594 // expansion code take care of it.  If we CAN select this case, and if it
3595 // selects to a single instruction, return Op.  Otherwise, if we can codegen
3596 // this case more efficiently than a constant pool load, lower it to the
3597 // sequence of ops that should be used.
3598 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
3599   DebugLoc dl = Op.getDebugLoc();
3600   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
3601   assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
3602
3603   // Check if this is a splat of a constant value.
3604   APInt APSplatBits, APSplatUndef;
3605   unsigned SplatBitSize;
3606   bool HasAnyUndefs;
3607   if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
3608                              HasAnyUndefs) || SplatBitSize > 32)
3609     return SDValue();
3610
3611   unsigned SplatBits = APSplatBits.getZExtValue();
3612   unsigned SplatUndef = APSplatUndef.getZExtValue();
3613   unsigned SplatSize = SplatBitSize / 8;
3614
3615   // First, handle single instruction cases.
3616
3617   // All zeros?
3618   if (SplatBits == 0) {
3619     // Canonicalize all zero vectors to be v4i32.
3620     if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
3621       SDValue Z = DAG.getConstant(0, MVT::i32);
3622       Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z);
3623       Op = DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Z);
3624     }
3625     return Op;
3626   }
3627
3628   // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
3629   int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
3630                     (32-SplatBitSize));
3631   if (SextVal >= -16 && SextVal <= 15)
3632     return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
3633
3634
3635   // Two instruction sequences.
3636
3637   // If this value is in the range [-32,30] and is even, use:
3638   //    tmp = VSPLTI[bhw], result = add tmp, tmp
3639   if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) {
3640     SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG, dl);
3641     Res = DAG.getNode(ISD::ADD, dl, Res.getValueType(), Res, Res);
3642     return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
3643   }
3644
3645   // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
3646   // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
3647   // for fneg/fabs.
3648   if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
3649     // Make -1 and vspltisw -1:
3650     SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
3651
3652     // Make the VSLW intrinsic, computing 0x8000_0000.
3653     SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
3654                                    OnesV, DAG, dl);
3655
3656     // xor by OnesV to invert it.
3657     Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
3658     return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
3659   }
3660
3661   // Check to see if this is a wide variety of vsplti*, binop self cases.
3662   static const signed char SplatCsts[] = {
3663     -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
3664     -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
3665   };
3666
3667   for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
3668     // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
3669     // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
3670     int i = SplatCsts[idx];
3671
3672     // Figure out what shift amount will be used by altivec if shifted by i in
3673     // this splat size.
3674     unsigned TypeShiftAmt = i & (SplatBitSize-1);
3675
3676     // vsplti + shl self.
3677     if (SextVal == (i << (int)TypeShiftAmt)) {
3678       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
3679       static const unsigned IIDs[] = { // Intrinsic to use for each size.
3680         Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
3681         Intrinsic::ppc_altivec_vslw
3682       };
3683       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
3684       return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
3685     }
3686
3687     // vsplti + srl self.
3688     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
3689       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
3690       static const unsigned IIDs[] = { // Intrinsic to use for each size.
3691         Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
3692         Intrinsic::ppc_altivec_vsrw
3693       };
3694       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
3695       return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
3696     }
3697
3698     // vsplti + sra self.
3699     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
3700       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
3701       static const unsigned IIDs[] = { // Intrinsic to use for each size.
3702         Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
3703         Intrinsic::ppc_altivec_vsraw
3704       };
3705       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
3706       return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
3707     }
3708
3709     // vsplti + rol self.
3710     if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
3711                          ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
3712       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
3713       static const unsigned IIDs[] = { // Intrinsic to use for each size.
3714         Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
3715         Intrinsic::ppc_altivec_vrlw
3716       };
3717       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
3718       return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
3719     }
3720
3721     // t = vsplti c, result = vsldoi t, t, 1
3722     if (SextVal == ((i << 8) | (i >> (TypeShiftAmt-8)))) {
3723       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
3724       return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl);
3725     }
3726     // t = vsplti c, result = vsldoi t, t, 2
3727     if (SextVal == ((i << 16) | (i >> (TypeShiftAmt-16)))) {
3728       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
3729       return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl);
3730     }
3731     // t = vsplti c, result = vsldoi t, t, 3
3732     if (SextVal == ((i << 24) | (i >> (TypeShiftAmt-24)))) {
3733       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
3734       return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl);
3735     }
3736   }
3737
3738   // Three instruction sequences.
3739
3740   // Odd, in range [17,31]:  (vsplti C)-(vsplti -16).
3741   if (SextVal >= 0 && SextVal <= 31) {
3742     SDValue LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG, dl);
3743     SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
3744     LHS = DAG.getNode(ISD::SUB, dl, LHS.getValueType(), LHS, RHS);
3745     return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), LHS);
3746   }
3747   // Odd, in range [-31,-17]:  (vsplti C)+(vsplti -16).
3748   if (SextVal >= -31 && SextVal <= 0) {
3749     SDValue LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG, dl);
3750     SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
3751     LHS = DAG.getNode(ISD::ADD, dl, LHS.getValueType(), LHS, RHS);
3752     return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), LHS);
3753   }
3754
3755   return SDValue();
3756 }
3757
3758 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
3759 /// the specified operations to build the shuffle.
3760 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
3761                                       SDValue RHS, SelectionDAG &DAG,
3762                                       DebugLoc dl) {
3763   unsigned OpNum = (PFEntry >> 26) & 0x0F;
3764   unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
3765   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
3766
3767   enum {
3768     OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
3769     OP_VMRGHW,
3770     OP_VMRGLW,
3771     OP_VSPLTISW0,
3772     OP_VSPLTISW1,
3773     OP_VSPLTISW2,
3774     OP_VSPLTISW3,
3775     OP_VSLDOI4,
3776     OP_VSLDOI8,
3777     OP_VSLDOI12
3778   };
3779
3780   if (OpNum == OP_COPY) {
3781     if (LHSID == (1*9+2)*9+3) return LHS;
3782     assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
3783     return RHS;
3784   }
3785
3786   SDValue OpLHS, OpRHS;
3787   OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
3788   OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
3789
3790   int ShufIdxs[16];
3791   switch (OpNum) {
3792   default: assert(0 && "Unknown i32 permute!");
3793   case OP_VMRGHW:
3794     ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
3795     ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
3796     ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
3797     ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
3798     break;
3799   case OP_VMRGLW:
3800     ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
3801     ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
3802     ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
3803     ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
3804     break;
3805   case OP_VSPLTISW0:
3806     for (unsigned i = 0; i != 16; ++i)
3807       ShufIdxs[i] = (i&3)+0;
3808     break;
3809   case OP_VSPLTISW1:
3810     for (unsigned i = 0; i != 16; ++i)
3811       ShufIdxs[i] = (i&3)+4;
3812     break;
3813   case OP_VSPLTISW2:
3814     for (unsigned i = 0; i != 16; ++i)
3815       ShufIdxs[i] = (i&3)+8;
3816     break;
3817   case OP_VSPLTISW3:
3818     for (unsigned i = 0; i != 16; ++i)
3819       ShufIdxs[i] = (i&3)+12;
3820     break;
3821   case OP_VSLDOI4:
3822     return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
3823   case OP_VSLDOI8:
3824     return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
3825   case OP_VSLDOI12:
3826     return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
3827   }
3828   MVT VT = OpLHS.getValueType();
3829   OpLHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OpLHS);
3830   OpRHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OpRHS);
3831   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
3832   return DAG.getNode(ISD::BIT_CONVERT, dl, VT, T);
3833 }
3834
3835 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
3836 /// is a shuffle we can handle in a single instruction, return it.  Otherwise,
3837 /// return the code it can be lowered into.  Worst case, it can always be
3838 /// lowered into a vperm.
3839 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
3840                                                SelectionDAG &DAG) {
3841   DebugLoc dl = Op.getDebugLoc();
3842   SDValue V1 = Op.getOperand(0);
3843   SDValue V2 = Op.getOperand(1);
3844   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
3845   MVT VT = Op.getValueType();
3846
3847   // Cases that are handled by instructions that take permute immediates
3848   // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
3849   // selected by the instruction selector.
3850   if (V2.getOpcode() == ISD::UNDEF) {
3851     if (PPC::isSplatShuffleMask(SVOp, 1) ||
3852         PPC::isSplatShuffleMask(SVOp, 2) ||
3853         PPC::isSplatShuffleMask(SVOp, 4) ||
3854         PPC::isVPKUWUMShuffleMask(SVOp, true) ||
3855         PPC::isVPKUHUMShuffleMask(SVOp, true) ||
3856         PPC::isVSLDOIShuffleMask(SVOp, true) != -1 ||
3857         PPC::isVMRGLShuffleMask(SVOp, 1, true) ||
3858         PPC::isVMRGLShuffleMask(SVOp, 2, true) ||
3859         PPC::isVMRGLShuffleMask(SVOp, 4, true) ||
3860         PPC::isVMRGHShuffleMask(SVOp, 1, true) ||
3861         PPC::isVMRGHShuffleMask(SVOp, 2, true) ||
3862         PPC::isVMRGHShuffleMask(SVOp, 4, true)) {
3863       return Op;
3864     }
3865   }
3866
3867   // Altivec has a variety of "shuffle immediates" that take two vector inputs
3868   // and produce a fixed permutation.  If any of these match, do not lower to
3869   // VPERM.
3870   if (PPC::isVPKUWUMShuffleMask(SVOp, false) ||
3871       PPC::isVPKUHUMShuffleMask(SVOp, false) ||
3872       PPC::isVSLDOIShuffleMask(SVOp, false) != -1 ||
3873       PPC::isVMRGLShuffleMask(SVOp, 1, false) ||
3874       PPC::isVMRGLShuffleMask(SVOp, 2, false) ||
3875       PPC::isVMRGLShuffleMask(SVOp, 4, false) ||
3876       PPC::isVMRGHShuffleMask(SVOp, 1, false) ||
3877       PPC::isVMRGHShuffleMask(SVOp, 2, false) ||
3878       PPC::isVMRGHShuffleMask(SVOp, 4, false))
3879     return Op;
3880
3881   // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
3882   // perfect shuffle table to emit an optimal matching sequence.
3883   SmallVector<int, 16> PermMask;
3884   SVOp->getMask(PermMask);
3885
3886   unsigned PFIndexes[4];
3887   bool isFourElementShuffle = true;
3888   for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
3889     unsigned EltNo = 8;   // Start out undef.
3890     for (unsigned j = 0; j != 4; ++j) {  // Intra-element byte.
3891       if (PermMask[i*4+j] < 0)
3892         continue;   // Undef, ignore it.
3893
3894       unsigned ByteSource = PermMask[i*4+j];
3895       if ((ByteSource & 3) != j) {
3896         isFourElementShuffle = false;
3897         break;
3898       }
3899
3900       if (EltNo == 8) {
3901         EltNo = ByteSource/4;
3902       } else if (EltNo != ByteSource/4) {
3903         isFourElementShuffle = false;
3904         break;
3905       }
3906     }
3907     PFIndexes[i] = EltNo;
3908   }
3909
3910   // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
3911   // perfect shuffle vector to determine if it is cost effective to do this as
3912   // discrete instructions, or whether we should use a vperm.
3913   if (isFourElementShuffle) {
3914     // Compute the index in the perfect shuffle table.
3915     unsigned PFTableIndex =
3916       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
3917
3918     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
3919     unsigned Cost  = (PFEntry >> 30);
3920
3921     // Determining when to avoid vperm is tricky.  Many things affect the cost
3922     // of vperm, particularly how many times the perm mask needs to be computed.
3923     // For example, if the perm mask can be hoisted out of a loop or is already
3924     // used (perhaps because there are multiple permutes with the same shuffle
3925     // mask?) the vperm has a cost of 1.  OTOH, hoisting the permute mask out of
3926     // the loop requires an extra register.
3927     //
3928     // As a compromise, we only emit discrete instructions if the shuffle can be
3929     // generated in 3 or fewer operations.  When we have loop information
3930     // available, if this block is within a loop, we should avoid using vperm
3931     // for 3-operation perms and use a constant pool load instead.
3932     if (Cost < 3)
3933       return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
3934   }
3935
3936   // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
3937   // vector that will get spilled to the constant pool.
3938   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
3939
3940   // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
3941   // that it is in input element units, not in bytes.  Convert now.
3942   MVT EltVT = V1.getValueType().getVectorElementType();
3943   unsigned BytesPerElement = EltVT.getSizeInBits()/8;
3944
3945   SmallVector<SDValue, 16> ResultMask;
3946   for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
3947     unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
3948
3949     for (unsigned j = 0; j != BytesPerElement; ++j)
3950       ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
3951                                            MVT::i32));
3952   }
3953
3954   SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
3955                                     &ResultMask[0], ResultMask.size());
3956   return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask);
3957 }
3958
3959 /// getAltivecCompareInfo - Given an intrinsic, return false if it is not an
3960 /// altivec comparison.  If it is, return true and fill in Opc/isDot with
3961 /// information about the intrinsic.
3962 static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
3963                                   bool &isDot) {
3964   unsigned IntrinsicID =
3965     cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
3966   CompareOpc = -1;
3967   isDot = false;
3968   switch (IntrinsicID) {
3969   default: return false;
3970     // Comparison predicates.
3971   case Intrinsic::ppc_altivec_vcmpbfp_p:  CompareOpc = 966; isDot = 1; break;
3972   case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
3973   case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc =   6; isDot = 1; break;
3974   case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc =  70; isDot = 1; break;
3975   case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
3976   case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
3977   case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
3978   case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
3979   case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
3980   case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
3981   case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
3982   case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
3983   case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
3984
3985     // Normal Comparisons.
3986   case Intrinsic::ppc_altivec_vcmpbfp:    CompareOpc = 966; isDot = 0; break;
3987   case Intrinsic::ppc_altivec_vcmpeqfp:   CompareOpc = 198; isDot = 0; break;
3988   case Intrinsic::ppc_altivec_vcmpequb:   CompareOpc =   6; isDot = 0; break;
3989   case Intrinsic::ppc_altivec_vcmpequh:   CompareOpc =  70; isDot = 0; break;
3990   case Intrinsic::ppc_altivec_vcmpequw:   CompareOpc = 134; isDot = 0; break;
3991   case Intrinsic::ppc_altivec_vcmpgefp:   CompareOpc = 454; isDot = 0; break;
3992   case Intrinsic::ppc_altivec_vcmpgtfp:   CompareOpc = 710; isDot = 0; break;
3993   case Intrinsic::ppc_altivec_vcmpgtsb:   CompareOpc = 774; isDot = 0; break;
3994   case Intrinsic::ppc_altivec_vcmpgtsh:   CompareOpc = 838; isDot = 0; break;
3995   case Intrinsic::ppc_altivec_vcmpgtsw:   CompareOpc = 902; isDot = 0; break;
3996   case Intrinsic::ppc_altivec_vcmpgtub:   CompareOpc = 518; isDot = 0; break;
3997   case Intrinsic::ppc_altivec_vcmpgtuh:   CompareOpc = 582; isDot = 0; break;
3998   case Intrinsic::ppc_altivec_vcmpgtuw:   CompareOpc = 646; isDot = 0; break;
3999   }
4000   return true;
4001 }
4002
4003 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
4004 /// lower, do it, otherwise return null.
4005 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
4006                                                      SelectionDAG &DAG) {
4007   // If this is a lowered altivec predicate compare, CompareOpc is set to the
4008   // opcode number of the comparison.
4009   DebugLoc dl = Op.getDebugLoc();
4010   int CompareOpc;
4011   bool isDot;
4012   if (!getAltivecCompareInfo(Op, CompareOpc, isDot))
4013     return SDValue();    // Don't custom lower most intrinsics.
4014
4015   // If this is a non-dot comparison, make the VCMP node and we are done.
4016   if (!isDot) {
4017     SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
4018                                 Op.getOperand(1), Op.getOperand(2),
4019                                 DAG.getConstant(CompareOpc, MVT::i32));
4020     return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Tmp);
4021   }
4022
4023   // Create the PPCISD altivec 'dot' comparison node.
4024   SDValue Ops[] = {
4025     Op.getOperand(2),  // LHS
4026     Op.getOperand(3),  // RHS
4027     DAG.getConstant(CompareOpc, MVT::i32)
4028   };
4029   std::vector<MVT> VTs;
4030   VTs.push_back(Op.getOperand(2).getValueType());
4031   VTs.push_back(MVT::Flag);
4032   SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
4033
4034   // Now that we have the comparison, emit a copy from the CR to a GPR.
4035   // This is flagged to the above dot comparison.
4036   SDValue Flags = DAG.getNode(PPCISD::MFCR, dl, MVT::i32,
4037                                 DAG.getRegister(PPC::CR6, MVT::i32),
4038                                 CompNode.getValue(1));
4039
4040   // Unpack the result based on how the target uses it.
4041   unsigned BitNo;   // Bit # of CR6.
4042   bool InvertBit;   // Invert result?
4043   switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
4044   default:  // Can't happen, don't crash on invalid number though.
4045   case 0:   // Return the value of the EQ bit of CR6.
4046     BitNo = 0; InvertBit = false;
4047     break;
4048   case 1:   // Return the inverted value of the EQ bit of CR6.
4049     BitNo = 0; InvertBit = true;
4050     break;
4051   case 2:   // Return the value of the LT bit of CR6.
4052     BitNo = 2; InvertBit = false;
4053     break;
4054   case 3:   // Return the inverted value of the LT bit of CR6.
4055     BitNo = 2; InvertBit = true;
4056     break;
4057   }
4058
4059   // Shift the bit into the low position.
4060   Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
4061                       DAG.getConstant(8-(3-BitNo), MVT::i32));
4062   // Isolate the bit.
4063   Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
4064                       DAG.getConstant(1, MVT::i32));
4065
4066   // If we are supposed to, toggle the bit.
4067   if (InvertBit)
4068     Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
4069                         DAG.getConstant(1, MVT::i32));
4070   return Flags;
4071 }
4072
4073 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
4074                                                    SelectionDAG &DAG) {
4075   DebugLoc dl = Op.getDebugLoc();
4076   // Create a stack slot that is 16-byte aligned.
4077   MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
4078   int FrameIdx = FrameInfo->CreateStackObject(16, 16);
4079   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
4080   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
4081
4082   // Store the input value into Value#0 of the stack slot.
4083   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
4084                                  Op.getOperand(0), FIdx, NULL, 0);
4085   // Load it out.
4086   return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, NULL, 0);
4087 }
4088
4089 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) {
4090   DebugLoc dl = Op.getDebugLoc();
4091   if (Op.getValueType() == MVT::v4i32) {
4092     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
4093
4094     SDValue Zero  = BuildSplatI(  0, 1, MVT::v4i32, DAG, dl);
4095     SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
4096
4097     SDValue RHSSwap =   // = vrlw RHS, 16
4098       BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
4099
4100     // Shrinkify inputs to v8i16.
4101     LHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, LHS);
4102     RHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, RHS);
4103     RHSSwap = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, RHSSwap);
4104
4105     // Low parts multiplied together, generating 32-bit results (we ignore the
4106     // top parts).
4107     SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
4108                                         LHS, RHS, DAG, dl, MVT::v4i32);
4109
4110     SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
4111                                       LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
4112     // Shift the high parts up 16 bits.
4113     HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
4114                               Neg16, DAG, dl);
4115     return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
4116   } else if (Op.getValueType() == MVT::v8i16) {
4117     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
4118
4119     SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
4120
4121     return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
4122                             LHS, RHS, Zero, DAG, dl);
4123   } else if (Op.getValueType() == MVT::v16i8) {
4124     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
4125
4126     // Multiply the even 8-bit parts, producing 16-bit sums.
4127     SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
4128                                            LHS, RHS, DAG, dl, MVT::v8i16);
4129     EvenParts = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, EvenParts);
4130
4131     // Multiply the odd 8-bit parts, producing 16-bit sums.
4132     SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
4133                                           LHS, RHS, DAG, dl, MVT::v8i16);
4134     OddParts = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OddParts);
4135
4136     // Merge the results together.
4137     int Ops[16];
4138     for (unsigned i = 0; i != 8; ++i) {
4139       Ops[i*2  ] = 2*i+1;
4140       Ops[i*2+1] = 2*i+1+16;
4141     }
4142     return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
4143   } else {
4144     assert(0 && "Unknown mul to lower!");
4145     abort();
4146   }
4147 }
4148
4149 /// LowerOperation - Provide custom lowering hooks for some operations.
4150 ///
4151 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
4152   switch (Op.getOpcode()) {
4153   default: assert(0 && "Wasn't expecting to be able to lower this!");
4154   case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
4155   case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
4156   case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
4157   case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
4158   case ISD::SETCC:              return LowerSETCC(Op, DAG);
4159   case ISD::TRAMPOLINE:         return LowerTRAMPOLINE(Op, DAG);
4160   case ISD::VASTART:
4161     return LowerVASTART(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset,
4162                         VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget);
4163
4164   case ISD::VAARG:
4165     return LowerVAARG(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset,
4166                       VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget);
4167
4168   case ISD::FORMAL_ARGUMENTS:
4169     if (PPCSubTarget.isSVR4ABI()) {
4170       return LowerFORMAL_ARGUMENTS_SVR4(Op, DAG, VarArgsFrameIndex,
4171                                         VarArgsStackOffset, VarArgsNumGPR,
4172                                         VarArgsNumFPR, PPCSubTarget);
4173     } else {
4174       return LowerFORMAL_ARGUMENTS_Darwin(Op, DAG, VarArgsFrameIndex,
4175                                           PPCSubTarget);
4176     }
4177
4178   case ISD::CALL:
4179     if (PPCSubTarget.isSVR4ABI()) {
4180       return LowerCALL_SVR4(Op, DAG, PPCSubTarget, getTargetMachine());
4181     } else {
4182       return LowerCALL_Darwin(Op, DAG, PPCSubTarget, getTargetMachine());
4183     }
4184
4185   case ISD::RET:                return LowerRET(Op, DAG, getTargetMachine());
4186   case ISD::STACKRESTORE:       return LowerSTACKRESTORE(Op, DAG, PPCSubTarget);
4187   case ISD::DYNAMIC_STACKALLOC:
4188     return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
4189
4190   case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
4191   case ISD::FP_TO_UINT:
4192   case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG,
4193                                                        Op.getDebugLoc());
4194   case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
4195   case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
4196
4197   // Lower 64-bit shifts.
4198   case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
4199   case ISD::SRL_PARTS:          return LowerSRL_PARTS(Op, DAG);
4200   case ISD::SRA_PARTS:          return LowerSRA_PARTS(Op, DAG);
4201
4202   // Vector-related lowering.
4203   case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
4204   case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
4205   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
4206   case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
4207   case ISD::MUL:                return LowerMUL(Op, DAG);
4208
4209   // Frame & Return address.
4210   case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
4211   case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
4212   }
4213   return SDValue();
4214 }
4215
4216 void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
4217                                            SmallVectorImpl<SDValue>&Results,
4218                                            SelectionDAG &DAG) {
4219   DebugLoc dl = N->getDebugLoc();
4220   switch (N->getOpcode()) {
4221   default:
4222     assert(false && "Do not know how to custom type legalize this operation!");
4223     return;
4224   case ISD::FP_ROUND_INREG: {
4225     assert(N->getValueType(0) == MVT::ppcf128);
4226     assert(N->getOperand(0).getValueType() == MVT::ppcf128);
4227     SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
4228                              MVT::f64, N->getOperand(0),
4229                              DAG.getIntPtrConstant(0));
4230     SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
4231                              MVT::f64, N->getOperand(0),
4232                              DAG.getIntPtrConstant(1));
4233
4234     // This sequence changes FPSCR to do round-to-zero, adds the two halves
4235     // of the long double, and puts FPSCR back the way it was.  We do not
4236     // actually model FPSCR.
4237     std::vector<MVT> NodeTys;
4238     SDValue Ops[4], Result, MFFSreg, InFlag, FPreg;
4239
4240     NodeTys.push_back(MVT::f64);   // Return register
4241     NodeTys.push_back(MVT::Flag);    // Returns a flag for later insns
4242     Result = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
4243     MFFSreg = Result.getValue(0);
4244     InFlag = Result.getValue(1);
4245
4246     NodeTys.clear();
4247     NodeTys.push_back(MVT::Flag);   // Returns a flag
4248     Ops[0] = DAG.getConstant(31, MVT::i32);
4249     Ops[1] = InFlag;
4250     Result = DAG.getNode(PPCISD::MTFSB1, dl, NodeTys, Ops, 2);
4251     InFlag = Result.getValue(0);
4252
4253     NodeTys.clear();
4254     NodeTys.push_back(MVT::Flag);   // Returns a flag
4255     Ops[0] = DAG.getConstant(30, MVT::i32);
4256     Ops[1] = InFlag;
4257     Result = DAG.getNode(PPCISD::MTFSB0, dl, NodeTys, Ops, 2);
4258     InFlag = Result.getValue(0);
4259
4260     NodeTys.clear();
4261     NodeTys.push_back(MVT::f64);    // result of add
4262     NodeTys.push_back(MVT::Flag);   // Returns a flag
4263     Ops[0] = Lo;
4264     Ops[1] = Hi;
4265     Ops[2] = InFlag;
4266     Result = DAG.getNode(PPCISD::FADDRTZ, dl, NodeTys, Ops, 3);
4267     FPreg = Result.getValue(0);
4268     InFlag = Result.getValue(1);
4269
4270     NodeTys.clear();
4271     NodeTys.push_back(MVT::f64);
4272     Ops[0] = DAG.getConstant(1, MVT::i32);
4273     Ops[1] = MFFSreg;
4274     Ops[2] = FPreg;
4275     Ops[3] = InFlag;
4276     Result = DAG.getNode(PPCISD::MTFSF, dl, NodeTys, Ops, 4);
4277     FPreg = Result.getValue(0);
4278
4279     // We know the low half is about to be thrown away, so just use something
4280     // convenient.
4281     Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
4282                                 FPreg, FPreg));
4283     return;
4284   }
4285   case ISD::FP_TO_SINT:
4286     Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
4287     return;
4288   }
4289 }
4290
4291
4292 //===----------------------------------------------------------------------===//
4293 //  Other Lowering Code
4294 //===----------------------------------------------------------------------===//
4295
4296 MachineBasicBlock *
4297 PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
4298                                     bool is64bit, unsigned BinOpcode) const {
4299   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
4300   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
4301
4302   const BasicBlock *LLVM_BB = BB->getBasicBlock();
4303   MachineFunction *F = BB->getParent();
4304   MachineFunction::iterator It = BB;
4305   ++It;
4306
4307   unsigned dest = MI->getOperand(0).getReg();
4308   unsigned ptrA = MI->getOperand(1).getReg();
4309   unsigned ptrB = MI->getOperand(2).getReg();
4310   unsigned incr = MI->getOperand(3).getReg();
4311   DebugLoc dl = MI->getDebugLoc();
4312
4313   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
4314   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
4315   F->insert(It, loopMBB);
4316   F->insert(It, exitMBB);
4317   exitMBB->transferSuccessors(BB);
4318
4319   MachineRegisterInfo &RegInfo = F->getRegInfo();
4320   unsigned TmpReg = (!BinOpcode) ? incr :
4321     RegInfo.createVirtualRegister(
4322        is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
4323                  (const TargetRegisterClass *) &PPC::GPRCRegClass);
4324
4325   //  thisMBB:
4326   //   ...
4327   //   fallthrough --> loopMBB
4328   BB->addSuccessor(loopMBB);
4329
4330   //  loopMBB:
4331   //   l[wd]arx dest, ptr
4332   //   add r0, dest, incr
4333   //   st[wd]cx. r0, ptr
4334   //   bne- loopMBB
4335   //   fallthrough --> exitMBB
4336   BB = loopMBB;
4337   BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
4338     .addReg(ptrA).addReg(ptrB);
4339   if (BinOpcode)
4340     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
4341   BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
4342     .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
4343   BuildMI(BB, dl, TII->get(PPC::BCC))
4344     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
4345   BB->addSuccessor(loopMBB);
4346   BB->addSuccessor(exitMBB);
4347
4348   //  exitMBB:
4349   //   ...
4350   BB = exitMBB;
4351   return BB;
4352 }
4353
4354 MachineBasicBlock *
4355 PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
4356                                             MachineBasicBlock *BB,
4357                                             bool is8bit,    // operation
4358                                             unsigned BinOpcode) const {
4359   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
4360   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
4361   // In 64 bit mode we have to use 64 bits for addresses, even though the
4362   // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
4363   // registers without caring whether they're 32 or 64, but here we're
4364   // doing actual arithmetic on the addresses.
4365   bool is64bit = PPCSubTarget.isPPC64();
4366
4367   const BasicBlock *LLVM_BB = BB->getBasicBlock();
4368   MachineFunction *F = BB->getParent();
4369   MachineFunction::iterator It = BB;
4370   ++It;
4371
4372   unsigned dest = MI->getOperand(0).getReg();
4373   unsigned ptrA = MI->getOperand(1).getReg();
4374   unsigned ptrB = MI->getOperand(2).getReg();
4375   unsigned incr = MI->getOperand(3).getReg();
4376   DebugLoc dl = MI->getDebugLoc();
4377
4378   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
4379   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
4380   F->insert(It, loopMBB);
4381   F->insert(It, exitMBB);
4382   exitMBB->transferSuccessors(BB);
4383
4384   MachineRegisterInfo &RegInfo = F->getRegInfo();
4385   const TargetRegisterClass *RC =
4386     is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
4387               (const TargetRegisterClass *) &PPC::GPRCRegClass;
4388   unsigned PtrReg = RegInfo.createVirtualRegister(RC);
4389   unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
4390   unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
4391   unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
4392   unsigned MaskReg = RegInfo.createVirtualRegister(RC);
4393   unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
4394   unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
4395   unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
4396   unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC);
4397   unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
4398   unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
4399   unsigned Ptr1Reg;
4400   unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC);
4401
4402   //  thisMBB:
4403   //   ...
4404   //   fallthrough --> loopMBB
4405   BB->addSuccessor(loopMBB);
4406
4407   // The 4-byte load must be aligned, while a char or short may be
4408   // anywhere in the word.  Hence all this nasty bookkeeping code.
4409   //   add ptr1, ptrA, ptrB [copy if ptrA==0]
4410   //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
4411   //   xori shift, shift1, 24 [16]
4412   //   rlwinm ptr, ptr1, 0, 0, 29
4413   //   slw incr2, incr, shift
4414   //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
4415   //   slw mask, mask2, shift
4416   //  loopMBB:
4417   //   lwarx tmpDest, ptr
4418   //   add tmp, tmpDest, incr2
4419   //   andc tmp2, tmpDest, mask
4420   //   and tmp3, tmp, mask
4421   //   or tmp4, tmp3, tmp2
4422   //   stwcx. tmp4, ptr
4423   //   bne- loopMBB
4424   //   fallthrough --> exitMBB
4425   //   srw dest, tmpDest, shift
4426
4427   if (ptrA!=PPC::R0) {
4428     Ptr1Reg = RegInfo.createVirtualRegister(RC);
4429     BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
4430       .addReg(ptrA).addReg(ptrB);
4431   } else {
4432     Ptr1Reg = ptrB;
4433   }
4434   BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
4435       .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
4436   BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
4437       .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
4438   if (is64bit)
4439     BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
4440       .addReg(Ptr1Reg).addImm(0).addImm(61);
4441   else
4442     BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
4443       .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
4444   BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg)
4445       .addReg(incr).addReg(ShiftReg);
4446   if (is8bit)
4447     BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
4448   else {
4449     BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
4450     BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535);
4451   }
4452   BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
4453       .addReg(Mask2Reg).addReg(ShiftReg);
4454
4455   BB = loopMBB;
4456   BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
4457     .addReg(PPC::R0).addReg(PtrReg);
4458   if (BinOpcode)
4459     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
4460       .addReg(Incr2Reg).addReg(TmpDestReg);
4461   BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg)
4462     .addReg(TmpDestReg).addReg(MaskReg);
4463   BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)
4464     .addReg(TmpReg).addReg(MaskReg);
4465   BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
4466     .addReg(Tmp3Reg).addReg(Tmp2Reg);
4467   BuildMI(BB, dl, TII->get(PPC::STWCX))
4468     .addReg(Tmp4Reg).addReg(PPC::R0).addReg(PtrReg);
4469   BuildMI(BB, dl, TII->get(PPC::BCC))
4470     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
4471   BB->addSuccessor(loopMBB);
4472   BB->addSuccessor(exitMBB);
4473
4474   //  exitMBB:
4475   //   ...
4476   BB = exitMBB;
4477   BuildMI(BB, dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg).addReg(ShiftReg);
4478   return BB;
4479 }
4480
4481 MachineBasicBlock *
4482 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
4483                                                MachineBasicBlock *BB) const {
4484   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
4485
4486   // To "insert" these instructions we actually have to insert their
4487   // control-flow patterns.
4488   const BasicBlock *LLVM_BB = BB->getBasicBlock();
4489   MachineFunction::iterator It = BB;
4490   ++It;
4491
4492   MachineFunction *F = BB->getParent();
4493
4494   if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
4495       MI->getOpcode() == PPC::SELECT_CC_I8 ||
4496       MI->getOpcode() == PPC::SELECT_CC_F4 ||
4497       MI->getOpcode() == PPC::SELECT_CC_F8 ||
4498       MI->getOpcode() == PPC::SELECT_CC_VRRC) {
4499
4500     // The incoming instruction knows the destination vreg to set, the
4501     // condition code register to branch on, the true/false values to
4502     // select between, and a branch opcode to use.
4503
4504     //  thisMBB:
4505     //  ...
4506     //   TrueVal = ...
4507     //   cmpTY ccX, r1, r2
4508     //   bCC copy1MBB
4509     //   fallthrough --> copy0MBB
4510     MachineBasicBlock *thisMBB = BB;
4511     MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
4512     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
4513     unsigned SelectPred = MI->getOperand(4).getImm();
4514     DebugLoc dl = MI->getDebugLoc();
4515     BuildMI(BB, dl, TII->get(PPC::BCC))
4516       .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
4517     F->insert(It, copy0MBB);
4518     F->insert(It, sinkMBB);
4519     // Update machine-CFG edges by transferring all successors of the current
4520     // block to the new block which will contain the Phi node for the select.
4521     sinkMBB->transferSuccessors(BB);
4522     // Next, add the true and fallthrough blocks as its successors.
4523     BB->addSuccessor(copy0MBB);
4524     BB->addSuccessor(sinkMBB);
4525
4526     //  copy0MBB:
4527     //   %FalseValue = ...
4528     //   # fallthrough to sinkMBB
4529     BB = copy0MBB;
4530
4531     // Update machine-CFG edges
4532     BB->addSuccessor(sinkMBB);
4533
4534     //  sinkMBB:
4535     //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
4536     //  ...
4537     BB = sinkMBB;
4538     BuildMI(BB, dl, TII->get(PPC::PHI), MI->getOperand(0).getReg())
4539       .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
4540       .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
4541   }
4542   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
4543     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
4544   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
4545     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
4546   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
4547     BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4);
4548   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
4549     BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8);
4550
4551   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
4552     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
4553   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
4554     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
4555   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
4556     BB = EmitAtomicBinary(MI, BB, false, PPC::AND);
4557   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
4558     BB = EmitAtomicBinary(MI, BB, true, PPC::AND8);
4559
4560   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
4561     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
4562   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
4563     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
4564   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
4565     BB = EmitAtomicBinary(MI, BB, false, PPC::OR);
4566   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
4567     BB = EmitAtomicBinary(MI, BB, true, PPC::OR8);
4568
4569   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
4570     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
4571   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
4572     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
4573   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
4574     BB = EmitAtomicBinary(MI, BB, false, PPC::XOR);
4575   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
4576     BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8);
4577
4578   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
4579     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ANDC);
4580   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
4581     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ANDC);
4582   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
4583     BB = EmitAtomicBinary(MI, BB, false, PPC::ANDC);
4584   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
4585     BB = EmitAtomicBinary(MI, BB, true, PPC::ANDC8);
4586
4587   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
4588     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
4589   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
4590     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
4591   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
4592     BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF);
4593   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
4594     BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8);
4595
4596   else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8)
4597     BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
4598   else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16)
4599     BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
4600   else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32)
4601     BB = EmitAtomicBinary(MI, BB, false, 0);
4602   else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64)
4603     BB = EmitAtomicBinary(MI, BB, true, 0);
4604
4605   else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
4606            MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) {
4607     bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
4608
4609     unsigned dest   = MI->getOperand(0).getReg();
4610     unsigned ptrA   = MI->getOperand(1).getReg();
4611     unsigned ptrB   = MI->getOperand(2).getReg();
4612     unsigned oldval = MI->getOperand(3).getReg();
4613     unsigned newval = MI->getOperand(4).getReg();
4614     DebugLoc dl     = MI->getDebugLoc();
4615
4616     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
4617     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
4618     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
4619     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
4620     F->insert(It, loop1MBB);
4621     F->insert(It, loop2MBB);
4622     F->insert(It, midMBB);
4623     F->insert(It, exitMBB);
4624     exitMBB->transferSuccessors(BB);
4625
4626     //  thisMBB:
4627     //   ...
4628     //   fallthrough --> loopMBB
4629     BB->addSuccessor(loop1MBB);
4630
4631     // loop1MBB:
4632     //   l[wd]arx dest, ptr
4633     //   cmp[wd] dest, oldval
4634     //   bne- midMBB
4635     // loop2MBB:
4636     //   st[wd]cx. newval, ptr
4637     //   bne- loopMBB
4638     //   b exitBB
4639     // midMBB:
4640     //   st[wd]cx. dest, ptr
4641     // exitBB:
4642     BB = loop1MBB;
4643     BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
4644       .addReg(ptrA).addReg(ptrB);
4645     BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
4646       .addReg(oldval).addReg(dest);
4647     BuildMI(BB, dl, TII->get(PPC::BCC))
4648       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
4649     BB->addSuccessor(loop2MBB);
4650     BB->addSuccessor(midMBB);
4651
4652     BB = loop2MBB;
4653     BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
4654       .addReg(newval).addReg(ptrA).addReg(ptrB);
4655     BuildMI(BB, dl, TII->get(PPC::BCC))
4656       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
4657     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
4658     BB->addSuccessor(loop1MBB);
4659     BB->addSuccessor(exitMBB);
4660
4661     BB = midMBB;
4662     BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
4663       .addReg(dest).addReg(ptrA).addReg(ptrB);
4664     BB->addSuccessor(exitMBB);
4665
4666     //  exitMBB:
4667     //   ...
4668     BB = exitMBB;
4669   } else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
4670              MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
4671     // We must use 64-bit registers for addresses when targeting 64-bit,
4672     // since we're actually doing arithmetic on them.  Other registers
4673     // can be 32-bit.
4674     bool is64bit = PPCSubTarget.isPPC64();
4675     bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
4676
4677     unsigned dest   = MI->getOperand(0).getReg();
4678     unsigned ptrA   = MI->getOperand(1).getReg();
4679     unsigned ptrB   = MI->getOperand(2).getReg();
4680     unsigned oldval = MI->getOperand(3).getReg();
4681     unsigned newval = MI->getOperand(4).getReg();
4682     DebugLoc dl     = MI->getDebugLoc();
4683
4684     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
4685     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
4686     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
4687     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
4688     F->insert(It, loop1MBB);
4689     F->insert(It, loop2MBB);
4690     F->insert(It, midMBB);
4691     F->insert(It, exitMBB);
4692     exitMBB->transferSuccessors(BB);
4693
4694     MachineRegisterInfo &RegInfo = F->getRegInfo();
4695     const TargetRegisterClass *RC =
4696       is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
4697                 (const TargetRegisterClass *) &PPC::GPRCRegClass;
4698     unsigned PtrReg = RegInfo.createVirtualRegister(RC);
4699     unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
4700     unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
4701     unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
4702     unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
4703     unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
4704     unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC);
4705     unsigned MaskReg = RegInfo.createVirtualRegister(RC);
4706     unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
4707     unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
4708     unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
4709     unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
4710     unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
4711     unsigned Ptr1Reg;
4712     unsigned TmpReg = RegInfo.createVirtualRegister(RC);
4713     //  thisMBB:
4714     //   ...
4715     //   fallthrough --> loopMBB
4716     BB->addSuccessor(loop1MBB);
4717
4718     // The 4-byte load must be aligned, while a char or short may be
4719     // anywhere in the word.  Hence all this nasty bookkeeping code.
4720     //   add ptr1, ptrA, ptrB [copy if ptrA==0]
4721     //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
4722     //   xori shift, shift1, 24 [16]
4723     //   rlwinm ptr, ptr1, 0, 0, 29
4724     //   slw newval2, newval, shift
4725     //   slw oldval2, oldval,shift
4726     //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
4727     //   slw mask, mask2, shift
4728     //   and newval3, newval2, mask
4729     //   and oldval3, oldval2, mask
4730     // loop1MBB:
4731     //   lwarx tmpDest, ptr
4732     //   and tmp, tmpDest, mask
4733     //   cmpw tmp, oldval3
4734     //   bne- midMBB
4735     // loop2MBB:
4736     //   andc tmp2, tmpDest, mask
4737     //   or tmp4, tmp2, newval3
4738     //   stwcx. tmp4, ptr
4739     //   bne- loop1MBB
4740     //   b exitBB
4741     // midMBB:
4742     //   stwcx. tmpDest, ptr
4743     // exitBB:
4744     //   srw dest, tmpDest, shift
4745     if (ptrA!=PPC::R0) {
4746       Ptr1Reg = RegInfo.createVirtualRegister(RC);
4747       BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
4748         .addReg(ptrA).addReg(ptrB);
4749     } else {
4750       Ptr1Reg = ptrB;
4751     }
4752     BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
4753         .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
4754     BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
4755         .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
4756     if (is64bit)
4757       BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
4758         .addReg(Ptr1Reg).addImm(0).addImm(61);
4759     else
4760       BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
4761         .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
4762     BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
4763         .addReg(newval).addReg(ShiftReg);
4764     BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
4765         .addReg(oldval).addReg(ShiftReg);
4766     if (is8bit)
4767       BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
4768     else {
4769       BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
4770       BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
4771         .addReg(Mask3Reg).addImm(65535);
4772     }
4773     BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
4774         .addReg(Mask2Reg).addReg(ShiftReg);
4775     BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
4776         .addReg(NewVal2Reg).addReg(MaskReg);
4777     BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
4778         .addReg(OldVal2Reg).addReg(MaskReg);
4779
4780     BB = loop1MBB;
4781     BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
4782         .addReg(PPC::R0).addReg(PtrReg);
4783     BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
4784         .addReg(TmpDestReg).addReg(MaskReg);
4785     BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
4786         .addReg(TmpReg).addReg(OldVal3Reg);
4787     BuildMI(BB, dl, TII->get(PPC::BCC))
4788         .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
4789     BB->addSuccessor(loop2MBB);
4790     BB->addSuccessor(midMBB);
4791
4792     BB = loop2MBB;
4793     BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg)
4794         .addReg(TmpDestReg).addReg(MaskReg);
4795     BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
4796         .addReg(Tmp2Reg).addReg(NewVal3Reg);
4797     BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
4798         .addReg(PPC::R0).addReg(PtrReg);
4799     BuildMI(BB, dl, TII->get(PPC::BCC))
4800       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
4801     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
4802     BB->addSuccessor(loop1MBB);
4803     BB->addSuccessor(exitMBB);
4804
4805     BB = midMBB;
4806     BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
4807       .addReg(PPC::R0).addReg(PtrReg);
4808     BB->addSuccessor(exitMBB);
4809
4810     //  exitMBB:
4811     //   ...
4812     BB = exitMBB;
4813     BuildMI(BB, dl, TII->get(PPC::SRW),dest).addReg(TmpReg).addReg(ShiftReg);
4814   } else {
4815     assert(0 && "Unexpected instr type to insert");
4816   }
4817
4818   F->DeleteMachineInstr(MI);   // The pseudo instruction is gone now.
4819   return BB;
4820 }
4821
4822 //===----------------------------------------------------------------------===//
4823 // Target Optimization Hooks
4824 //===----------------------------------------------------------------------===//
4825
4826 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
4827                                              DAGCombinerInfo &DCI) const {
4828   TargetMachine &TM = getTargetMachine();
4829   SelectionDAG &DAG = DCI.DAG;
4830   DebugLoc dl = N->getDebugLoc();
4831   switch (N->getOpcode()) {
4832   default: break;
4833   case PPCISD::SHL:
4834     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
4835       if (C->getZExtValue() == 0)   // 0 << V -> 0.
4836         return N->getOperand(0);
4837     }
4838     break;
4839   case PPCISD::SRL:
4840     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
4841       if (C->getZExtValue() == 0)   // 0 >>u V -> 0.
4842         return N->getOperand(0);
4843     }
4844     break;
4845   case PPCISD::SRA:
4846     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
4847       if (C->getZExtValue() == 0 ||   //  0 >>s V -> 0.
4848           C->isAllOnesValue())    // -1 >>s V -> -1.
4849         return N->getOperand(0);
4850     }
4851     break;
4852
4853   case ISD::SINT_TO_FP:
4854     if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
4855       if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
4856         // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
4857         // We allow the src/dst to be either f32/f64, but the intermediate
4858         // type must be i64.
4859         if (N->getOperand(0).getValueType() == MVT::i64 &&
4860             N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) {
4861           SDValue Val = N->getOperand(0).getOperand(0);
4862           if (Val.getValueType() == MVT::f32) {
4863             Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
4864             DCI.AddToWorklist(Val.getNode());
4865           }
4866
4867           Val = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Val);
4868           DCI.AddToWorklist(Val.getNode());
4869           Val = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Val);
4870           DCI.AddToWorklist(Val.getNode());
4871           if (N->getValueType(0) == MVT::f32) {
4872             Val = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Val,
4873                               DAG.getIntPtrConstant(0));
4874             DCI.AddToWorklist(Val.getNode());
4875           }
4876           return Val;
4877         } else if (N->getOperand(0).getValueType() == MVT::i32) {
4878           // If the intermediate type is i32, we can avoid the load/store here
4879           // too.
4880         }
4881       }
4882     }
4883     break;
4884   case ISD::STORE:
4885     // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
4886     if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
4887         !cast<StoreSDNode>(N)->isTruncatingStore() &&
4888         N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
4889         N->getOperand(1).getValueType() == MVT::i32 &&
4890         N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
4891       SDValue Val = N->getOperand(1).getOperand(0);
4892       if (Val.getValueType() == MVT::f32) {
4893         Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
4894         DCI.AddToWorklist(Val.getNode());
4895       }
4896       Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
4897       DCI.AddToWorklist(Val.getNode());
4898
4899       Val = DAG.getNode(PPCISD::STFIWX, dl, MVT::Other, N->getOperand(0), Val,
4900                         N->getOperand(2), N->getOperand(3));
4901       DCI.AddToWorklist(Val.getNode());
4902       return Val;
4903     }
4904
4905     // Turn STORE (BSWAP) -> sthbrx/stwbrx.
4906     if (N->getOperand(1).getOpcode() == ISD::BSWAP &&
4907         N->getOperand(1).getNode()->hasOneUse() &&
4908         (N->getOperand(1).getValueType() == MVT::i32 ||
4909          N->getOperand(1).getValueType() == MVT::i16)) {
4910       SDValue BSwapOp = N->getOperand(1).getOperand(0);
4911       // Do an any-extend to 32-bits if this is a half-word input.
4912       if (BSwapOp.getValueType() == MVT::i16)
4913         BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
4914
4915       return DAG.getNode(PPCISD::STBRX, dl, MVT::Other, N->getOperand(0),
4916                          BSwapOp, N->getOperand(2), N->getOperand(3),
4917                          DAG.getValueType(N->getOperand(1).getValueType()));
4918     }
4919     break;
4920   case ISD::BSWAP:
4921     // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
4922     if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
4923         N->getOperand(0).hasOneUse() &&
4924         (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) {
4925       SDValue Load = N->getOperand(0);
4926       LoadSDNode *LD = cast<LoadSDNode>(Load);
4927       // Create the byte-swapping load.
4928       std::vector<MVT> VTs;
4929       VTs.push_back(MVT::i32);
4930       VTs.push_back(MVT::Other);
4931       SDValue MO = DAG.getMemOperand(LD->getMemOperand());
4932       SDValue Ops[] = {
4933         LD->getChain(),    // Chain
4934         LD->getBasePtr(),  // Ptr
4935         MO,                // MemOperand
4936         DAG.getValueType(N->getValueType(0)) // VT
4937       };
4938       SDValue BSLoad = DAG.getNode(PPCISD::LBRX, dl, VTs, Ops, 4);
4939
4940       // If this is an i16 load, insert the truncate.
4941       SDValue ResVal = BSLoad;
4942       if (N->getValueType(0) == MVT::i16)
4943         ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
4944
4945       // First, combine the bswap away.  This makes the value produced by the
4946       // load dead.
4947       DCI.CombineTo(N, ResVal);
4948
4949       // Next, combine the load away, we give it a bogus result value but a real
4950       // chain result.  The result value is dead because the bswap is dead.
4951       DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
4952
4953       // Return N so it doesn't get rechecked!
4954       return SDValue(N, 0);
4955     }
4956
4957     break;
4958   case PPCISD::VCMP: {
4959     // If a VCMPo node already exists with exactly the same operands as this
4960     // node, use its result instead of this node (VCMPo computes both a CR6 and
4961     // a normal output).
4962     //
4963     if (!N->getOperand(0).hasOneUse() &&
4964         !N->getOperand(1).hasOneUse() &&
4965         !N->getOperand(2).hasOneUse()) {
4966
4967       // Scan all of the users of the LHS, looking for VCMPo's that match.
4968       SDNode *VCMPoNode = 0;
4969
4970       SDNode *LHSN = N->getOperand(0).getNode();
4971       for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
4972            UI != E; ++UI)
4973         if (UI->getOpcode() == PPCISD::VCMPo &&
4974             UI->getOperand(1) == N->getOperand(1) &&
4975             UI->getOperand(2) == N->getOperand(2) &&
4976             UI->getOperand(0) == N->getOperand(0)) {
4977           VCMPoNode = *UI;
4978           break;
4979         }
4980
4981       // If there is no VCMPo node, or if the flag value has a single use, don't
4982       // transform this.
4983       if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
4984         break;
4985
4986       // Look at the (necessarily single) use of the flag value.  If it has a
4987       // chain, this transformation is more complex.  Note that multiple things
4988       // could use the value result, which we should ignore.
4989       SDNode *FlagUser = 0;
4990       for (SDNode::use_iterator UI = VCMPoNode->use_begin();
4991            FlagUser == 0; ++UI) {
4992         assert(UI != VCMPoNode->use_end() && "Didn't find user!");
4993         SDNode *User = *UI;
4994         for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
4995           if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
4996             FlagUser = User;
4997             break;
4998           }
4999         }
5000       }
5001
5002       // If the user is a MFCR instruction, we know this is safe.  Otherwise we
5003       // give up for right now.
5004       if (FlagUser->getOpcode() == PPCISD::MFCR)
5005         return SDValue(VCMPoNode, 0);
5006     }
5007     break;
5008   }
5009   case ISD::BR_CC: {
5010     // If this is a branch on an altivec predicate comparison, lower this so
5011     // that we don't have to do a MFCR: instead, branch directly on CR6.  This
5012     // lowering is done pre-legalize, because the legalizer lowers the predicate
5013     // compare down to code that is difficult to reassemble.
5014     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
5015     SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
5016     int CompareOpc;
5017     bool isDot;
5018
5019     if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
5020         isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
5021         getAltivecCompareInfo(LHS, CompareOpc, isDot)) {
5022       assert(isDot && "Can't compare against a vector result!");
5023
5024       // If this is a comparison against something other than 0/1, then we know
5025       // that the condition is never/always true.
5026       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
5027       if (Val != 0 && Val != 1) {
5028         if (CC == ISD::SETEQ)      // Cond never true, remove branch.
5029           return N->getOperand(0);
5030         // Always !=, turn it into an unconditional branch.
5031         return DAG.getNode(ISD::BR, dl, MVT::Other,
5032                            N->getOperand(0), N->getOperand(4));
5033       }
5034
5035       bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
5036
5037       // Create the PPCISD altivec 'dot' comparison node.
5038       std::vector<MVT> VTs;
5039       SDValue Ops[] = {
5040         LHS.getOperand(2),  // LHS of compare
5041         LHS.getOperand(3),  // RHS of compare
5042         DAG.getConstant(CompareOpc, MVT::i32)
5043       };
5044       VTs.push_back(LHS.getOperand(2).getValueType());
5045       VTs.push_back(MVT::Flag);
5046       SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
5047
5048       // Unpack the result based on how the target uses it.
5049       PPC::Predicate CompOpc;
5050       switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
5051       default:  // Can't happen, don't crash on invalid number though.
5052       case 0:   // Branch on the value of the EQ bit of CR6.
5053         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
5054         break;
5055       case 1:   // Branch on the inverted value of the EQ bit of CR6.
5056         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
5057         break;
5058       case 2:   // Branch on the value of the LT bit of CR6.
5059         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
5060         break;
5061       case 3:   // Branch on the inverted value of the LT bit of CR6.
5062         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
5063         break;
5064       }
5065
5066       return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
5067                          DAG.getConstant(CompOpc, MVT::i32),
5068                          DAG.getRegister(PPC::CR6, MVT::i32),
5069                          N->getOperand(4), CompNode.getValue(1));
5070     }
5071     break;
5072   }
5073   }
5074
5075   return SDValue();
5076 }
5077
5078 //===----------------------------------------------------------------------===//
5079 // Inline Assembly Support
5080 //===----------------------------------------------------------------------===//
5081
5082 void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
5083                                                        const APInt &Mask,
5084                                                        APInt &KnownZero,
5085                                                        APInt &KnownOne,
5086                                                        const SelectionDAG &DAG,
5087                                                        unsigned Depth) const {
5088   KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
5089   switch (Op.getOpcode()) {
5090   default: break;
5091   case PPCISD::LBRX: {
5092     // lhbrx is known to have the top bits cleared out.
5093     if (cast<VTSDNode>(Op.getOperand(3))->getVT() == MVT::i16)
5094       KnownZero = 0xFFFF0000;
5095     break;
5096   }
5097   case ISD::INTRINSIC_WO_CHAIN: {
5098     switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
5099     default: break;
5100     case Intrinsic::ppc_altivec_vcmpbfp_p:
5101     case Intrinsic::ppc_altivec_vcmpeqfp_p:
5102     case Intrinsic::ppc_altivec_vcmpequb_p:
5103     case Intrinsic::ppc_altivec_vcmpequh_p:
5104     case Intrinsic::ppc_altivec_vcmpequw_p:
5105     case Intrinsic::ppc_altivec_vcmpgefp_p:
5106     case Intrinsic::ppc_altivec_vcmpgtfp_p:
5107     case Intrinsic::ppc_altivec_vcmpgtsb_p:
5108     case Intrinsic::ppc_altivec_vcmpgtsh_p:
5109     case Intrinsic::ppc_altivec_vcmpgtsw_p:
5110     case Intrinsic::ppc_altivec_vcmpgtub_p:
5111     case Intrinsic::ppc_altivec_vcmpgtuh_p:
5112     case Intrinsic::ppc_altivec_vcmpgtuw_p:
5113       KnownZero = ~1U;  // All bits but the low one are known to be zero.
5114       break;
5115     }
5116   }
5117   }
5118 }
5119
5120
5121 /// getConstraintType - Given a constraint, return the type of
5122 /// constraint it is for this target.
5123 PPCTargetLowering::ConstraintType
5124 PPCTargetLowering::getConstraintType(const std::string &Constraint) const {
5125   if (Constraint.size() == 1) {
5126     switch (Constraint[0]) {
5127     default: break;
5128     case 'b':
5129     case 'r':
5130     case 'f':
5131     case 'v':
5132     case 'y':
5133       return C_RegisterClass;
5134     }
5135   }
5136   return TargetLowering::getConstraintType(Constraint);
5137 }
5138
5139 std::pair<unsigned, const TargetRegisterClass*>
5140 PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
5141                                                 MVT VT) const {
5142   if (Constraint.size() == 1) {
5143     // GCC RS6000 Constraint Letters
5144     switch (Constraint[0]) {
5145     case 'b':   // R1-R31
5146     case 'r':   // R0-R31
5147       if (VT == MVT::i64 && PPCSubTarget.isPPC64())
5148         return std::make_pair(0U, PPC::G8RCRegisterClass);
5149       return std::make_pair(0U, PPC::GPRCRegisterClass);
5150     case 'f':
5151       if (VT == MVT::f32)
5152         return std::make_pair(0U, PPC::F4RCRegisterClass);
5153       else if (VT == MVT::f64)
5154         return std::make_pair(0U, PPC::F8RCRegisterClass);
5155       break;
5156     case 'v':
5157       return std::make_pair(0U, PPC::VRRCRegisterClass);
5158     case 'y':   // crrc
5159       return std::make_pair(0U, PPC::CRRCRegisterClass);
5160     }
5161   }
5162
5163   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
5164 }
5165
5166
5167 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
5168 /// vector.  If it is invalid, don't add anything to Ops. If hasMemory is true
5169 /// it means one of the asm constraint of the inline asm instruction being
5170 /// processed is 'm'.
5171 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, char Letter,
5172                                                      bool hasMemory,
5173                                                      std::vector<SDValue>&Ops,
5174                                                      SelectionDAG &DAG) const {
5175   SDValue Result(0,0);
5176   switch (Letter) {
5177   default: break;
5178   case 'I':
5179   case 'J':
5180   case 'K':
5181   case 'L':
5182   case 'M':
5183   case 'N':
5184   case 'O':
5185   case 'P': {
5186     ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
5187     if (!CST) return; // Must be an immediate to match.
5188     unsigned Value = CST->getZExtValue();
5189     switch (Letter) {
5190     default: assert(0 && "Unknown constraint letter!");
5191     case 'I':  // "I" is a signed 16-bit constant.
5192       if ((short)Value == (int)Value)
5193         Result = DAG.getTargetConstant(Value, Op.getValueType());
5194       break;
5195     case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
5196     case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
5197       if ((short)Value == 0)
5198         Result = DAG.getTargetConstant(Value, Op.getValueType());
5199       break;
5200     case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
5201       if ((Value >> 16) == 0)
5202         Result = DAG.getTargetConstant(Value, Op.getValueType());
5203       break;
5204     case 'M':  // "M" is a constant that is greater than 31.
5205       if (Value > 31)
5206         Result = DAG.getTargetConstant(Value, Op.getValueType());
5207       break;
5208     case 'N':  // "N" is a positive constant that is an exact power of two.
5209       if ((int)Value > 0 && isPowerOf2_32(Value))
5210         Result = DAG.getTargetConstant(Value, Op.getValueType());
5211       break;
5212     case 'O':  // "O" is the constant zero.
5213       if (Value == 0)
5214         Result = DAG.getTargetConstant(Value, Op.getValueType());
5215       break;
5216     case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
5217       if ((short)-Value == (int)-Value)
5218         Result = DAG.getTargetConstant(Value, Op.getValueType());
5219       break;
5220     }
5221     break;
5222   }
5223   }
5224
5225   if (Result.getNode()) {
5226     Ops.push_back(Result);
5227     return;
5228   }
5229
5230   // Handle standard constraint letters.
5231   TargetLowering::LowerAsmOperandForConstraint(Op, Letter, hasMemory, Ops, DAG);
5232 }
5233
5234 // isLegalAddressingMode - Return true if the addressing mode represented
5235 // by AM is legal for this target, for a load/store of the specified type.
5236 bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
5237                                               const Type *Ty) const {
5238   // FIXME: PPC does not allow r+i addressing modes for vectors!
5239
5240   // PPC allows a sign-extended 16-bit immediate field.
5241   if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
5242     return false;
5243
5244   // No global is ever allowed as a base.
5245   if (AM.BaseGV)
5246     return false;
5247
5248   // PPC only support r+r,
5249   switch (AM.Scale) {
5250   case 0:  // "r+i" or just "i", depending on HasBaseReg.
5251     break;
5252   case 1:
5253     if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
5254       return false;
5255     // Otherwise we have r+r or r+i.
5256     break;
5257   case 2:
5258     if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
5259       return false;
5260     // Allow 2*r as r+r.
5261     break;
5262   default:
5263     // No other scales are supported.
5264     return false;
5265   }
5266
5267   return true;
5268 }
5269
5270 /// isLegalAddressImmediate - Return true if the integer value can be used
5271 /// as the offset of the target addressing mode for load / store of the
5272 /// given type.
5273 bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,const Type *Ty) const{
5274   // PPC allows a sign-extended 16-bit immediate field.
5275   return (V > -(1 << 16) && V < (1 << 16)-1);
5276 }
5277
5278 bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
5279   return false;
5280 }
5281
5282 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
5283   DebugLoc dl = Op.getDebugLoc();
5284   // Depths > 0 not supported yet!
5285   if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0)
5286     return SDValue();
5287
5288   MachineFunction &MF = DAG.getMachineFunction();
5289   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
5290
5291   // Just load the return address off the stack.
5292   SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
5293
5294   // Make sure the function really does not optimize away the store of the RA
5295   // to the stack.
5296   FuncInfo->setLRStoreRequired();
5297   return DAG.getLoad(getPointerTy(), dl,
5298                      DAG.getEntryNode(), RetAddrFI, NULL, 0);
5299 }
5300
5301 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
5302   DebugLoc dl = Op.getDebugLoc();
5303   // Depths > 0 not supported yet!
5304   if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0)
5305     return SDValue();
5306
5307   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
5308   bool isPPC64 = PtrVT == MVT::i64;
5309
5310   MachineFunction &MF = DAG.getMachineFunction();
5311   MachineFrameInfo *MFI = MF.getFrameInfo();
5312   bool is31 = (NoFramePointerElim || MFI->hasVarSizedObjects())
5313                   && MFI->getStackSize();
5314
5315   if (isPPC64)
5316     return DAG.getCopyFromReg(DAG.getEntryNode(), dl, is31 ? PPC::X31 : PPC::X1,
5317       MVT::i64);
5318   else
5319     return DAG.getCopyFromReg(DAG.getEntryNode(), dl, is31 ? PPC::R31 : PPC::R1,
5320       MVT::i32);
5321 }
5322
5323 bool
5324 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
5325   // The PowerPC target isn't yet aware of offsets.
5326   return false;
5327 }
5328
5329 MVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align,
5330                                            bool isSrcConst, bool isSrcStr,
5331                                            SelectionDAG &DAG) const {
5332   if (this->PPCSubTarget.isPPC64()) {
5333     return MVT::i64;
5334   } else {
5335     return MVT::i32;
5336   }
5337 }